2015-11-13 14:39:38 +01:00
|
|
|
|
/*---------------------------------------------------------------------------------------------
|
|
|
|
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
|
|
|
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
|
|
|
|
*--------------------------------------------------------------------------------------------*/
|
|
|
|
|
|
2018-03-15 15:56:43 +01:00
|
|
|
|
import * as assert from 'assert';
|
2018-04-10 16:13:44 +02:00
|
|
|
|
import * as fs from 'fs';
|
2020-06-20 10:47:29 +02:00
|
|
|
|
import * as encoding from 'vs/workbench/services/textfile/common/encoding';
|
2019-12-30 16:57:22 +01:00
|
|
|
|
import * as terminalEncoding from 'vs/base/node/terminalEncoding';
|
2020-06-17 10:45:58 +02:00
|
|
|
|
import * as streams from 'vs/base/common/stream';
|
2020-06-18 16:08:17 +02:00
|
|
|
|
import * as iconv from 'iconv-lite-umd';
|
2021-03-15 11:32:08 +01:00
|
|
|
|
import { getPathFromAmdModule } from 'vs/base/test/node/testUtils';
|
2020-06-17 10:45:58 +02:00
|
|
|
|
import { newWriteableBufferStream, VSBuffer, VSBufferReadableStream, streamToBufferReadableStream } from 'vs/base/common/buffer';
|
2021-02-22 10:49:59 +01:00
|
|
|
|
import { splitLines } from 'vs/base/common/strings';
|
2015-11-13 14:39:38 +01:00
|
|
|
|
|
2019-11-19 08:33:26 +01:00
|
|
|
|
export async function detectEncodingByBOM(file: string): Promise<typeof encoding.UTF16be | typeof encoding.UTF16le | typeof encoding.UTF8_with_bom | null> {
|
2019-04-16 18:54:52 +02:00
|
|
|
|
try {
|
|
|
|
|
const { buffer, bytesRead } = await readExactlyByFile(file, 3);
|
|
|
|
|
|
|
|
|
|
return encoding.detectEncodingByBOMFromBuffer(buffer, bytesRead);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
return null; // ignore errors (like file not found)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface ReadResult {
|
2020-06-17 11:26:41 +02:00
|
|
|
|
buffer: VSBuffer | null;
|
2019-04-16 18:54:52 +02:00
|
|
|
|
bytesRead: number;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function readExactlyByFile(file: string, totalBytes: number): Promise<ReadResult> {
|
|
|
|
|
return new Promise<ReadResult>((resolve, reject) => {
|
|
|
|
|
fs.open(file, 'r', null, (err, fd) => {
|
|
|
|
|
if (err) {
|
|
|
|
|
return reject(err);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function end(err: Error | null, resultBuffer: Buffer | null, bytesRead: number): void {
|
|
|
|
|
fs.close(fd, closeError => {
|
|
|
|
|
if (closeError) {
|
|
|
|
|
return reject(closeError);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (err && (<any>err).code === 'EISDIR') {
|
|
|
|
|
return reject(err); // we want to bubble this error up (file is actually a folder)
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-17 11:26:41 +02:00
|
|
|
|
return resolve({ buffer: resultBuffer ? VSBuffer.wrap(resultBuffer) : null, bytesRead });
|
2019-04-16 18:54:52 +02:00
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const buffer = Buffer.allocUnsafe(totalBytes);
|
|
|
|
|
let offset = 0;
|
|
|
|
|
|
|
|
|
|
function readChunk(): void {
|
|
|
|
|
fs.read(fd, buffer, offset, totalBytes - offset, null, (err, bytesRead) => {
|
|
|
|
|
if (err) {
|
|
|
|
|
return end(err, null, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (bytesRead === 0) {
|
|
|
|
|
return end(null, buffer, offset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
offset += bytesRead;
|
|
|
|
|
|
|
|
|
|
if (offset === totalBytes) {
|
|
|
|
|
return end(null, buffer, offset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return readChunk();
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
readChunk();
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-13 14:39:38 +01:00
|
|
|
|
suite('Encoding', () => {
|
2019-04-12 11:49:02 +02:00
|
|
|
|
|
|
|
|
|
test('detectBOM does not return error for non existing file', async () => {
|
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/not-exist.css');
|
|
|
|
|
|
2019-04-16 18:54:52 +02:00
|
|
|
|
const detectedEncoding = await detectEncodingByBOM(file);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(detectedEncoding, null);
|
2019-04-12 11:49:02 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectBOM UTF-8', async () => {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some_utf8.css');
|
2015-11-13 14:39:38 +01:00
|
|
|
|
|
2019-04-16 18:54:52 +02:00
|
|
|
|
const detectedEncoding = await detectEncodingByBOM(file);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(detectedEncoding, 'utf8bom');
|
2015-11-13 14:39:38 +01:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectBOM UTF-16 LE', async () => {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some_utf16le.css');
|
2015-11-13 14:39:38 +01:00
|
|
|
|
|
2019-04-16 18:54:52 +02:00
|
|
|
|
const detectedEncoding = await detectEncodingByBOM(file);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(detectedEncoding, 'utf16le');
|
2015-11-13 14:39:38 +01:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectBOM UTF-16 BE', async () => {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some_utf16be.css');
|
2015-11-13 14:39:38 +01:00
|
|
|
|
|
2019-04-16 18:54:52 +02:00
|
|
|
|
const detectedEncoding = await detectEncodingByBOM(file);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(detectedEncoding, 'utf16be');
|
2015-11-13 14:39:38 +01:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectBOM ANSI', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some_ansi.css');
|
2015-11-13 14:39:38 +01:00
|
|
|
|
|
2019-04-16 18:54:52 +02:00
|
|
|
|
const detectedEncoding = await detectEncodingByBOM(file);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(detectedEncoding, null);
|
2015-11-13 14:39:38 +01:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectBOM ANSI', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/empty.txt');
|
2015-11-13 14:39:38 +01:00
|
|
|
|
|
2019-04-16 18:54:52 +02:00
|
|
|
|
const detectedEncoding = await detectEncodingByBOM(file);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(detectedEncoding, null);
|
2015-11-13 14:39:38 +01:00
|
|
|
|
});
|
2017-12-04 12:00:20 +01:00
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('resolve terminal encoding (detect)', async function () {
|
2019-12-30 16:57:22 +01:00
|
|
|
|
const enc = await terminalEncoding.resolveTerminalEncoding();
|
|
|
|
|
assert.ok(enc.length > 0);
|
2017-12-04 12:00:20 +01:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('resolve terminal encoding (environment)', async function () {
|
2017-12-04 12:00:20 +01:00
|
|
|
|
process.env['VSCODE_CLI_ENCODING'] = 'utf16le';
|
|
|
|
|
|
2019-12-30 16:57:22 +01:00
|
|
|
|
const enc = await terminalEncoding.resolveTerminalEncoding();
|
2020-06-17 11:53:16 +02:00
|
|
|
|
assert.ok(await encoding.encodingExists(enc));
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(enc, 'utf16le');
|
2017-12-04 12:00:20 +01:00
|
|
|
|
});
|
2018-04-03 14:23:24 +02:00
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectEncodingFromBuffer (JSON saved as PNG)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some.json.png');
|
2018-04-03 14:23:24 +02:00
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512);
|
|
|
|
|
const mimes = encoding.detectEncodingFromBuffer(buffer);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.seemsBinary, false);
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectEncodingFromBuffer (PNG saved as TXT)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some.png.txt');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512);
|
|
|
|
|
const mimes = encoding.detectEncodingFromBuffer(buffer);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.seemsBinary, true);
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectEncodingFromBuffer (XML saved as PNG)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some.xml.png');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512);
|
|
|
|
|
const mimes = encoding.detectEncodingFromBuffer(buffer);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.seemsBinary, false);
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectEncodingFromBuffer (QWOFF saved as TXT)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some.qwoff.txt');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512);
|
|
|
|
|
const mimes = encoding.detectEncodingFromBuffer(buffer);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.seemsBinary, true);
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectEncodingFromBuffer (CSS saved as QWOFF)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some.css.qwoff');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512);
|
|
|
|
|
const mimes = encoding.detectEncodingFromBuffer(buffer);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.seemsBinary, false);
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectEncodingFromBuffer (PDF)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some.pdf');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512);
|
|
|
|
|
const mimes = encoding.detectEncodingFromBuffer(buffer);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.seemsBinary, true);
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectEncodingFromBuffer (guess UTF-16 LE from content without BOM)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/utf16_le_nobom.txt');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512);
|
|
|
|
|
const mimes = encoding.detectEncodingFromBuffer(buffer);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.encoding, encoding.UTF16le);
|
|
|
|
|
assert.strictEqual(mimes.seemsBinary, false);
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('detectEncodingFromBuffer (guess UTF-16 BE from content without BOM)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/utf16_be_nobom.txt');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512);
|
|
|
|
|
const mimes = encoding.detectEncodingFromBuffer(buffer);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.encoding, encoding.UTF16be);
|
|
|
|
|
assert.strictEqual(mimes.seemsBinary, false);
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2019-11-12 09:57:53 +01:00
|
|
|
|
test('autoGuessEncoding (UTF8)', async function () {
|
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some_file.css');
|
|
|
|
|
const buffer = await readExactlyByFile(file, 512 * 8);
|
|
|
|
|
const mimes = await encoding.detectEncodingFromBuffer(buffer, true);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.encoding, 'utf8');
|
2019-11-12 09:57:53 +01:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('autoGuessEncoding (ASCII)', async function () {
|
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some_ansi.css');
|
|
|
|
|
const buffer = await readExactlyByFile(file, 512 * 8);
|
|
|
|
|
const mimes = await encoding.detectEncodingFromBuffer(buffer, true);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.encoding, null);
|
2019-11-12 09:57:53 +01:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('autoGuessEncoding (ShiftJIS)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some.shiftjis.txt');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512 * 8);
|
|
|
|
|
const mimes = await encoding.detectEncodingFromBuffer(buffer, true);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.encoding, 'shiftjis');
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
|
|
|
|
|
2018-10-04 22:15:45 +02:00
|
|
|
|
test('autoGuessEncoding (CP1252)', async function () {
|
2018-08-07 11:08:06 +02:00
|
|
|
|
const file = getPathFromAmdModule(require, './fixtures/some.cp1252.txt');
|
2018-10-04 22:15:45 +02:00
|
|
|
|
const buffer = await readExactlyByFile(file, 512 * 8);
|
|
|
|
|
const mimes = await encoding.detectEncodingFromBuffer(buffer, true);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(mimes.encoding, 'windows1252');
|
2018-04-03 14:23:24 +02:00
|
|
|
|
});
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
2019-03-21 17:06:12 +01:00
|
|
|
|
async function readAndDecodeFromDisk(path: string, fileEncoding: string | null) {
|
2018-04-10 16:13:44 +02:00
|
|
|
|
return new Promise<string>((resolve, reject) => {
|
|
|
|
|
fs.readFile(path, (err, data) => {
|
|
|
|
|
if (err) {
|
|
|
|
|
reject(err);
|
|
|
|
|
} else {
|
2020-04-10 13:41:17 +02:00
|
|
|
|
resolve(iconv.decode(data, encoding.toNodeEncoding(fileEncoding!)));
|
2018-04-10 16:13:44 +02:00
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-17 10:45:58 +02:00
|
|
|
|
function newTestReadableStream(buffers: Buffer[]): VSBufferReadableStream {
|
|
|
|
|
const stream = newWriteableBufferStream();
|
|
|
|
|
buffers
|
|
|
|
|
.map(VSBuffer.wrap)
|
|
|
|
|
.forEach(buffer => {
|
|
|
|
|
setTimeout(() => {
|
|
|
|
|
stream.write(buffer);
|
|
|
|
|
});
|
2018-04-10 16:13:44 +02:00
|
|
|
|
});
|
2020-06-17 10:45:58 +02:00
|
|
|
|
setTimeout(() => {
|
|
|
|
|
stream.end();
|
2018-04-10 16:13:44 +02:00
|
|
|
|
});
|
2020-06-17 10:45:58 +02:00
|
|
|
|
return stream;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function readAllAsString(stream: streams.ReadableStream<string>) {
|
|
|
|
|
return streams.consumeStream(stream, strings => strings.join(''));
|
2018-04-10 16:13:44 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
test('toDecodeStream - some stream', async function () {
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const source = newTestReadableStream([
|
2020-06-17 10:45:58 +02:00
|
|
|
|
Buffer.from([65, 66, 67]),
|
|
|
|
|
Buffer.from([65, 66, 67]),
|
|
|
|
|
Buffer.from([65, 66, 67]),
|
|
|
|
|
]);
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 4, guessEncoding: false, overwriteEncoding: async detected => detected || encoding.UTF8 });
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
|
|
|
|
assert.ok(detected);
|
|
|
|
|
assert.ok(stream);
|
|
|
|
|
|
|
|
|
|
const content = await readAllAsString(stream);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(content, 'ABCABCABC');
|
2018-04-10 16:13:44 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('toDecodeStream - some stream, expect too much data', async function () {
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const source = newTestReadableStream([
|
2020-06-17 10:45:58 +02:00
|
|
|
|
Buffer.from([65, 66, 67]),
|
|
|
|
|
Buffer.from([65, 66, 67]),
|
|
|
|
|
Buffer.from([65, 66, 67]),
|
|
|
|
|
]);
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64, guessEncoding: false, overwriteEncoding: async detected => detected || encoding.UTF8 });
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
|
|
|
|
assert.ok(detected);
|
|
|
|
|
assert.ok(stream);
|
|
|
|
|
|
|
|
|
|
const content = await readAllAsString(stream);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(content, 'ABCABCABC');
|
2018-04-10 16:13:44 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('toDecodeStream - some stream, no data', async function () {
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const source = newWriteableBufferStream();
|
2020-06-17 10:45:58 +02:00
|
|
|
|
source.end();
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 512, guessEncoding: false, overwriteEncoding: async detected => detected || encoding.UTF8 });
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
|
|
|
|
assert.ok(detected);
|
|
|
|
|
assert.ok(stream);
|
|
|
|
|
|
|
|
|
|
const content = await readAllAsString(stream);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(content, '');
|
2018-04-10 16:13:44 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('toDecodeStream - encoding, utf16be', async function () {
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const path = getPathFromAmdModule(require, './fixtures/some_utf16be.css');
|
|
|
|
|
const source = streamToBufferReadableStream(fs.createReadStream(path));
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64, guessEncoding: false, overwriteEncoding: async detected => detected || encoding.UTF8 });
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(detected.encoding, 'utf16be');
|
|
|
|
|
assert.strictEqual(detected.seemsBinary, false);
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const expected = await readAndDecodeFromDisk(path, detected.encoding);
|
|
|
|
|
const actual = await readAllAsString(stream);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(actual, expected);
|
2018-04-10 16:13:44 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('toDecodeStream - empty file', async function () {
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const path = getPathFromAmdModule(require, './fixtures/empty.txt');
|
|
|
|
|
const source = streamToBufferReadableStream(fs.createReadStream(path));
|
|
|
|
|
const { detected, stream } = await encoding.toDecodeStream(source, { guessEncoding: false, overwriteEncoding: async detected => detected || encoding.UTF8 });
|
2018-04-10 16:13:44 +02:00
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const expected = await readAndDecodeFromDisk(path, detected.encoding);
|
|
|
|
|
const actual = await readAllAsString(stream);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(actual, expected);
|
2018-04-10 16:13:44 +02:00
|
|
|
|
});
|
2020-06-17 10:45:58 +02:00
|
|
|
|
|
|
|
|
|
test('toDecodeStream - decodes buffer entirely', async function () {
|
2021-11-24 13:45:17 +01:00
|
|
|
|
if (!process.versions.electron) {
|
|
|
|
|
this.skip(); // TODO@bpasero enable once we ship Electron 16
|
|
|
|
|
}
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const emojis = Buffer.from('🖥️💻💾');
|
|
|
|
|
const incompleteEmojis = emojis.slice(0, emojis.length - 1);
|
2020-06-17 10:45:58 +02:00
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const buffers: Buffer[] = [];
|
2020-06-17 10:45:58 +02:00
|
|
|
|
for (let i = 0; i < incompleteEmojis.length; i++) {
|
|
|
|
|
buffers.push(incompleteEmojis.slice(i, i + 1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const source = newTestReadableStream(buffers);
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const { stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 4, guessEncoding: false, overwriteEncoding: async detected => detected || encoding.UTF8 });
|
2020-06-17 10:45:58 +02:00
|
|
|
|
|
2020-07-13 11:45:01 +02:00
|
|
|
|
const expected = new TextDecoder().decode(incompleteEmojis);
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const actual = await readAllAsString(stream);
|
2020-06-17 10:45:58 +02:00
|
|
|
|
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(actual, expected);
|
2020-06-17 10:45:58 +02:00
|
|
|
|
});
|
|
|
|
|
|
2020-07-08 17:29:54 +02:00
|
|
|
|
test('toDecodeStream - some stream (GBK issue #101856)', async function () {
|
|
|
|
|
const path = getPathFromAmdModule(require, './fixtures/some_gbk.txt');
|
|
|
|
|
const source = streamToBufferReadableStream(fs.createReadStream(path));
|
|
|
|
|
|
|
|
|
|
const { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 4, guessEncoding: false, overwriteEncoding: async () => 'gbk' });
|
|
|
|
|
assert.ok(detected);
|
|
|
|
|
assert.ok(stream);
|
|
|
|
|
|
|
|
|
|
const content = await readAllAsString(stream);
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(content.length, 65537);
|
2020-07-08 17:29:54 +02:00
|
|
|
|
});
|
2020-07-08 12:03:33 +02:00
|
|
|
|
|
2021-02-22 10:49:59 +01:00
|
|
|
|
test('toDecodeStream - some stream (UTF-8 issue #102202)', async function () {
|
2020-07-13 14:32:17 +02:00
|
|
|
|
const path = getPathFromAmdModule(require, './fixtures/issue_102202.txt');
|
|
|
|
|
const source = streamToBufferReadableStream(fs.createReadStream(path));
|
|
|
|
|
|
|
|
|
|
const { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 4, guessEncoding: false, overwriteEncoding: async () => 'utf-8' });
|
|
|
|
|
assert.ok(detected);
|
|
|
|
|
assert.ok(stream);
|
|
|
|
|
|
|
|
|
|
const content = await readAllAsString(stream);
|
2021-02-22 10:49:59 +01:00
|
|
|
|
const lines = splitLines(content);
|
2020-07-13 14:32:17 +02:00
|
|
|
|
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(lines[981].toString(), '啊啊啊啊啊啊aaa啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊,啊啊啊啊啊啊啊啊啊啊啊。');
|
2020-07-13 14:32:17 +02:00
|
|
|
|
});
|
|
|
|
|
|
2020-06-17 10:45:58 +02:00
|
|
|
|
test('toEncodeReadable - encoding, utf16be', async function () {
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const path = getPathFromAmdModule(require, './fixtures/some_utf16be.css');
|
|
|
|
|
const source = await readAndDecodeFromDisk(path, encoding.UTF16be);
|
2020-06-17 10:45:58 +02:00
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const expected = VSBuffer.wrap(
|
2020-06-17 10:45:58 +02:00
|
|
|
|
iconv.encode(source, encoding.toNodeEncoding(encoding.UTF16be))
|
|
|
|
|
).toString();
|
2020-06-17 11:53:16 +02:00
|
|
|
|
|
|
|
|
|
const actual = streams.consumeReadable(
|
|
|
|
|
await encoding.toEncodeReadable(streams.toReadable(source), encoding.UTF16be),
|
2020-06-17 10:45:58 +02:00
|
|
|
|
VSBuffer.concat
|
|
|
|
|
).toString();
|
|
|
|
|
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(actual, expected);
|
2020-06-17 10:45:58 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('toEncodeReadable - empty readable to utf8', async function () {
|
|
|
|
|
const source: streams.Readable<string> = {
|
|
|
|
|
read() {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const actual = streams.consumeReadable(
|
|
|
|
|
await encoding.toEncodeReadable(source, encoding.UTF8),
|
2020-06-17 10:45:58 +02:00
|
|
|
|
VSBuffer.concat
|
|
|
|
|
).toString();
|
|
|
|
|
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(actual, '');
|
2020-06-17 10:45:58 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
[{
|
|
|
|
|
utfEncoding: encoding.UTF8,
|
|
|
|
|
relatedBom: encoding.UTF8_BOM
|
|
|
|
|
}, {
|
|
|
|
|
utfEncoding: encoding.UTF8_with_bom,
|
|
|
|
|
relatedBom: encoding.UTF8_BOM
|
|
|
|
|
}, {
|
|
|
|
|
utfEncoding: encoding.UTF16be,
|
|
|
|
|
relatedBom: encoding.UTF16be_BOM,
|
|
|
|
|
}, {
|
|
|
|
|
utfEncoding: encoding.UTF16le,
|
|
|
|
|
relatedBom: encoding.UTF16le_BOM
|
|
|
|
|
}].forEach(({ utfEncoding, relatedBom }) => {
|
|
|
|
|
test(`toEncodeReadable - empty readable to ${utfEncoding} with BOM`, async function () {
|
|
|
|
|
const source: streams.Readable<string> = {
|
|
|
|
|
read() {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const encodedReadable = encoding.toEncodeReadable(source, utfEncoding, { addBOM: true });
|
2020-06-17 10:45:58 +02:00
|
|
|
|
|
|
|
|
|
const expected = VSBuffer.wrap(Buffer.from(relatedBom)).toString();
|
2020-06-17 11:53:16 +02:00
|
|
|
|
const actual = streams.consumeReadable(await encodedReadable, VSBuffer.concat).toString();
|
2020-06-17 10:45:58 +02:00
|
|
|
|
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(actual, expected);
|
2020-06-17 10:45:58 +02:00
|
|
|
|
});
|
|
|
|
|
});
|
2020-07-07 12:14:42 +02:00
|
|
|
|
|
|
|
|
|
test('encodingExists', async function () {
|
2020-07-20 10:40:47 +02:00
|
|
|
|
for (const enc in encoding.SUPPORTED_ENCODINGS) {
|
2020-07-07 12:14:42 +02:00
|
|
|
|
if (enc === encoding.UTF8_with_bom) {
|
|
|
|
|
continue; // skip over encodings from us
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-22 16:20:23 +01:00
|
|
|
|
assert.strictEqual(iconv.encodingExists(enc), true, enc);
|
2020-07-07 12:14:42 +02:00
|
|
|
|
}
|
|
|
|
|
});
|
2015-11-13 14:39:38 +01:00
|
|
|
|
});
|