Remote: Preserve BOM in UTF-8 when found (fixes #48826)

This commit is contained in:
Benjamin Pasero 2019-04-16 16:49:33 +02:00
parent 7d8d01a61e
commit eacb2d85df
13 changed files with 150 additions and 58 deletions

View file

@ -85,24 +85,24 @@ export class VSBuffer {
this.buffer.set(array.buffer, offset);
}
readUint32BE(offset: number): number {
return readUint32BE(this.buffer, offset);
readUInt32BE(offset: number): number {
return readUInt32BE(this.buffer, offset);
}
writeUint32BE(value: number, offset: number): void {
writeUint32BE(this.buffer, value, offset);
writeUInt32BE(value: number, offset: number): void {
writeUInt32BE(this.buffer, value, offset);
}
readUint8(offset: number): number {
return readUint8(this.buffer, offset);
readUInt8(offset: number): number {
return readUInt8(this.buffer, offset);
}
writeUint8(value: number, offset: number): void {
writeUint8(this.buffer, value, offset);
writeUInt8(value: number, offset: number): void {
writeUInt8(this.buffer, value, offset);
}
}
function readUint32BE(source: Uint8Array, offset: number): number {
function readUInt32BE(source: Uint8Array, offset: number): number {
return (
source[offset] * 2 ** 24
+ source[offset + 1] * 2 ** 16
@ -111,7 +111,7 @@ function readUint32BE(source: Uint8Array, offset: number): number {
);
}
function writeUint32BE(destination: Uint8Array, value: number, offset: number): void {
function writeUInt32BE(destination: Uint8Array, value: number, offset: number): void {
destination[offset + 3] = value;
value = value >>> 8;
destination[offset + 2] = value;
@ -121,11 +121,11 @@ function writeUint32BE(destination: Uint8Array, value: number, offset: number):
destination[offset] = value;
}
function readUint8(source: Uint8Array, offset: number): number {
function readUInt8(source: Uint8Array, offset: number): number {
return source[offset];
}
function writeUint8(destination: Uint8Array, value: number, offset: number): void {
function writeUInt8(destination: Uint8Array, value: number, offset: number): void {
destination[offset] = value;
}

View file

@ -8,6 +8,7 @@ import * as iconv from 'iconv-lite';
import { isLinux, isMacintosh } from 'vs/base/common/platform';
import { exec } from 'child_process';
import { Readable, Writable } from 'stream';
import { VSBuffer } from 'vs/base/common/buffer';
export const UTF8 = 'utf8';
export const UTF8_with_bom = 'utf8bom';
@ -160,7 +161,7 @@ function toNodeEncoding(enc: string | null): string {
return enc;
}
export function detectEncodingByBOMFromBuffer(buffer: Buffer | null, bytesRead: number): string | null {
export function detectEncodingByBOMFromBuffer(buffer: Buffer | VSBuffer | null, bytesRead: number): string | null {
if (!buffer || bytesRead < 2) {
return null;
}

View file

@ -216,10 +216,10 @@ class ProtocolReader extends Disposable {
// save new state => next time will read the body
this._state.readHead = false;
this._state.readLen = buff.readUint32BE(9);
this._state.messageType = <ProtocolMessageType>buff.readUint8(0);
this._state.id = buff.readUint32BE(1);
this._state.ack = buff.readUint32BE(5);
this._state.readLen = buff.readUInt32BE(9);
this._state.messageType = <ProtocolMessageType>buff.readUInt8(0);
this._state.id = buff.readUInt32BE(1);
this._state.ack = buff.readUInt32BE(5);
} else {
// buff is the body
const messageType = this._state.messageType;
@ -288,10 +288,10 @@ class ProtocolWriter {
msg.writtenTime = Date.now();
this.lastWriteTime = Date.now();
const header = VSBuffer.alloc(ProtocolConstants.HeaderLength);
header.writeUint8(msg.type, 0);
header.writeUint32BE(msg.id, 1);
header.writeUint32BE(msg.ack, 5);
header.writeUint32BE(msg.data.byteLength, 9);
header.writeUInt8(msg.type, 0);
header.writeUInt32BE(msg.id, 1);
header.writeUInt32BE(msg.ack, 5);
header.writeUInt32BE(msg.data.byteLength, 9);
this._writeSoon(header, msg.data);
}

View file

@ -166,17 +166,17 @@ enum DataType {
function createSizeBuffer(size: number): VSBuffer {
const result = VSBuffer.alloc(4);
result.writeUint32BE(size, 0);
result.writeUInt32BE(size, 0);
return result;
}
function readSizeBuffer(reader: IReader): number {
return reader.read(4).readUint32BE(0);
return reader.read(4).readUInt32BE(0);
}
function createOneByteBuffer(value: number): VSBuffer {
const result = VSBuffer.alloc(1);
result.writeUint8(value, 0);
result.writeUInt8(value, 0);
return result;
}
@ -225,7 +225,7 @@ function serialize(writer: IWriter, data: any): void {
}
function deserialize(reader: IReader): any {
const type = reader.read(1).readUint8(0);
const type = reader.read(1).readUInt8(0);
switch (type) {
case DataType.Undefined: return undefined;

View file

@ -136,10 +136,10 @@ suite('IPC, Socket Protocol', () => {
assert.equal(msg1.toString(), 'foobarfarboo');
const buffer = VSBuffer.alloc(1);
buffer.writeUint8(123, 0);
buffer.writeUInt8(123, 0);
a.send(buffer);
const msg2 = await bMessages.waitForOne();
assert.equal(msg2.readUint8(0), 123);
assert.equal(msg2.readUInt8(0), 123);
});

View file

@ -624,6 +624,12 @@ export interface IReadFileOptions {
*/
position?: number;
/**
* Is an integer specifying how many bytes to read from the file. By default, all bytes
* will be read.
*/
length?: number;
/**
* If provided, the size of the file will be checked against the limits.
*/

View file

@ -692,7 +692,7 @@ export class ExtHostExtensionService implements ExtHostExtensionServiceShape {
let buff = VSBuffer.alloc(size);
let value = Math.random() % 256;
for (let i = 0; i < size; i++) {
buff.writeUint8(value, i);
buff.writeUInt8(value, i);
}
return buff;
}

View file

@ -24,9 +24,9 @@ export function createMessageOfType(type: MessageType): VSBuffer {
const result = VSBuffer.alloc(1);
switch (type) {
case MessageType.Initialized: result.writeUint8(1, 0); break;
case MessageType.Ready: result.writeUint8(2, 0); break;
case MessageType.Terminate: result.writeUint8(3, 0); break;
case MessageType.Initialized: result.writeUInt8(1, 0); break;
case MessageType.Ready: result.writeUInt8(2, 0); break;
case MessageType.Terminate: result.writeUInt8(3, 0); break;
}
return result;
@ -37,7 +37,7 @@ export function isMessageOfType(message: VSBuffer, type: MessageType): boolean {
return false;
}
switch (message.readUint8(0)) {
switch (message.readUInt8(0)) {
case 1: return type === MessageType.Initialized;
case 2: return type === MessageType.Ready;
case 3: return type === MessageType.Terminate;

View file

@ -463,20 +463,20 @@ class MessageBuffer {
}
public writeUInt8(n: number): void {
this._buff.writeUint8(n, this._offset); this._offset += 1;
this._buff.writeUInt8(n, this._offset); this._offset += 1;
}
public readUInt8(): number {
const n = this._buff.readUint8(this._offset); this._offset += 1;
const n = this._buff.readUInt8(this._offset); this._offset += 1;
return n;
}
public writeUInt32(n: number): void {
this._buff.writeUint32BE(n, this._offset); this._offset += 4;
this._buff.writeUInt32BE(n, this._offset); this._offset += 4;
}
public readUInt32(): number {
const n = this._buff.readUint32BE(this._offset); this._offset += 4;
const n = this._buff.readUInt32BE(this._offset); this._offset += 4;
return n;
}
@ -485,12 +485,12 @@ class MessageBuffer {
}
public writeShortString(str: VSBuffer): void {
this._buff.writeUint8(str.byteLength, this._offset); this._offset += 1;
this._buff.writeUInt8(str.byteLength, this._offset); this._offset += 1;
this._buff.set(str, this._offset); this._offset += str.byteLength;
}
public readShortString(): string {
const strByteLength = this._buff.readUint8(this._offset); this._offset += 1;
const strByteLength = this._buff.readUInt8(this._offset); this._offset += 1;
const strBuff = this._buff.slice(this._offset, this._offset + strByteLength);
const str = strBuff.toString(); this._offset += strByteLength;
return str;
@ -501,19 +501,19 @@ class MessageBuffer {
}
public writeLongString(str: VSBuffer): void {
this._buff.writeUint32BE(str.byteLength, this._offset); this._offset += 4;
this._buff.writeUInt32BE(str.byteLength, this._offset); this._offset += 4;
this._buff.set(str, this._offset); this._offset += str.byteLength;
}
public readLongString(): string {
const strByteLength = this._buff.readUint32BE(this._offset); this._offset += 4;
const strByteLength = this._buff.readUInt32BE(this._offset); this._offset += 4;
const strBuff = this._buff.slice(this._offset, this._offset + strByteLength);
const str = strBuff.toString(); this._offset += strByteLength;
return str;
}
public writeBuffer(buff: VSBuffer): void {
this._buff.writeUint32BE(buff.byteLength, this._offset); this._offset += 4;
this._buff.writeUInt32BE(buff.byteLength, this._offset); this._offset += 4;
this._buff.set(buff, this._offset); this._offset += buff.byteLength;
}
@ -522,12 +522,12 @@ class MessageBuffer {
}
public writeVSBuffer(buff: VSBuffer): void {
this._buff.writeUint32BE(buff.byteLength, this._offset); this._offset += 4;
this._buff.writeUInt32BE(buff.byteLength, this._offset); this._offset += 4;
this._buff.set(buff, this._offset); this._offset += buff.byteLength;
}
public readVSBuffer(): VSBuffer {
const buffLength = this._buff.readUint32BE(this._offset); this._offset += 4;
const buffLength = this._buff.readUInt32BE(this._offset); this._offset += 4;
const buff = this._buff.slice(this._offset, this._offset + buffLength); this._offset += buffLength;
return buff;
}
@ -549,7 +549,7 @@ class MessageBuffer {
}
public writeMixedArray(arr: VSBuffer[], arrType: ArgType[]): void {
this._buff.writeUint8(arr.length, this._offset); this._offset += 1;
this._buff.writeUInt8(arr.length, this._offset); this._offset += 1;
for (let i = 0, len = arr.length; i < len; i++) {
const el = arr[i];
const elType = arrType[i];
@ -564,7 +564,7 @@ class MessageBuffer {
}
public readMixedArray(): Array<string | VSBuffer> {
const arrLen = this._buff.readUint8(this._offset); this._offset += 1;
const arrLen = this._buff.readUInt8(this._offset); this._offset += 1;
let arr: Array<string | VSBuffer> = new Array(arrLen);
for (let i = 0; i < arrLen; i++) {
const argType = <ArgType>this.readUInt8();

View file

@ -420,11 +420,13 @@ export class FileService extends Disposable implements IFileService {
const handle = await provider.open(resource, { create: false });
try {
let buffer = VSBuffer.alloc(this.BUFFER_SIZE);
let totalBytesRead = 0;
let posInFile = options && typeof options.position === 'number' ? options.position : 0;
let bytesRead = 0;
let allowedRemainingBytes = (options && typeof options.length === 'number') ? options.length : undefined;
let buffer = VSBuffer.alloc(Math.min(this.BUFFER_SIZE, typeof allowedRemainingBytes === 'number' ? allowedRemainingBytes : this.BUFFER_SIZE));
let posInFile = options && typeof options.position === 'number' ? options.position : 0;
let posInBuffer = 0;
do {
// read from source (handle) at current position (pos) into buffer (buffer) at
@ -435,19 +437,28 @@ export class FileService extends Disposable implements IFileService {
posInBuffer += bytesRead;
totalBytesRead += bytesRead;
if (typeof allowedRemainingBytes === 'number') {
allowedRemainingBytes -= bytesRead;
}
// when buffer full, create a new one and emit it through stream
if (posInBuffer === buffer.byteLength) {
stream.write(buffer);
buffer = VSBuffer.alloc(this.BUFFER_SIZE);
buffer = VSBuffer.alloc(Math.min(this.BUFFER_SIZE, typeof allowedRemainingBytes === 'number' ? allowedRemainingBytes : this.BUFFER_SIZE));
posInBuffer = 0;
}
} while (bytesRead > 0 && this.throwIfCancelled(token) && this.throwIfTooLarge(totalBytesRead, options));
} while (bytesRead > 0 && (typeof allowedRemainingBytes !== 'number' || allowedRemainingBytes > 0) && this.throwIfCancelled(token) && this.throwIfTooLarge(totalBytesRead, options));
// wrap up with last buffer
// wrap up with last buffer (also respect maxBytes if provided)
if (posInBuffer > 0) {
stream.write(buffer.slice(0, posInBuffer));
let lastChunkLength = posInBuffer;
if (typeof allowedRemainingBytes === 'number') {
lastChunkLength = Math.min(posInBuffer, allowedRemainingBytes);
}
stream.write(buffer.slice(0, lastChunkLength));
}
} catch (error) {
throw error;
@ -464,6 +475,11 @@ export class FileService extends Disposable implements IFileService {
buffer = buffer.slice(options.position);
}
// respect length option
if (options && typeof options.length === 'number') {
buffer = buffer.slice(0, options.length);
}
return bufferToStream(VSBuffer.wrap(buffer));
}

View file

@ -928,6 +928,67 @@ suite('Disk File Service', () => {
assert.equal(contents.value.toString(), 'mlaut');
});
test('readFile - 3 bytes (ASCII) - buffered', async () => {
setCapabilities(fileProvider, FileSystemProviderCapabilities.FileOpenReadWriteClose);
const resource = URI.file(join(testDir, 'small.txt'));
const contents = await service.readFile(resource, { length: 3 });
assert.equal(contents.value.toString(), 'Sma');
});
test('readFile - 3 bytes (ASCII) - unbuffered', async () => {
setCapabilities(fileProvider, FileSystemProviderCapabilities.FileReadWrite);
const resource = URI.file(join(testDir, 'small.txt'));
const contents = await service.readFile(resource, { length: 3 });
assert.equal(contents.value.toString(), 'Sma');
});
test('readFile - 20000 bytes (large) - buffered', async () => {
setCapabilities(fileProvider, FileSystemProviderCapabilities.FileOpenReadWriteClose);
const resource = URI.file(join(testDir, 'lorem.txt'));
const contents = await service.readFile(resource, { length: 20000 });
assert.equal(contents.value.byteLength, 20000);
});
test('readFile - 20000 bytes (large) - unbuffered', async () => {
setCapabilities(fileProvider, FileSystemProviderCapabilities.FileReadWrite);
const resource = URI.file(join(testDir, 'lorem.txt'));
const contents = await service.readFile(resource, { length: 20000 });
assert.equal(contents.value.byteLength, 20000);
});
test('readFile - 80000 bytes (large) - buffered', async () => {
setCapabilities(fileProvider, FileSystemProviderCapabilities.FileOpenReadWriteClose);
const resource = URI.file(join(testDir, 'lorem.txt'));
const contents = await service.readFile(resource, { length: 80000 });
assert.equal(contents.value.byteLength, 80000);
});
test('readFile - 80000 bytes (large) - unbuffered', async () => {
setCapabilities(fileProvider, FileSystemProviderCapabilities.FileReadWrite);
const resource = URI.file(join(testDir, 'lorem.txt'));
const contents = await service.readFile(resource, { length: 80000 });
assert.equal(contents.value.byteLength, 80000);
});
test('readFile - FILE_IS_DIRECTORY', async () => {
const resource = URI.file(join(testDir, 'deep'));

View file

@ -418,7 +418,7 @@ export abstract class TextFileService extends Disposable implements ITextFileSer
// chunk for possibly being binary by looking for 0-bytes
// we limit this check to the first 512 bytes
for (let i = 0; i < buffer.byteLength && i < 512; i++) {
if (buffer.readUint8(i) === 0) {
if (buffer.readUInt8(i) === 0) {
throw new TextFileOperationError(nls.localize('fileBinaryError', "File seems to be binary and cannot be opened as text"), TextFileOperationResult.FILE_IS_BINARY, options);
}
}

View file

@ -9,7 +9,7 @@ import { TextFileService } from 'vs/workbench/services/textfile/common/textFileS
import { ITextFileService, ITextFileStreamContent, ITextFileContent, IResourceEncodings, IResourceEncoding, IReadTextFileOptions, IWriteTextFileOptions, stringToSnapshot, TextFileOperationResult, TextFileOperationError } from 'vs/workbench/services/textfile/common/textfiles';
import { registerSingleton } from 'vs/platform/instantiation/common/extensions';
import { URI } from 'vs/base/common/uri';
import { IFileStatWithMetadata, ICreateFileOptions, FileOperationError, FileOperationResult, IFileStreamContent } from 'vs/platform/files/common/files';
import { IFileStatWithMetadata, ICreateFileOptions, FileOperationError, FileOperationResult, IFileStreamContent, IFileService } from 'vs/platform/files/common/files';
import { Schemas } from 'vs/base/common/network';
import { exists, stat, chmod, rimraf } from 'vs/base/node/pfs';
import { join, dirname } from 'vs/base/common/path';
@ -17,7 +17,7 @@ import { isMacintosh, isLinux } from 'vs/base/common/platform';
import product from 'vs/platform/product/node/product';
import { ITextResourceConfigurationService } from 'vs/editor/common/services/resourceConfiguration';
import { IWorkspaceContextService } from 'vs/platform/workspace/common/workspace';
import { UTF8, UTF8_with_bom, UTF16be, UTF16le, encodingExists, IDetectedEncodingResult, detectEncodingByBOM, encodeStream, UTF8_BOM, UTF16be_BOM, UTF16le_BOM, toDecodeStream, IDecodeStreamResult } from 'vs/base/node/encoding';
import { UTF8, UTF8_with_bom, UTF16be, UTF16le, encodingExists, IDetectedEncodingResult, encodeStream, UTF8_BOM, UTF16be_BOM, UTF16le_BOM, toDecodeStream, IDecodeStreamResult, detectEncodingByBOMFromBuffer } from 'vs/base/node/encoding';
import { WORKSPACE_EXTENSION } from 'vs/platform/workspaces/common/workspaces';
import { joinPath, extname, isEqualOrParent } from 'vs/base/common/resources';
import { Disposable } from 'vs/base/common/lifecycle';
@ -371,7 +371,8 @@ export class EncodingOracle extends Disposable implements IResourceEncodings {
constructor(
@ITextResourceConfigurationService private textResourceConfigurationService: ITextResourceConfigurationService,
@IEnvironmentService private environmentService: IEnvironmentService,
@IWorkspaceContextService private contextService: IWorkspaceContextService
@IWorkspaceContextService private contextService: IWorkspaceContextService,
@IFileService private fileService: IFileService
) {
super();
@ -414,8 +415,15 @@ export class EncodingOracle extends Disposable implements IResourceEncodings {
// Ensure that we preserve an existing BOM if found for UTF8
// unless we are instructed to overwrite the encoding
const overwriteEncoding = options && options.overwriteEncoding;
if (!overwriteEncoding && encoding === UTF8 && resource.scheme === Schemas.file && await detectEncodingByBOM(resource.fsPath) === UTF8) {
return { encoding, addBOM: true };
if (!overwriteEncoding && encoding === UTF8) {
try {
const buffer = (await this.fileService.readFile(resource, { length: 3 })).value;
if (detectEncodingByBOMFromBuffer(buffer, buffer.byteLength) === UTF8) {
return { encoding, addBOM: true };
}
} catch (error) {
// ignore - file might not exist
}
}
return { encoding, addBOM: false };