Improve performance of language detection (#130006)

* initial move to worker

* move event handling to the untitledTextEditorModel

* reuse simpleWorker interfaces and classes

* use correct path to languageDetection

* have vscode-languagedetection be outside of the asar

* add telemetry

* don't unpackage anything from languagedetection because it's not needed

* add an integration test

* some of Ben's feedback

* rework worker code to avoid duplication

* add isDisposed check

* fix test

* Isi and Ben feedback part 2

* use RunOnceScheduler instead and try to fix the test using events

* Ben feedback part 3

* bump distro
This commit is contained in:
Tyler James Leonhardt 2021-08-06 11:56:14 -07:00 committed by GitHub
parent 3d9899db8e
commit cfcda1c048
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 541 additions and 213 deletions

View file

@ -39,6 +39,7 @@ const vscodeEntryPoints = _.flatten([
buildfile.base,
buildfile.workerExtensionHost,
buildfile.workerNotebook,
buildfile.workerLanguageDetection,
buildfile.workbenchDesktop,
buildfile.code
]);
@ -233,9 +234,6 @@ function packageTask(platform, arch, sourceFolderName, destinationFolderName, op
'**/node-pty/lib/worker/conoutSocketWorker.js',
'**/node-pty/lib/shared/conout.js',
'**/*.wasm',
// For language detection
'**/model.json',
'**/group1-shard1of1.bin'
], 'node_modules.asar'));
let all = es.merge(

View file

@ -0,0 +1,65 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as assert from 'assert';
import * as vscode from 'vscode';
import { asPromise, assertNoRpc, closeAllEditors } from '../utils';
suite('vscode - untitled automatic language detection', () => {
teardown(async function () {
assertNoRpc();
await closeAllEditors();
});
test('test automatic language detection works', async () => {
const doc = await vscode.workspace.openTextDocument();
const editor = await vscode.window.showTextDocument(doc);
assert.strictEqual(editor.document.languageId, 'plaintext');
const settingResult = vscode.workspace.getConfiguration().get<boolean>('workbench.editor.untitled.experimentalLanguageDetection');
assert.ok(settingResult);
const result = await editor.edit(editBuilder => {
editBuilder.insert(new vscode.Position(0, 0), `{
"extends": "./tsconfig.base.json",
"compilerOptions": {
"removeComments": false,
"preserveConstEnums": true,
"sourceMap": false,
"outDir": "../out/vs",
"target": "es2020",
"types": [
"keytar",
"mocha",
"semver",
"sinon",
"winreg",
"trusted-types",
"wicg-file-system-access"
],
"plugins": [
{
"name": "tsec",
"exemptionConfig": "./tsec.exemptions.json"
}
]
},
"include": [
"./typings",
"./vs"
]
}`);
});
assert.ok(result);
// Changing the language triggers a file to be closed and opened again so wait for that event to happen.
const newDoc = await asPromise(vscode.workspace.onDidOpenTextDocument, 5000);
assert.strictEqual(newDoc.languageId, 'json');
});
});

View file

@ -5,5 +5,6 @@
"files.exclude": {
"**/files-exclude/**": true
},
"editor.minimap.enabled": false // see https://github.com/microsoft/vscode/issues/115747
"editor.minimap.enabled": false, // see https://github.com/microsoft/vscode/issues/115747
"workbench.editor.untitled.experimentalLanguageDetection": true
}

View file

@ -1,7 +1,7 @@
{
"name": "code-oss-dev",
"version": "1.60.0",
"distro": "60d5c7616e3d0825ded1a695b8a67199c6d577f0",
"distro": "61e4c454dd3e0928fdf44dce555922b34ea8e507",
"author": {
"name": "Microsoft Corporation"
},

View file

@ -17,6 +17,7 @@ exports.base = [{
exports.workerExtensionHost = [entrypoint('vs/workbench/services/extensions/worker/extensionHostWorker')];
exports.workerNotebook = [entrypoint('vs/workbench/contrib/notebook/common/services/notebookSimpleWorker')];
exports.workerLanguageDetection = [entrypoint('vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker')];
exports.workbenchDesktop = require('./vs/workbench/buildfile.desktop').collectModules();
exports.workbenchWeb = require('./vs/workbench/buildfile.web').collectModules();

View file

@ -76,7 +76,7 @@ export interface ICommonModel extends ILinkComputerTarget, IMirrorModel {
* Range of a word inside a model.
* @internal
*/
interface IWordRange {
export interface IWordRange {
/**
* The index where the word starts.
*/
@ -90,7 +90,7 @@ interface IWordRange {
/**
* @internal
*/
class MirrorModel extends BaseMirrorModel implements ICommonModel {
export class MirrorModel extends BaseMirrorModel implements ICommonModel {
public get uri(): URI {
return this._uri;
@ -326,7 +326,7 @@ declare const require: any;
export class EditorSimpleWorker implements IRequestHandler, IDisposable {
_requestHandlerBrand: any;
private readonly _host: EditorWorkerHost;
protected readonly _host: EditorWorkerHost;
private _models: { [uri: string]: MirrorModel; };
private readonly _foreignModuleFactory: IForeignModuleFactory | null;
private _foreignModule: any;

View file

@ -388,11 +388,15 @@ class SynchronousWorkerClient<T extends IDisposable> implements IWorkerClient<T>
}
}
export interface IEditorWorkerClient {
fhr(method: string, args: any[]): Promise<any>;
}
export class EditorWorkerHost {
private readonly _workerClient: EditorWorkerClient;
private readonly _workerClient: IEditorWorkerClient;
constructor(workerClient: EditorWorkerClient) {
constructor(workerClient: IEditorWorkerClient) {
this._workerClient = workerClient;
}
@ -402,12 +406,12 @@ export class EditorWorkerHost {
}
}
export class EditorWorkerClient extends Disposable {
export class EditorWorkerClient extends Disposable implements IEditorWorkerClient {
private readonly _modelService: IModelService;
private readonly _keepIdleModels: boolean;
private _worker: IWorkerClient<EditorSimpleWorker> | null;
private readonly _workerFactory: DefaultWorkerFactory;
protected _worker: IWorkerClient<EditorSimpleWorker> | null;
protected readonly _workerFactory: DefaultWorkerFactory;
private _modelManager: EditorModelManager | null;
private _disposed = false;

View file

@ -55,7 +55,7 @@ import { ThemeColor, themeColorFromId } from 'vs/platform/theme/common/themeServ
import { ITelemetryData, ITelemetryService } from 'vs/platform/telemetry/common/telemetry';
import { SideBySideEditorInput } from 'vs/workbench/common/editor/sideBySideEditorInput';
import { ILanguageStatus, ILanguageStatusService } from 'vs/editor/common/services/languageStatusService';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection';
import { AutomaticLanguageDetectionLikelyWrongClassification, AutomaticLanguageDetectionLikelyWrongId, IAutomaticLanguageDetectionLikelyWrongData, ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
class SideBySideEditorEncodingSupport implements IEncodingSupport {
constructor(private primary: IEncodingSupport, private secondary: IEncodingSupport) { }
@ -1121,6 +1121,10 @@ export class ShowLanguageExtensionsAction extends Action {
}
}
interface IDetectedLanguageQuickPickItem extends IQuickPickItem {
guessRank: number;
}
export class ChangeModeAction extends Action {
static readonly ID = 'workbench.action.editor.changeLanguageMode';
@ -1222,6 +1226,7 @@ export class ChangeModeAction extends Action {
picks.unshift(autoDetectMode);
} else if (detectedLanguages) {
// Add untitled detected languages
let index = detectedLanguages.length - 1;
for (const modeId of detectedLanguages.reverse()) {
const lang = this.modeService.getLanguageName(modeId) || 'unknown';
let description: string;
@ -1231,11 +1236,13 @@ export class ChangeModeAction extends Action {
description = localize('languageDescriptionConfigured', "({0})", modeId);
}
picks.unshift({
const pick: IDetectedLanguageQuickPickItem = {
label: lang,
iconClasses: getIconClassesForModeId(modeId),
description
});
description,
guessRank: index--,
};
picks.unshift(pick);
}
picks.unshift({ type: 'separator', label: localize('detectedLanguagesPicks', "detected languages (identifier)") });
@ -1284,6 +1291,18 @@ export class ChangeModeAction extends Action {
languageSelection = this.modeService.createByLanguageName(pick.label);
}
const guessRankOfPicked: number = (pick as IDetectedLanguageQuickPickItem).guessRank ?? -1;
// If we detected languages and they didn't choose the top detected language (which should also be the active language if automatic detection is enabled)
// then the automatic language detection was likely wrong and the user is correcting it. In this case, we want telemetry.
if (detectedLanguages.length && guessRankOfPicked !== 0) {
this.telemetryService.publicLog2<IAutomaticLanguageDetectionLikelyWrongData, AutomaticLanguageDetectionLikelyWrongClassification>(AutomaticLanguageDetectionLikelyWrongId, {
// For languages that weren't guessed, the guessRankOfPicked will be -1. This detail tells us if the user chose the language that was guessed or not.
choseOtherGuessedLanguage: guessRankOfPicked !== -1,
currentLanguageId: currentLanguageId ?? 'unknown',
nextLanguageId: languageSelection?.languageIdentifier.language ?? 'unknown'
});
}
// Change mode
if (typeof languageSelection !== 'undefined') {
modeSupport.setMode(languageSelection.languageIdentifier.language);

View file

@ -1,173 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Disposable } from 'vs/base/common/lifecycle';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection';
import { IUntitledTextEditorService } from 'vs/workbench/services/untitled/common/untitledTextEditorService';
import { FileAccess } from 'vs/base/common/network';
import type { ModelOperations, ModelResult } from '@vscode/vscode-languagedetection';
import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { IModeService } from 'vs/editor/common/services/modeService';
import { URI } from 'vs/base/common/uri';
import { isWeb } from 'vs/base/common/platform';
import { registerSingleton } from 'vs/platform/instantiation/common/extensions';
import { Registry } from 'vs/platform/registry/common/platform';
import { Extensions, IWorkbenchContributionsRegistry } from 'vs/workbench/common/contributions';
import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle';
import { debounce } from 'vs/base/common/decorators';
import { IWorkingCopyService } from 'vs/workbench/services/workingCopy/common/workingCopyService';
import { IWorkingCopy } from 'vs/workbench/services/workingCopy/common/workingCopy';
export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {
private static readonly expectedRelativeConfidence = 0.2;
static readonly enablementSettingKey = 'workbench.editor.untitled.experimentalLanguageDetection';
private _loadFailed = false;
private _modelOperations: ModelOperations | undefined;
_serviceBrand: undefined;
constructor(
@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,
@IModeService private readonly _modeService: IModeService,
@IConfigurationService private readonly _configurationService: IConfigurationService,
@IUntitledTextEditorService private readonly _untitledTextEditorService: IUntitledTextEditorService,
@IWorkingCopyService _workingCopyService: IWorkingCopyService) {
super();
this._register(_workingCopyService.onDidChangeContent(e => this.handleChangeEvent(e)));
}
@debounce(600)
private async handleChangeEvent(e: IWorkingCopy) {
const untitledEditorModel = this._untitledTextEditorService.get(e.resource);
if (!untitledEditorModel
|| !this.isEnabledForMode(untitledEditorModel.getMode())
|| untitledEditorModel.hasModeSetExplicitly) {
return;
}
const value = this._untitledTextEditorService.getValue(e.resource);
if (!value) { return; }
const lang = await this.detectLanguage(value);
if (!lang) { return; }
untitledEditorModel.setMode(lang, false);
}
private async getModelOperations(): Promise<ModelOperations> {
if (this._modelOperations) {
return this._modelOperations;
}
const { ModelOperations } = await import('@vscode/vscode-languagedetection');
this._modelOperations = new ModelOperations(
async () => {
const response = await fetch(this._environmentService.isBuilt && !isWeb
? FileAccess.asBrowserUri('../../../../../../node_modules.asar.unpacked/@vscode/vscode-languagedetection/model/model.json', require).toString(true)
: FileAccess.asBrowserUri('../../../../../../node_modules/@vscode/vscode-languagedetection/model/model.json', require).toString(true));
try {
const modelJSON = await response.json();
return modelJSON;
} catch (e) {
const message = `Failed to parse model JSON.`;
throw new Error(message);
}
},
async () => {
const response = await fetch(this._environmentService.isBuilt && !isWeb
? FileAccess.asBrowserUri('../../../../../../node_modules.asar.unpacked/@vscode/vscode-languagedetection/model/group1-shard1of1.bin', require).toString(true)
: FileAccess.asBrowserUri('../../../../../../node_modules/@vscode/vscode-languagedetection/model/group1-shard1of1.bin', require).toString(true));
const buffer = await response.arrayBuffer();
return buffer;
}
);
return this._register(this._modelOperations);
}
private isEnabledForMode(modeId: string | undefined): boolean {
return !!modeId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: modeId });
}
async detectLanguage(contentOrResource: string | URI): Promise<string | undefined> {
let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource;
if (content) {
for await (const language of this.detectLanguagesImpl(content)) {
return language;
}
}
return undefined;
}
async detectLanguages(contentOrResource: string | URI): Promise<string[]> {
let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource;
const languages: string[] = [];
if (content) {
for await (const language of this.detectLanguagesImpl(content)) {
languages.push(language);
}
}
return languages;
}
private async * detectLanguagesImpl(content: string) {
if (this._loadFailed) {
return;
}
let modelOperations: ModelOperations | undefined;
try {
modelOperations = await this.getModelOperations();
} catch (e) {
this._loadFailed = true;
return;
}
const modelResults = await modelOperations.runModel(content);
if (!modelResults
|| modelResults.length === 0
|| modelResults[0].confidence < LanguageDetectionService.expectedRelativeConfidence) {
return;
}
const possibleLanguages: ModelResult[] = [modelResults[0]];
for (let current of modelResults) {
if (current === modelResults[0]) {
continue;
}
const currentHighest = possibleLanguages[possibleLanguages.length - 1];
if (currentHighest.confidence - current.confidence >= LanguageDetectionService.expectedRelativeConfidence) {
while (possibleLanguages.length) {
// TODO: see if there's a better way to do this.
const vscodeLanguageId = this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${possibleLanguages.shift()!.languageId}`));
if (vscodeLanguageId) {
yield vscodeLanguageId;
}
}
if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) {
possibleLanguages.push(current);
continue;
}
return;
} else {
if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) {
possibleLanguages.push(current);
continue;
}
return;
}
}
}
}
Registry.as<IWorkbenchContributionsRegistry>(Extensions.Workbench)
.registerWorkbenchContribution(LanguageDetectionService, LifecyclePhase.Eventually);
registerSingleton(ILanguageDetectionService, LanguageDetectionService);

View file

@ -0,0 +1,139 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type { ModelOperations, ModelResult } from '@vscode/vscode-languagedetection';
// import { IDisposable } from 'vs/base/common/lifecycle';
import { StopWatch } from 'vs/base/common/stopwatch';
// import { URI } from 'vs/base/common/uri';
import { IRequestHandler } from 'vs/base/common/worker/simpleWorker';
// import { IModelChangedEvent } from 'vs/editor/common/model/mirrorTextModel';
import { EditorSimpleWorker } from 'vs/editor/common/services/editorSimpleWorker';
import { EditorWorkerHost } from 'vs/editor/common/services/editorWorkerServiceImpl';
/**
* Called on the worker side
* @internal
*/
export function create(host: EditorWorkerHost): IRequestHandler {
return new LanguageDetectionSimpleWorker(host, null);
}
/**
* @internal
*/
export class LanguageDetectionSimpleWorker extends EditorSimpleWorker {
private static readonly expectedRelativeConfidence = 0.2;
private _modelOperations: ModelOperations | undefined;
private _loadFailed: boolean = false;
public async detectLanguage(uri: string): Promise<string | undefined> {
const stopWatch = new StopWatch(true);
for await (const language of this.detectLanguagesImpl(uri)) {
stopWatch.stop();
this._host.fhr('sendTelemetryEvent', [[language.languageId], [language.confidence], stopWatch.elapsed()]);
return language.languageId;
}
return undefined;
}
public async detectLanguages(uri: string): Promise<string[]> {
const languages: string[] = [];
const confidences: number[] = [];
const stopWatch = new StopWatch(true);
for await (const language of this.detectLanguagesImpl(uri)) {
languages.push(language.languageId);
confidences.push(language.confidence);
}
stopWatch.stop();
this._host.fhr('sendTelemetryEvent', [languages, confidences, stopWatch.elapsed()]);
return languages;
}
private async getModelOperations(): Promise<ModelOperations> {
if (this._modelOperations) {
return this._modelOperations;
}
const uri: string = await this._host.fhr('getIndexJsUri', []);
// const uri = await this.host.getIndexJsUri();
const { ModelOperations } = await import(uri);
this._modelOperations = new ModelOperations(
async () => {
const response = await fetch(await this._host.fhr('getModelJsonUri', []));
try {
const modelJSON = await response.json();
return modelJSON;
} catch (e) {
const message = `Failed to parse model JSON.`;
throw new Error(message);
}
},
async () => {
const response = await fetch(await this._host.fhr('getWeightsUri', []));
const buffer = await response.arrayBuffer();
return buffer;
}
);
return this._modelOperations!;
}
private async * detectLanguagesImpl(uri: string): AsyncGenerator<ModelResult, void, unknown> {
if (this._loadFailed) {
return;
}
let modelOperations: ModelOperations | undefined;
try {
modelOperations = await this.getModelOperations();
} catch (e) {
console.log(e);
this._loadFailed = true;
return;
}
const content = this._getModel(uri);
if (!content) {
return;
}
const modelResults = await modelOperations.runModel(content.getValue());
if (!modelResults
|| modelResults.length === 0
|| modelResults[0].confidence < LanguageDetectionSimpleWorker.expectedRelativeConfidence) {
return;
}
const possibleLanguages: ModelResult[] = [modelResults[0]];
for (let current of modelResults) {
if (current === modelResults[0]) {
continue;
}
const currentHighest = possibleLanguages[possibleLanguages.length - 1];
if (currentHighest.confidence - current.confidence >= LanguageDetectionSimpleWorker.expectedRelativeConfidence) {
while (possibleLanguages.length) {
yield possibleLanguages.shift()!;
}
if (current.confidence > LanguageDetectionSimpleWorker.expectedRelativeConfidence) {
possibleLanguages.push(current);
continue;
}
return;
} else {
if (current.confidence > LanguageDetectionSimpleWorker.expectedRelativeConfidence) {
possibleLanguages.push(current);
continue;
}
return;
}
}
}
}

View file

@ -0,0 +1,201 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Disposable } from 'vs/base/common/lifecycle';
import { ILanguageDetectionService, ILanguageDetectionStats, LanguageDetectionStatsClassification, LanguageDetectionStatsId } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
import { FileAccess } from 'vs/base/common/network';
import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { IModeService } from 'vs/editor/common/services/modeService';
import { URI } from 'vs/base/common/uri';
import { isWeb } from 'vs/base/common/platform';
import { registerSingleton } from 'vs/platform/instantiation/common/extensions';
import { LanguageDetectionSimpleWorker } from 'vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker';
import { IModelService } from 'vs/editor/common/services/modelService';
import { SimpleWorkerClient } from 'vs/base/common/worker/simpleWorker';
import { ITelemetryService } from 'vs/platform/telemetry/common/telemetry';
import { EditorWorkerClient, EditorWorkerHost } from 'vs/editor/common/services/editorWorkerServiceImpl';
const moduleLocation = '../../../../../../node_modules/@vscode/vscode-languagedetection';
const moduleLocationAsar = '../../../../../../node_modules.asar/@vscode/vscode-languagedetection';
export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {
static readonly enablementSettingKey = 'workbench.editor.untitled.experimentalLanguageDetection';
_serviceBrand: undefined;
private _languageDetectionWorkerClient: LanguageDetectionWorkerClient;
constructor(
@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,
@IModeService private readonly _modeService: IModeService,
@IConfigurationService private readonly _configurationService: IConfigurationService,
@IModelService modelService: IModelService,
@ITelemetryService telemetryService: ITelemetryService,
) {
super();
this._languageDetectionWorkerClient = new LanguageDetectionWorkerClient(
modelService,
telemetryService,
// TODO: See if it's possible to bundle vscode-languagedetection
this._environmentService.isBuilt && !isWeb
? FileAccess.asBrowserUri(`${moduleLocationAsar}/dist/lib/index.js`, require).toString(true)
: FileAccess.asBrowserUri(`${moduleLocation}/dist/lib/index.js`, require).toString(true),
this._environmentService.isBuilt && !isWeb
? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/model.json`, require).toString(true)
: FileAccess.asBrowserUri(`${moduleLocation}/model/model.json`, require).toString(true),
this._environmentService.isBuilt && !isWeb
? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/group1-shard1of1.bin`, require).toString(true)
: FileAccess.asBrowserUri(`${moduleLocation}/model/group1-shard1of1.bin`, require).toString(true));
}
public isEnabledForMode(modeId: string): boolean {
return !!modeId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: modeId });
}
private getModeId(language: string | undefined): string | undefined {
if (!language) {
return undefined;
}
return this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${language}`)) ?? undefined;
}
async detectLanguage(resource: URI): Promise<string | undefined> {
const language = await this._languageDetectionWorkerClient.detectLanguage(resource);
if (language) {
return this.getModeId(language);
}
return undefined;
}
async detectLanguages(resource: URI): Promise<string[]> {
const languages: Array<string | undefined> = await this._languageDetectionWorkerClient.detectLanguages(resource);
for (let i = 0; i < languages.length; i++) {
const modeId = this.getModeId(languages[i]);
languages[i] = modeId ? modeId : undefined;
}
return languages.filter(<T>(l?: T): l is T => Boolean(l));
}
}
export interface IWorkerClient<W> {
getProxyObject(): Promise<W>;
dispose(): void;
}
export class LanguageDetectionWorkerHost {
constructor(
private _indexJsUri: string,
private _modelJsonUri: string,
private _weightsUri: string,
private _telemetryService: ITelemetryService,
) {
}
async getIndexJsUri() {
return this._indexJsUri;
}
async getModelJsonUri() {
return this._modelJsonUri;
}
async getWeightsUri() {
return this._weightsUri;
}
async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise<void> {
type LanguageDetectionStats = { languages: string; confidences: string; timeSpent: number; };
type LanguageDetectionStatsClassification = {
languages: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
confidences: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
timeSpent: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
};
this._telemetryService.publicLog2<LanguageDetectionStats, LanguageDetectionStatsClassification>('automaticlanguagedetection.stats', {
languages: languages.join(','),
confidences: confidences.join(','),
timeSpent
});
}
}
export class LanguageDetectionWorkerClient extends EditorWorkerClient {
private worker: IWorkerClient<LanguageDetectionSimpleWorker> | undefined;
constructor(
modelService: IModelService,
private readonly _telemetryService: ITelemetryService,
private readonly _indexJsUri: string,
private readonly _modelJsonUri: string,
private readonly _weightsUri: string
) {
super(modelService, true, 'languageDetectionWorkerService');
}
private _getOrCreateLanguageDetectionWorker(): IWorkerClient<LanguageDetectionSimpleWorker> {
if (!this.worker) {
this.worker = this._register(new SimpleWorkerClient<LanguageDetectionSimpleWorker, EditorWorkerHost>(
this._workerFactory,
'vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker',
new EditorWorkerHost(this)
));
}
return this.worker;
}
override async _getProxy(): Promise<LanguageDetectionSimpleWorker> {
return await this._getOrCreateLanguageDetectionWorker().getProxyObject();
}
// foreign host request
public override async fhr(method: string, args: any[]): Promise<any> {
switch (method) {
case 'getIndexJsUri':
return this.getIndexJsUri();
case 'getModelJsonUri':
return this.getModelJsonUri();
case 'getWeightsUri':
return this.getWeightsUri();
case 'sendTelemetryEvent':
return this.sendTelemetryEvent(args[0], args[1], args[2]);
default:
return super.fhr(method, args);
}
}
async getIndexJsUri() {
return this._indexJsUri;
}
async getModelJsonUri() {
return this._modelJsonUri;
}
async getWeightsUri() {
return this._weightsUri;
}
async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise<void> {
this._telemetryService.publicLog2<ILanguageDetectionStats, LanguageDetectionStatsClassification>(LanguageDetectionStatsId, {
languages: languages.join(','),
confidences: confidences.join(','),
timeSpent
});
}
public async detectLanguage(resource: URI): Promise<string | undefined> {
await this._withSyncedResources([resource]);
return (await this._getProxy()).detectLanguage(resource.toString());
}
public async detectLanguages(resource: URI): Promise<string[]> {
await this._withSyncedResources([resource]);
return (await this._getProxy()).detectLanguages(resource.toString());
}
}
registerSingleton(ILanguageDetectionService, LanguageDetectionService);

View file

@ -1,16 +0,0 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { URI } from 'vs/base/common/uri';
import { createDecorator } from 'vs/platform/instantiation/common/instantiation';
export const ILanguageDetectionService = createDecorator<ILanguageDetectionService>('ILanguageDetectionService');
export interface ILanguageDetectionService {
readonly _serviceBrand: undefined;
detectLanguage(contentOrResource: string | URI): Promise<string | undefined>;
detectLanguages(contentOrResource: string | URI): Promise<string[]>;
}

View file

@ -0,0 +1,63 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { URI } from 'vs/base/common/uri';
import { createDecorator } from 'vs/platform/instantiation/common/instantiation';
export const ILanguageDetectionService = createDecorator<ILanguageDetectionService>('ILanguageDetectionService');
export interface ILanguageDetectionService {
readonly _serviceBrand: undefined;
/**
* @param modeId The modeId to check if language detection is currently enabled.
* @returns whether or not language detection is on for this language mode.
*/
isEnabledForMode(modeId: string): boolean;
/**
* @param resource The resource to detect the language for.
* @returns the language mode for the given resource or undefined if the model is not confident enough.
*/
detectLanguage(resource: URI): Promise<string | undefined>;
/**
* @param resource The resource to detect the language for.
* @returns all possible language modes detected in this resource.
*/
detectLanguages(resource: URI): Promise<string[]>;
}
//#region Telemetry events
export const AutomaticLanguageDetectionLikelyWrongId = 'automaticlanguagedetection.likelywrong';
export interface IAutomaticLanguageDetectionLikelyWrongData {
choseOtherGuessedLanguage: boolean;
currentLanguageId: string;
nextLanguageId: string;
}
export type AutomaticLanguageDetectionLikelyWrongClassification = {
choseOtherGuessedLanguage: { classification: 'SystemMetaData', purpose: 'FeatureInsight' },
currentLanguageId: { classification: 'SystemMetaData', purpose: 'FeatureInsight' },
nextLanguageId: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }
};
export const LanguageDetectionStatsId = 'automaticlanguagedetection.stats';
export interface ILanguageDetectionStats {
languages: string;
confidences: string;
timeSpent: number;
}
export type LanguageDetectionStatsClassification = {
languages: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
confidences: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
timeSpent: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
};
//#endregion

View file

@ -26,6 +26,9 @@ import { CancellationToken } from 'vs/base/common/cancellation';
import { getCharContainingOffset } from 'vs/base/common/strings';
import { UTF8 } from 'vs/workbench/services/textfile/common/encoding';
import { bufferToStream, VSBuffer, VSBufferReadableStream } from 'vs/base/common/buffer';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
import { PLAINTEXT_MODE_ID } from 'vs/editor/common/modes/modesRegistry';
import { RunOnceScheduler } from 'vs/base/common/async';
export interface IUntitledTextEditorModel extends ITextEditorModel, IModeSupport, IEncodingSupport, IWorkingCopy {
@ -93,6 +96,8 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
readonly capabilities = WorkingCopyCapabilities.Untitled;
private readonly _autoDetectLanguageScheduler = this._register(new RunOnceScheduler(() => this.autoDetectLanguage(), 600));;
//#region Name
private configuredLabelFormat: 'content' | 'name' = 'content';
@ -126,7 +131,8 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
@IWorkingCopyService private readonly workingCopyService: IWorkingCopyService,
@ITextFileService private readonly textFileService: ITextFileService,
@ILabelService private readonly labelService: ILabelService,
@IEditorService private readonly editorService: IEditorService
@IEditorService private readonly editorService: IEditorService,
@ILanguageDetectionService private readonly languageDetectionService: ILanguageDetectionService
) {
super(modelService, modeService);
@ -134,7 +140,7 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
this._register(this.workingCopyService.registerWorkingCopy(this));
if (preferredMode) {
this.setMode(preferredMode);
this.setModeInternal(preferredMode);
}
// Fetch config
@ -178,11 +184,14 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
private _hasModeSetExplicitly: boolean = false;
get hasModeSetExplicitly(): boolean { return this._hasModeSetExplicitly; }
override setMode(mode: string, setExplicitly = true): void {
override setMode(mode: string): void {
// Remember that an explicit mode was set
this._hasModeSetExplicitly = setExplicitly;
this._hasModeSetExplicitly = true;
this.setModeInternal(mode);
}
private setModeInternal(mode: string): void {
let actualMode: string | undefined = undefined;
if (mode === '${activeEditorLanguage}') {
// support the special '${activeEditorLanguage}' mode by
@ -368,6 +377,24 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
// Emit as general content change event
this._onDidChangeContent.fire();
// Try to detect language from content (debounced by some time to reduce pressure).
this._autoDetectLanguageScheduler.schedule();
}
private async autoDetectLanguage() {
if (this.hasModeSetExplicitly || !this.languageDetectionService.isEnabledForMode(this.getMode() ?? PLAINTEXT_MODE_ID)) {
return;
}
const lang = await this.languageDetectionService.detectLanguage(this.resource);
if (!lang) {
return;
}
if (!this.isDisposed()) {
this.setModeInternal(lang);
}
}
private updateNameFromFirstLine(textEditorModel: ITextModel): void {

View file

@ -279,7 +279,6 @@ suite('Untitled text editors', () => {
const service = accessor.untitledTextEditorService;
const input = instantiationService.createInstance(UntitledTextEditorInput, service.create({ mode }));
assert.ok(input.model.hasModeSetExplicitly);
assert.strictEqual(input.getMode(), mode);
const model = await input.resolve();

View file

@ -96,7 +96,7 @@ import 'vs/workbench/services/authentication/browser/authenticationService';
import 'vs/workbench/services/hover/browser/hoverService';
import 'vs/workbench/services/experiment/common/experimentService';
import 'vs/workbench/services/outline/browser/outlineService';
import 'vs/workbench/services/languageDetection/browser/languageDetectionService';
import 'vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl';
import { registerSingleton } from 'vs/platform/instantiation/common/extensions';
import { ExtensionGalleryService } from 'vs/platform/extensionManagement/common/extensionGalleryService';