Improve performance of language detection (#130006)
* initial move to worker * move event handling to the untitledTextEditorModel * reuse simpleWorker interfaces and classes * use correct path to languageDetection * have vscode-languagedetection be outside of the asar * add telemetry * don't unpackage anything from languagedetection because it's not needed * add an integration test * some of Ben's feedback * rework worker code to avoid duplication * add isDisposed check * fix test * Isi and Ben feedback part 2 * use RunOnceScheduler instead and try to fix the test using events * Ben feedback part 3 * bump distro
This commit is contained in:
parent
3d9899db8e
commit
cfcda1c048
|
@ -39,6 +39,7 @@ const vscodeEntryPoints = _.flatten([
|
|||
buildfile.base,
|
||||
buildfile.workerExtensionHost,
|
||||
buildfile.workerNotebook,
|
||||
buildfile.workerLanguageDetection,
|
||||
buildfile.workbenchDesktop,
|
||||
buildfile.code
|
||||
]);
|
||||
|
@ -233,9 +234,6 @@ function packageTask(platform, arch, sourceFolderName, destinationFolderName, op
|
|||
'**/node-pty/lib/worker/conoutSocketWorker.js',
|
||||
'**/node-pty/lib/shared/conout.js',
|
||||
'**/*.wasm',
|
||||
// For language detection
|
||||
'**/model.json',
|
||||
'**/group1-shard1of1.bin'
|
||||
], 'node_modules.asar'));
|
||||
|
||||
let all = es.merge(
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import * as assert from 'assert';
|
||||
import * as vscode from 'vscode';
|
||||
import { asPromise, assertNoRpc, closeAllEditors } from '../utils';
|
||||
|
||||
suite('vscode - untitled automatic language detection', () => {
|
||||
|
||||
teardown(async function () {
|
||||
assertNoRpc();
|
||||
await closeAllEditors();
|
||||
});
|
||||
|
||||
test('test automatic language detection works', async () => {
|
||||
const doc = await vscode.workspace.openTextDocument();
|
||||
const editor = await vscode.window.showTextDocument(doc);
|
||||
|
||||
assert.strictEqual(editor.document.languageId, 'plaintext');
|
||||
|
||||
const settingResult = vscode.workspace.getConfiguration().get<boolean>('workbench.editor.untitled.experimentalLanguageDetection');
|
||||
assert.ok(settingResult);
|
||||
|
||||
const result = await editor.edit(editBuilder => {
|
||||
editBuilder.insert(new vscode.Position(0, 0), `{
|
||||
"extends": "./tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"removeComments": false,
|
||||
"preserveConstEnums": true,
|
||||
"sourceMap": false,
|
||||
"outDir": "../out/vs",
|
||||
"target": "es2020",
|
||||
"types": [
|
||||
"keytar",
|
||||
"mocha",
|
||||
"semver",
|
||||
"sinon",
|
||||
"winreg",
|
||||
"trusted-types",
|
||||
"wicg-file-system-access"
|
||||
],
|
||||
"plugins": [
|
||||
{
|
||||
"name": "tsec",
|
||||
"exemptionConfig": "./tsec.exemptions.json"
|
||||
}
|
||||
]
|
||||
},
|
||||
"include": [
|
||||
"./typings",
|
||||
"./vs"
|
||||
]
|
||||
}`);
|
||||
});
|
||||
|
||||
assert.ok(result);
|
||||
|
||||
// Changing the language triggers a file to be closed and opened again so wait for that event to happen.
|
||||
const newDoc = await asPromise(vscode.workspace.onDidOpenTextDocument, 5000);
|
||||
|
||||
assert.strictEqual(newDoc.languageId, 'json');
|
||||
});
|
||||
});
|
|
@ -5,5 +5,6 @@
|
|||
"files.exclude": {
|
||||
"**/files-exclude/**": true
|
||||
},
|
||||
"editor.minimap.enabled": false // see https://github.com/microsoft/vscode/issues/115747
|
||||
"editor.minimap.enabled": false, // see https://github.com/microsoft/vscode/issues/115747
|
||||
"workbench.editor.untitled.experimentalLanguageDetection": true
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "code-oss-dev",
|
||||
"version": "1.60.0",
|
||||
"distro": "60d5c7616e3d0825ded1a695b8a67199c6d577f0",
|
||||
"distro": "61e4c454dd3e0928fdf44dce555922b34ea8e507",
|
||||
"author": {
|
||||
"name": "Microsoft Corporation"
|
||||
},
|
||||
|
|
|
@ -17,6 +17,7 @@ exports.base = [{
|
|||
|
||||
exports.workerExtensionHost = [entrypoint('vs/workbench/services/extensions/worker/extensionHostWorker')];
|
||||
exports.workerNotebook = [entrypoint('vs/workbench/contrib/notebook/common/services/notebookSimpleWorker')];
|
||||
exports.workerLanguageDetection = [entrypoint('vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker')];
|
||||
|
||||
exports.workbenchDesktop = require('./vs/workbench/buildfile.desktop').collectModules();
|
||||
exports.workbenchWeb = require('./vs/workbench/buildfile.web').collectModules();
|
||||
|
|
|
@ -76,7 +76,7 @@ export interface ICommonModel extends ILinkComputerTarget, IMirrorModel {
|
|||
* Range of a word inside a model.
|
||||
* @internal
|
||||
*/
|
||||
interface IWordRange {
|
||||
export interface IWordRange {
|
||||
/**
|
||||
* The index where the word starts.
|
||||
*/
|
||||
|
@ -90,7 +90,7 @@ interface IWordRange {
|
|||
/**
|
||||
* @internal
|
||||
*/
|
||||
class MirrorModel extends BaseMirrorModel implements ICommonModel {
|
||||
export class MirrorModel extends BaseMirrorModel implements ICommonModel {
|
||||
|
||||
public get uri(): URI {
|
||||
return this._uri;
|
||||
|
@ -326,7 +326,7 @@ declare const require: any;
|
|||
export class EditorSimpleWorker implements IRequestHandler, IDisposable {
|
||||
_requestHandlerBrand: any;
|
||||
|
||||
private readonly _host: EditorWorkerHost;
|
||||
protected readonly _host: EditorWorkerHost;
|
||||
private _models: { [uri: string]: MirrorModel; };
|
||||
private readonly _foreignModuleFactory: IForeignModuleFactory | null;
|
||||
private _foreignModule: any;
|
||||
|
|
|
@ -388,11 +388,15 @@ class SynchronousWorkerClient<T extends IDisposable> implements IWorkerClient<T>
|
|||
}
|
||||
}
|
||||
|
||||
export interface IEditorWorkerClient {
|
||||
fhr(method: string, args: any[]): Promise<any>;
|
||||
}
|
||||
|
||||
export class EditorWorkerHost {
|
||||
|
||||
private readonly _workerClient: EditorWorkerClient;
|
||||
private readonly _workerClient: IEditorWorkerClient;
|
||||
|
||||
constructor(workerClient: EditorWorkerClient) {
|
||||
constructor(workerClient: IEditorWorkerClient) {
|
||||
this._workerClient = workerClient;
|
||||
}
|
||||
|
||||
|
@ -402,12 +406,12 @@ export class EditorWorkerHost {
|
|||
}
|
||||
}
|
||||
|
||||
export class EditorWorkerClient extends Disposable {
|
||||
export class EditorWorkerClient extends Disposable implements IEditorWorkerClient {
|
||||
|
||||
private readonly _modelService: IModelService;
|
||||
private readonly _keepIdleModels: boolean;
|
||||
private _worker: IWorkerClient<EditorSimpleWorker> | null;
|
||||
private readonly _workerFactory: DefaultWorkerFactory;
|
||||
protected _worker: IWorkerClient<EditorSimpleWorker> | null;
|
||||
protected readonly _workerFactory: DefaultWorkerFactory;
|
||||
private _modelManager: EditorModelManager | null;
|
||||
private _disposed = false;
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ import { ThemeColor, themeColorFromId } from 'vs/platform/theme/common/themeServ
|
|||
import { ITelemetryData, ITelemetryService } from 'vs/platform/telemetry/common/telemetry';
|
||||
import { SideBySideEditorInput } from 'vs/workbench/common/editor/sideBySideEditorInput';
|
||||
import { ILanguageStatus, ILanguageStatusService } from 'vs/editor/common/services/languageStatusService';
|
||||
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection';
|
||||
import { AutomaticLanguageDetectionLikelyWrongClassification, AutomaticLanguageDetectionLikelyWrongId, IAutomaticLanguageDetectionLikelyWrongData, ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
|
||||
|
||||
class SideBySideEditorEncodingSupport implements IEncodingSupport {
|
||||
constructor(private primary: IEncodingSupport, private secondary: IEncodingSupport) { }
|
||||
|
@ -1121,6 +1121,10 @@ export class ShowLanguageExtensionsAction extends Action {
|
|||
}
|
||||
}
|
||||
|
||||
interface IDetectedLanguageQuickPickItem extends IQuickPickItem {
|
||||
guessRank: number;
|
||||
}
|
||||
|
||||
export class ChangeModeAction extends Action {
|
||||
|
||||
static readonly ID = 'workbench.action.editor.changeLanguageMode';
|
||||
|
@ -1222,6 +1226,7 @@ export class ChangeModeAction extends Action {
|
|||
picks.unshift(autoDetectMode);
|
||||
} else if (detectedLanguages) {
|
||||
// Add untitled detected languages
|
||||
let index = detectedLanguages.length - 1;
|
||||
for (const modeId of detectedLanguages.reverse()) {
|
||||
const lang = this.modeService.getLanguageName(modeId) || 'unknown';
|
||||
let description: string;
|
||||
|
@ -1231,11 +1236,13 @@ export class ChangeModeAction extends Action {
|
|||
description = localize('languageDescriptionConfigured', "({0})", modeId);
|
||||
}
|
||||
|
||||
picks.unshift({
|
||||
const pick: IDetectedLanguageQuickPickItem = {
|
||||
label: lang,
|
||||
iconClasses: getIconClassesForModeId(modeId),
|
||||
description
|
||||
});
|
||||
description,
|
||||
guessRank: index--,
|
||||
};
|
||||
picks.unshift(pick);
|
||||
}
|
||||
|
||||
picks.unshift({ type: 'separator', label: localize('detectedLanguagesPicks', "detected languages (identifier)") });
|
||||
|
@ -1284,6 +1291,18 @@ export class ChangeModeAction extends Action {
|
|||
languageSelection = this.modeService.createByLanguageName(pick.label);
|
||||
}
|
||||
|
||||
const guessRankOfPicked: number = (pick as IDetectedLanguageQuickPickItem).guessRank ?? -1;
|
||||
// If we detected languages and they didn't choose the top detected language (which should also be the active language if automatic detection is enabled)
|
||||
// then the automatic language detection was likely wrong and the user is correcting it. In this case, we want telemetry.
|
||||
if (detectedLanguages.length && guessRankOfPicked !== 0) {
|
||||
this.telemetryService.publicLog2<IAutomaticLanguageDetectionLikelyWrongData, AutomaticLanguageDetectionLikelyWrongClassification>(AutomaticLanguageDetectionLikelyWrongId, {
|
||||
// For languages that weren't guessed, the guessRankOfPicked will be -1. This detail tells us if the user chose the language that was guessed or not.
|
||||
choseOtherGuessedLanguage: guessRankOfPicked !== -1,
|
||||
currentLanguageId: currentLanguageId ?? 'unknown',
|
||||
nextLanguageId: languageSelection?.languageIdentifier.language ?? 'unknown'
|
||||
});
|
||||
}
|
||||
|
||||
// Change mode
|
||||
if (typeof languageSelection !== 'undefined') {
|
||||
modeSupport.setMode(languageSelection.languageIdentifier.language);
|
||||
|
|
|
@ -1,173 +0,0 @@
|
|||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import { Disposable } from 'vs/base/common/lifecycle';
|
||||
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection';
|
||||
import { IUntitledTextEditorService } from 'vs/workbench/services/untitled/common/untitledTextEditorService';
|
||||
import { FileAccess } from 'vs/base/common/network';
|
||||
import type { ModelOperations, ModelResult } from '@vscode/vscode-languagedetection';
|
||||
import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService';
|
||||
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
|
||||
import { IModeService } from 'vs/editor/common/services/modeService';
|
||||
import { URI } from 'vs/base/common/uri';
|
||||
import { isWeb } from 'vs/base/common/platform';
|
||||
import { registerSingleton } from 'vs/platform/instantiation/common/extensions';
|
||||
import { Registry } from 'vs/platform/registry/common/platform';
|
||||
import { Extensions, IWorkbenchContributionsRegistry } from 'vs/workbench/common/contributions';
|
||||
import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle';
|
||||
import { debounce } from 'vs/base/common/decorators';
|
||||
import { IWorkingCopyService } from 'vs/workbench/services/workingCopy/common/workingCopyService';
|
||||
import { IWorkingCopy } from 'vs/workbench/services/workingCopy/common/workingCopy';
|
||||
|
||||
export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {
|
||||
private static readonly expectedRelativeConfidence = 0.2;
|
||||
static readonly enablementSettingKey = 'workbench.editor.untitled.experimentalLanguageDetection';
|
||||
|
||||
private _loadFailed = false;
|
||||
private _modelOperations: ModelOperations | undefined;
|
||||
_serviceBrand: undefined;
|
||||
|
||||
constructor(
|
||||
@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,
|
||||
@IModeService private readonly _modeService: IModeService,
|
||||
@IConfigurationService private readonly _configurationService: IConfigurationService,
|
||||
@IUntitledTextEditorService private readonly _untitledTextEditorService: IUntitledTextEditorService,
|
||||
@IWorkingCopyService _workingCopyService: IWorkingCopyService) {
|
||||
super();
|
||||
|
||||
this._register(_workingCopyService.onDidChangeContent(e => this.handleChangeEvent(e)));
|
||||
}
|
||||
|
||||
@debounce(600)
|
||||
private async handleChangeEvent(e: IWorkingCopy) {
|
||||
const untitledEditorModel = this._untitledTextEditorService.get(e.resource);
|
||||
if (!untitledEditorModel
|
||||
|| !this.isEnabledForMode(untitledEditorModel.getMode())
|
||||
|| untitledEditorModel.hasModeSetExplicitly) {
|
||||
return;
|
||||
}
|
||||
|
||||
const value = this._untitledTextEditorService.getValue(e.resource);
|
||||
if (!value) { return; }
|
||||
const lang = await this.detectLanguage(value);
|
||||
if (!lang) { return; }
|
||||
untitledEditorModel.setMode(lang, false);
|
||||
}
|
||||
|
||||
private async getModelOperations(): Promise<ModelOperations> {
|
||||
if (this._modelOperations) {
|
||||
return this._modelOperations;
|
||||
}
|
||||
|
||||
const { ModelOperations } = await import('@vscode/vscode-languagedetection');
|
||||
this._modelOperations = new ModelOperations(
|
||||
async () => {
|
||||
const response = await fetch(this._environmentService.isBuilt && !isWeb
|
||||
? FileAccess.asBrowserUri('../../../../../../node_modules.asar.unpacked/@vscode/vscode-languagedetection/model/model.json', require).toString(true)
|
||||
: FileAccess.asBrowserUri('../../../../../../node_modules/@vscode/vscode-languagedetection/model/model.json', require).toString(true));
|
||||
try {
|
||||
const modelJSON = await response.json();
|
||||
return modelJSON;
|
||||
} catch (e) {
|
||||
const message = `Failed to parse model JSON.`;
|
||||
throw new Error(message);
|
||||
}
|
||||
},
|
||||
async () => {
|
||||
const response = await fetch(this._environmentService.isBuilt && !isWeb
|
||||
? FileAccess.asBrowserUri('../../../../../../node_modules.asar.unpacked/@vscode/vscode-languagedetection/model/group1-shard1of1.bin', require).toString(true)
|
||||
: FileAccess.asBrowserUri('../../../../../../node_modules/@vscode/vscode-languagedetection/model/group1-shard1of1.bin', require).toString(true));
|
||||
const buffer = await response.arrayBuffer();
|
||||
return buffer;
|
||||
}
|
||||
);
|
||||
|
||||
return this._register(this._modelOperations);
|
||||
}
|
||||
|
||||
private isEnabledForMode(modeId: string | undefined): boolean {
|
||||
return !!modeId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: modeId });
|
||||
}
|
||||
|
||||
async detectLanguage(contentOrResource: string | URI): Promise<string | undefined> {
|
||||
let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource;
|
||||
|
||||
if (content) {
|
||||
for await (const language of this.detectLanguagesImpl(content)) {
|
||||
return language;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async detectLanguages(contentOrResource: string | URI): Promise<string[]> {
|
||||
let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource;
|
||||
|
||||
const languages: string[] = [];
|
||||
if (content) {
|
||||
for await (const language of this.detectLanguagesImpl(content)) {
|
||||
languages.push(language);
|
||||
}
|
||||
}
|
||||
return languages;
|
||||
}
|
||||
|
||||
private async * detectLanguagesImpl(content: string) {
|
||||
if (this._loadFailed) {
|
||||
return;
|
||||
}
|
||||
|
||||
let modelOperations: ModelOperations | undefined;
|
||||
try {
|
||||
modelOperations = await this.getModelOperations();
|
||||
} catch (e) {
|
||||
this._loadFailed = true;
|
||||
return;
|
||||
}
|
||||
|
||||
const modelResults = await modelOperations.runModel(content);
|
||||
if (!modelResults
|
||||
|| modelResults.length === 0
|
||||
|| modelResults[0].confidence < LanguageDetectionService.expectedRelativeConfidence) {
|
||||
return;
|
||||
}
|
||||
|
||||
const possibleLanguages: ModelResult[] = [modelResults[0]];
|
||||
|
||||
for (let current of modelResults) {
|
||||
|
||||
if (current === modelResults[0]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const currentHighest = possibleLanguages[possibleLanguages.length - 1];
|
||||
|
||||
if (currentHighest.confidence - current.confidence >= LanguageDetectionService.expectedRelativeConfidence) {
|
||||
while (possibleLanguages.length) {
|
||||
// TODO: see if there's a better way to do this.
|
||||
const vscodeLanguageId = this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${possibleLanguages.shift()!.languageId}`));
|
||||
if (vscodeLanguageId) {
|
||||
yield vscodeLanguageId;
|
||||
}
|
||||
}
|
||||
if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) {
|
||||
possibleLanguages.push(current);
|
||||
continue;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) {
|
||||
possibleLanguages.push(current);
|
||||
continue;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Registry.as<IWorkbenchContributionsRegistry>(Extensions.Workbench)
|
||||
.registerWorkbenchContribution(LanguageDetectionService, LifecyclePhase.Eventually);
|
||||
registerSingleton(ILanguageDetectionService, LanguageDetectionService);
|
|
@ -0,0 +1,139 @@
|
|||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import type { ModelOperations, ModelResult } from '@vscode/vscode-languagedetection';
|
||||
// import { IDisposable } from 'vs/base/common/lifecycle';
|
||||
import { StopWatch } from 'vs/base/common/stopwatch';
|
||||
// import { URI } from 'vs/base/common/uri';
|
||||
import { IRequestHandler } from 'vs/base/common/worker/simpleWorker';
|
||||
// import { IModelChangedEvent } from 'vs/editor/common/model/mirrorTextModel';
|
||||
import { EditorSimpleWorker } from 'vs/editor/common/services/editorSimpleWorker';
|
||||
import { EditorWorkerHost } from 'vs/editor/common/services/editorWorkerServiceImpl';
|
||||
|
||||
/**
|
||||
* Called on the worker side
|
||||
* @internal
|
||||
*/
|
||||
export function create(host: EditorWorkerHost): IRequestHandler {
|
||||
return new LanguageDetectionSimpleWorker(host, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export class LanguageDetectionSimpleWorker extends EditorSimpleWorker {
|
||||
private static readonly expectedRelativeConfidence = 0.2;
|
||||
|
||||
private _modelOperations: ModelOperations | undefined;
|
||||
private _loadFailed: boolean = false;
|
||||
|
||||
public async detectLanguage(uri: string): Promise<string | undefined> {
|
||||
const stopWatch = new StopWatch(true);
|
||||
for await (const language of this.detectLanguagesImpl(uri)) {
|
||||
stopWatch.stop();
|
||||
this._host.fhr('sendTelemetryEvent', [[language.languageId], [language.confidence], stopWatch.elapsed()]);
|
||||
return language.languageId;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
public async detectLanguages(uri: string): Promise<string[]> {
|
||||
const languages: string[] = [];
|
||||
const confidences: number[] = [];
|
||||
const stopWatch = new StopWatch(true);
|
||||
for await (const language of this.detectLanguagesImpl(uri)) {
|
||||
languages.push(language.languageId);
|
||||
confidences.push(language.confidence);
|
||||
}
|
||||
stopWatch.stop();
|
||||
|
||||
this._host.fhr('sendTelemetryEvent', [languages, confidences, stopWatch.elapsed()]);
|
||||
return languages;
|
||||
}
|
||||
|
||||
private async getModelOperations(): Promise<ModelOperations> {
|
||||
if (this._modelOperations) {
|
||||
return this._modelOperations;
|
||||
}
|
||||
|
||||
const uri: string = await this._host.fhr('getIndexJsUri', []);
|
||||
// const uri = await this.host.getIndexJsUri();
|
||||
const { ModelOperations } = await import(uri);
|
||||
this._modelOperations = new ModelOperations(
|
||||
async () => {
|
||||
const response = await fetch(await this._host.fhr('getModelJsonUri', []));
|
||||
try {
|
||||
const modelJSON = await response.json();
|
||||
return modelJSON;
|
||||
} catch (e) {
|
||||
const message = `Failed to parse model JSON.`;
|
||||
throw new Error(message);
|
||||
}
|
||||
},
|
||||
async () => {
|
||||
const response = await fetch(await this._host.fhr('getWeightsUri', []));
|
||||
const buffer = await response.arrayBuffer();
|
||||
return buffer;
|
||||
}
|
||||
);
|
||||
|
||||
return this._modelOperations!;
|
||||
}
|
||||
|
||||
private async * detectLanguagesImpl(uri: string): AsyncGenerator<ModelResult, void, unknown> {
|
||||
if (this._loadFailed) {
|
||||
return;
|
||||
}
|
||||
|
||||
let modelOperations: ModelOperations | undefined;
|
||||
try {
|
||||
modelOperations = await this.getModelOperations();
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
this._loadFailed = true;
|
||||
return;
|
||||
}
|
||||
|
||||
const content = this._getModel(uri);
|
||||
if (!content) {
|
||||
return;
|
||||
}
|
||||
|
||||
const modelResults = await modelOperations.runModel(content.getValue());
|
||||
if (!modelResults
|
||||
|| modelResults.length === 0
|
||||
|| modelResults[0].confidence < LanguageDetectionSimpleWorker.expectedRelativeConfidence) {
|
||||
return;
|
||||
}
|
||||
|
||||
const possibleLanguages: ModelResult[] = [modelResults[0]];
|
||||
|
||||
for (let current of modelResults) {
|
||||
|
||||
if (current === modelResults[0]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const currentHighest = possibleLanguages[possibleLanguages.length - 1];
|
||||
|
||||
if (currentHighest.confidence - current.confidence >= LanguageDetectionSimpleWorker.expectedRelativeConfidence) {
|
||||
while (possibleLanguages.length) {
|
||||
yield possibleLanguages.shift()!;
|
||||
}
|
||||
if (current.confidence > LanguageDetectionSimpleWorker.expectedRelativeConfidence) {
|
||||
possibleLanguages.push(current);
|
||||
continue;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
if (current.confidence > LanguageDetectionSimpleWorker.expectedRelativeConfidence) {
|
||||
possibleLanguages.push(current);
|
||||
continue;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import { Disposable } from 'vs/base/common/lifecycle';
|
||||
import { ILanguageDetectionService, ILanguageDetectionStats, LanguageDetectionStatsClassification, LanguageDetectionStatsId } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
|
||||
import { FileAccess } from 'vs/base/common/network';
|
||||
import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService';
|
||||
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
|
||||
import { IModeService } from 'vs/editor/common/services/modeService';
|
||||
import { URI } from 'vs/base/common/uri';
|
||||
import { isWeb } from 'vs/base/common/platform';
|
||||
import { registerSingleton } from 'vs/platform/instantiation/common/extensions';
|
||||
import { LanguageDetectionSimpleWorker } from 'vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker';
|
||||
import { IModelService } from 'vs/editor/common/services/modelService';
|
||||
import { SimpleWorkerClient } from 'vs/base/common/worker/simpleWorker';
|
||||
import { ITelemetryService } from 'vs/platform/telemetry/common/telemetry';
|
||||
import { EditorWorkerClient, EditorWorkerHost } from 'vs/editor/common/services/editorWorkerServiceImpl';
|
||||
|
||||
const moduleLocation = '../../../../../../node_modules/@vscode/vscode-languagedetection';
|
||||
const moduleLocationAsar = '../../../../../../node_modules.asar/@vscode/vscode-languagedetection';
|
||||
export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {
|
||||
static readonly enablementSettingKey = 'workbench.editor.untitled.experimentalLanguageDetection';
|
||||
|
||||
_serviceBrand: undefined;
|
||||
|
||||
private _languageDetectionWorkerClient: LanguageDetectionWorkerClient;
|
||||
|
||||
constructor(
|
||||
@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,
|
||||
@IModeService private readonly _modeService: IModeService,
|
||||
@IConfigurationService private readonly _configurationService: IConfigurationService,
|
||||
@IModelService modelService: IModelService,
|
||||
@ITelemetryService telemetryService: ITelemetryService,
|
||||
) {
|
||||
super();
|
||||
|
||||
this._languageDetectionWorkerClient = new LanguageDetectionWorkerClient(
|
||||
modelService,
|
||||
telemetryService,
|
||||
// TODO: See if it's possible to bundle vscode-languagedetection
|
||||
this._environmentService.isBuilt && !isWeb
|
||||
? FileAccess.asBrowserUri(`${moduleLocationAsar}/dist/lib/index.js`, require).toString(true)
|
||||
: FileAccess.asBrowserUri(`${moduleLocation}/dist/lib/index.js`, require).toString(true),
|
||||
this._environmentService.isBuilt && !isWeb
|
||||
? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/model.json`, require).toString(true)
|
||||
: FileAccess.asBrowserUri(`${moduleLocation}/model/model.json`, require).toString(true),
|
||||
this._environmentService.isBuilt && !isWeb
|
||||
? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/group1-shard1of1.bin`, require).toString(true)
|
||||
: FileAccess.asBrowserUri(`${moduleLocation}/model/group1-shard1of1.bin`, require).toString(true));
|
||||
}
|
||||
|
||||
public isEnabledForMode(modeId: string): boolean {
|
||||
return !!modeId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: modeId });
|
||||
}
|
||||
|
||||
private getModeId(language: string | undefined): string | undefined {
|
||||
if (!language) {
|
||||
return undefined;
|
||||
}
|
||||
return this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${language}`)) ?? undefined;
|
||||
}
|
||||
|
||||
async detectLanguage(resource: URI): Promise<string | undefined> {
|
||||
const language = await this._languageDetectionWorkerClient.detectLanguage(resource);
|
||||
if (language) {
|
||||
return this.getModeId(language);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async detectLanguages(resource: URI): Promise<string[]> {
|
||||
const languages: Array<string | undefined> = await this._languageDetectionWorkerClient.detectLanguages(resource);
|
||||
for (let i = 0; i < languages.length; i++) {
|
||||
const modeId = this.getModeId(languages[i]);
|
||||
languages[i] = modeId ? modeId : undefined;
|
||||
}
|
||||
|
||||
return languages.filter(<T>(l?: T): l is T => Boolean(l));
|
||||
}
|
||||
}
|
||||
|
||||
export interface IWorkerClient<W> {
|
||||
getProxyObject(): Promise<W>;
|
||||
dispose(): void;
|
||||
}
|
||||
|
||||
export class LanguageDetectionWorkerHost {
|
||||
constructor(
|
||||
private _indexJsUri: string,
|
||||
private _modelJsonUri: string,
|
||||
private _weightsUri: string,
|
||||
private _telemetryService: ITelemetryService,
|
||||
) {
|
||||
}
|
||||
|
||||
async getIndexJsUri() {
|
||||
return this._indexJsUri;
|
||||
}
|
||||
|
||||
async getModelJsonUri() {
|
||||
return this._modelJsonUri;
|
||||
}
|
||||
|
||||
async getWeightsUri() {
|
||||
return this._weightsUri;
|
||||
}
|
||||
|
||||
async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise<void> {
|
||||
type LanguageDetectionStats = { languages: string; confidences: string; timeSpent: number; };
|
||||
type LanguageDetectionStatsClassification = {
|
||||
languages: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
|
||||
confidences: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
|
||||
timeSpent: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
|
||||
};
|
||||
|
||||
this._telemetryService.publicLog2<LanguageDetectionStats, LanguageDetectionStatsClassification>('automaticlanguagedetection.stats', {
|
||||
languages: languages.join(','),
|
||||
confidences: confidences.join(','),
|
||||
timeSpent
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export class LanguageDetectionWorkerClient extends EditorWorkerClient {
|
||||
private worker: IWorkerClient<LanguageDetectionSimpleWorker> | undefined;
|
||||
|
||||
constructor(
|
||||
modelService: IModelService,
|
||||
private readonly _telemetryService: ITelemetryService,
|
||||
private readonly _indexJsUri: string,
|
||||
private readonly _modelJsonUri: string,
|
||||
private readonly _weightsUri: string
|
||||
) {
|
||||
super(modelService, true, 'languageDetectionWorkerService');
|
||||
}
|
||||
|
||||
private _getOrCreateLanguageDetectionWorker(): IWorkerClient<LanguageDetectionSimpleWorker> {
|
||||
if (!this.worker) {
|
||||
|
||||
this.worker = this._register(new SimpleWorkerClient<LanguageDetectionSimpleWorker, EditorWorkerHost>(
|
||||
this._workerFactory,
|
||||
'vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker',
|
||||
new EditorWorkerHost(this)
|
||||
));
|
||||
}
|
||||
return this.worker;
|
||||
}
|
||||
|
||||
override async _getProxy(): Promise<LanguageDetectionSimpleWorker> {
|
||||
return await this._getOrCreateLanguageDetectionWorker().getProxyObject();
|
||||
}
|
||||
|
||||
// foreign host request
|
||||
public override async fhr(method: string, args: any[]): Promise<any> {
|
||||
switch (method) {
|
||||
case 'getIndexJsUri':
|
||||
return this.getIndexJsUri();
|
||||
case 'getModelJsonUri':
|
||||
return this.getModelJsonUri();
|
||||
case 'getWeightsUri':
|
||||
return this.getWeightsUri();
|
||||
case 'sendTelemetryEvent':
|
||||
return this.sendTelemetryEvent(args[0], args[1], args[2]);
|
||||
default:
|
||||
return super.fhr(method, args);
|
||||
}
|
||||
}
|
||||
|
||||
async getIndexJsUri() {
|
||||
return this._indexJsUri;
|
||||
}
|
||||
|
||||
async getModelJsonUri() {
|
||||
return this._modelJsonUri;
|
||||
}
|
||||
|
||||
async getWeightsUri() {
|
||||
return this._weightsUri;
|
||||
}
|
||||
|
||||
async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise<void> {
|
||||
this._telemetryService.publicLog2<ILanguageDetectionStats, LanguageDetectionStatsClassification>(LanguageDetectionStatsId, {
|
||||
languages: languages.join(','),
|
||||
confidences: confidences.join(','),
|
||||
timeSpent
|
||||
});
|
||||
}
|
||||
|
||||
public async detectLanguage(resource: URI): Promise<string | undefined> {
|
||||
await this._withSyncedResources([resource]);
|
||||
return (await this._getProxy()).detectLanguage(resource.toString());
|
||||
}
|
||||
public async detectLanguages(resource: URI): Promise<string[]> {
|
||||
await this._withSyncedResources([resource]);
|
||||
return (await this._getProxy()).detectLanguages(resource.toString());
|
||||
}
|
||||
}
|
||||
|
||||
registerSingleton(ILanguageDetectionService, LanguageDetectionService);
|
|
@ -1,16 +0,0 @@
|
|||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import { URI } from 'vs/base/common/uri';
|
||||
import { createDecorator } from 'vs/platform/instantiation/common/instantiation';
|
||||
|
||||
export const ILanguageDetectionService = createDecorator<ILanguageDetectionService>('ILanguageDetectionService');
|
||||
|
||||
export interface ILanguageDetectionService {
|
||||
readonly _serviceBrand: undefined;
|
||||
|
||||
detectLanguage(contentOrResource: string | URI): Promise<string | undefined>;
|
||||
detectLanguages(contentOrResource: string | URI): Promise<string[]>;
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/*---------------------------------------------------------------------------------------------
|
||||
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import { URI } from 'vs/base/common/uri';
|
||||
import { createDecorator } from 'vs/platform/instantiation/common/instantiation';
|
||||
|
||||
export const ILanguageDetectionService = createDecorator<ILanguageDetectionService>('ILanguageDetectionService');
|
||||
|
||||
export interface ILanguageDetectionService {
|
||||
readonly _serviceBrand: undefined;
|
||||
|
||||
/**
|
||||
* @param modeId The modeId to check if language detection is currently enabled.
|
||||
* @returns whether or not language detection is on for this language mode.
|
||||
*/
|
||||
isEnabledForMode(modeId: string): boolean;
|
||||
|
||||
/**
|
||||
* @param resource The resource to detect the language for.
|
||||
* @returns the language mode for the given resource or undefined if the model is not confident enough.
|
||||
*/
|
||||
detectLanguage(resource: URI): Promise<string | undefined>;
|
||||
|
||||
/**
|
||||
* @param resource The resource to detect the language for.
|
||||
* @returns all possible language modes detected in this resource.
|
||||
*/
|
||||
detectLanguages(resource: URI): Promise<string[]>;
|
||||
}
|
||||
|
||||
//#region Telemetry events
|
||||
|
||||
export const AutomaticLanguageDetectionLikelyWrongId = 'automaticlanguagedetection.likelywrong';
|
||||
|
||||
export interface IAutomaticLanguageDetectionLikelyWrongData {
|
||||
choseOtherGuessedLanguage: boolean;
|
||||
currentLanguageId: string;
|
||||
nextLanguageId: string;
|
||||
}
|
||||
|
||||
export type AutomaticLanguageDetectionLikelyWrongClassification = {
|
||||
choseOtherGuessedLanguage: { classification: 'SystemMetaData', purpose: 'FeatureInsight' },
|
||||
currentLanguageId: { classification: 'SystemMetaData', purpose: 'FeatureInsight' },
|
||||
nextLanguageId: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }
|
||||
};
|
||||
|
||||
export const LanguageDetectionStatsId = 'automaticlanguagedetection.stats';
|
||||
|
||||
export interface ILanguageDetectionStats {
|
||||
languages: string;
|
||||
confidences: string;
|
||||
timeSpent: number;
|
||||
}
|
||||
|
||||
export type LanguageDetectionStatsClassification = {
|
||||
languages: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
|
||||
confidences: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
|
||||
timeSpent: { classification: 'SystemMetaData', purpose: 'FeatureInsight' };
|
||||
};
|
||||
|
||||
//#endregion
|
|
@ -26,6 +26,9 @@ import { CancellationToken } from 'vs/base/common/cancellation';
|
|||
import { getCharContainingOffset } from 'vs/base/common/strings';
|
||||
import { UTF8 } from 'vs/workbench/services/textfile/common/encoding';
|
||||
import { bufferToStream, VSBuffer, VSBufferReadableStream } from 'vs/base/common/buffer';
|
||||
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
|
||||
import { PLAINTEXT_MODE_ID } from 'vs/editor/common/modes/modesRegistry';
|
||||
import { RunOnceScheduler } from 'vs/base/common/async';
|
||||
|
||||
export interface IUntitledTextEditorModel extends ITextEditorModel, IModeSupport, IEncodingSupport, IWorkingCopy {
|
||||
|
||||
|
@ -93,6 +96,8 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
|
|||
|
||||
readonly capabilities = WorkingCopyCapabilities.Untitled;
|
||||
|
||||
private readonly _autoDetectLanguageScheduler = this._register(new RunOnceScheduler(() => this.autoDetectLanguage(), 600));;
|
||||
|
||||
//#region Name
|
||||
|
||||
private configuredLabelFormat: 'content' | 'name' = 'content';
|
||||
|
@ -126,7 +131,8 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
|
|||
@IWorkingCopyService private readonly workingCopyService: IWorkingCopyService,
|
||||
@ITextFileService private readonly textFileService: ITextFileService,
|
||||
@ILabelService private readonly labelService: ILabelService,
|
||||
@IEditorService private readonly editorService: IEditorService
|
||||
@IEditorService private readonly editorService: IEditorService,
|
||||
@ILanguageDetectionService private readonly languageDetectionService: ILanguageDetectionService
|
||||
) {
|
||||
super(modelService, modeService);
|
||||
|
||||
|
@ -134,7 +140,7 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
|
|||
this._register(this.workingCopyService.registerWorkingCopy(this));
|
||||
|
||||
if (preferredMode) {
|
||||
this.setMode(preferredMode);
|
||||
this.setModeInternal(preferredMode);
|
||||
}
|
||||
|
||||
// Fetch config
|
||||
|
@ -178,11 +184,14 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
|
|||
private _hasModeSetExplicitly: boolean = false;
|
||||
get hasModeSetExplicitly(): boolean { return this._hasModeSetExplicitly; }
|
||||
|
||||
override setMode(mode: string, setExplicitly = true): void {
|
||||
|
||||
override setMode(mode: string): void {
|
||||
// Remember that an explicit mode was set
|
||||
this._hasModeSetExplicitly = setExplicitly;
|
||||
this._hasModeSetExplicitly = true;
|
||||
|
||||
this.setModeInternal(mode);
|
||||
}
|
||||
|
||||
private setModeInternal(mode: string): void {
|
||||
let actualMode: string | undefined = undefined;
|
||||
if (mode === '${activeEditorLanguage}') {
|
||||
// support the special '${activeEditorLanguage}' mode by
|
||||
|
@ -368,6 +377,24 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt
|
|||
|
||||
// Emit as general content change event
|
||||
this._onDidChangeContent.fire();
|
||||
|
||||
// Try to detect language from content (debounced by some time to reduce pressure).
|
||||
this._autoDetectLanguageScheduler.schedule();
|
||||
}
|
||||
|
||||
private async autoDetectLanguage() {
|
||||
if (this.hasModeSetExplicitly || !this.languageDetectionService.isEnabledForMode(this.getMode() ?? PLAINTEXT_MODE_ID)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const lang = await this.languageDetectionService.detectLanguage(this.resource);
|
||||
if (!lang) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.isDisposed()) {
|
||||
this.setModeInternal(lang);
|
||||
}
|
||||
}
|
||||
|
||||
private updateNameFromFirstLine(textEditorModel: ITextModel): void {
|
||||
|
|
|
@ -279,7 +279,6 @@ suite('Untitled text editors', () => {
|
|||
const service = accessor.untitledTextEditorService;
|
||||
const input = instantiationService.createInstance(UntitledTextEditorInput, service.create({ mode }));
|
||||
|
||||
assert.ok(input.model.hasModeSetExplicitly);
|
||||
assert.strictEqual(input.getMode(), mode);
|
||||
|
||||
const model = await input.resolve();
|
||||
|
|
|
@ -96,7 +96,7 @@ import 'vs/workbench/services/authentication/browser/authenticationService';
|
|||
import 'vs/workbench/services/hover/browser/hoverService';
|
||||
import 'vs/workbench/services/experiment/common/experimentService';
|
||||
import 'vs/workbench/services/outline/browser/outlineService';
|
||||
import 'vs/workbench/services/languageDetection/browser/languageDetectionService';
|
||||
import 'vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl';
|
||||
|
||||
import { registerSingleton } from 'vs/platform/instantiation/common/extensions';
|
||||
import { ExtensionGalleryService } from 'vs/platform/extensionManagement/common/extensionGalleryService';
|
||||
|
|
Loading…
Reference in a new issue