diff --git a/build/gulpfile.vscode.js b/build/gulpfile.vscode.js index ad0e4717a66..0400dfa2426 100644 --- a/build/gulpfile.vscode.js +++ b/build/gulpfile.vscode.js @@ -39,6 +39,7 @@ const vscodeEntryPoints = _.flatten([ buildfile.base, buildfile.workerExtensionHost, buildfile.workerNotebook, + buildfile.workerLanguageDetection, buildfile.workbenchDesktop, buildfile.code ]); @@ -233,9 +234,6 @@ function packageTask(platform, arch, sourceFolderName, destinationFolderName, op '**/node-pty/lib/worker/conoutSocketWorker.js', '**/node-pty/lib/shared/conout.js', '**/*.wasm', - // For language detection - '**/model.json', - '**/group1-shard1of1.bin' ], 'node_modules.asar')); let all = es.merge( diff --git a/extensions/vscode-api-tests/src/singlefolder-tests/untitled.languagedetection.test.ts b/extensions/vscode-api-tests/src/singlefolder-tests/untitled.languagedetection.test.ts new file mode 100644 index 00000000000..9c1e4b3cd29 --- /dev/null +++ b/extensions/vscode-api-tests/src/singlefolder-tests/untitled.languagedetection.test.ts @@ -0,0 +1,65 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import * as assert from 'assert'; +import * as vscode from 'vscode'; +import { asPromise, assertNoRpc, closeAllEditors } from '../utils'; + +suite('vscode - untitled automatic language detection', () => { + + teardown(async function () { + assertNoRpc(); + await closeAllEditors(); + }); + + test('test automatic language detection works', async () => { + const doc = await vscode.workspace.openTextDocument(); + const editor = await vscode.window.showTextDocument(doc); + + assert.strictEqual(editor.document.languageId, 'plaintext'); + + const settingResult = vscode.workspace.getConfiguration().get('workbench.editor.untitled.experimentalLanguageDetection'); + assert.ok(settingResult); + + const result = await editor.edit(editBuilder => { + editBuilder.insert(new vscode.Position(0, 0), `{ + "extends": "./tsconfig.base.json", + "compilerOptions": { + "removeComments": false, + "preserveConstEnums": true, + "sourceMap": false, + "outDir": "../out/vs", + "target": "es2020", + "types": [ + "keytar", + "mocha", + "semver", + "sinon", + "winreg", + "trusted-types", + "wicg-file-system-access" + ], + "plugins": [ + { + "name": "tsec", + "exemptionConfig": "./tsec.exemptions.json" + } + ] + }, + "include": [ + "./typings", + "./vs" + ] +}`); + }); + + assert.ok(result); + + // Changing the language triggers a file to be closed and opened again so wait for that event to happen. + const newDoc = await asPromise(vscode.workspace.onDidOpenTextDocument, 5000); + + assert.strictEqual(newDoc.languageId, 'json'); + }); +}); diff --git a/extensions/vscode-api-tests/testWorkspace/.vscode/settings.json b/extensions/vscode-api-tests/testWorkspace/.vscode/settings.json index 31b22c56533..a62f521614a 100644 --- a/extensions/vscode-api-tests/testWorkspace/.vscode/settings.json +++ b/extensions/vscode-api-tests/testWorkspace/.vscode/settings.json @@ -5,5 +5,6 @@ "files.exclude": { "**/files-exclude/**": true }, - "editor.minimap.enabled": false // see https://github.com/microsoft/vscode/issues/115747 + "editor.minimap.enabled": false, // see https://github.com/microsoft/vscode/issues/115747 + "workbench.editor.untitled.experimentalLanguageDetection": true } diff --git a/package.json b/package.json index 1f482eb957f..d2d7c749c79 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "code-oss-dev", "version": "1.60.0", - "distro": "60d5c7616e3d0825ded1a695b8a67199c6d577f0", + "distro": "61e4c454dd3e0928fdf44dce555922b34ea8e507", "author": { "name": "Microsoft Corporation" }, diff --git a/src/buildfile.js b/src/buildfile.js index c0400de76c0..b0cf8c1bdc6 100644 --- a/src/buildfile.js +++ b/src/buildfile.js @@ -17,6 +17,7 @@ exports.base = [{ exports.workerExtensionHost = [entrypoint('vs/workbench/services/extensions/worker/extensionHostWorker')]; exports.workerNotebook = [entrypoint('vs/workbench/contrib/notebook/common/services/notebookSimpleWorker')]; +exports.workerLanguageDetection = [entrypoint('vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker')]; exports.workbenchDesktop = require('./vs/workbench/buildfile.desktop').collectModules(); exports.workbenchWeb = require('./vs/workbench/buildfile.web').collectModules(); diff --git a/src/vs/editor/common/services/editorSimpleWorker.ts b/src/vs/editor/common/services/editorSimpleWorker.ts index e5e93423f69..a2aad3ebc0e 100644 --- a/src/vs/editor/common/services/editorSimpleWorker.ts +++ b/src/vs/editor/common/services/editorSimpleWorker.ts @@ -76,7 +76,7 @@ export interface ICommonModel extends ILinkComputerTarget, IMirrorModel { * Range of a word inside a model. * @internal */ -interface IWordRange { +export interface IWordRange { /** * The index where the word starts. */ @@ -90,7 +90,7 @@ interface IWordRange { /** * @internal */ -class MirrorModel extends BaseMirrorModel implements ICommonModel { +export class MirrorModel extends BaseMirrorModel implements ICommonModel { public get uri(): URI { return this._uri; @@ -326,7 +326,7 @@ declare const require: any; export class EditorSimpleWorker implements IRequestHandler, IDisposable { _requestHandlerBrand: any; - private readonly _host: EditorWorkerHost; + protected readonly _host: EditorWorkerHost; private _models: { [uri: string]: MirrorModel; }; private readonly _foreignModuleFactory: IForeignModuleFactory | null; private _foreignModule: any; diff --git a/src/vs/editor/common/services/editorWorkerServiceImpl.ts b/src/vs/editor/common/services/editorWorkerServiceImpl.ts index 2763dd1280d..36ff35d4177 100644 --- a/src/vs/editor/common/services/editorWorkerServiceImpl.ts +++ b/src/vs/editor/common/services/editorWorkerServiceImpl.ts @@ -388,11 +388,15 @@ class SynchronousWorkerClient implements IWorkerClient } } +export interface IEditorWorkerClient { + fhr(method: string, args: any[]): Promise; +} + export class EditorWorkerHost { - private readonly _workerClient: EditorWorkerClient; + private readonly _workerClient: IEditorWorkerClient; - constructor(workerClient: EditorWorkerClient) { + constructor(workerClient: IEditorWorkerClient) { this._workerClient = workerClient; } @@ -402,12 +406,12 @@ export class EditorWorkerHost { } } -export class EditorWorkerClient extends Disposable { +export class EditorWorkerClient extends Disposable implements IEditorWorkerClient { private readonly _modelService: IModelService; private readonly _keepIdleModels: boolean; - private _worker: IWorkerClient | null; - private readonly _workerFactory: DefaultWorkerFactory; + protected _worker: IWorkerClient | null; + protected readonly _workerFactory: DefaultWorkerFactory; private _modelManager: EditorModelManager | null; private _disposed = false; diff --git a/src/vs/workbench/browser/parts/editor/editorStatus.ts b/src/vs/workbench/browser/parts/editor/editorStatus.ts index 0b17954bad7..0b58ae151bb 100644 --- a/src/vs/workbench/browser/parts/editor/editorStatus.ts +++ b/src/vs/workbench/browser/parts/editor/editorStatus.ts @@ -55,7 +55,7 @@ import { ThemeColor, themeColorFromId } from 'vs/platform/theme/common/themeServ import { ITelemetryData, ITelemetryService } from 'vs/platform/telemetry/common/telemetry'; import { SideBySideEditorInput } from 'vs/workbench/common/editor/sideBySideEditorInput'; import { ILanguageStatus, ILanguageStatusService } from 'vs/editor/common/services/languageStatusService'; -import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection'; +import { AutomaticLanguageDetectionLikelyWrongClassification, AutomaticLanguageDetectionLikelyWrongId, IAutomaticLanguageDetectionLikelyWrongData, ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService'; class SideBySideEditorEncodingSupport implements IEncodingSupport { constructor(private primary: IEncodingSupport, private secondary: IEncodingSupport) { } @@ -1121,6 +1121,10 @@ export class ShowLanguageExtensionsAction extends Action { } } +interface IDetectedLanguageQuickPickItem extends IQuickPickItem { + guessRank: number; +} + export class ChangeModeAction extends Action { static readonly ID = 'workbench.action.editor.changeLanguageMode'; @@ -1222,6 +1226,7 @@ export class ChangeModeAction extends Action { picks.unshift(autoDetectMode); } else if (detectedLanguages) { // Add untitled detected languages + let index = detectedLanguages.length - 1; for (const modeId of detectedLanguages.reverse()) { const lang = this.modeService.getLanguageName(modeId) || 'unknown'; let description: string; @@ -1231,11 +1236,13 @@ export class ChangeModeAction extends Action { description = localize('languageDescriptionConfigured', "({0})", modeId); } - picks.unshift({ + const pick: IDetectedLanguageQuickPickItem = { label: lang, iconClasses: getIconClassesForModeId(modeId), - description - }); + description, + guessRank: index--, + }; + picks.unshift(pick); } picks.unshift({ type: 'separator', label: localize('detectedLanguagesPicks', "detected languages (identifier)") }); @@ -1284,6 +1291,18 @@ export class ChangeModeAction extends Action { languageSelection = this.modeService.createByLanguageName(pick.label); } + const guessRankOfPicked: number = (pick as IDetectedLanguageQuickPickItem).guessRank ?? -1; + // If we detected languages and they didn't choose the top detected language (which should also be the active language if automatic detection is enabled) + // then the automatic language detection was likely wrong and the user is correcting it. In this case, we want telemetry. + if (detectedLanguages.length && guessRankOfPicked !== 0) { + this.telemetryService.publicLog2(AutomaticLanguageDetectionLikelyWrongId, { + // For languages that weren't guessed, the guessRankOfPicked will be -1. This detail tells us if the user chose the language that was guessed or not. + choseOtherGuessedLanguage: guessRankOfPicked !== -1, + currentLanguageId: currentLanguageId ?? 'unknown', + nextLanguageId: languageSelection?.languageIdentifier.language ?? 'unknown' + }); + } + // Change mode if (typeof languageSelection !== 'undefined') { modeSupport.setMode(languageSelection.languageIdentifier.language); diff --git a/src/vs/workbench/services/languageDetection/browser/languageDetectionService.ts b/src/vs/workbench/services/languageDetection/browser/languageDetectionService.ts deleted file mode 100644 index 7c7543ec7f3..00000000000 --- a/src/vs/workbench/services/languageDetection/browser/languageDetectionService.ts +++ /dev/null @@ -1,173 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - * Licensed under the MIT License. See License.txt in the project root for license information. - *--------------------------------------------------------------------------------------------*/ - -import { Disposable } from 'vs/base/common/lifecycle'; -import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection'; -import { IUntitledTextEditorService } from 'vs/workbench/services/untitled/common/untitledTextEditorService'; -import { FileAccess } from 'vs/base/common/network'; -import type { ModelOperations, ModelResult } from '@vscode/vscode-languagedetection'; -import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService'; -import { IConfigurationService } from 'vs/platform/configuration/common/configuration'; -import { IModeService } from 'vs/editor/common/services/modeService'; -import { URI } from 'vs/base/common/uri'; -import { isWeb } from 'vs/base/common/platform'; -import { registerSingleton } from 'vs/platform/instantiation/common/extensions'; -import { Registry } from 'vs/platform/registry/common/platform'; -import { Extensions, IWorkbenchContributionsRegistry } from 'vs/workbench/common/contributions'; -import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle'; -import { debounce } from 'vs/base/common/decorators'; -import { IWorkingCopyService } from 'vs/workbench/services/workingCopy/common/workingCopyService'; -import { IWorkingCopy } from 'vs/workbench/services/workingCopy/common/workingCopy'; - -export class LanguageDetectionService extends Disposable implements ILanguageDetectionService { - private static readonly expectedRelativeConfidence = 0.2; - static readonly enablementSettingKey = 'workbench.editor.untitled.experimentalLanguageDetection'; - - private _loadFailed = false; - private _modelOperations: ModelOperations | undefined; - _serviceBrand: undefined; - - constructor( - @IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService, - @IModeService private readonly _modeService: IModeService, - @IConfigurationService private readonly _configurationService: IConfigurationService, - @IUntitledTextEditorService private readonly _untitledTextEditorService: IUntitledTextEditorService, - @IWorkingCopyService _workingCopyService: IWorkingCopyService) { - super(); - - this._register(_workingCopyService.onDidChangeContent(e => this.handleChangeEvent(e))); - } - - @debounce(600) - private async handleChangeEvent(e: IWorkingCopy) { - const untitledEditorModel = this._untitledTextEditorService.get(e.resource); - if (!untitledEditorModel - || !this.isEnabledForMode(untitledEditorModel.getMode()) - || untitledEditorModel.hasModeSetExplicitly) { - return; - } - - const value = this._untitledTextEditorService.getValue(e.resource); - if (!value) { return; } - const lang = await this.detectLanguage(value); - if (!lang) { return; } - untitledEditorModel.setMode(lang, false); - } - - private async getModelOperations(): Promise { - if (this._modelOperations) { - return this._modelOperations; - } - - const { ModelOperations } = await import('@vscode/vscode-languagedetection'); - this._modelOperations = new ModelOperations( - async () => { - const response = await fetch(this._environmentService.isBuilt && !isWeb - ? FileAccess.asBrowserUri('../../../../../../node_modules.asar.unpacked/@vscode/vscode-languagedetection/model/model.json', require).toString(true) - : FileAccess.asBrowserUri('../../../../../../node_modules/@vscode/vscode-languagedetection/model/model.json', require).toString(true)); - try { - const modelJSON = await response.json(); - return modelJSON; - } catch (e) { - const message = `Failed to parse model JSON.`; - throw new Error(message); - } - }, - async () => { - const response = await fetch(this._environmentService.isBuilt && !isWeb - ? FileAccess.asBrowserUri('../../../../../../node_modules.asar.unpacked/@vscode/vscode-languagedetection/model/group1-shard1of1.bin', require).toString(true) - : FileAccess.asBrowserUri('../../../../../../node_modules/@vscode/vscode-languagedetection/model/group1-shard1of1.bin', require).toString(true)); - const buffer = await response.arrayBuffer(); - return buffer; - } - ); - - return this._register(this._modelOperations); - } - - private isEnabledForMode(modeId: string | undefined): boolean { - return !!modeId && this._configurationService.getValue(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: modeId }); - } - - async detectLanguage(contentOrResource: string | URI): Promise { - let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource; - - if (content) { - for await (const language of this.detectLanguagesImpl(content)) { - return language; - } - } - return undefined; - } - - async detectLanguages(contentOrResource: string | URI): Promise { - let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource; - - const languages: string[] = []; - if (content) { - for await (const language of this.detectLanguagesImpl(content)) { - languages.push(language); - } - } - return languages; - } - - private async * detectLanguagesImpl(content: string) { - if (this._loadFailed) { - return; - } - - let modelOperations: ModelOperations | undefined; - try { - modelOperations = await this.getModelOperations(); - } catch (e) { - this._loadFailed = true; - return; - } - - const modelResults = await modelOperations.runModel(content); - if (!modelResults - || modelResults.length === 0 - || modelResults[0].confidence < LanguageDetectionService.expectedRelativeConfidence) { - return; - } - - const possibleLanguages: ModelResult[] = [modelResults[0]]; - - for (let current of modelResults) { - - if (current === modelResults[0]) { - continue; - } - - const currentHighest = possibleLanguages[possibleLanguages.length - 1]; - - if (currentHighest.confidence - current.confidence >= LanguageDetectionService.expectedRelativeConfidence) { - while (possibleLanguages.length) { - // TODO: see if there's a better way to do this. - const vscodeLanguageId = this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${possibleLanguages.shift()!.languageId}`)); - if (vscodeLanguageId) { - yield vscodeLanguageId; - } - } - if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) { - possibleLanguages.push(current); - continue; - } - return; - } else { - if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) { - possibleLanguages.push(current); - continue; - } - return; - } - } - } -} - -Registry.as(Extensions.Workbench) - .registerWorkbenchContribution(LanguageDetectionService, LifecyclePhase.Eventually); -registerSingleton(ILanguageDetectionService, LanguageDetectionService); diff --git a/src/vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker.ts b/src/vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker.ts new file mode 100644 index 00000000000..961f8e1fce8 --- /dev/null +++ b/src/vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker.ts @@ -0,0 +1,139 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { ModelOperations, ModelResult } from '@vscode/vscode-languagedetection'; +// import { IDisposable } from 'vs/base/common/lifecycle'; +import { StopWatch } from 'vs/base/common/stopwatch'; +// import { URI } from 'vs/base/common/uri'; +import { IRequestHandler } from 'vs/base/common/worker/simpleWorker'; +// import { IModelChangedEvent } from 'vs/editor/common/model/mirrorTextModel'; +import { EditorSimpleWorker } from 'vs/editor/common/services/editorSimpleWorker'; +import { EditorWorkerHost } from 'vs/editor/common/services/editorWorkerServiceImpl'; + +/** + * Called on the worker side + * @internal + */ +export function create(host: EditorWorkerHost): IRequestHandler { + return new LanguageDetectionSimpleWorker(host, null); +} + +/** + * @internal + */ +export class LanguageDetectionSimpleWorker extends EditorSimpleWorker { + private static readonly expectedRelativeConfidence = 0.2; + + private _modelOperations: ModelOperations | undefined; + private _loadFailed: boolean = false; + + public async detectLanguage(uri: string): Promise { + const stopWatch = new StopWatch(true); + for await (const language of this.detectLanguagesImpl(uri)) { + stopWatch.stop(); + this._host.fhr('sendTelemetryEvent', [[language.languageId], [language.confidence], stopWatch.elapsed()]); + return language.languageId; + } + return undefined; + } + + public async detectLanguages(uri: string): Promise { + const languages: string[] = []; + const confidences: number[] = []; + const stopWatch = new StopWatch(true); + for await (const language of this.detectLanguagesImpl(uri)) { + languages.push(language.languageId); + confidences.push(language.confidence); + } + stopWatch.stop(); + + this._host.fhr('sendTelemetryEvent', [languages, confidences, stopWatch.elapsed()]); + return languages; + } + + private async getModelOperations(): Promise { + if (this._modelOperations) { + return this._modelOperations; + } + + const uri: string = await this._host.fhr('getIndexJsUri', []); + // const uri = await this.host.getIndexJsUri(); + const { ModelOperations } = await import(uri); + this._modelOperations = new ModelOperations( + async () => { + const response = await fetch(await this._host.fhr('getModelJsonUri', [])); + try { + const modelJSON = await response.json(); + return modelJSON; + } catch (e) { + const message = `Failed to parse model JSON.`; + throw new Error(message); + } + }, + async () => { + const response = await fetch(await this._host.fhr('getWeightsUri', [])); + const buffer = await response.arrayBuffer(); + return buffer; + } + ); + + return this._modelOperations!; + } + + private async * detectLanguagesImpl(uri: string): AsyncGenerator { + if (this._loadFailed) { + return; + } + + let modelOperations: ModelOperations | undefined; + try { + modelOperations = await this.getModelOperations(); + } catch (e) { + console.log(e); + this._loadFailed = true; + return; + } + + const content = this._getModel(uri); + if (!content) { + return; + } + + const modelResults = await modelOperations.runModel(content.getValue()); + if (!modelResults + || modelResults.length === 0 + || modelResults[0].confidence < LanguageDetectionSimpleWorker.expectedRelativeConfidence) { + return; + } + + const possibleLanguages: ModelResult[] = [modelResults[0]]; + + for (let current of modelResults) { + + if (current === modelResults[0]) { + continue; + } + + const currentHighest = possibleLanguages[possibleLanguages.length - 1]; + + if (currentHighest.confidence - current.confidence >= LanguageDetectionSimpleWorker.expectedRelativeConfidence) { + while (possibleLanguages.length) { + yield possibleLanguages.shift()!; + } + if (current.confidence > LanguageDetectionSimpleWorker.expectedRelativeConfidence) { + possibleLanguages.push(current); + continue; + } + return; + } else { + if (current.confidence > LanguageDetectionSimpleWorker.expectedRelativeConfidence) { + possibleLanguages.push(current); + continue; + } + return; + } + } + } +} diff --git a/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts b/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts new file mode 100644 index 00000000000..57caa4911b1 --- /dev/null +++ b/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts @@ -0,0 +1,201 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { Disposable } from 'vs/base/common/lifecycle'; +import { ILanguageDetectionService, ILanguageDetectionStats, LanguageDetectionStatsClassification, LanguageDetectionStatsId } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService'; +import { FileAccess } from 'vs/base/common/network'; +import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService'; +import { IConfigurationService } from 'vs/platform/configuration/common/configuration'; +import { IModeService } from 'vs/editor/common/services/modeService'; +import { URI } from 'vs/base/common/uri'; +import { isWeb } from 'vs/base/common/platform'; +import { registerSingleton } from 'vs/platform/instantiation/common/extensions'; +import { LanguageDetectionSimpleWorker } from 'vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker'; +import { IModelService } from 'vs/editor/common/services/modelService'; +import { SimpleWorkerClient } from 'vs/base/common/worker/simpleWorker'; +import { ITelemetryService } from 'vs/platform/telemetry/common/telemetry'; +import { EditorWorkerClient, EditorWorkerHost } from 'vs/editor/common/services/editorWorkerServiceImpl'; + +const moduleLocation = '../../../../../../node_modules/@vscode/vscode-languagedetection'; +const moduleLocationAsar = '../../../../../../node_modules.asar/@vscode/vscode-languagedetection'; +export class LanguageDetectionService extends Disposable implements ILanguageDetectionService { + static readonly enablementSettingKey = 'workbench.editor.untitled.experimentalLanguageDetection'; + + _serviceBrand: undefined; + + private _languageDetectionWorkerClient: LanguageDetectionWorkerClient; + + constructor( + @IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService, + @IModeService private readonly _modeService: IModeService, + @IConfigurationService private readonly _configurationService: IConfigurationService, + @IModelService modelService: IModelService, + @ITelemetryService telemetryService: ITelemetryService, + ) { + super(); + + this._languageDetectionWorkerClient = new LanguageDetectionWorkerClient( + modelService, + telemetryService, + // TODO: See if it's possible to bundle vscode-languagedetection + this._environmentService.isBuilt && !isWeb + ? FileAccess.asBrowserUri(`${moduleLocationAsar}/dist/lib/index.js`, require).toString(true) + : FileAccess.asBrowserUri(`${moduleLocation}/dist/lib/index.js`, require).toString(true), + this._environmentService.isBuilt && !isWeb + ? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/model.json`, require).toString(true) + : FileAccess.asBrowserUri(`${moduleLocation}/model/model.json`, require).toString(true), + this._environmentService.isBuilt && !isWeb + ? FileAccess.asBrowserUri(`${moduleLocationAsar}/model/group1-shard1of1.bin`, require).toString(true) + : FileAccess.asBrowserUri(`${moduleLocation}/model/group1-shard1of1.bin`, require).toString(true)); + } + + public isEnabledForMode(modeId: string): boolean { + return !!modeId && this._configurationService.getValue(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: modeId }); + } + + private getModeId(language: string | undefined): string | undefined { + if (!language) { + return undefined; + } + return this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${language}`)) ?? undefined; + } + + async detectLanguage(resource: URI): Promise { + const language = await this._languageDetectionWorkerClient.detectLanguage(resource); + if (language) { + return this.getModeId(language); + } + return undefined; + } + + async detectLanguages(resource: URI): Promise { + const languages: Array = await this._languageDetectionWorkerClient.detectLanguages(resource); + for (let i = 0; i < languages.length; i++) { + const modeId = this.getModeId(languages[i]); + languages[i] = modeId ? modeId : undefined; + } + + return languages.filter((l?: T): l is T => Boolean(l)); + } +} + +export interface IWorkerClient { + getProxyObject(): Promise; + dispose(): void; +} + +export class LanguageDetectionWorkerHost { + constructor( + private _indexJsUri: string, + private _modelJsonUri: string, + private _weightsUri: string, + private _telemetryService: ITelemetryService, + ) { + } + + async getIndexJsUri() { + return this._indexJsUri; + } + + async getModelJsonUri() { + return this._modelJsonUri; + } + + async getWeightsUri() { + return this._weightsUri; + } + + async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise { + type LanguageDetectionStats = { languages: string; confidences: string; timeSpent: number; }; + type LanguageDetectionStatsClassification = { + languages: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }; + confidences: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }; + timeSpent: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }; + }; + + this._telemetryService.publicLog2('automaticlanguagedetection.stats', { + languages: languages.join(','), + confidences: confidences.join(','), + timeSpent + }); + } +} + +export class LanguageDetectionWorkerClient extends EditorWorkerClient { + private worker: IWorkerClient | undefined; + + constructor( + modelService: IModelService, + private readonly _telemetryService: ITelemetryService, + private readonly _indexJsUri: string, + private readonly _modelJsonUri: string, + private readonly _weightsUri: string + ) { + super(modelService, true, 'languageDetectionWorkerService'); + } + + private _getOrCreateLanguageDetectionWorker(): IWorkerClient { + if (!this.worker) { + + this.worker = this._register(new SimpleWorkerClient( + this._workerFactory, + 'vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker', + new EditorWorkerHost(this) + )); + } + return this.worker; + } + + override async _getProxy(): Promise { + return await this._getOrCreateLanguageDetectionWorker().getProxyObject(); + } + + // foreign host request + public override async fhr(method: string, args: any[]): Promise { + switch (method) { + case 'getIndexJsUri': + return this.getIndexJsUri(); + case 'getModelJsonUri': + return this.getModelJsonUri(); + case 'getWeightsUri': + return this.getWeightsUri(); + case 'sendTelemetryEvent': + return this.sendTelemetryEvent(args[0], args[1], args[2]); + default: + return super.fhr(method, args); + } + } + + async getIndexJsUri() { + return this._indexJsUri; + } + + async getModelJsonUri() { + return this._modelJsonUri; + } + + async getWeightsUri() { + return this._weightsUri; + } + + async sendTelemetryEvent(languages: string[], confidences: number[], timeSpent: number): Promise { + this._telemetryService.publicLog2(LanguageDetectionStatsId, { + languages: languages.join(','), + confidences: confidences.join(','), + timeSpent + }); + } + + public async detectLanguage(resource: URI): Promise { + await this._withSyncedResources([resource]); + return (await this._getProxy()).detectLanguage(resource.toString()); + } + public async detectLanguages(resource: URI): Promise { + await this._withSyncedResources([resource]); + return (await this._getProxy()).detectLanguages(resource.toString()); + } +} + +registerSingleton(ILanguageDetectionService, LanguageDetectionService); diff --git a/src/vs/workbench/services/languageDetection/common/languageDetection.ts b/src/vs/workbench/services/languageDetection/common/languageDetection.ts deleted file mode 100644 index 05d612e5d33..00000000000 --- a/src/vs/workbench/services/languageDetection/common/languageDetection.ts +++ /dev/null @@ -1,16 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - * Licensed under the MIT License. See License.txt in the project root for license information. - *--------------------------------------------------------------------------------------------*/ - -import { URI } from 'vs/base/common/uri'; -import { createDecorator } from 'vs/platform/instantiation/common/instantiation'; - -export const ILanguageDetectionService = createDecorator('ILanguageDetectionService'); - -export interface ILanguageDetectionService { - readonly _serviceBrand: undefined; - - detectLanguage(contentOrResource: string | URI): Promise; - detectLanguages(contentOrResource: string | URI): Promise; -} diff --git a/src/vs/workbench/services/languageDetection/common/languageDetectionWorkerService.ts b/src/vs/workbench/services/languageDetection/common/languageDetectionWorkerService.ts new file mode 100644 index 00000000000..ed38c41b3d0 --- /dev/null +++ b/src/vs/workbench/services/languageDetection/common/languageDetectionWorkerService.ts @@ -0,0 +1,63 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { URI } from 'vs/base/common/uri'; +import { createDecorator } from 'vs/platform/instantiation/common/instantiation'; + +export const ILanguageDetectionService = createDecorator('ILanguageDetectionService'); + +export interface ILanguageDetectionService { + readonly _serviceBrand: undefined; + + /** + * @param modeId The modeId to check if language detection is currently enabled. + * @returns whether or not language detection is on for this language mode. + */ + isEnabledForMode(modeId: string): boolean; + + /** + * @param resource The resource to detect the language for. + * @returns the language mode for the given resource or undefined if the model is not confident enough. + */ + detectLanguage(resource: URI): Promise; + + /** + * @param resource The resource to detect the language for. + * @returns all possible language modes detected in this resource. + */ + detectLanguages(resource: URI): Promise; +} + +//#region Telemetry events + +export const AutomaticLanguageDetectionLikelyWrongId = 'automaticlanguagedetection.likelywrong'; + +export interface IAutomaticLanguageDetectionLikelyWrongData { + choseOtherGuessedLanguage: boolean; + currentLanguageId: string; + nextLanguageId: string; +} + +export type AutomaticLanguageDetectionLikelyWrongClassification = { + choseOtherGuessedLanguage: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }, + currentLanguageId: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }, + nextLanguageId: { classification: 'SystemMetaData', purpose: 'FeatureInsight' } +}; + +export const LanguageDetectionStatsId = 'automaticlanguagedetection.stats'; + +export interface ILanguageDetectionStats { + languages: string; + confidences: string; + timeSpent: number; +} + +export type LanguageDetectionStatsClassification = { + languages: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }; + confidences: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }; + timeSpent: { classification: 'SystemMetaData', purpose: 'FeatureInsight' }; +}; + +//#endregion diff --git a/src/vs/workbench/services/untitled/common/untitledTextEditorModel.ts b/src/vs/workbench/services/untitled/common/untitledTextEditorModel.ts index 70a58661bb5..eeb14ba65a4 100644 --- a/src/vs/workbench/services/untitled/common/untitledTextEditorModel.ts +++ b/src/vs/workbench/services/untitled/common/untitledTextEditorModel.ts @@ -26,6 +26,9 @@ import { CancellationToken } from 'vs/base/common/cancellation'; import { getCharContainingOffset } from 'vs/base/common/strings'; import { UTF8 } from 'vs/workbench/services/textfile/common/encoding'; import { bufferToStream, VSBuffer, VSBufferReadableStream } from 'vs/base/common/buffer'; +import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService'; +import { PLAINTEXT_MODE_ID } from 'vs/editor/common/modes/modesRegistry'; +import { RunOnceScheduler } from 'vs/base/common/async'; export interface IUntitledTextEditorModel extends ITextEditorModel, IModeSupport, IEncodingSupport, IWorkingCopy { @@ -93,6 +96,8 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt readonly capabilities = WorkingCopyCapabilities.Untitled; + private readonly _autoDetectLanguageScheduler = this._register(new RunOnceScheduler(() => this.autoDetectLanguage(), 600));; + //#region Name private configuredLabelFormat: 'content' | 'name' = 'content'; @@ -126,7 +131,8 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt @IWorkingCopyService private readonly workingCopyService: IWorkingCopyService, @ITextFileService private readonly textFileService: ITextFileService, @ILabelService private readonly labelService: ILabelService, - @IEditorService private readonly editorService: IEditorService + @IEditorService private readonly editorService: IEditorService, + @ILanguageDetectionService private readonly languageDetectionService: ILanguageDetectionService ) { super(modelService, modeService); @@ -134,7 +140,7 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt this._register(this.workingCopyService.registerWorkingCopy(this)); if (preferredMode) { - this.setMode(preferredMode); + this.setModeInternal(preferredMode); } // Fetch config @@ -178,11 +184,14 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt private _hasModeSetExplicitly: boolean = false; get hasModeSetExplicitly(): boolean { return this._hasModeSetExplicitly; } - override setMode(mode: string, setExplicitly = true): void { - + override setMode(mode: string): void { // Remember that an explicit mode was set - this._hasModeSetExplicitly = setExplicitly; + this._hasModeSetExplicitly = true; + this.setModeInternal(mode); + } + + private setModeInternal(mode: string): void { let actualMode: string | undefined = undefined; if (mode === '${activeEditorLanguage}') { // support the special '${activeEditorLanguage}' mode by @@ -368,6 +377,24 @@ export class UntitledTextEditorModel extends BaseTextEditorModel implements IUnt // Emit as general content change event this._onDidChangeContent.fire(); + + // Try to detect language from content (debounced by some time to reduce pressure). + this._autoDetectLanguageScheduler.schedule(); + } + + private async autoDetectLanguage() { + if (this.hasModeSetExplicitly || !this.languageDetectionService.isEnabledForMode(this.getMode() ?? PLAINTEXT_MODE_ID)) { + return; + } + + const lang = await this.languageDetectionService.detectLanguage(this.resource); + if (!lang) { + return; + } + + if (!this.isDisposed()) { + this.setModeInternal(lang); + } } private updateNameFromFirstLine(textEditorModel: ITextModel): void { diff --git a/src/vs/workbench/services/untitled/test/browser/untitledTextEditor.test.ts b/src/vs/workbench/services/untitled/test/browser/untitledTextEditor.test.ts index df321b26e20..36f299ccc07 100644 --- a/src/vs/workbench/services/untitled/test/browser/untitledTextEditor.test.ts +++ b/src/vs/workbench/services/untitled/test/browser/untitledTextEditor.test.ts @@ -279,7 +279,6 @@ suite('Untitled text editors', () => { const service = accessor.untitledTextEditorService; const input = instantiationService.createInstance(UntitledTextEditorInput, service.create({ mode })); - assert.ok(input.model.hasModeSetExplicitly); assert.strictEqual(input.getMode(), mode); const model = await input.resolve(); diff --git a/src/vs/workbench/workbench.common.main.ts b/src/vs/workbench/workbench.common.main.ts index b237a1b4a98..d738cca8fcc 100644 --- a/src/vs/workbench/workbench.common.main.ts +++ b/src/vs/workbench/workbench.common.main.ts @@ -96,7 +96,7 @@ import 'vs/workbench/services/authentication/browser/authenticationService'; import 'vs/workbench/services/hover/browser/hoverService'; import 'vs/workbench/services/experiment/common/experimentService'; import 'vs/workbench/services/outline/browser/outlineService'; -import 'vs/workbench/services/languageDetection/browser/languageDetectionService'; +import 'vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl'; import { registerSingleton } from 'vs/platform/instantiation/common/extensions'; import { ExtensionGalleryService } from 'vs/platform/extensionManagement/common/extensionGalleryService';