From c69945154d0d8b410003f1609ec0f6b556c95fbd Mon Sep 17 00:00:00 2001 From: Tyler Leonhardt Date: Thu, 22 Jul 2021 14:01:39 -0700 Subject: [PATCH] improve heuristics around language detection and show detected languages in language picker --- package.json | 2 +- remote/package.json | 2 +- remote/web/package.json | 2 +- remote/web/yarn.lock | 8 +- remote/yarn.lock | 8 +- .../browser/parts/editor/editorStatus.ts | 30 +++++- .../browser/languageDetectionService.ts | 95 +++++++++++-------- .../common/languageDetection.ts | 4 +- yarn.lock | 8 +- 9 files changed, 102 insertions(+), 57 deletions(-) diff --git a/package.json b/package.json index bb4785b5921..f63a89570db 100644 --- a/package.json +++ b/package.json @@ -58,7 +58,7 @@ "extensions-ci": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js extensions-ci" }, "dependencies": { - "@vscode/vscode-languagedetection": "1.0.12", + "@vscode/vscode-languagedetection": "1.0.13", "applicationinsights": "1.0.8", "chokidar": "3.5.1", "eslint-plugin-header": "3.1.1", diff --git a/remote/package.json b/remote/package.json index fb334085459..fa0b4cb9a88 100644 --- a/remote/package.json +++ b/remote/package.json @@ -3,7 +3,7 @@ "version": "0.0.0", "private": true, "dependencies": { - "@vscode/vscode-languagedetection": "1.0.12", + "@vscode/vscode-languagedetection": "1.0.13", "applicationinsights": "1.0.8", "chokidar": "3.5.1", "cookie": "^0.4.0", diff --git a/remote/web/package.json b/remote/web/package.json index 28be4df40b6..fad73b5de4a 100644 --- a/remote/web/package.json +++ b/remote/web/package.json @@ -3,7 +3,7 @@ "version": "0.0.0", "private": true, "dependencies": { - "@vscode/vscode-languagedetection": "1.0.12", + "@vscode/vscode-languagedetection": "1.0.13", "iconv-lite-umd": "0.6.8", "jschardet": "3.0.0", "tas-client-umd": "0.1.4", diff --git a/remote/web/yarn.lock b/remote/web/yarn.lock index d3f17fccae4..b50090fe752 100644 --- a/remote/web/yarn.lock +++ b/remote/web/yarn.lock @@ -2,10 +2,10 @@ # yarn lockfile v1 -"@vscode/vscode-languagedetection@1.0.12": - version "1.0.12" - resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa" - integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg== +"@vscode/vscode-languagedetection@1.0.13": + version "1.0.13" + resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.13.tgz#1af4c6473d3f971121c4fbc7f14292779ef4588f" + integrity sha512-mIWgBE79ECB3raUtXHjiamDIPOESq/2nYogldOrmwpIfSssXeA3LOe0PjjNW7vuY7kisD/AvEviNlpjWr+mVBg== iconv-lite-umd@0.6.8: version "0.6.8" diff --git a/remote/yarn.lock b/remote/yarn.lock index 5a0ed3a4eba..8af39ad90b8 100644 --- a/remote/yarn.lock +++ b/remote/yarn.lock @@ -7,10 +7,10 @@ resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82" integrity sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw== -"@vscode/vscode-languagedetection@1.0.12": - version "1.0.12" - resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa" - integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg== +"@vscode/vscode-languagedetection@1.0.13": + version "1.0.13" + resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.13.tgz#1af4c6473d3f971121c4fbc7f14292779ef4588f" + integrity sha512-mIWgBE79ECB3raUtXHjiamDIPOESq/2nYogldOrmwpIfSssXeA3LOe0PjjNW7vuY7kisD/AvEviNlpjWr+mVBg== agent-base@4: version "4.2.0" diff --git a/src/vs/workbench/browser/parts/editor/editorStatus.ts b/src/vs/workbench/browser/parts/editor/editorStatus.ts index 17f309b24cf..03153d30b2a 100644 --- a/src/vs/workbench/browser/parts/editor/editorStatus.ts +++ b/src/vs/workbench/browser/parts/editor/editorStatus.ts @@ -56,6 +56,7 @@ import { ITelemetryData, ITelemetryService } from 'vs/platform/telemetry/common/ import { SideBySideEditorInput } from 'vs/workbench/common/editor/sideBySideEditorInput'; import { ILanguageStatus, ILanguageStatusService } from 'vs/editor/common/services/languageStatusService'; import { IUntitledTextEditorService } from 'vs/workbench/services/untitled/common/untitledTextEditorService'; +import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection'; class SideBySideEditorEncodingSupport implements IEncodingSupport { constructor(private primary: IEncodingSupport, private secondary: IEncodingSupport) { } @@ -1141,7 +1142,8 @@ export class ChangeModeAction extends Action { @IPreferencesService private readonly preferencesService: IPreferencesService, @IInstantiationService private readonly instantiationService: IInstantiationService, @ITextFileService private readonly textFileService: ITextFileService, - @ITelemetryService private readonly telemetryService: ITelemetryService + @ITelemetryService private readonly telemetryService: ITelemetryService, + @ILanguageDetectionService private readonly languageDetectionService: ILanguageDetectionService, ) { super(actionId, actionLabel); } @@ -1189,9 +1191,7 @@ export class ChangeModeAction extends Action { }; }); - if (hasLanguageSupport) { - picks.unshift({ type: 'separator', label: localize('languagesPicks', "languages (identifier)") }); - } + picks.unshift({ type: 'separator', label: localize('languagesPicks', "languages (identifier)") }); // Offer action to configure via settings let configureModeAssociations: IQuickPickItem | undefined; @@ -1218,6 +1218,28 @@ export class ChangeModeAction extends Action { if (hasLanguageSupport) { picks.unshift(autoDetectMode); + } else if (resource) { + // Handle language detection + const detectedLanguages = await this.languageDetectionService.detectLanguages(resource); + if (detectedLanguages) { + for (const modeId of detectedLanguages.reverse()) { + const lang = this.modeService.getLanguageName(modeId) || 'unknown'; + let description: string; + if (currentLanguageId === lang) { + description = localize('languageDescriptionCurrent', "({0}) - Current Language", modeId); + } else { + description = localize('languageDescriptionConfigured', "({0})", modeId); + } + + picks.unshift({ + label: lang, + iconClasses: getIconClassesForModeId(modeId), + description + }); + } + + picks.unshift({ type: 'separator', label: localize('detectedLanguagesPicks', "detected languages (identifier)") }); + } } const pick = await this.quickInputService.pick(picks, { placeHolder: localize('pickLanguage', "Select Language Mode"), matchOnDescription: true }); diff --git a/src/vs/workbench/services/languageDetection/browser/languageDetectionService.ts b/src/vs/workbench/services/languageDetection/browser/languageDetectionService.ts index 2d66454d071..ef78bf4f445 100644 --- a/src/vs/workbench/services/languageDetection/browser/languageDetectionService.ts +++ b/src/vs/workbench/services/languageDetection/browser/languageDetectionService.ts @@ -7,7 +7,7 @@ import { Disposable } from 'vs/base/common/lifecycle'; import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection'; import { IUntitledTextEditorService } from 'vs/workbench/services/untitled/common/untitledTextEditorService'; import { FileAccess } from 'vs/base/common/network'; -import type { ModelOperations } from '@vscode/vscode-languagedetection'; +import type { ModelOperations, ModelResult } from '@vscode/vscode-languagedetection'; import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService'; import { IConfigurationService } from 'vs/platform/configuration/common/configuration'; import { IModeService } from 'vs/editor/common/services/modeService'; @@ -20,7 +20,7 @@ import { Extensions, IWorkbenchContributionsRegistry } from 'vs/workbench/common import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle'; export class LanguageDetectionService extends Disposable implements ILanguageDetectionService { - private static readonly expectedConfidence = 0.6; + private static readonly expectedRelativeConfidence = 0.2; private _loadFailed = false; private _modelOperations: ModelOperations | undefined; @@ -80,7 +80,30 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet return this._register(this._modelOperations); } - async detectLanguage(content: string): Promise { + async detectLanguage(contentOrResource: string | URI): Promise { + let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource; + + if (content) { + for await (const language of this.detectLanguagesImpl(content)) { + return language; + } + } + return undefined; + } + + async detectLanguages(contentOrResource: string | URI): Promise { + let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource; + + const languages: string[] = []; + if (content) { + for await (const language of this.detectLanguagesImpl(content)) { + languages.push(language); + } + } + return languages; + } + + private async * detectLanguagesImpl(content: string) { if (this._loadFailed) { return; } @@ -98,43 +121,41 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet return; } - let { languageId, confidence } = modelResults[0]; - - // TODO: this is the place where we can improve the results of the model with know hueristics (popular languages, etc). - - // For ts/js and c/cpp we "add" the confidence of the other language to ensure better results - switch (languageId) { - case 'ts': - if (modelResults[1].languageId === 'js') { - confidence += modelResults[1].confidence; - } - break; - case 'js': - if (modelResults[1].languageId === 'ts') { - confidence += modelResults[1].confidence; - } - break; - case 'c': - if (modelResults[1].languageId === 'cpp') { - confidence += modelResults[1].confidence; - } - break; - case 'cpp': - if (modelResults[1].languageId === 'c') { - confidence += modelResults[1].confidence; - } - break; - default: - break; - } - - if (confidence < LanguageDetectionService.expectedConfidence) { + if (modelResults[0].confidence < LanguageDetectionService.expectedRelativeConfidence) { return; } - // TODO: see if there's a better way to do this. - const vscodeLanguageId = this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${languageId}`)); - return vscodeLanguageId ?? undefined; + const possibleLanguages: ModelResult[] = [modelResults[0]]; + + for (let current of modelResults) { + + if (current === modelResults[0]) { + continue; + } + + const currentHighest = possibleLanguages[possibleLanguages.length - 1]; + + if (currentHighest.confidence - current.confidence >= LanguageDetectionService.expectedRelativeConfidence) { + while (possibleLanguages.length) { + // TODO: see if there's a better way to do this. + const vscodeLanguageId = this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${possibleLanguages.shift()!.languageId}`)); + if (vscodeLanguageId) { + yield vscodeLanguageId; + } + } + if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) { + possibleLanguages.push(current); + continue; + } + return; + } else { + if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) { + possibleLanguages.push(current); + continue; + } + return; + } + } } } diff --git a/src/vs/workbench/services/languageDetection/common/languageDetection.ts b/src/vs/workbench/services/languageDetection/common/languageDetection.ts index 9c88bbe8fc6..05d612e5d33 100644 --- a/src/vs/workbench/services/languageDetection/common/languageDetection.ts +++ b/src/vs/workbench/services/languageDetection/common/languageDetection.ts @@ -3,6 +3,7 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import { URI } from 'vs/base/common/uri'; import { createDecorator } from 'vs/platform/instantiation/common/instantiation'; export const ILanguageDetectionService = createDecorator('ILanguageDetectionService'); @@ -10,5 +11,6 @@ export const ILanguageDetectionService = createDecorator; + detectLanguage(contentOrResource: string | URI): Promise; + detectLanguages(contentOrResource: string | URI): Promise; } diff --git a/yarn.lock b/yarn.lock index 2835dc958e2..52b0b47e547 100644 --- a/yarn.lock +++ b/yarn.lock @@ -765,10 +765,10 @@ resolved "https://registry.yarnpkg.com/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz#aa58042711d6e3275dd37dc597e5d31e8c290a44" integrity sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q== -"@vscode/vscode-languagedetection@1.0.12": - version "1.0.12" - resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa" - integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg== +"@vscode/vscode-languagedetection@1.0.13": + version "1.0.13" + resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.13.tgz#1af4c6473d3f971121c4fbc7f14292779ef4588f" + integrity sha512-mIWgBE79ECB3raUtXHjiamDIPOESq/2nYogldOrmwpIfSssXeA3LOe0PjjNW7vuY7kisD/AvEviNlpjWr+mVBg== "@webassemblyjs/ast@1.11.0": version "1.11.0"