improve heuristics around language detection and show detected languages in language picker

This commit is contained in:
Tyler Leonhardt 2021-07-22 14:01:39 -07:00
parent ff922dc231
commit c69945154d
No known key found for this signature in database
GPG key ID: 1BC2B6244363E77E
9 changed files with 102 additions and 57 deletions

View file

@ -58,7 +58,7 @@
"extensions-ci": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js extensions-ci"
},
"dependencies": {
"@vscode/vscode-languagedetection": "1.0.12",
"@vscode/vscode-languagedetection": "1.0.13",
"applicationinsights": "1.0.8",
"chokidar": "3.5.1",
"eslint-plugin-header": "3.1.1",

View file

@ -3,7 +3,7 @@
"version": "0.0.0",
"private": true,
"dependencies": {
"@vscode/vscode-languagedetection": "1.0.12",
"@vscode/vscode-languagedetection": "1.0.13",
"applicationinsights": "1.0.8",
"chokidar": "3.5.1",
"cookie": "^0.4.0",

View file

@ -3,7 +3,7 @@
"version": "0.0.0",
"private": true,
"dependencies": {
"@vscode/vscode-languagedetection": "1.0.12",
"@vscode/vscode-languagedetection": "1.0.13",
"iconv-lite-umd": "0.6.8",
"jschardet": "3.0.0",
"tas-client-umd": "0.1.4",

View file

@ -2,10 +2,10 @@
# yarn lockfile v1
"@vscode/vscode-languagedetection@1.0.12":
version "1.0.12"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa"
integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg==
"@vscode/vscode-languagedetection@1.0.13":
version "1.0.13"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.13.tgz#1af4c6473d3f971121c4fbc7f14292779ef4588f"
integrity sha512-mIWgBE79ECB3raUtXHjiamDIPOESq/2nYogldOrmwpIfSssXeA3LOe0PjjNW7vuY7kisD/AvEviNlpjWr+mVBg==
iconv-lite-umd@0.6.8:
version "0.6.8"

View file

@ -7,10 +7,10 @@
resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82"
integrity sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==
"@vscode/vscode-languagedetection@1.0.12":
version "1.0.12"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa"
integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg==
"@vscode/vscode-languagedetection@1.0.13":
version "1.0.13"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.13.tgz#1af4c6473d3f971121c4fbc7f14292779ef4588f"
integrity sha512-mIWgBE79ECB3raUtXHjiamDIPOESq/2nYogldOrmwpIfSssXeA3LOe0PjjNW7vuY7kisD/AvEviNlpjWr+mVBg==
agent-base@4:
version "4.2.0"

View file

@ -56,6 +56,7 @@ import { ITelemetryData, ITelemetryService } from 'vs/platform/telemetry/common/
import { SideBySideEditorInput } from 'vs/workbench/common/editor/sideBySideEditorInput';
import { ILanguageStatus, ILanguageStatusService } from 'vs/editor/common/services/languageStatusService';
import { IUntitledTextEditorService } from 'vs/workbench/services/untitled/common/untitledTextEditorService';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection';
class SideBySideEditorEncodingSupport implements IEncodingSupport {
constructor(private primary: IEncodingSupport, private secondary: IEncodingSupport) { }
@ -1141,7 +1142,8 @@ export class ChangeModeAction extends Action {
@IPreferencesService private readonly preferencesService: IPreferencesService,
@IInstantiationService private readonly instantiationService: IInstantiationService,
@ITextFileService private readonly textFileService: ITextFileService,
@ITelemetryService private readonly telemetryService: ITelemetryService
@ITelemetryService private readonly telemetryService: ITelemetryService,
@ILanguageDetectionService private readonly languageDetectionService: ILanguageDetectionService,
) {
super(actionId, actionLabel);
}
@ -1189,9 +1191,7 @@ export class ChangeModeAction extends Action {
};
});
if (hasLanguageSupport) {
picks.unshift({ type: 'separator', label: localize('languagesPicks', "languages (identifier)") });
}
picks.unshift({ type: 'separator', label: localize('languagesPicks', "languages (identifier)") });
// Offer action to configure via settings
let configureModeAssociations: IQuickPickItem | undefined;
@ -1218,6 +1218,28 @@ export class ChangeModeAction extends Action {
if (hasLanguageSupport) {
picks.unshift(autoDetectMode);
} else if (resource) {
// Handle language detection
const detectedLanguages = await this.languageDetectionService.detectLanguages(resource);
if (detectedLanguages) {
for (const modeId of detectedLanguages.reverse()) {
const lang = this.modeService.getLanguageName(modeId) || 'unknown';
let description: string;
if (currentLanguageId === lang) {
description = localize('languageDescriptionCurrent', "({0}) - Current Language", modeId);
} else {
description = localize('languageDescriptionConfigured', "({0})", modeId);
}
picks.unshift({
label: lang,
iconClasses: getIconClassesForModeId(modeId),
description
});
}
picks.unshift({ type: 'separator', label: localize('detectedLanguagesPicks', "detected languages (identifier)") });
}
}
const pick = await this.quickInputService.pick(picks, { placeHolder: localize('pickLanguage', "Select Language Mode"), matchOnDescription: true });

View file

@ -7,7 +7,7 @@ import { Disposable } from 'vs/base/common/lifecycle';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection';
import { IUntitledTextEditorService } from 'vs/workbench/services/untitled/common/untitledTextEditorService';
import { FileAccess } from 'vs/base/common/network';
import type { ModelOperations } from '@vscode/vscode-languagedetection';
import type { ModelOperations, ModelResult } from '@vscode/vscode-languagedetection';
import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { IModeService } from 'vs/editor/common/services/modeService';
@ -20,7 +20,7 @@ import { Extensions, IWorkbenchContributionsRegistry } from 'vs/workbench/common
import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle';
export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {
private static readonly expectedConfidence = 0.6;
private static readonly expectedRelativeConfidence = 0.2;
private _loadFailed = false;
private _modelOperations: ModelOperations | undefined;
@ -80,7 +80,30 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet
return this._register(this._modelOperations);
}
async detectLanguage(content: string): Promise<string | undefined> {
async detectLanguage(contentOrResource: string | URI): Promise<string | undefined> {
let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource;
if (content) {
for await (const language of this.detectLanguagesImpl(content)) {
return language;
}
}
return undefined;
}
async detectLanguages(contentOrResource: string | URI): Promise<string[]> {
let content: string | undefined = URI.isUri(contentOrResource) ? this._untitledTextEditorService.getValue(contentOrResource) : contentOrResource;
const languages: string[] = [];
if (content) {
for await (const language of this.detectLanguagesImpl(content)) {
languages.push(language);
}
}
return languages;
}
private async * detectLanguagesImpl(content: string) {
if (this._loadFailed) {
return;
}
@ -98,43 +121,41 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet
return;
}
let { languageId, confidence } = modelResults[0];
// TODO: this is the place where we can improve the results of the model with know hueristics (popular languages, etc).
// For ts/js and c/cpp we "add" the confidence of the other language to ensure better results
switch (languageId) {
case 'ts':
if (modelResults[1].languageId === 'js') {
confidence += modelResults[1].confidence;
}
break;
case 'js':
if (modelResults[1].languageId === 'ts') {
confidence += modelResults[1].confidence;
}
break;
case 'c':
if (modelResults[1].languageId === 'cpp') {
confidence += modelResults[1].confidence;
}
break;
case 'cpp':
if (modelResults[1].languageId === 'c') {
confidence += modelResults[1].confidence;
}
break;
default:
break;
}
if (confidence < LanguageDetectionService.expectedConfidence) {
if (modelResults[0].confidence < LanguageDetectionService.expectedRelativeConfidence) {
return;
}
// TODO: see if there's a better way to do this.
const vscodeLanguageId = this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${languageId}`));
return vscodeLanguageId ?? undefined;
const possibleLanguages: ModelResult[] = [modelResults[0]];
for (let current of modelResults) {
if (current === modelResults[0]) {
continue;
}
const currentHighest = possibleLanguages[possibleLanguages.length - 1];
if (currentHighest.confidence - current.confidence >= LanguageDetectionService.expectedRelativeConfidence) {
while (possibleLanguages.length) {
// TODO: see if there's a better way to do this.
const vscodeLanguageId = this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${possibleLanguages.shift()!.languageId}`));
if (vscodeLanguageId) {
yield vscodeLanguageId;
}
}
if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) {
possibleLanguages.push(current);
continue;
}
return;
} else {
if (current.confidence > LanguageDetectionService.expectedRelativeConfidence) {
possibleLanguages.push(current);
continue;
}
return;
}
}
}
}

View file

@ -3,6 +3,7 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { URI } from 'vs/base/common/uri';
import { createDecorator } from 'vs/platform/instantiation/common/instantiation';
export const ILanguageDetectionService = createDecorator<ILanguageDetectionService>('ILanguageDetectionService');
@ -10,5 +11,6 @@ export const ILanguageDetectionService = createDecorator<ILanguageDetectionServi
export interface ILanguageDetectionService {
readonly _serviceBrand: undefined;
detectLanguage(content: string): Promise<string | undefined>;
detectLanguage(contentOrResource: string | URI): Promise<string | undefined>;
detectLanguages(contentOrResource: string | URI): Promise<string[]>;
}

View file

@ -765,10 +765,10 @@
resolved "https://registry.yarnpkg.com/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz#aa58042711d6e3275dd37dc597e5d31e8c290a44"
integrity sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q==
"@vscode/vscode-languagedetection@1.0.12":
version "1.0.12"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa"
integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg==
"@vscode/vscode-languagedetection@1.0.13":
version "1.0.13"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.13.tgz#1af4c6473d3f971121c4fbc7f14292779ef4588f"
integrity sha512-mIWgBE79ECB3raUtXHjiamDIPOESq/2nYogldOrmwpIfSssXeA3LOe0PjjNW7vuY7kisD/AvEviNlpjWr+mVBg==
"@webassemblyjs/ast@1.11.0":
version "1.11.0"