Clean up lexical classifier (#20123)
This commit is contained in:
parent
a551c4cd64
commit
53796eed59
|
@ -1260,10 +1260,12 @@ namespace ts {
|
|||
return result;
|
||||
}
|
||||
|
||||
export function arrayToNumericMap<T>(array: ReadonlyArray<T>, makeKey: (value: T) => number): T[] {
|
||||
const result: T[] = [];
|
||||
export function arrayToNumericMap<T>(array: ReadonlyArray<T>, makeKey: (value: T) => number): T[];
|
||||
export function arrayToNumericMap<T, V>(array: ReadonlyArray<T>, makeKey: (value: T) => number, makeValue: (value: T) => V): V[];
|
||||
export function arrayToNumericMap<T, V>(array: ReadonlyArray<T>, makeKey: (value: T) => number, makeValue?: (value: T) => V): V[] {
|
||||
const result: V[] = [];
|
||||
for (const value of array) {
|
||||
result[makeKey(value)] = value;
|
||||
result[makeKey(value)] = makeValue ? makeValue(value) : value as any as V;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -5078,16 +5078,7 @@ namespace ts {
|
|||
}
|
||||
|
||||
export function isStringTextContainingNode(node: Node) {
|
||||
switch (node.kind) {
|
||||
case SyntaxKind.StringLiteral:
|
||||
case SyntaxKind.TemplateHead:
|
||||
case SyntaxKind.TemplateMiddle:
|
||||
case SyntaxKind.TemplateTail:
|
||||
case SyntaxKind.NoSubstitutionTemplateLiteral:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return node.kind === SyntaxKind.StringLiteral || isTemplateLiteralKind(node.kind);
|
||||
}
|
||||
|
||||
// Identifiers
|
||||
|
|
|
@ -1,177 +1,50 @@
|
|||
namespace ts {
|
||||
/// Classifier
|
||||
export function createClassifier(): Classifier {
|
||||
const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false);
|
||||
|
||||
/// We do not have a full parser support to know when we should parse a regex or not
|
||||
/// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
|
||||
/// we have a series of divide operator. this list allows us to be more accurate by ruling out
|
||||
/// locations where a regexp cannot exist.
|
||||
const noRegexTable: boolean[] = [];
|
||||
noRegexTable[SyntaxKind.Identifier] = true;
|
||||
noRegexTable[SyntaxKind.StringLiteral] = true;
|
||||
noRegexTable[SyntaxKind.NumericLiteral] = true;
|
||||
noRegexTable[SyntaxKind.RegularExpressionLiteral] = true;
|
||||
noRegexTable[SyntaxKind.ThisKeyword] = true;
|
||||
noRegexTable[SyntaxKind.PlusPlusToken] = true;
|
||||
noRegexTable[SyntaxKind.MinusMinusToken] = true;
|
||||
noRegexTable[SyntaxKind.CloseParenToken] = true;
|
||||
noRegexTable[SyntaxKind.CloseBracketToken] = true;
|
||||
noRegexTable[SyntaxKind.CloseBraceToken] = true;
|
||||
noRegexTable[SyntaxKind.TrueKeyword] = true;
|
||||
noRegexTable[SyntaxKind.FalseKeyword] = true;
|
||||
|
||||
// Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
|
||||
// classification on template strings. Because of the context free nature of templates,
|
||||
// the only precise way to classify a template portion would be by propagating the stack across
|
||||
// lines, just as we do with the end-of-line state. However, this is a burden for implementers,
|
||||
// and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
|
||||
// flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
|
||||
// Situations in which this fails are
|
||||
// 1) When template strings are nested across different lines:
|
||||
// `hello ${ `world
|
||||
// ` }`
|
||||
//
|
||||
// Where on the second line, you will get the closing of a template,
|
||||
// a closing curly, and a new template.
|
||||
//
|
||||
// 2) When substitution expressions have curly braces and the curly brace falls on the next line:
|
||||
// `hello ${ () => {
|
||||
// return "world" } } `
|
||||
//
|
||||
// Where on the second line, you will get the 'return' keyword,
|
||||
// a string literal, and a template end consisting of '} } `'.
|
||||
const templateStack: SyntaxKind[] = [];
|
||||
|
||||
/** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */
|
||||
function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind) {
|
||||
if (isAccessibilityModifier(keyword1)) {
|
||||
if (keyword2 === SyntaxKind.GetKeyword ||
|
||||
keyword2 === SyntaxKind.SetKeyword ||
|
||||
keyword2 === SyntaxKind.ConstructorKeyword ||
|
||||
keyword2 === SyntaxKind.StaticKeyword) {
|
||||
|
||||
// Allow things like "public get", "public constructor" and "public static".
|
||||
// These are all legal.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Any other keyword following "public" is actually an identifier an not a real
|
||||
// keyword.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Assume any other keyword combination is legal. This can be refined in the future
|
||||
// if there are more cases we want the classifier to be better at.
|
||||
return true;
|
||||
}
|
||||
|
||||
function convertClassifications(classifications: Classifications, text: string): ClassificationResult {
|
||||
const entries: ClassificationInfo[] = [];
|
||||
const dense = classifications.spans;
|
||||
let lastEnd = 0;
|
||||
|
||||
for (let i = 0; i < dense.length; i += 3) {
|
||||
const start = dense[i];
|
||||
const length = dense[i + 1];
|
||||
const type = <ClassificationType>dense[i + 2];
|
||||
|
||||
// Make a whitespace entry between the last item and this one.
|
||||
if (lastEnd >= 0) {
|
||||
const whitespaceLength = start - lastEnd;
|
||||
if (whitespaceLength > 0) {
|
||||
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
|
||||
}
|
||||
}
|
||||
|
||||
entries.push({ length, classification: convertClassification(type) });
|
||||
lastEnd = start + length;
|
||||
}
|
||||
|
||||
const whitespaceLength = text.length - lastEnd;
|
||||
if (whitespaceLength > 0) {
|
||||
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
|
||||
}
|
||||
|
||||
return { entries, finalLexState: classifications.endOfLineState };
|
||||
}
|
||||
|
||||
function convertClassification(type: ClassificationType): TokenClass {
|
||||
switch (type) {
|
||||
case ClassificationType.comment: return TokenClass.Comment;
|
||||
case ClassificationType.keyword: return TokenClass.Keyword;
|
||||
case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
|
||||
case ClassificationType.operator: return TokenClass.Operator;
|
||||
case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
|
||||
case ClassificationType.whiteSpace: return TokenClass.Whitespace;
|
||||
case ClassificationType.punctuation: return TokenClass.Punctuation;
|
||||
case ClassificationType.identifier:
|
||||
case ClassificationType.className:
|
||||
case ClassificationType.enumName:
|
||||
case ClassificationType.interfaceName:
|
||||
case ClassificationType.moduleName:
|
||||
case ClassificationType.typeParameterName:
|
||||
case ClassificationType.typeAliasName:
|
||||
case ClassificationType.text:
|
||||
case ClassificationType.parameterName:
|
||||
default:
|
||||
return TokenClass.Identifier;
|
||||
}
|
||||
}
|
||||
|
||||
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
|
||||
return convertClassifications(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text);
|
||||
return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text);
|
||||
}
|
||||
|
||||
// If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
|
||||
// we will be more conservative in order to avoid conflicting with the syntactic classifier.
|
||||
function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications {
|
||||
let offset = 0;
|
||||
let token = SyntaxKind.Unknown;
|
||||
let lastNonTriviaToken = SyntaxKind.Unknown;
|
||||
|
||||
// Empty out the template stack for reuse.
|
||||
while (templateStack.length > 0) {
|
||||
templateStack.pop();
|
||||
}
|
||||
|
||||
// If we're in a string literal, then prepend: "\
|
||||
// (and a newline). That way when we lex we'll think we're still in a string literal.
|
||||
// Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
|
||||
// classification on template strings. Because of the context free nature of templates,
|
||||
// the only precise way to classify a template portion would be by propagating the stack across
|
||||
// lines, just as we do with the end-of-line state. However, this is a burden for implementers,
|
||||
// and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
|
||||
// flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
|
||||
// Situations in which this fails are
|
||||
// 1) When template strings are nested across different lines:
|
||||
// `hello ${ `world
|
||||
// ` }`
|
||||
//
|
||||
// If we're in a multiline comment, then prepend: /*
|
||||
// (and a newline). That way when we lex we'll think we're still in a multiline comment.
|
||||
switch (lexState) {
|
||||
case EndOfLineState.InDoubleQuoteStringLiteral:
|
||||
text = "\"\\\n" + text;
|
||||
offset = 3;
|
||||
break;
|
||||
case EndOfLineState.InSingleQuoteStringLiteral:
|
||||
text = "'\\\n" + text;
|
||||
offset = 3;
|
||||
break;
|
||||
case EndOfLineState.InMultiLineCommentTrivia:
|
||||
text = "/*\n" + text;
|
||||
offset = 3;
|
||||
break;
|
||||
case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
|
||||
text = "`\n" + text;
|
||||
offset = 2;
|
||||
break;
|
||||
case EndOfLineState.InTemplateMiddleOrTail:
|
||||
text = "}\n" + text;
|
||||
offset = 2;
|
||||
// falls through
|
||||
case EndOfLineState.InTemplateSubstitutionPosition:
|
||||
templateStack.push(SyntaxKind.TemplateHead);
|
||||
break;
|
||||
// Where on the second line, you will get the closing of a template,
|
||||
// a closing curly, and a new template.
|
||||
//
|
||||
// 2) When substitution expressions have curly braces and the curly brace falls on the next line:
|
||||
// `hello ${ () => {
|
||||
// return "world" } } `
|
||||
//
|
||||
// Where on the second line, you will get the 'return' keyword,
|
||||
// a string literal, and a template end consisting of '} } `'.
|
||||
const templateStack: SyntaxKind[] = [];
|
||||
|
||||
const { prefix, pushTemplate } = getPrefixFromLexState(lexState);
|
||||
text = prefix + text;
|
||||
const offset = prefix.length;
|
||||
if (pushTemplate) {
|
||||
templateStack.push(SyntaxKind.TemplateHead);
|
||||
}
|
||||
|
||||
scanner.setText(text);
|
||||
|
||||
const result: Classifications = {
|
||||
endOfLineState: EndOfLineState.None,
|
||||
spans: []
|
||||
};
|
||||
let endOfLineState = EndOfLineState.None;
|
||||
const spans: number[] = [];
|
||||
|
||||
// We can run into an unfortunate interaction between the lexical and syntactic classifier
|
||||
// when the user is typing something generic. Consider the case where the user types:
|
||||
|
@ -196,57 +69,65 @@ namespace ts {
|
|||
|
||||
do {
|
||||
token = scanner.scan();
|
||||
|
||||
if (!isTrivia(token)) {
|
||||
if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastNonTriviaToken]) {
|
||||
if (scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
|
||||
handleToken();
|
||||
lastNonTriviaToken = token;
|
||||
}
|
||||
const end = scanner.getTextPos();
|
||||
pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans);
|
||||
if (end >= text.length) {
|
||||
const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack));
|
||||
if (end !== undefined) {
|
||||
endOfLineState = end;
|
||||
}
|
||||
}
|
||||
} while (token !== SyntaxKind.EndOfFileToken);
|
||||
|
||||
function handleToken(): void {
|
||||
switch (token) {
|
||||
case SyntaxKind.SlashToken:
|
||||
case SyntaxKind.SlashEqualsToken:
|
||||
if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
|
||||
token = SyntaxKind.RegularExpressionLiteral;
|
||||
}
|
||||
}
|
||||
else if (lastNonTriviaToken === SyntaxKind.DotToken && isKeyword(token)) {
|
||||
token = SyntaxKind.Identifier;
|
||||
}
|
||||
else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) {
|
||||
// We have two keywords in a row. Only treat the second as a keyword if
|
||||
// it's a sequence that could legally occur in the language. Otherwise
|
||||
// treat it as an identifier. This way, if someone writes "private var"
|
||||
// we recognize that 'var' is actually an identifier here.
|
||||
token = SyntaxKind.Identifier;
|
||||
}
|
||||
else if (lastNonTriviaToken === SyntaxKind.Identifier &&
|
||||
token === SyntaxKind.LessThanToken) {
|
||||
// Could be the start of something generic. Keep track of that by bumping
|
||||
// up the current count of generic contexts we may be in.
|
||||
angleBracketStack++;
|
||||
}
|
||||
else if (token === SyntaxKind.GreaterThanToken && angleBracketStack > 0) {
|
||||
// If we think we're currently in something generic, then mark that that
|
||||
// generic entity is complete.
|
||||
angleBracketStack--;
|
||||
}
|
||||
else if (token === SyntaxKind.AnyKeyword ||
|
||||
token === SyntaxKind.StringKeyword ||
|
||||
token === SyntaxKind.NumberKeyword ||
|
||||
token === SyntaxKind.BooleanKeyword ||
|
||||
token === SyntaxKind.SymbolKeyword) {
|
||||
break;
|
||||
case SyntaxKind.LessThanToken:
|
||||
if (lastNonTriviaToken === SyntaxKind.Identifier) {
|
||||
// Could be the start of something generic. Keep track of that by bumping
|
||||
// up the current count of generic contexts we may be in.
|
||||
angleBracketStack++;
|
||||
}
|
||||
break;
|
||||
case SyntaxKind.GreaterThanToken:
|
||||
if (angleBracketStack > 0) {
|
||||
// If we think we're currently in something generic, then mark that that
|
||||
// generic entity is complete.
|
||||
angleBracketStack--;
|
||||
}
|
||||
break;
|
||||
case SyntaxKind.AnyKeyword:
|
||||
case SyntaxKind.StringKeyword:
|
||||
case SyntaxKind.NumberKeyword:
|
||||
case SyntaxKind.BooleanKeyword:
|
||||
case SyntaxKind.SymbolKeyword:
|
||||
if (angleBracketStack > 0 && !syntacticClassifierAbsent) {
|
||||
// If it looks like we're could be in something generic, don't classify this
|
||||
// as a keyword. We may just get overwritten by the syntactic classifier,
|
||||
// causing a noisy experience for the user.
|
||||
token = SyntaxKind.Identifier;
|
||||
}
|
||||
}
|
||||
else if (token === SyntaxKind.TemplateHead) {
|
||||
break;
|
||||
case SyntaxKind.TemplateHead:
|
||||
templateStack.push(token);
|
||||
}
|
||||
else if (token === SyntaxKind.OpenBraceToken) {
|
||||
break;
|
||||
case SyntaxKind.OpenBraceToken:
|
||||
// If we don't have anything on the template stack,
|
||||
// then we aren't trying to keep track of a previously scanned template head.
|
||||
if (templateStack.length > 0) {
|
||||
templateStack.push(token);
|
||||
}
|
||||
}
|
||||
else if (token === SyntaxKind.CloseBraceToken) {
|
||||
break;
|
||||
case SyntaxKind.CloseBraceToken:
|
||||
// If we don't have anything on the template stack,
|
||||
// then we aren't trying to keep track of a previously scanned template head.
|
||||
if (templateStack.length > 0) {
|
||||
|
@ -268,202 +149,300 @@ namespace ts {
|
|||
templateStack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lastNonTriviaToken = token;
|
||||
}
|
||||
|
||||
processToken();
|
||||
}
|
||||
while (token !== SyntaxKind.EndOfFileToken);
|
||||
|
||||
return result;
|
||||
|
||||
function processToken(): void {
|
||||
const start = scanner.getTokenPos();
|
||||
const end = scanner.getTextPos();
|
||||
|
||||
addResult(start, end, classFromKind(token));
|
||||
|
||||
if (end >= text.length) {
|
||||
if (token === SyntaxKind.StringLiteral) {
|
||||
// Check to see if we finished up on a multiline string literal.
|
||||
const tokenText = scanner.getTokenText();
|
||||
if (scanner.isUnterminated()) {
|
||||
const lastCharIndex = tokenText.length - 1;
|
||||
|
||||
let numBackslashes = 0;
|
||||
while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) {
|
||||
numBackslashes++;
|
||||
}
|
||||
|
||||
// If we have an odd number of backslashes, then the multiline string is unclosed
|
||||
if (numBackslashes & 1) {
|
||||
const quoteChar = tokenText.charCodeAt(0);
|
||||
result.endOfLineState = quoteChar === CharacterCodes.doubleQuote
|
||||
? EndOfLineState.InDoubleQuoteStringLiteral
|
||||
: EndOfLineState.InSingleQuoteStringLiteral;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (!isKeyword(token)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (token === SyntaxKind.MultiLineCommentTrivia) {
|
||||
// Check to see if the multiline comment was unclosed.
|
||||
if (scanner.isUnterminated()) {
|
||||
result.endOfLineState = EndOfLineState.InMultiLineCommentTrivia;
|
||||
|
||||
if (lastNonTriviaToken === SyntaxKind.DotToken) {
|
||||
token = SyntaxKind.Identifier;
|
||||
}
|
||||
}
|
||||
else if (isTemplateLiteralKind(token)) {
|
||||
if (scanner.isUnterminated()) {
|
||||
if (token === SyntaxKind.TemplateTail) {
|
||||
result.endOfLineState = EndOfLineState.InTemplateMiddleOrTail;
|
||||
}
|
||||
else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) {
|
||||
result.endOfLineState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
|
||||
}
|
||||
else {
|
||||
Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
|
||||
}
|
||||
else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) {
|
||||
// We have two keywords in a row. Only treat the second as a keyword if
|
||||
// it's a sequence that could legally occur in the language. Otherwise
|
||||
// treat it as an identifier. This way, if someone writes "private var"
|
||||
// we recognize that 'var' is actually an identifier here.
|
||||
token = SyntaxKind.Identifier;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { endOfLineState, spans };
|
||||
}
|
||||
|
||||
return { getClassificationsForLine, getEncodedLexicalClassifications };
|
||||
}
|
||||
|
||||
/// We do not have a full parser support to know when we should parse a regex or not
|
||||
/// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
|
||||
/// we have a series of divide operator. this list allows us to be more accurate by ruling out
|
||||
/// locations where a regexp cannot exist.
|
||||
const noRegexTable: true[] = ts.arrayToNumericMap<SyntaxKind, true>([
|
||||
SyntaxKind.Identifier,
|
||||
SyntaxKind.StringLiteral,
|
||||
SyntaxKind.NumericLiteral,
|
||||
SyntaxKind.RegularExpressionLiteral,
|
||||
SyntaxKind.ThisKeyword,
|
||||
SyntaxKind.PlusPlusToken,
|
||||
SyntaxKind.MinusMinusToken,
|
||||
SyntaxKind.CloseParenToken,
|
||||
SyntaxKind.CloseBracketToken,
|
||||
SyntaxKind.CloseBraceToken,
|
||||
SyntaxKind.TrueKeyword,
|
||||
SyntaxKind.FalseKeyword,
|
||||
], token => token, () => true);
|
||||
|
||||
function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined {
|
||||
switch (token) {
|
||||
case SyntaxKind.StringLiteral: {
|
||||
// Check to see if we finished up on a multiline string literal.
|
||||
if (!scanner.isUnterminated()) return undefined;
|
||||
|
||||
const tokenText = scanner.getTokenText();
|
||||
const lastCharIndex = tokenText.length - 1;
|
||||
let numBackslashes = 0;
|
||||
while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) {
|
||||
numBackslashes++;
|
||||
}
|
||||
|
||||
// If we have an odd number of backslashes, then the multiline string is unclosed
|
||||
if ((numBackslashes & 1) === 0) return undefined;
|
||||
return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral;
|
||||
}
|
||||
case SyntaxKind.MultiLineCommentTrivia:
|
||||
// Check to see if the multiline comment was unclosed.
|
||||
return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined;
|
||||
default:
|
||||
if (isTemplateLiteralKind(token)) {
|
||||
if (!scanner.isUnterminated()) {
|
||||
return undefined;
|
||||
}
|
||||
else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) {
|
||||
result.endOfLineState = EndOfLineState.InTemplateSubstitutionPosition;
|
||||
switch (token) {
|
||||
case SyntaxKind.TemplateTail:
|
||||
return EndOfLineState.InTemplateMiddleOrTail;
|
||||
case SyntaxKind.NoSubstitutionTemplateLiteral:
|
||||
return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
|
||||
default:
|
||||
throw Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
|
||||
}
|
||||
}
|
||||
}
|
||||
return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function addResult(start: number, end: number, classification: ClassificationType): void {
|
||||
if (classification === ClassificationType.whiteSpace) {
|
||||
// Don't bother with whitespace classifications. They're not needed.
|
||||
return;
|
||||
}
|
||||
function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void {
|
||||
if (classification === ClassificationType.whiteSpace) {
|
||||
// Don't bother with whitespace classifications. They're not needed.
|
||||
return;
|
||||
}
|
||||
|
||||
if (start === 0 && offset > 0) {
|
||||
// We're classifying the first token, and this was a case where we prepended
|
||||
// text. We should consider the start of this token to be at the start of
|
||||
// the original text.
|
||||
start += offset;
|
||||
}
|
||||
if (start === 0 && offset > 0) {
|
||||
// We're classifying the first token, and this was a case where we prepended text.
|
||||
// We should consider the start of this token to be at the start of the original text.
|
||||
start += offset;
|
||||
}
|
||||
|
||||
// All our tokens are in relation to the augmented text. Move them back to be
|
||||
// relative to the original text.
|
||||
start -= offset;
|
||||
end -= offset;
|
||||
const length = end - start;
|
||||
const length = end - start;
|
||||
if (length > 0) {
|
||||
// All our tokens are in relation to the augmented text. Move them back to be
|
||||
// relative to the original text.
|
||||
result.push(start - offset, length, classification);
|
||||
}
|
||||
}
|
||||
|
||||
if (length > 0) {
|
||||
result.spans.push(start);
|
||||
result.spans.push(length);
|
||||
result.spans.push(classification);
|
||||
function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult {
|
||||
const entries: ClassificationInfo[] = [];
|
||||
const dense = classifications.spans;
|
||||
let lastEnd = 0;
|
||||
|
||||
for (let i = 0; i < dense.length; i += 3) {
|
||||
const start = dense[i];
|
||||
const length = dense[i + 1];
|
||||
const type = <ClassificationType>dense[i + 2];
|
||||
|
||||
// Make a whitespace entry between the last item and this one.
|
||||
if (lastEnd >= 0) {
|
||||
const whitespaceLength = start - lastEnd;
|
||||
if (whitespaceLength > 0) {
|
||||
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
|
||||
}
|
||||
}
|
||||
|
||||
entries.push({ length, classification: convertClassification(type) });
|
||||
lastEnd = start + length;
|
||||
}
|
||||
|
||||
function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean {
|
||||
switch (token) {
|
||||
case SyntaxKind.AsteriskToken:
|
||||
case SyntaxKind.SlashToken:
|
||||
case SyntaxKind.PercentToken:
|
||||
case SyntaxKind.PlusToken:
|
||||
case SyntaxKind.MinusToken:
|
||||
case SyntaxKind.LessThanLessThanToken:
|
||||
case SyntaxKind.GreaterThanGreaterThanToken:
|
||||
case SyntaxKind.GreaterThanGreaterThanGreaterThanToken:
|
||||
case SyntaxKind.LessThanToken:
|
||||
case SyntaxKind.GreaterThanToken:
|
||||
case SyntaxKind.LessThanEqualsToken:
|
||||
case SyntaxKind.GreaterThanEqualsToken:
|
||||
case SyntaxKind.InstanceOfKeyword:
|
||||
case SyntaxKind.InKeyword:
|
||||
case SyntaxKind.AsKeyword:
|
||||
case SyntaxKind.EqualsEqualsToken:
|
||||
case SyntaxKind.ExclamationEqualsToken:
|
||||
case SyntaxKind.EqualsEqualsEqualsToken:
|
||||
case SyntaxKind.ExclamationEqualsEqualsToken:
|
||||
case SyntaxKind.AmpersandToken:
|
||||
case SyntaxKind.CaretToken:
|
||||
case SyntaxKind.BarToken:
|
||||
case SyntaxKind.AmpersandAmpersandToken:
|
||||
case SyntaxKind.BarBarToken:
|
||||
case SyntaxKind.BarEqualsToken:
|
||||
case SyntaxKind.AmpersandEqualsToken:
|
||||
case SyntaxKind.CaretEqualsToken:
|
||||
case SyntaxKind.LessThanLessThanEqualsToken:
|
||||
case SyntaxKind.GreaterThanGreaterThanEqualsToken:
|
||||
case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken:
|
||||
case SyntaxKind.PlusEqualsToken:
|
||||
case SyntaxKind.MinusEqualsToken:
|
||||
case SyntaxKind.AsteriskEqualsToken:
|
||||
case SyntaxKind.SlashEqualsToken:
|
||||
case SyntaxKind.PercentEqualsToken:
|
||||
case SyntaxKind.EqualsToken:
|
||||
case SyntaxKind.CommaToken:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
const whitespaceLength = text.length - lastEnd;
|
||||
if (whitespaceLength > 0) {
|
||||
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
|
||||
}
|
||||
|
||||
function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean {
|
||||
switch (token) {
|
||||
case SyntaxKind.PlusToken:
|
||||
case SyntaxKind.MinusToken:
|
||||
case SyntaxKind.TildeToken:
|
||||
case SyntaxKind.ExclamationToken:
|
||||
case SyntaxKind.PlusPlusToken:
|
||||
case SyntaxKind.MinusMinusToken:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return { entries, finalLexState: classifications.endOfLineState };
|
||||
}
|
||||
|
||||
function convertClassification(type: ClassificationType): TokenClass {
|
||||
switch (type) {
|
||||
case ClassificationType.comment: return TokenClass.Comment;
|
||||
case ClassificationType.keyword: return TokenClass.Keyword;
|
||||
case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
|
||||
case ClassificationType.operator: return TokenClass.Operator;
|
||||
case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
|
||||
case ClassificationType.whiteSpace: return TokenClass.Whitespace;
|
||||
case ClassificationType.punctuation: return TokenClass.Punctuation;
|
||||
case ClassificationType.identifier:
|
||||
case ClassificationType.className:
|
||||
case ClassificationType.enumName:
|
||||
case ClassificationType.interfaceName:
|
||||
case ClassificationType.moduleName:
|
||||
case ClassificationType.typeParameterName:
|
||||
case ClassificationType.typeAliasName:
|
||||
case ClassificationType.text:
|
||||
case ClassificationType.parameterName:
|
||||
return TokenClass.Identifier;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */
|
||||
function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean {
|
||||
if (!isAccessibilityModifier(keyword1)) {
|
||||
// Assume any other keyword combination is legal.
|
||||
// This can be refined in the future if there are more cases we want the classifier to be better at.
|
||||
return true;
|
||||
}
|
||||
switch (keyword2) {
|
||||
case SyntaxKind.GetKeyword:
|
||||
case SyntaxKind.SetKeyword:
|
||||
case SyntaxKind.ConstructorKeyword:
|
||||
case SyntaxKind.StaticKeyword:
|
||||
return true; // Allow things like "public get", "public constructor" and "public static".
|
||||
default:
|
||||
return false; // Any other keyword following "public" is actually an identifier, not a real keyword.
|
||||
}
|
||||
}
|
||||
|
||||
function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } {
|
||||
// If we're in a string literal, then prepend: "\
|
||||
// (and a newline). That way when we lex we'll think we're still in a string literal.
|
||||
//
|
||||
// If we're in a multiline comment, then prepend: /*
|
||||
// (and a newline). That way when we lex we'll think we're still in a multiline comment.
|
||||
switch (lexState) {
|
||||
case EndOfLineState.InDoubleQuoteStringLiteral:
|
||||
return { prefix: "\"\\\n" };
|
||||
case EndOfLineState.InSingleQuoteStringLiteral:
|
||||
return { prefix: "'\\\n" };
|
||||
case EndOfLineState.InMultiLineCommentTrivia:
|
||||
return { prefix: "/*\n" };
|
||||
case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
|
||||
return { prefix: "`\n" };
|
||||
case EndOfLineState.InTemplateMiddleOrTail:
|
||||
return { prefix: "}\n", pushTemplate: true };
|
||||
case EndOfLineState.InTemplateSubstitutionPosition:
|
||||
return { prefix: "", pushTemplate: true };
|
||||
case EndOfLineState.None:
|
||||
return { prefix: "" };
|
||||
default:
|
||||
throw Debug.assertNever(lexState);
|
||||
}
|
||||
}
|
||||
|
||||
function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean {
|
||||
switch (token) {
|
||||
case SyntaxKind.AsteriskToken:
|
||||
case SyntaxKind.SlashToken:
|
||||
case SyntaxKind.PercentToken:
|
||||
case SyntaxKind.PlusToken:
|
||||
case SyntaxKind.MinusToken:
|
||||
case SyntaxKind.LessThanLessThanToken:
|
||||
case SyntaxKind.GreaterThanGreaterThanToken:
|
||||
case SyntaxKind.GreaterThanGreaterThanGreaterThanToken:
|
||||
case SyntaxKind.LessThanToken:
|
||||
case SyntaxKind.GreaterThanToken:
|
||||
case SyntaxKind.LessThanEqualsToken:
|
||||
case SyntaxKind.GreaterThanEqualsToken:
|
||||
case SyntaxKind.InstanceOfKeyword:
|
||||
case SyntaxKind.InKeyword:
|
||||
case SyntaxKind.AsKeyword:
|
||||
case SyntaxKind.EqualsEqualsToken:
|
||||
case SyntaxKind.ExclamationEqualsToken:
|
||||
case SyntaxKind.EqualsEqualsEqualsToken:
|
||||
case SyntaxKind.ExclamationEqualsEqualsToken:
|
||||
case SyntaxKind.AmpersandToken:
|
||||
case SyntaxKind.CaretToken:
|
||||
case SyntaxKind.BarToken:
|
||||
case SyntaxKind.AmpersandAmpersandToken:
|
||||
case SyntaxKind.BarBarToken:
|
||||
case SyntaxKind.BarEqualsToken:
|
||||
case SyntaxKind.AmpersandEqualsToken:
|
||||
case SyntaxKind.CaretEqualsToken:
|
||||
case SyntaxKind.LessThanLessThanEqualsToken:
|
||||
case SyntaxKind.GreaterThanGreaterThanEqualsToken:
|
||||
case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken:
|
||||
case SyntaxKind.PlusEqualsToken:
|
||||
case SyntaxKind.MinusEqualsToken:
|
||||
case SyntaxKind.AsteriskEqualsToken:
|
||||
case SyntaxKind.SlashEqualsToken:
|
||||
case SyntaxKind.PercentEqualsToken:
|
||||
case SyntaxKind.EqualsToken:
|
||||
case SyntaxKind.CommaToken:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean {
|
||||
switch (token) {
|
||||
case SyntaxKind.PlusToken:
|
||||
case SyntaxKind.MinusToken:
|
||||
case SyntaxKind.TildeToken:
|
||||
case SyntaxKind.ExclamationToken:
|
||||
case SyntaxKind.PlusPlusToken:
|
||||
case SyntaxKind.MinusMinusToken:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function classFromKind(token: SyntaxKind): ClassificationType {
|
||||
if (isKeyword(token)) {
|
||||
return ClassificationType.keyword;
|
||||
}
|
||||
else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
|
||||
return ClassificationType.operator;
|
||||
}
|
||||
else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
|
||||
return ClassificationType.punctuation;
|
||||
}
|
||||
|
||||
function isKeyword(token: SyntaxKind): boolean {
|
||||
return token >= SyntaxKind.FirstKeyword && token <= SyntaxKind.LastKeyword;
|
||||
}
|
||||
|
||||
function classFromKind(token: SyntaxKind): ClassificationType {
|
||||
if (isKeyword(token)) {
|
||||
return ClassificationType.keyword;
|
||||
}
|
||||
else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
|
||||
return ClassificationType.operator;
|
||||
}
|
||||
else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
|
||||
return ClassificationType.punctuation;
|
||||
}
|
||||
|
||||
switch (token) {
|
||||
case SyntaxKind.NumericLiteral:
|
||||
return ClassificationType.numericLiteral;
|
||||
case SyntaxKind.StringLiteral:
|
||||
switch (token) {
|
||||
case SyntaxKind.NumericLiteral:
|
||||
return ClassificationType.numericLiteral;
|
||||
case SyntaxKind.StringLiteral:
|
||||
return ClassificationType.stringLiteral;
|
||||
case SyntaxKind.RegularExpressionLiteral:
|
||||
return ClassificationType.regularExpressionLiteral;
|
||||
case SyntaxKind.ConflictMarkerTrivia:
|
||||
case SyntaxKind.MultiLineCommentTrivia:
|
||||
case SyntaxKind.SingleLineCommentTrivia:
|
||||
return ClassificationType.comment;
|
||||
case SyntaxKind.WhitespaceTrivia:
|
||||
case SyntaxKind.NewLineTrivia:
|
||||
return ClassificationType.whiteSpace;
|
||||
case SyntaxKind.Identifier:
|
||||
default:
|
||||
if (isTemplateLiteralKind(token)) {
|
||||
return ClassificationType.stringLiteral;
|
||||
case SyntaxKind.RegularExpressionLiteral:
|
||||
return ClassificationType.regularExpressionLiteral;
|
||||
case SyntaxKind.ConflictMarkerTrivia:
|
||||
case SyntaxKind.MultiLineCommentTrivia:
|
||||
case SyntaxKind.SingleLineCommentTrivia:
|
||||
return ClassificationType.comment;
|
||||
case SyntaxKind.WhitespaceTrivia:
|
||||
case SyntaxKind.NewLineTrivia:
|
||||
return ClassificationType.whiteSpace;
|
||||
case SyntaxKind.Identifier:
|
||||
default:
|
||||
if (isTemplateLiteralKind(token)) {
|
||||
return ClassificationType.stringLiteral;
|
||||
}
|
||||
return ClassificationType.identifier;
|
||||
}
|
||||
}
|
||||
return ClassificationType.identifier;
|
||||
}
|
||||
|
||||
return {
|
||||
getClassificationsForLine,
|
||||
getEncodedLexicalClassifications
|
||||
};
|
||||
}
|
||||
|
||||
/* @internal */
|
||||
export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: UnderscoreEscapedMap<true>, span: TextSpan): ClassifiedSpan[] {
|
||||
return convertClassifications(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span));
|
||||
return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span));
|
||||
}
|
||||
|
||||
function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) {
|
||||
|
@ -583,7 +562,7 @@ namespace ts {
|
|||
}
|
||||
}
|
||||
|
||||
function convertClassifications(classifications: Classifications): ClassifiedSpan[] {
|
||||
function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] {
|
||||
Debug.assert(classifications.spans.length % 3 === 0);
|
||||
const dense = classifications.spans;
|
||||
const result: ClassifiedSpan[] = [];
|
||||
|
@ -599,7 +578,7 @@ namespace ts {
|
|||
|
||||
/* @internal */
|
||||
export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] {
|
||||
return convertClassifications(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span));
|
||||
return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span));
|
||||
}
|
||||
|
||||
/* @internal */
|
||||
|
|
Loading…
Reference in a new issue