From 53796eed59798739227f1c91231f647945d0c88b Mon Sep 17 00:00:00 2001 From: Andy Date: Mon, 20 Nov 2017 16:25:51 -0800 Subject: [PATCH] Clean up lexical classifier (#20123) --- src/compiler/core.ts | 8 +- src/compiler/utilities.ts | 11 +- src/services/classifier.ts | 711 ++++++++++++++++++------------------- 3 files changed, 351 insertions(+), 379 deletions(-) diff --git a/src/compiler/core.ts b/src/compiler/core.ts index 867a22b92a..fdf888b0cf 100644 --- a/src/compiler/core.ts +++ b/src/compiler/core.ts @@ -1260,10 +1260,12 @@ namespace ts { return result; } - export function arrayToNumericMap(array: ReadonlyArray, makeKey: (value: T) => number): T[] { - const result: T[] = []; + export function arrayToNumericMap(array: ReadonlyArray, makeKey: (value: T) => number): T[]; + export function arrayToNumericMap(array: ReadonlyArray, makeKey: (value: T) => number, makeValue: (value: T) => V): V[]; + export function arrayToNumericMap(array: ReadonlyArray, makeKey: (value: T) => number, makeValue?: (value: T) => V): V[] { + const result: V[] = []; for (const value of array) { - result[makeKey(value)] = value; + result[makeKey(value)] = makeValue ? makeValue(value) : value as any as V; } return result; } diff --git a/src/compiler/utilities.ts b/src/compiler/utilities.ts index 4bc519c2b8..46835bd94d 100644 --- a/src/compiler/utilities.ts +++ b/src/compiler/utilities.ts @@ -5078,16 +5078,7 @@ namespace ts { } export function isStringTextContainingNode(node: Node) { - switch (node.kind) { - case SyntaxKind.StringLiteral: - case SyntaxKind.TemplateHead: - case SyntaxKind.TemplateMiddle: - case SyntaxKind.TemplateTail: - case SyntaxKind.NoSubstitutionTemplateLiteral: - return true; - default: - return false; - } + return node.kind === SyntaxKind.StringLiteral || isTemplateLiteralKind(node.kind); } // Identifiers diff --git a/src/services/classifier.ts b/src/services/classifier.ts index af7d84300e..32b3dac2cc 100644 --- a/src/services/classifier.ts +++ b/src/services/classifier.ts @@ -1,177 +1,50 @@ namespace ts { - /// Classifier export function createClassifier(): Classifier { const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false); - /// We do not have a full parser support to know when we should parse a regex or not - /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where - /// we have a series of divide operator. this list allows us to be more accurate by ruling out - /// locations where a regexp cannot exist. - const noRegexTable: boolean[] = []; - noRegexTable[SyntaxKind.Identifier] = true; - noRegexTable[SyntaxKind.StringLiteral] = true; - noRegexTable[SyntaxKind.NumericLiteral] = true; - noRegexTable[SyntaxKind.RegularExpressionLiteral] = true; - noRegexTable[SyntaxKind.ThisKeyword] = true; - noRegexTable[SyntaxKind.PlusPlusToken] = true; - noRegexTable[SyntaxKind.MinusMinusToken] = true; - noRegexTable[SyntaxKind.CloseParenToken] = true; - noRegexTable[SyntaxKind.CloseBracketToken] = true; - noRegexTable[SyntaxKind.CloseBraceToken] = true; - noRegexTable[SyntaxKind.TrueKeyword] = true; - noRegexTable[SyntaxKind.FalseKeyword] = true; - - // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact) - // classification on template strings. Because of the context free nature of templates, - // the only precise way to classify a template portion would be by propagating the stack across - // lines, just as we do with the end-of-line state. However, this is a burden for implementers, - // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead - // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state. - // Situations in which this fails are - // 1) When template strings are nested across different lines: - // `hello ${ `world - // ` }` - // - // Where on the second line, you will get the closing of a template, - // a closing curly, and a new template. - // - // 2) When substitution expressions have curly braces and the curly brace falls on the next line: - // `hello ${ () => { - // return "world" } } ` - // - // Where on the second line, you will get the 'return' keyword, - // a string literal, and a template end consisting of '} } `'. - const templateStack: SyntaxKind[] = []; - - /** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */ - function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind) { - if (isAccessibilityModifier(keyword1)) { - if (keyword2 === SyntaxKind.GetKeyword || - keyword2 === SyntaxKind.SetKeyword || - keyword2 === SyntaxKind.ConstructorKeyword || - keyword2 === SyntaxKind.StaticKeyword) { - - // Allow things like "public get", "public constructor" and "public static". - // These are all legal. - return true; - } - - // Any other keyword following "public" is actually an identifier an not a real - // keyword. - return false; - } - - // Assume any other keyword combination is legal. This can be refined in the future - // if there are more cases we want the classifier to be better at. - return true; - } - - function convertClassifications(classifications: Classifications, text: string): ClassificationResult { - const entries: ClassificationInfo[] = []; - const dense = classifications.spans; - let lastEnd = 0; - - for (let i = 0; i < dense.length; i += 3) { - const start = dense[i]; - const length = dense[i + 1]; - const type = dense[i + 2]; - - // Make a whitespace entry between the last item and this one. - if (lastEnd >= 0) { - const whitespaceLength = start - lastEnd; - if (whitespaceLength > 0) { - entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); - } - } - - entries.push({ length, classification: convertClassification(type) }); - lastEnd = start + length; - } - - const whitespaceLength = text.length - lastEnd; - if (whitespaceLength > 0) { - entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); - } - - return { entries, finalLexState: classifications.endOfLineState }; - } - - function convertClassification(type: ClassificationType): TokenClass { - switch (type) { - case ClassificationType.comment: return TokenClass.Comment; - case ClassificationType.keyword: return TokenClass.Keyword; - case ClassificationType.numericLiteral: return TokenClass.NumberLiteral; - case ClassificationType.operator: return TokenClass.Operator; - case ClassificationType.stringLiteral: return TokenClass.StringLiteral; - case ClassificationType.whiteSpace: return TokenClass.Whitespace; - case ClassificationType.punctuation: return TokenClass.Punctuation; - case ClassificationType.identifier: - case ClassificationType.className: - case ClassificationType.enumName: - case ClassificationType.interfaceName: - case ClassificationType.moduleName: - case ClassificationType.typeParameterName: - case ClassificationType.typeAliasName: - case ClassificationType.text: - case ClassificationType.parameterName: - default: - return TokenClass.Identifier; - } - } - function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult { - return convertClassifications(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text); + return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text); } // If there is a syntactic classifier ('syntacticClassifierAbsent' is false), // we will be more conservative in order to avoid conflicting with the syntactic classifier. function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications { - let offset = 0; let token = SyntaxKind.Unknown; let lastNonTriviaToken = SyntaxKind.Unknown; - // Empty out the template stack for reuse. - while (templateStack.length > 0) { - templateStack.pop(); - } - - // If we're in a string literal, then prepend: "\ - // (and a newline). That way when we lex we'll think we're still in a string literal. + // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact) + // classification on template strings. Because of the context free nature of templates, + // the only precise way to classify a template portion would be by propagating the stack across + // lines, just as we do with the end-of-line state. However, this is a burden for implementers, + // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead + // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state. + // Situations in which this fails are + // 1) When template strings are nested across different lines: + // `hello ${ `world + // ` }` // - // If we're in a multiline comment, then prepend: /* - // (and a newline). That way when we lex we'll think we're still in a multiline comment. - switch (lexState) { - case EndOfLineState.InDoubleQuoteStringLiteral: - text = "\"\\\n" + text; - offset = 3; - break; - case EndOfLineState.InSingleQuoteStringLiteral: - text = "'\\\n" + text; - offset = 3; - break; - case EndOfLineState.InMultiLineCommentTrivia: - text = "/*\n" + text; - offset = 3; - break; - case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate: - text = "`\n" + text; - offset = 2; - break; - case EndOfLineState.InTemplateMiddleOrTail: - text = "}\n" + text; - offset = 2; - // falls through - case EndOfLineState.InTemplateSubstitutionPosition: - templateStack.push(SyntaxKind.TemplateHead); - break; + // Where on the second line, you will get the closing of a template, + // a closing curly, and a new template. + // + // 2) When substitution expressions have curly braces and the curly brace falls on the next line: + // `hello ${ () => { + // return "world" } } ` + // + // Where on the second line, you will get the 'return' keyword, + // a string literal, and a template end consisting of '} } `'. + const templateStack: SyntaxKind[] = []; + + const { prefix, pushTemplate } = getPrefixFromLexState(lexState); + text = prefix + text; + const offset = prefix.length; + if (pushTemplate) { + templateStack.push(SyntaxKind.TemplateHead); } scanner.setText(text); - const result: Classifications = { - endOfLineState: EndOfLineState.None, - spans: [] - }; + let endOfLineState = EndOfLineState.None; + const spans: number[] = []; // We can run into an unfortunate interaction between the lexical and syntactic classifier // when the user is typing something generic. Consider the case where the user types: @@ -196,57 +69,65 @@ namespace ts { do { token = scanner.scan(); - if (!isTrivia(token)) { - if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastNonTriviaToken]) { - if (scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) { + handleToken(); + lastNonTriviaToken = token; + } + const end = scanner.getTextPos(); + pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans); + if (end >= text.length) { + const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack)); + if (end !== undefined) { + endOfLineState = end; + } + } + } while (token !== SyntaxKind.EndOfFileToken); + + function handleToken(): void { + switch (token) { + case SyntaxKind.SlashToken: + case SyntaxKind.SlashEqualsToken: + if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) { token = SyntaxKind.RegularExpressionLiteral; } - } - else if (lastNonTriviaToken === SyntaxKind.DotToken && isKeyword(token)) { - token = SyntaxKind.Identifier; - } - else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) { - // We have two keywords in a row. Only treat the second as a keyword if - // it's a sequence that could legally occur in the language. Otherwise - // treat it as an identifier. This way, if someone writes "private var" - // we recognize that 'var' is actually an identifier here. - token = SyntaxKind.Identifier; - } - else if (lastNonTriviaToken === SyntaxKind.Identifier && - token === SyntaxKind.LessThanToken) { - // Could be the start of something generic. Keep track of that by bumping - // up the current count of generic contexts we may be in. - angleBracketStack++; - } - else if (token === SyntaxKind.GreaterThanToken && angleBracketStack > 0) { - // If we think we're currently in something generic, then mark that that - // generic entity is complete. - angleBracketStack--; - } - else if (token === SyntaxKind.AnyKeyword || - token === SyntaxKind.StringKeyword || - token === SyntaxKind.NumberKeyword || - token === SyntaxKind.BooleanKeyword || - token === SyntaxKind.SymbolKeyword) { + break; + case SyntaxKind.LessThanToken: + if (lastNonTriviaToken === SyntaxKind.Identifier) { + // Could be the start of something generic. Keep track of that by bumping + // up the current count of generic contexts we may be in. + angleBracketStack++; + } + break; + case SyntaxKind.GreaterThanToken: + if (angleBracketStack > 0) { + // If we think we're currently in something generic, then mark that that + // generic entity is complete. + angleBracketStack--; + } + break; + case SyntaxKind.AnyKeyword: + case SyntaxKind.StringKeyword: + case SyntaxKind.NumberKeyword: + case SyntaxKind.BooleanKeyword: + case SyntaxKind.SymbolKeyword: if (angleBracketStack > 0 && !syntacticClassifierAbsent) { // If it looks like we're could be in something generic, don't classify this // as a keyword. We may just get overwritten by the syntactic classifier, // causing a noisy experience for the user. token = SyntaxKind.Identifier; } - } - else if (token === SyntaxKind.TemplateHead) { + break; + case SyntaxKind.TemplateHead: templateStack.push(token); - } - else if (token === SyntaxKind.OpenBraceToken) { + break; + case SyntaxKind.OpenBraceToken: // If we don't have anything on the template stack, // then we aren't trying to keep track of a previously scanned template head. if (templateStack.length > 0) { templateStack.push(token); } - } - else if (token === SyntaxKind.CloseBraceToken) { + break; + case SyntaxKind.CloseBraceToken: // If we don't have anything on the template stack, // then we aren't trying to keep track of a previously scanned template head. if (templateStack.length > 0) { @@ -268,202 +149,300 @@ namespace ts { templateStack.pop(); } } - } - - lastNonTriviaToken = token; - } - - processToken(); - } - while (token !== SyntaxKind.EndOfFileToken); - - return result; - - function processToken(): void { - const start = scanner.getTokenPos(); - const end = scanner.getTextPos(); - - addResult(start, end, classFromKind(token)); - - if (end >= text.length) { - if (token === SyntaxKind.StringLiteral) { - // Check to see if we finished up on a multiline string literal. - const tokenText = scanner.getTokenText(); - if (scanner.isUnterminated()) { - const lastCharIndex = tokenText.length - 1; - - let numBackslashes = 0; - while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) { - numBackslashes++; - } - - // If we have an odd number of backslashes, then the multiline string is unclosed - if (numBackslashes & 1) { - const quoteChar = tokenText.charCodeAt(0); - result.endOfLineState = quoteChar === CharacterCodes.doubleQuote - ? EndOfLineState.InDoubleQuoteStringLiteral - : EndOfLineState.InSingleQuoteStringLiteral; - } + break; + default: + if (!isKeyword(token)) { + break; } - } - else if (token === SyntaxKind.MultiLineCommentTrivia) { - // Check to see if the multiline comment was unclosed. - if (scanner.isUnterminated()) { - result.endOfLineState = EndOfLineState.InMultiLineCommentTrivia; + + if (lastNonTriviaToken === SyntaxKind.DotToken) { + token = SyntaxKind.Identifier; } - } - else if (isTemplateLiteralKind(token)) { - if (scanner.isUnterminated()) { - if (token === SyntaxKind.TemplateTail) { - result.endOfLineState = EndOfLineState.InTemplateMiddleOrTail; - } - else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) { - result.endOfLineState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate; - } - else { - Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token); - } + else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) { + // We have two keywords in a row. Only treat the second as a keyword if + // it's a sequence that could legally occur in the language. Otherwise + // treat it as an identifier. This way, if someone writes "private var" + // we recognize that 'var' is actually an identifier here. + token = SyntaxKind.Identifier; } + } + } + + return { endOfLineState, spans }; + } + + return { getClassificationsForLine, getEncodedLexicalClassifications }; + } + + /// We do not have a full parser support to know when we should parse a regex or not + /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where + /// we have a series of divide operator. this list allows us to be more accurate by ruling out + /// locations where a regexp cannot exist. + const noRegexTable: true[] = ts.arrayToNumericMap([ + SyntaxKind.Identifier, + SyntaxKind.StringLiteral, + SyntaxKind.NumericLiteral, + SyntaxKind.RegularExpressionLiteral, + SyntaxKind.ThisKeyword, + SyntaxKind.PlusPlusToken, + SyntaxKind.MinusMinusToken, + SyntaxKind.CloseParenToken, + SyntaxKind.CloseBracketToken, + SyntaxKind.CloseBraceToken, + SyntaxKind.TrueKeyword, + SyntaxKind.FalseKeyword, + ], token => token, () => true); + + function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined { + switch (token) { + case SyntaxKind.StringLiteral: { + // Check to see if we finished up on a multiline string literal. + if (!scanner.isUnterminated()) return undefined; + + const tokenText = scanner.getTokenText(); + const lastCharIndex = tokenText.length - 1; + let numBackslashes = 0; + while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) { + numBackslashes++; + } + + // If we have an odd number of backslashes, then the multiline string is unclosed + if ((numBackslashes & 1) === 0) return undefined; + return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral; + } + case SyntaxKind.MultiLineCommentTrivia: + // Check to see if the multiline comment was unclosed. + return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined; + default: + if (isTemplateLiteralKind(token)) { + if (!scanner.isUnterminated()) { + return undefined; } - else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) { - result.endOfLineState = EndOfLineState.InTemplateSubstitutionPosition; + switch (token) { + case SyntaxKind.TemplateTail: + return EndOfLineState.InTemplateMiddleOrTail; + case SyntaxKind.NoSubstitutionTemplateLiteral: + return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate; + default: + throw Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token); } } - } + return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined; + } + } - function addResult(start: number, end: number, classification: ClassificationType): void { - if (classification === ClassificationType.whiteSpace) { - // Don't bother with whitespace classifications. They're not needed. - return; - } + function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push): void { + if (classification === ClassificationType.whiteSpace) { + // Don't bother with whitespace classifications. They're not needed. + return; + } - if (start === 0 && offset > 0) { - // We're classifying the first token, and this was a case where we prepended - // text. We should consider the start of this token to be at the start of - // the original text. - start += offset; - } + if (start === 0 && offset > 0) { + // We're classifying the first token, and this was a case where we prepended text. + // We should consider the start of this token to be at the start of the original text. + start += offset; + } - // All our tokens are in relation to the augmented text. Move them back to be - // relative to the original text. - start -= offset; - end -= offset; - const length = end - start; + const length = end - start; + if (length > 0) { + // All our tokens are in relation to the augmented text. Move them back to be + // relative to the original text. + result.push(start - offset, length, classification); + } + } - if (length > 0) { - result.spans.push(start); - result.spans.push(length); - result.spans.push(classification); + function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult { + const entries: ClassificationInfo[] = []; + const dense = classifications.spans; + let lastEnd = 0; + + for (let i = 0; i < dense.length; i += 3) { + const start = dense[i]; + const length = dense[i + 1]; + const type = dense[i + 2]; + + // Make a whitespace entry between the last item and this one. + if (lastEnd >= 0) { + const whitespaceLength = start - lastEnd; + if (whitespaceLength > 0) { + entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); } } + + entries.push({ length, classification: convertClassification(type) }); + lastEnd = start + length; } - function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean { - switch (token) { - case SyntaxKind.AsteriskToken: - case SyntaxKind.SlashToken: - case SyntaxKind.PercentToken: - case SyntaxKind.PlusToken: - case SyntaxKind.MinusToken: - case SyntaxKind.LessThanLessThanToken: - case SyntaxKind.GreaterThanGreaterThanToken: - case SyntaxKind.GreaterThanGreaterThanGreaterThanToken: - case SyntaxKind.LessThanToken: - case SyntaxKind.GreaterThanToken: - case SyntaxKind.LessThanEqualsToken: - case SyntaxKind.GreaterThanEqualsToken: - case SyntaxKind.InstanceOfKeyword: - case SyntaxKind.InKeyword: - case SyntaxKind.AsKeyword: - case SyntaxKind.EqualsEqualsToken: - case SyntaxKind.ExclamationEqualsToken: - case SyntaxKind.EqualsEqualsEqualsToken: - case SyntaxKind.ExclamationEqualsEqualsToken: - case SyntaxKind.AmpersandToken: - case SyntaxKind.CaretToken: - case SyntaxKind.BarToken: - case SyntaxKind.AmpersandAmpersandToken: - case SyntaxKind.BarBarToken: - case SyntaxKind.BarEqualsToken: - case SyntaxKind.AmpersandEqualsToken: - case SyntaxKind.CaretEqualsToken: - case SyntaxKind.LessThanLessThanEqualsToken: - case SyntaxKind.GreaterThanGreaterThanEqualsToken: - case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken: - case SyntaxKind.PlusEqualsToken: - case SyntaxKind.MinusEqualsToken: - case SyntaxKind.AsteriskEqualsToken: - case SyntaxKind.SlashEqualsToken: - case SyntaxKind.PercentEqualsToken: - case SyntaxKind.EqualsToken: - case SyntaxKind.CommaToken: - return true; - default: - return false; - } + const whitespaceLength = text.length - lastEnd; + if (whitespaceLength > 0) { + entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); } - function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean { - switch (token) { - case SyntaxKind.PlusToken: - case SyntaxKind.MinusToken: - case SyntaxKind.TildeToken: - case SyntaxKind.ExclamationToken: - case SyntaxKind.PlusPlusToken: - case SyntaxKind.MinusMinusToken: - return true; - default: - return false; - } + return { entries, finalLexState: classifications.endOfLineState }; + } + + function convertClassification(type: ClassificationType): TokenClass { + switch (type) { + case ClassificationType.comment: return TokenClass.Comment; + case ClassificationType.keyword: return TokenClass.Keyword; + case ClassificationType.numericLiteral: return TokenClass.NumberLiteral; + case ClassificationType.operator: return TokenClass.Operator; + case ClassificationType.stringLiteral: return TokenClass.StringLiteral; + case ClassificationType.whiteSpace: return TokenClass.Whitespace; + case ClassificationType.punctuation: return TokenClass.Punctuation; + case ClassificationType.identifier: + case ClassificationType.className: + case ClassificationType.enumName: + case ClassificationType.interfaceName: + case ClassificationType.moduleName: + case ClassificationType.typeParameterName: + case ClassificationType.typeAliasName: + case ClassificationType.text: + case ClassificationType.parameterName: + return TokenClass.Identifier; + } + } + + /** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */ + function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean { + if (!isAccessibilityModifier(keyword1)) { + // Assume any other keyword combination is legal. + // This can be refined in the future if there are more cases we want the classifier to be better at. + return true; + } + switch (keyword2) { + case SyntaxKind.GetKeyword: + case SyntaxKind.SetKeyword: + case SyntaxKind.ConstructorKeyword: + case SyntaxKind.StaticKeyword: + return true; // Allow things like "public get", "public constructor" and "public static". + default: + return false; // Any other keyword following "public" is actually an identifier, not a real keyword. + } + } + + function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } { + // If we're in a string literal, then prepend: "\ + // (and a newline). That way when we lex we'll think we're still in a string literal. + // + // If we're in a multiline comment, then prepend: /* + // (and a newline). That way when we lex we'll think we're still in a multiline comment. + switch (lexState) { + case EndOfLineState.InDoubleQuoteStringLiteral: + return { prefix: "\"\\\n" }; + case EndOfLineState.InSingleQuoteStringLiteral: + return { prefix: "'\\\n" }; + case EndOfLineState.InMultiLineCommentTrivia: + return { prefix: "/*\n" }; + case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate: + return { prefix: "`\n" }; + case EndOfLineState.InTemplateMiddleOrTail: + return { prefix: "}\n", pushTemplate: true }; + case EndOfLineState.InTemplateSubstitutionPosition: + return { prefix: "", pushTemplate: true }; + case EndOfLineState.None: + return { prefix: "" }; + default: + throw Debug.assertNever(lexState); + } + } + + function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean { + switch (token) { + case SyntaxKind.AsteriskToken: + case SyntaxKind.SlashToken: + case SyntaxKind.PercentToken: + case SyntaxKind.PlusToken: + case SyntaxKind.MinusToken: + case SyntaxKind.LessThanLessThanToken: + case SyntaxKind.GreaterThanGreaterThanToken: + case SyntaxKind.GreaterThanGreaterThanGreaterThanToken: + case SyntaxKind.LessThanToken: + case SyntaxKind.GreaterThanToken: + case SyntaxKind.LessThanEqualsToken: + case SyntaxKind.GreaterThanEqualsToken: + case SyntaxKind.InstanceOfKeyword: + case SyntaxKind.InKeyword: + case SyntaxKind.AsKeyword: + case SyntaxKind.EqualsEqualsToken: + case SyntaxKind.ExclamationEqualsToken: + case SyntaxKind.EqualsEqualsEqualsToken: + case SyntaxKind.ExclamationEqualsEqualsToken: + case SyntaxKind.AmpersandToken: + case SyntaxKind.CaretToken: + case SyntaxKind.BarToken: + case SyntaxKind.AmpersandAmpersandToken: + case SyntaxKind.BarBarToken: + case SyntaxKind.BarEqualsToken: + case SyntaxKind.AmpersandEqualsToken: + case SyntaxKind.CaretEqualsToken: + case SyntaxKind.LessThanLessThanEqualsToken: + case SyntaxKind.GreaterThanGreaterThanEqualsToken: + case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken: + case SyntaxKind.PlusEqualsToken: + case SyntaxKind.MinusEqualsToken: + case SyntaxKind.AsteriskEqualsToken: + case SyntaxKind.SlashEqualsToken: + case SyntaxKind.PercentEqualsToken: + case SyntaxKind.EqualsToken: + case SyntaxKind.CommaToken: + return true; + default: + return false; + } + } + + function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean { + switch (token) { + case SyntaxKind.PlusToken: + case SyntaxKind.MinusToken: + case SyntaxKind.TildeToken: + case SyntaxKind.ExclamationToken: + case SyntaxKind.PlusPlusToken: + case SyntaxKind.MinusMinusToken: + return true; + default: + return false; + } + } + + function classFromKind(token: SyntaxKind): ClassificationType { + if (isKeyword(token)) { + return ClassificationType.keyword; + } + else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) { + return ClassificationType.operator; + } + else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) { + return ClassificationType.punctuation; } - function isKeyword(token: SyntaxKind): boolean { - return token >= SyntaxKind.FirstKeyword && token <= SyntaxKind.LastKeyword; - } - - function classFromKind(token: SyntaxKind): ClassificationType { - if (isKeyword(token)) { - return ClassificationType.keyword; - } - else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) { - return ClassificationType.operator; - } - else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) { - return ClassificationType.punctuation; - } - - switch (token) { - case SyntaxKind.NumericLiteral: - return ClassificationType.numericLiteral; - case SyntaxKind.StringLiteral: + switch (token) { + case SyntaxKind.NumericLiteral: + return ClassificationType.numericLiteral; + case SyntaxKind.StringLiteral: + return ClassificationType.stringLiteral; + case SyntaxKind.RegularExpressionLiteral: + return ClassificationType.regularExpressionLiteral; + case SyntaxKind.ConflictMarkerTrivia: + case SyntaxKind.MultiLineCommentTrivia: + case SyntaxKind.SingleLineCommentTrivia: + return ClassificationType.comment; + case SyntaxKind.WhitespaceTrivia: + case SyntaxKind.NewLineTrivia: + return ClassificationType.whiteSpace; + case SyntaxKind.Identifier: + default: + if (isTemplateLiteralKind(token)) { return ClassificationType.stringLiteral; - case SyntaxKind.RegularExpressionLiteral: - return ClassificationType.regularExpressionLiteral; - case SyntaxKind.ConflictMarkerTrivia: - case SyntaxKind.MultiLineCommentTrivia: - case SyntaxKind.SingleLineCommentTrivia: - return ClassificationType.comment; - case SyntaxKind.WhitespaceTrivia: - case SyntaxKind.NewLineTrivia: - return ClassificationType.whiteSpace; - case SyntaxKind.Identifier: - default: - if (isTemplateLiteralKind(token)) { - return ClassificationType.stringLiteral; - } - return ClassificationType.identifier; - } + } + return ClassificationType.identifier; } - - return { - getClassificationsForLine, - getEncodedLexicalClassifications - }; } /* @internal */ export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: UnderscoreEscapedMap, span: TextSpan): ClassifiedSpan[] { - return convertClassifications(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span)); + return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span)); } function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) { @@ -583,7 +562,7 @@ namespace ts { } } - function convertClassifications(classifications: Classifications): ClassifiedSpan[] { + function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] { Debug.assert(classifications.spans.length % 3 === 0); const dense = classifications.spans; const result: ClassifiedSpan[] = []; @@ -599,7 +578,7 @@ namespace ts { /* @internal */ export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] { - return convertClassifications(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span)); + return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span)); } /* @internal */