Clean up lexical classifier (#20123)

2017-11-20 16:25:51 -08:00 · 2017-11-20 16:25:51 -08:00 · 53796eed59
parent a551c4cd64
commit 53796eed59
3 changed files with 351 additions and 379 deletions
--- a/src/compiler/core.ts
+++ b/src/compiler/core.ts
@ -1260,10 +1260,12 @@ namespace ts {
        return result;
    }

-    export function arrayToNumericMap<T>(array: ReadonlyArray<T>, makeKey: (value: T) => number): T[] {
-        const result: T[] = [];
+    export function arrayToNumericMap<T>(array: ReadonlyArray<T>, makeKey: (value: T) => number): T[];
+    export function arrayToNumericMap<T, V>(array: ReadonlyArray<T>, makeKey: (value: T) => number, makeValue: (value: T) => V): V[];
+    export function arrayToNumericMap<T, V>(array: ReadonlyArray<T>, makeKey: (value: T) => number, makeValue?: (value: T) => V): V[] {
+        const result: V[] = [];
        for (const value of array) {
-            result[makeKey(value)] = value;
+            result[makeKey(value)] = makeValue ? makeValue(value) : value as any as V;
        }
        return result;
    }
--- a/src/compiler/utilities.ts
+++ b/src/compiler/utilities.ts
@ -5078,16 +5078,7 @@ namespace ts {
    }

    export function isStringTextContainingNode(node: Node) {
-        switch (node.kind) {
-            case SyntaxKind.StringLiteral:
-            case SyntaxKind.TemplateHead:
-            case SyntaxKind.TemplateMiddle:
-            case SyntaxKind.TemplateTail:
-            case SyntaxKind.NoSubstitutionTemplateLiteral:
-                return true;
-            default:
-                return false;
-        }
+        return node.kind === SyntaxKind.StringLiteral || isTemplateLiteralKind(node.kind);
    }

    // Identifiers
--- a/src/services/classifier.ts
+++ b/src/services/classifier.ts
@ -1,177 +1,50 @@
 namespace ts {
-    /// Classifier
    export function createClassifier(): Classifier {
        const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false);

-        /// We do not have a full parser support to know when we should parse a regex or not
-        /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
-        /// we have a series of divide operator. this list allows us to be more accurate by ruling out
-        /// locations where a regexp cannot exist.
-        const noRegexTable: boolean[] = [];
-        noRegexTable[SyntaxKind.Identifier] = true;
-        noRegexTable[SyntaxKind.StringLiteral] = true;
-        noRegexTable[SyntaxKind.NumericLiteral] = true;
-        noRegexTable[SyntaxKind.RegularExpressionLiteral] = true;
-        noRegexTable[SyntaxKind.ThisKeyword] = true;
-        noRegexTable[SyntaxKind.PlusPlusToken] = true;
-        noRegexTable[SyntaxKind.MinusMinusToken] = true;
-        noRegexTable[SyntaxKind.CloseParenToken] = true;
-        noRegexTable[SyntaxKind.CloseBracketToken] = true;
-        noRegexTable[SyntaxKind.CloseBraceToken] = true;
-        noRegexTable[SyntaxKind.TrueKeyword] = true;
-        noRegexTable[SyntaxKind.FalseKeyword] = true;
-
-        // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
-        // classification on template strings. Because of the context free nature of templates,
-        // the only precise way to classify a template portion would be by propagating the stack across
-        // lines, just as we do with the end-of-line state. However, this is a burden for implementers,
-        // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
-        // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
-        // Situations in which this fails are
-        //  1) When template strings are nested across different lines:
-        //          `hello ${ `world
-        //          ` }`
-        //
-        //     Where on the second line, you will get the closing of a template,
-        //     a closing curly, and a new template.
-        //
-        //  2) When substitution expressions have curly braces and the curly brace falls on the next line:
-        //          `hello ${ () => {
-        //          return "world" } } `
-        //
-        //     Where on the second line, you will get the 'return' keyword,
-        //     a string literal, and a template end consisting of '} } `'.
-        const templateStack: SyntaxKind[] = [];
-
-        /** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */
-        function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind) {
-            if (isAccessibilityModifier(keyword1)) {
-                if (keyword2 === SyntaxKind.GetKeyword ||
-                    keyword2 === SyntaxKind.SetKeyword ||
-                    keyword2 === SyntaxKind.ConstructorKeyword ||
-                    keyword2 === SyntaxKind.StaticKeyword) {
-
-                    // Allow things like "public get", "public constructor" and "public static".
-                    // These are all legal.
-                    return true;
-                }
-
-                // Any other keyword following "public" is actually an identifier an not a real
-                // keyword.
-                return false;
-            }
-
-            // Assume any other keyword combination is legal.  This can be refined in the future
-            // if there are more cases we want the classifier to be better at.
-            return true;
-        }
-
-        function convertClassifications(classifications: Classifications, text: string): ClassificationResult {
-            const entries: ClassificationInfo[] = [];
-            const dense = classifications.spans;
-            let lastEnd = 0;
-
-            for (let i = 0; i < dense.length; i += 3) {
-                const start = dense[i];
-                const length = dense[i + 1];
-                const type = <ClassificationType>dense[i + 2];
-
-                // Make a whitespace entry between the last item and this one.
-                if (lastEnd >= 0) {
-                    const whitespaceLength = start - lastEnd;
-                    if (whitespaceLength > 0) {
-                        entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
-                    }
-                }
-
-                entries.push({ length, classification: convertClassification(type) });
-                lastEnd = start + length;
-            }
-
-            const whitespaceLength = text.length - lastEnd;
-            if (whitespaceLength > 0) {
-                entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
-            }
-
-            return { entries, finalLexState: classifications.endOfLineState };
-        }
-
-        function convertClassification(type: ClassificationType): TokenClass {
-            switch (type) {
-                case ClassificationType.comment: return TokenClass.Comment;
-                case ClassificationType.keyword: return TokenClass.Keyword;
-                case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
-                case ClassificationType.operator: return TokenClass.Operator;
-                case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
-                case ClassificationType.whiteSpace: return TokenClass.Whitespace;
-                case ClassificationType.punctuation: return TokenClass.Punctuation;
-                case ClassificationType.identifier:
-                case ClassificationType.className:
-                case ClassificationType.enumName:
-                case ClassificationType.interfaceName:
-                case ClassificationType.moduleName:
-                case ClassificationType.typeParameterName:
-                case ClassificationType.typeAliasName:
-                case ClassificationType.text:
-                case ClassificationType.parameterName:
-                default:
-                    return TokenClass.Identifier;
-            }
-        }
-
        function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
-            return convertClassifications(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text);
+            return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text);
        }

        // If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
        // we will be more conservative in order to avoid conflicting with the syntactic classifier.
        function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications {
-            let offset = 0;
            let token = SyntaxKind.Unknown;
            let lastNonTriviaToken = SyntaxKind.Unknown;

-            // Empty out the template stack for reuse.
-            while (templateStack.length > 0) {
-                templateStack.pop();
-            }
-
-            // If we're in a string literal, then prepend: "\
-            // (and a newline).  That way when we lex we'll think we're still in a string literal.
+            // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
+            // classification on template strings. Because of the context free nature of templates,
+            // the only precise way to classify a template portion would be by propagating the stack across
+            // lines, just as we do with the end-of-line state. However, this is a burden for implementers,
+            // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
+            // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
+            // Situations in which this fails are
+            //  1) When template strings are nested across different lines:
+            //          `hello ${ `world
+            //          ` }`
            //
-            // If we're in a multiline comment, then prepend: /*
-            // (and a newline).  That way when we lex we'll think we're still in a multiline comment.
-            switch (lexState) {
-                case EndOfLineState.InDoubleQuoteStringLiteral:
-                    text = "\"\\\n" + text;
-                    offset = 3;
-                    break;
-                case EndOfLineState.InSingleQuoteStringLiteral:
-                    text = "'\\\n" + text;
-                    offset = 3;
-                    break;
-                case EndOfLineState.InMultiLineCommentTrivia:
-                    text = "/*\n" + text;
-                    offset = 3;
-                    break;
-                case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
-                    text = "`\n" + text;
-                    offset = 2;
-                    break;
-                case EndOfLineState.InTemplateMiddleOrTail:
-                    text = "}\n" + text;
-                    offset = 2;
-                    // falls through
-                case EndOfLineState.InTemplateSubstitutionPosition:
-                    templateStack.push(SyntaxKind.TemplateHead);
-                    break;
+            //     Where on the second line, you will get the closing of a template,
+            //     a closing curly, and a new template.
+            //
+            //  2) When substitution expressions have curly braces and the curly brace falls on the next line:
+            //          `hello ${ () => {
+            //          return "world" } } `
+            //
+            //     Where on the second line, you will get the 'return' keyword,
+            //     a string literal, and a template end consisting of '} } `'.
+            const templateStack: SyntaxKind[] = [];
+
+            const { prefix, pushTemplate } = getPrefixFromLexState(lexState);
+            text = prefix + text;
+            const offset = prefix.length;
+            if (pushTemplate) {
+                templateStack.push(SyntaxKind.TemplateHead);
            }

            scanner.setText(text);

-            const result: Classifications = {
-                endOfLineState: EndOfLineState.None,
-                spans: []
-            };
+            let endOfLineState = EndOfLineState.None;
+            const spans: number[] = [];

            // We can run into an unfortunate interaction between the lexical and syntactic classifier
            // when the user is typing something generic.  Consider the case where the user types:
@ -196,57 +69,65 @@ namespace ts {

            do {
                token = scanner.scan();
-
                if (!isTrivia(token)) {
-                    if ((token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) && !noRegexTable[lastNonTriviaToken]) {
-                        if (scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
+                    handleToken();
+                    lastNonTriviaToken = token;
+                }
+                const end = scanner.getTextPos();
+                pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans);
+                if (end >= text.length) {
+                    const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack));
+                    if (end !== undefined) {
+                        endOfLineState = end;
+                    }
+                }
+            } while (token !== SyntaxKind.EndOfFileToken);
+
+            function handleToken(): void {
+                switch (token) {
+                    case SyntaxKind.SlashToken:
+                    case SyntaxKind.SlashEqualsToken:
+                        if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
                            token = SyntaxKind.RegularExpressionLiteral;
                        }
-                    }
-                    else if (lastNonTriviaToken === SyntaxKind.DotToken && isKeyword(token)) {
-                        token = SyntaxKind.Identifier;
-                    }
-                    else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) {
-                        // We have two keywords in a row.  Only treat the second as a keyword if
-                        // it's a sequence that could legally occur in the language.  Otherwise
-                        // treat it as an identifier.  This way, if someone writes "private var"
-                        // we recognize that 'var' is actually an identifier here.
-                        token = SyntaxKind.Identifier;
-                    }
-                    else if (lastNonTriviaToken === SyntaxKind.Identifier &&
-                        token === SyntaxKind.LessThanToken) {
-                        // Could be the start of something generic.  Keep track of that by bumping
-                        // up the current count of generic contexts we may be in.
-                        angleBracketStack++;
-                    }
-                    else if (token === SyntaxKind.GreaterThanToken && angleBracketStack > 0) {
-                        // If we think we're currently in something generic, then mark that that
-                        // generic entity is complete.
-                        angleBracketStack--;
-                    }
-                    else if (token === SyntaxKind.AnyKeyword ||
-                        token === SyntaxKind.StringKeyword ||
-                        token === SyntaxKind.NumberKeyword ||
-                        token === SyntaxKind.BooleanKeyword ||
-                        token === SyntaxKind.SymbolKeyword) {
+                        break;
+                    case SyntaxKind.LessThanToken:
+                        if (lastNonTriviaToken === SyntaxKind.Identifier) {
+                            // Could be the start of something generic.  Keep track of that by bumping
+                            // up the current count of generic contexts we may be in.
+                            angleBracketStack++;
+                        }
+                        break;
+                    case SyntaxKind.GreaterThanToken:
+                        if (angleBracketStack > 0) {
+                            // If we think we're currently in something generic, then mark that that
+                            // generic entity is complete.
+                            angleBracketStack--;
+                        }
+                        break;
+                    case SyntaxKind.AnyKeyword:
+                    case SyntaxKind.StringKeyword:
+                    case SyntaxKind.NumberKeyword:
+                    case SyntaxKind.BooleanKeyword:
+                    case SyntaxKind.SymbolKeyword:
                        if (angleBracketStack > 0 && !syntacticClassifierAbsent) {
                            // If it looks like we're could be in something generic, don't classify this
                            // as a keyword.  We may just get overwritten by the syntactic classifier,
                            // causing a noisy experience for the user.
                            token = SyntaxKind.Identifier;
                        }
-                    }
-                    else if (token === SyntaxKind.TemplateHead) {
+                        break;
+                    case SyntaxKind.TemplateHead:
                        templateStack.push(token);
-                    }
-                    else if (token === SyntaxKind.OpenBraceToken) {
+                        break;
+                    case SyntaxKind.OpenBraceToken:
                        // If we don't have anything on the template stack,
                        // then we aren't trying to keep track of a previously scanned template head.
                        if (templateStack.length > 0) {
                            templateStack.push(token);
                        }
-                    }
-                    else if (token === SyntaxKind.CloseBraceToken) {
+                        break;
+                    case SyntaxKind.CloseBraceToken:
                        // If we don't have anything on the template stack,
                        // then we aren't trying to keep track of a previously scanned template head.
                        if (templateStack.length > 0) {
@ -268,202 +149,300 @@ namespace ts {
                                templateStack.pop();
                            }
                        }
-                    }
-
-                    lastNonTriviaToken = token;
-                }
-
-                processToken();
-            }
-            while (token !== SyntaxKind.EndOfFileToken);
-
-            return result;
-
-            function processToken(): void {
-                const start = scanner.getTokenPos();
-                const end = scanner.getTextPos();
-
-                addResult(start, end, classFromKind(token));
-
-                if (end >= text.length) {
-                    if (token === SyntaxKind.StringLiteral) {
-                        // Check to see if we finished up on a multiline string literal.
-                        const tokenText = scanner.getTokenText();
-                        if (scanner.isUnterminated()) {
-                            const lastCharIndex = tokenText.length - 1;
-
-                            let numBackslashes = 0;
-                            while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) {
-                                numBackslashes++;
-                            }
-
-                            // If we have an odd number of backslashes, then the multiline string is unclosed
-                            if (numBackslashes & 1) {
-                                const quoteChar = tokenText.charCodeAt(0);
-                                result.endOfLineState = quoteChar === CharacterCodes.doubleQuote
-                                    ? EndOfLineState.InDoubleQuoteStringLiteral
-                                    : EndOfLineState.InSingleQuoteStringLiteral;
-                            }
+                        break;
+                    default:
+                        if (!isKeyword(token)) {
+                            break;
                        }
-                    }
-                    else if (token === SyntaxKind.MultiLineCommentTrivia) {
-                        // Check to see if the multiline comment was unclosed.
-                        if (scanner.isUnterminated()) {
-                            result.endOfLineState = EndOfLineState.InMultiLineCommentTrivia;
+
+                        if (lastNonTriviaToken === SyntaxKind.DotToken) {
+                            token = SyntaxKind.Identifier;
                        }
-                    }
-                    else if (isTemplateLiteralKind(token)) {
-                        if (scanner.isUnterminated()) {
-                            if (token === SyntaxKind.TemplateTail) {
-                                result.endOfLineState = EndOfLineState.InTemplateMiddleOrTail;
-                            }
-                            else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) {
-                                result.endOfLineState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
-                            }
-                            else {
-                                Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
-                            }
+                        else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) {
+                            // We have two keywords in a row.  Only treat the second as a keyword if
+                            // it's a sequence that could legally occur in the language.  Otherwise
+                            // treat it as an identifier.  This way, if someone writes "private var"
+                            // we recognize that 'var' is actually an identifier here.
+                            token = SyntaxKind.Identifier;
                        }
+                }
+            }
+
+            return { endOfLineState, spans };
+        }
+
+        return { getClassificationsForLine, getEncodedLexicalClassifications };
+    }
+
+    /// We do not have a full parser support to know when we should parse a regex or not
+    /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
+    /// we have a series of divide operator. this list allows us to be more accurate by ruling out
+    /// locations where a regexp cannot exist.
+    const noRegexTable: true[] = ts.arrayToNumericMap<SyntaxKind, true>([
+        SyntaxKind.Identifier,
+        SyntaxKind.StringLiteral,
+        SyntaxKind.NumericLiteral,
+        SyntaxKind.RegularExpressionLiteral,
+        SyntaxKind.ThisKeyword,
+        SyntaxKind.PlusPlusToken,
+        SyntaxKind.MinusMinusToken,
+        SyntaxKind.CloseParenToken,
+        SyntaxKind.CloseBracketToken,
+        SyntaxKind.CloseBraceToken,
+        SyntaxKind.TrueKeyword,
+        SyntaxKind.FalseKeyword,
+    ], token => token, () => true);
+
+    function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined {
+        switch (token) {
+            case SyntaxKind.StringLiteral: {
+                // Check to see if we finished up on a multiline string literal.
+                if (!scanner.isUnterminated()) return undefined;
+
+                const tokenText = scanner.getTokenText();
+                const lastCharIndex = tokenText.length - 1;
+                let numBackslashes = 0;
+                while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) {
+                    numBackslashes++;
+                }
+
+                // If we have an odd number of backslashes, then the multiline string is unclosed
+                if ((numBackslashes & 1) === 0) return undefined;
+                return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral;
+            }
+            case SyntaxKind.MultiLineCommentTrivia:
+                // Check to see if the multiline comment was unclosed.
+                return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined;
+            default:
+                if (isTemplateLiteralKind(token)) {
+                    if (!scanner.isUnterminated()) {
+                        return undefined;
                    }
-                    else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) {
-                        result.endOfLineState = EndOfLineState.InTemplateSubstitutionPosition;
+                    switch (token) {
+                        case SyntaxKind.TemplateTail:
+                            return EndOfLineState.InTemplateMiddleOrTail;
+                        case SyntaxKind.NoSubstitutionTemplateLiteral:
+                            return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
+                        default:
+                            throw Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
                    }
                }
-            }
+                return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined;
+        }
+    }

-            function addResult(start: number, end: number, classification: ClassificationType): void {
-                if (classification === ClassificationType.whiteSpace) {
-                    // Don't bother with whitespace classifications.  They're not needed.
-                    return;
-                }
+    function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void {
+        if (classification === ClassificationType.whiteSpace) {
+            // Don't bother with whitespace classifications.  They're not needed.
+            return;
+        }

-                if (start === 0 && offset > 0) {
-                    // We're classifying the first token, and this was a case where we prepended
-                    // text.  We should consider the start of this token to be at the start of
-                    // the original text.
-                    start += offset;
-                }
+        if (start === 0 && offset > 0) {
+            // We're classifying the first token, and this was a case where we prepended text.
+            // We should consider the start of this token to be at the start of the original text.
+            start += offset;
+        }

-                // All our tokens are in relation to the augmented text.  Move them back to be
-                // relative to the original text.
-                start -= offset;
-                end -= offset;
-                const length = end - start;
+        const length = end - start;
+        if (length > 0) {
+            // All our tokens are in relation to the augmented text.  Move them back to be
+            // relative to the original text.
+            result.push(start - offset, length, classification);
+        }
+    }

-                if (length > 0) {
-                    result.spans.push(start);
-                    result.spans.push(length);
-                    result.spans.push(classification);
+    function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult {
+        const entries: ClassificationInfo[] = [];
+        const dense = classifications.spans;
+        let lastEnd = 0;
+
+        for (let i = 0; i < dense.length; i += 3) {
+            const start = dense[i];
+            const length = dense[i + 1];
+            const type = <ClassificationType>dense[i + 2];
+
+            // Make a whitespace entry between the last item and this one.
+            if (lastEnd >= 0) {
+                const whitespaceLength = start - lastEnd;
+                if (whitespaceLength > 0) {
+                    entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
                }
            }
+
+            entries.push({ length, classification: convertClassification(type) });
+            lastEnd = start + length;
        }

-        function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean {
-            switch (token) {
-                case SyntaxKind.AsteriskToken:
-                case SyntaxKind.SlashToken:
-                case SyntaxKind.PercentToken:
-                case SyntaxKind.PlusToken:
-                case SyntaxKind.MinusToken:
-                case SyntaxKind.LessThanLessThanToken:
-                case SyntaxKind.GreaterThanGreaterThanToken:
-                case SyntaxKind.GreaterThanGreaterThanGreaterThanToken:
-                case SyntaxKind.LessThanToken:
-                case SyntaxKind.GreaterThanToken:
-                case SyntaxKind.LessThanEqualsToken:
-                case SyntaxKind.GreaterThanEqualsToken:
-                case SyntaxKind.InstanceOfKeyword:
-                case SyntaxKind.InKeyword:
-                case SyntaxKind.AsKeyword:
-                case SyntaxKind.EqualsEqualsToken:
-                case SyntaxKind.ExclamationEqualsToken:
-                case SyntaxKind.EqualsEqualsEqualsToken:
-                case SyntaxKind.ExclamationEqualsEqualsToken:
-                case SyntaxKind.AmpersandToken:
-                case SyntaxKind.CaretToken:
-                case SyntaxKind.BarToken:
-                case SyntaxKind.AmpersandAmpersandToken:
-                case SyntaxKind.BarBarToken:
-                case SyntaxKind.BarEqualsToken:
-                case SyntaxKind.AmpersandEqualsToken:
-                case SyntaxKind.CaretEqualsToken:
-                case SyntaxKind.LessThanLessThanEqualsToken:
-                case SyntaxKind.GreaterThanGreaterThanEqualsToken:
-                case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken:
-                case SyntaxKind.PlusEqualsToken:
-                case SyntaxKind.MinusEqualsToken:
-                case SyntaxKind.AsteriskEqualsToken:
-                case SyntaxKind.SlashEqualsToken:
-                case SyntaxKind.PercentEqualsToken:
-                case SyntaxKind.EqualsToken:
-                case SyntaxKind.CommaToken:
-                    return true;
-                default:
-                    return false;
-            }
+        const whitespaceLength = text.length - lastEnd;
+        if (whitespaceLength > 0) {
+            entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
        }

-        function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean {
-            switch (token) {
-                case SyntaxKind.PlusToken:
-                case SyntaxKind.MinusToken:
-                case SyntaxKind.TildeToken:
-                case SyntaxKind.ExclamationToken:
-                case SyntaxKind.PlusPlusToken:
-                case SyntaxKind.MinusMinusToken:
-                    return true;
-                default:
-                    return false;
-            }
+        return { entries, finalLexState: classifications.endOfLineState };
+    }
+
+    function convertClassification(type: ClassificationType): TokenClass {
+        switch (type) {
+            case ClassificationType.comment: return TokenClass.Comment;
+            case ClassificationType.keyword: return TokenClass.Keyword;
+            case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
+            case ClassificationType.operator: return TokenClass.Operator;
+            case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
+            case ClassificationType.whiteSpace: return TokenClass.Whitespace;
+            case ClassificationType.punctuation: return TokenClass.Punctuation;
+            case ClassificationType.identifier:
+            case ClassificationType.className:
+            case ClassificationType.enumName:
+            case ClassificationType.interfaceName:
+            case ClassificationType.moduleName:
+            case ClassificationType.typeParameterName:
+            case ClassificationType.typeAliasName:
+            case ClassificationType.text:
+            case ClassificationType.parameterName:
+                return TokenClass.Identifier;
+        }
+    }
+
+    /** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */
+    function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean {
+        if (!isAccessibilityModifier(keyword1)) {
+            // Assume any other keyword combination is legal.
+            // This can be refined in the future if there are more cases we want the classifier to be better at.
+            return true;
+        }
+        switch (keyword2) {
+            case SyntaxKind.GetKeyword:
+            case SyntaxKind.SetKeyword:
+            case SyntaxKind.ConstructorKeyword:
+            case SyntaxKind.StaticKeyword:
+                return true; // Allow things like "public get", "public constructor" and "public static".
+            default:
+                return false; // Any other keyword following "public" is actually an identifier, not a real keyword.
+        }
+    }
+
+    function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } {
+        // If we're in a string literal, then prepend: "\
+        // (and a newline).  That way when we lex we'll think we're still in a string literal.
+        //
+        // If we're in a multiline comment, then prepend: /*
+        // (and a newline).  That way when we lex we'll think we're still in a multiline comment.
+        switch (lexState) {
+            case EndOfLineState.InDoubleQuoteStringLiteral:
+                return { prefix: "\"\\\n" };
+            case EndOfLineState.InSingleQuoteStringLiteral:
+                return { prefix: "'\\\n" };
+            case EndOfLineState.InMultiLineCommentTrivia:
+                return { prefix: "/*\n" };
+            case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
+                return { prefix: "`\n" };
+            case EndOfLineState.InTemplateMiddleOrTail:
+                return { prefix: "}\n", pushTemplate: true };
+            case EndOfLineState.InTemplateSubstitutionPosition:
+                return { prefix: "", pushTemplate: true };
+            case EndOfLineState.None:
+                return { prefix: "" };
+            default:
+                throw Debug.assertNever(lexState);
+        }
+    }
+
+    function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean {
+        switch (token) {
+            case SyntaxKind.AsteriskToken:
+            case SyntaxKind.SlashToken:
+            case SyntaxKind.PercentToken:
+            case SyntaxKind.PlusToken:
+            case SyntaxKind.MinusToken:
+            case SyntaxKind.LessThanLessThanToken:
+            case SyntaxKind.GreaterThanGreaterThanToken:
+            case SyntaxKind.GreaterThanGreaterThanGreaterThanToken:
+            case SyntaxKind.LessThanToken:
+            case SyntaxKind.GreaterThanToken:
+            case SyntaxKind.LessThanEqualsToken:
+            case SyntaxKind.GreaterThanEqualsToken:
+            case SyntaxKind.InstanceOfKeyword:
+            case SyntaxKind.InKeyword:
+            case SyntaxKind.AsKeyword:
+            case SyntaxKind.EqualsEqualsToken:
+            case SyntaxKind.ExclamationEqualsToken:
+            case SyntaxKind.EqualsEqualsEqualsToken:
+            case SyntaxKind.ExclamationEqualsEqualsToken:
+            case SyntaxKind.AmpersandToken:
+            case SyntaxKind.CaretToken:
+            case SyntaxKind.BarToken:
+            case SyntaxKind.AmpersandAmpersandToken:
+            case SyntaxKind.BarBarToken:
+            case SyntaxKind.BarEqualsToken:
+            case SyntaxKind.AmpersandEqualsToken:
+            case SyntaxKind.CaretEqualsToken:
+            case SyntaxKind.LessThanLessThanEqualsToken:
+            case SyntaxKind.GreaterThanGreaterThanEqualsToken:
+            case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken:
+            case SyntaxKind.PlusEqualsToken:
+            case SyntaxKind.MinusEqualsToken:
+            case SyntaxKind.AsteriskEqualsToken:
+            case SyntaxKind.SlashEqualsToken:
+            case SyntaxKind.PercentEqualsToken:
+            case SyntaxKind.EqualsToken:
+            case SyntaxKind.CommaToken:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean {
+        switch (token) {
+            case SyntaxKind.PlusToken:
+            case SyntaxKind.MinusToken:
+            case SyntaxKind.TildeToken:
+            case SyntaxKind.ExclamationToken:
+            case SyntaxKind.PlusPlusToken:
+            case SyntaxKind.MinusMinusToken:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    function classFromKind(token: SyntaxKind): ClassificationType {
+        if (isKeyword(token)) {
+            return ClassificationType.keyword;
+        }
+        else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
+            return ClassificationType.operator;
+        }
+        else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
+            return ClassificationType.punctuation;
        }

-        function isKeyword(token: SyntaxKind): boolean {
-            return token >= SyntaxKind.FirstKeyword && token <= SyntaxKind.LastKeyword;
-        }
-
-        function classFromKind(token: SyntaxKind): ClassificationType {
-            if (isKeyword(token)) {
-                return ClassificationType.keyword;
-            }
-            else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
-                return ClassificationType.operator;
-            }
-            else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
-                return ClassificationType.punctuation;
-            }
-
-            switch (token) {
-                case SyntaxKind.NumericLiteral:
-                    return ClassificationType.numericLiteral;
-                case SyntaxKind.StringLiteral:
+        switch (token) {
+            case SyntaxKind.NumericLiteral:
+                return ClassificationType.numericLiteral;
+            case SyntaxKind.StringLiteral:
+                return ClassificationType.stringLiteral;
+            case SyntaxKind.RegularExpressionLiteral:
+                return ClassificationType.regularExpressionLiteral;
+            case SyntaxKind.ConflictMarkerTrivia:
+            case SyntaxKind.MultiLineCommentTrivia:
+            case SyntaxKind.SingleLineCommentTrivia:
+                return ClassificationType.comment;
+            case SyntaxKind.WhitespaceTrivia:
+            case SyntaxKind.NewLineTrivia:
+                return ClassificationType.whiteSpace;
+            case SyntaxKind.Identifier:
+            default:
+                if (isTemplateLiteralKind(token)) {
                    return ClassificationType.stringLiteral;
-                case SyntaxKind.RegularExpressionLiteral:
-                    return ClassificationType.regularExpressionLiteral;
-                case SyntaxKind.ConflictMarkerTrivia:
-                case SyntaxKind.MultiLineCommentTrivia:
-                case SyntaxKind.SingleLineCommentTrivia:
-                    return ClassificationType.comment;
-                case SyntaxKind.WhitespaceTrivia:
-                case SyntaxKind.NewLineTrivia:
-                    return ClassificationType.whiteSpace;
-                case SyntaxKind.Identifier:
-                default:
-                    if (isTemplateLiteralKind(token)) {
-                        return ClassificationType.stringLiteral;
-                    }
-                    return ClassificationType.identifier;
-            }
+                }
+                return ClassificationType.identifier;
        }
-
-        return {
-            getClassificationsForLine,
-            getEncodedLexicalClassifications
-        };
    }

    /* @internal */
    export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: UnderscoreEscapedMap<true>, span: TextSpan): ClassifiedSpan[] {
-        return convertClassifications(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span));
+        return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span));
    }

    function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) {
@ -583,7 +562,7 @@ namespace ts {
        }
    }

-    function convertClassifications(classifications: Classifications): ClassifiedSpan[] {
+    function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] {
        Debug.assert(classifications.spans.length % 3 === 0);
        const dense = classifications.spans;
        const result: ClassifiedSpan[] = [];
@ -599,7 +578,7 @@ namespace ts {

    /* @internal */
    export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] {
-        return convertClassifications(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span));
+        return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span));
    }

    /* @internal */