Fixes #115662: Add support for escaping @ as @@ in regular expressions to avoid replacement

2021-02-03 13:57:18 +01:00 · 2021-02-03 13:57:18 +01:00 · 538f72e2a7
commit 538f72e2a7
parent 06c0dbe616
4 changed files with 93 additions and 26 deletions
--- a/src/vs/editor/standalone/common/monarch/monarchCompile.ts
+++ b/src/vs/editor/standalone/common/monarch/monarchCompile.ts
@ -81,12 +81,21 @@ function createKeywordMatcher(arr: string[], caseInsensitive: boolean = false):
 /**
 * Compiles a regular expression string, adding the 'i' flag if 'ignoreCase' is set, and the 'u' flag if 'unicode' is set.
 * Also replaces @\w+ or sequences with the content of the specified attribute
+ * @\w+ replacement can be avoided by escaping `@` signs with another `@` sign.
+ * @example /@attr/ will be replaced with the value of lexer[attr]
+ * @example /@@text/ will not be replaced and will become /@text/.
 */
 function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
 	let n = 0;
-	while (str.indexOf('@') >= 0 && n < 5) { // at most 5 expansions
-		n++;
-		str = str.replace(/@(\w+)/g, function (s, attr?) {
+	let hadExpansion: boolean;
+	do {
+		hadExpansion = false;
+		str = str.replace(/(.|^)@(\w+)/g, function (s, charBeforeAtSign, attr?) {
+			if (charBeforeAtSign === '@') {
+				// do not expand @@
+				return s;
+			}
+			hadExpansion = true;
 			let sub = '';
 			if (typeof (lexer[attr]) === 'string') {
 				sub = lexer[attr];
@ -99,9 +108,13 @@ function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
 					throw monarchCommon.createError(lexer, 'attribute reference \'' + attr + '\' must be a string, used at: ' + str);
 				}
 			}
-			return (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
+			return charBeforeAtSign + (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
 		});
-	}
+		n++;
+	} while (hadExpansion && n < 5);
+
+	// handle escaped @@
+	str = str.replace(/@@/g, '@');

 	let flags = (lexer.ignoreCase ? 'i' : '') + (lexer.unicode ? 'u' : '');
 	return new RegExp(str, flags);
--- a/src/vs/editor/standalone/common/monarch/monarchTypes.ts
+++ b/src/vs/editor/standalone/common/monarch/monarchTypes.ts
@ -46,6 +46,10 @@ export interface IMonarchLanguage {
 	 * Defaults to false
 	 */
 	includeLF?: boolean;
+	/**
+	 * Other keys that can be referred to by the tokenizer.
+	 */
+	[key: string]: any;
 }

 /**
--- a/src/vs/editor/standalone/test/monarch/monarch.test.ts
+++ b/src/vs/editor/standalone/test/monarch/monarch.test.ts
@ -19,6 +19,17 @@ suite('Monarch', () => {
 		return new MonarchTokenizer(modeService, null!, languageId, compile(languageId, language));
 	}

+	function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {
+		const actualTokens: Token[][] = [];
+		let state = tokenizer.getInitialState();
+		for (const line of lines) {
+			const result = tokenizer.tokenize(line, true, state, 0);
+			actualTokens.push(result.tokens);
+			state = result.endState;
+		}
+		return actualTokens;
+	}
+
 	test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {
 		const modeService = new ModeServiceImpl();
 		const innerModeRegistration = ModesRegistry.registerLanguage({
@ -65,13 +76,7 @@ suite('Monarch', () => {
 			`""")`,
 		];

-		const actualTokens: Token[][] = [];
-		let state = tokenizer.getInitialState();
-		for (const line of lines) {
-			const result = tokenizer.tokenize(line, true, state, 0);
-			actualTokens.push(result.tokens);
-			state = result.endState;
-		}
+		const actualTokens = getTokens(tokenizer, lines);

 		assert.deepStrictEqual(actualTokens, [
 			[
@ -140,13 +145,7 @@ suite('Monarch', () => {
 			`But the line was empty. This line should not be commented.`,
 		];

-		const actualTokens: Token[][] = [];
-		let state = tokenizer.getInitialState();
-		for (const line of lines) {
-			const result = tokenizer.tokenize(line, true, state, 0);
-			actualTokens.push(result.tokens);
-			state = result.endState;
-		}
+		const actualTokens = getTokens(tokenizer, lines);

 		assert.deepStrictEqual(actualTokens, [
 			[new Token(0, 'comment.test', 'test')],
@ -190,13 +189,7 @@ suite('Monarch', () => {
 			`PRINT 2*3:*FX200, 3`
 		];

-		const actualTokens: Token[][] = [];
-		let state = tokenizer.getInitialState();
-		for (const line of lines) {
-			const result = tokenizer.tokenize(line, true, state, 0);
-			actualTokens.push(result.tokens);
-			state = result.endState;
-		}
+		const actualTokens = getTokens(tokenizer, lines);

 		assert.deepStrictEqual(actualTokens, [
 			[
@ -218,4 +211,57 @@ suite('Monarch', () => {
 		]);
 	});

+	test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {
+		const modeService = new ModeServiceImpl();
+
+		const tokenizer1 = createMonarchTokenizer(modeService, 'test', {
+			ignoreCase: false,
+			uselessReplaceKey1: '@uselessReplaceKey2',
+			uselessReplaceKey2: '@uselessReplaceKey3',
+			uselessReplaceKey3: '@uselessReplaceKey4',
+			uselessReplaceKey4: '@uselessReplaceKey5',
+			uselessReplaceKey5: '@ham' || '',
+			tokenizer: {
+				root: [
+					{
+						regex: /@\w+/.test('@ham')
+							? new RegExp(`^${'@uselessReplaceKey1'}$`)
+							: new RegExp(`^${'@ham'}$`),
+						action: { token: 'ham' }
+					},
+				],
+			},
+		});
+
+		const tokenizer2 = createMonarchTokenizer(modeService, 'test', {
+			ignoreCase: false,
+			tokenizer: {
+				root: [
+					{
+						regex: /@@ham/,
+						action: { token: 'ham' }
+					},
+				],
+			},
+		});
+
+		const lines = [
+			`@ham`
+		];
+
+		const actualTokens1 = getTokens(tokenizer1, lines);
+		assert.deepStrictEqual(actualTokens1, [
+			[
+				new Token(0, 'ham.test', 'test'),
+			]
+		]);
+
+		const actualTokens2 = getTokens(tokenizer2, lines);
+		assert.deepStrictEqual(actualTokens2, [
+			[
+				new Token(0, 'ham.test', 'test'),
+			]
+		]);
+	});
+
 });
--- a/src/vs/monaco.d.ts
+++ b/src/vs/monaco.d.ts
@ -6505,6 +6505,10 @@ declare namespace monaco.languages {
 		 * Defaults to false
 		 */
 		includeLF?: boolean;
+		/**
+		 * Other keys that can be referred to by the tokenizer.
+		 */
+		[key: string]: any;
 	}

 	/**