Fixes #115662: Add support for escaping @ as @@ in regular expressions to avoid replacement

This commit is contained in:
Alexandru Dima 2021-02-03 13:57:18 +01:00
parent 06c0dbe616
commit 538f72e2a7
No known key found for this signature in database
GPG key ID: 6E58D7B045760DA0
4 changed files with 93 additions and 26 deletions

View file

@ -81,12 +81,21 @@ function createKeywordMatcher(arr: string[], caseInsensitive: boolean = false):
/**
* Compiles a regular expression string, adding the 'i' flag if 'ignoreCase' is set, and the 'u' flag if 'unicode' is set.
* Also replaces @\w+ or sequences with the content of the specified attribute
* @\w+ replacement can be avoided by escaping `@` signs with another `@` sign.
* @example /@attr/ will be replaced with the value of lexer[attr]
* @example /@@text/ will not be replaced and will become /@text/.
*/
function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
let n = 0;
while (str.indexOf('@') >= 0 && n < 5) { // at most 5 expansions
n++;
str = str.replace(/@(\w+)/g, function (s, attr?) {
let hadExpansion: boolean;
do {
hadExpansion = false;
str = str.replace(/(.|^)@(\w+)/g, function (s, charBeforeAtSign, attr?) {
if (charBeforeAtSign === '@') {
// do not expand @@
return s;
}
hadExpansion = true;
let sub = '';
if (typeof (lexer[attr]) === 'string') {
sub = lexer[attr];
@ -99,9 +108,13 @@ function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
throw monarchCommon.createError(lexer, 'attribute reference \'' + attr + '\' must be a string, used at: ' + str);
}
}
return (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
return charBeforeAtSign + (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
});
}
n++;
} while (hadExpansion && n < 5);
// handle escaped @@
str = str.replace(/@@/g, '@');
let flags = (lexer.ignoreCase ? 'i' : '') + (lexer.unicode ? 'u' : '');
return new RegExp(str, flags);

View file

@ -46,6 +46,10 @@ export interface IMonarchLanguage {
* Defaults to false
*/
includeLF?: boolean;
/**
* Other keys that can be referred to by the tokenizer.
*/
[key: string]: any;
}
/**

View file

@ -19,6 +19,17 @@ suite('Monarch', () => {
return new MonarchTokenizer(modeService, null!, languageId, compile(languageId, language));
}
function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {
const actualTokens: Token[][] = [];
let state = tokenizer.getInitialState();
for (const line of lines) {
const result = tokenizer.tokenize(line, true, state, 0);
actualTokens.push(result.tokens);
state = result.endState;
}
return actualTokens;
}
test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {
const modeService = new ModeServiceImpl();
const innerModeRegistration = ModesRegistry.registerLanguage({
@ -65,13 +76,7 @@ suite('Monarch', () => {
`""")`,
];
const actualTokens: Token[][] = [];
let state = tokenizer.getInitialState();
for (const line of lines) {
const result = tokenizer.tokenize(line, true, state, 0);
actualTokens.push(result.tokens);
state = result.endState;
}
const actualTokens = getTokens(tokenizer, lines);
assert.deepStrictEqual(actualTokens, [
[
@ -140,13 +145,7 @@ suite('Monarch', () => {
`But the line was empty. This line should not be commented.`,
];
const actualTokens: Token[][] = [];
let state = tokenizer.getInitialState();
for (const line of lines) {
const result = tokenizer.tokenize(line, true, state, 0);
actualTokens.push(result.tokens);
state = result.endState;
}
const actualTokens = getTokens(tokenizer, lines);
assert.deepStrictEqual(actualTokens, [
[new Token(0, 'comment.test', 'test')],
@ -190,13 +189,7 @@ suite('Monarch', () => {
`PRINT 2*3:*FX200, 3`
];
const actualTokens: Token[][] = [];
let state = tokenizer.getInitialState();
for (const line of lines) {
const result = tokenizer.tokenize(line, true, state, 0);
actualTokens.push(result.tokens);
state = result.endState;
}
const actualTokens = getTokens(tokenizer, lines);
assert.deepStrictEqual(actualTokens, [
[
@ -218,4 +211,57 @@ suite('Monarch', () => {
]);
});
test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {
const modeService = new ModeServiceImpl();
const tokenizer1 = createMonarchTokenizer(modeService, 'test', {
ignoreCase: false,
uselessReplaceKey1: '@uselessReplaceKey2',
uselessReplaceKey2: '@uselessReplaceKey3',
uselessReplaceKey3: '@uselessReplaceKey4',
uselessReplaceKey4: '@uselessReplaceKey5',
uselessReplaceKey5: '@ham' || '',
tokenizer: {
root: [
{
regex: /@\w+/.test('@ham')
? new RegExp(`^${'@uselessReplaceKey1'}$`)
: new RegExp(`^${'@ham'}$`),
action: { token: 'ham' }
},
],
},
});
const tokenizer2 = createMonarchTokenizer(modeService, 'test', {
ignoreCase: false,
tokenizer: {
root: [
{
regex: /@@ham/,
action: { token: 'ham' }
},
],
},
});
const lines = [
`@ham`
];
const actualTokens1 = getTokens(tokenizer1, lines);
assert.deepStrictEqual(actualTokens1, [
[
new Token(0, 'ham.test', 'test'),
]
]);
const actualTokens2 = getTokens(tokenizer2, lines);
assert.deepStrictEqual(actualTokens2, [
[
new Token(0, 'ham.test', 'test'),
]
]);
});
});

4
src/vs/monaco.d.ts vendored
View file

@ -6505,6 +6505,10 @@ declare namespace monaco.languages {
* Defaults to false
*/
includeLF?: boolean;
/**
* Other keys that can be referred to by the tokenizer.
*/
[key: string]: any;
}
/**