Fixes #115662: Add support for escaping @
as @@
in regular expressions to avoid replacement
This commit is contained in:
parent
06c0dbe616
commit
538f72e2a7
4 changed files with 93 additions and 26 deletions
|
@ -81,12 +81,21 @@ function createKeywordMatcher(arr: string[], caseInsensitive: boolean = false):
|
|||
/**
|
||||
* Compiles a regular expression string, adding the 'i' flag if 'ignoreCase' is set, and the 'u' flag if 'unicode' is set.
|
||||
* Also replaces @\w+ or sequences with the content of the specified attribute
|
||||
* @\w+ replacement can be avoided by escaping `@` signs with another `@` sign.
|
||||
* @example /@attr/ will be replaced with the value of lexer[attr]
|
||||
* @example /@@text/ will not be replaced and will become /@text/.
|
||||
*/
|
||||
function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
|
||||
let n = 0;
|
||||
while (str.indexOf('@') >= 0 && n < 5) { // at most 5 expansions
|
||||
n++;
|
||||
str = str.replace(/@(\w+)/g, function (s, attr?) {
|
||||
let hadExpansion: boolean;
|
||||
do {
|
||||
hadExpansion = false;
|
||||
str = str.replace(/(.|^)@(\w+)/g, function (s, charBeforeAtSign, attr?) {
|
||||
if (charBeforeAtSign === '@') {
|
||||
// do not expand @@
|
||||
return s;
|
||||
}
|
||||
hadExpansion = true;
|
||||
let sub = '';
|
||||
if (typeof (lexer[attr]) === 'string') {
|
||||
sub = lexer[attr];
|
||||
|
@ -99,9 +108,13 @@ function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
|
|||
throw monarchCommon.createError(lexer, 'attribute reference \'' + attr + '\' must be a string, used at: ' + str);
|
||||
}
|
||||
}
|
||||
return (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
|
||||
return charBeforeAtSign + (monarchCommon.empty(sub) ? '' : '(?:' + sub + ')');
|
||||
});
|
||||
}
|
||||
n++;
|
||||
} while (hadExpansion && n < 5);
|
||||
|
||||
// handle escaped @@
|
||||
str = str.replace(/@@/g, '@');
|
||||
|
||||
let flags = (lexer.ignoreCase ? 'i' : '') + (lexer.unicode ? 'u' : '');
|
||||
return new RegExp(str, flags);
|
||||
|
|
|
@ -46,6 +46,10 @@ export interface IMonarchLanguage {
|
|||
* Defaults to false
|
||||
*/
|
||||
includeLF?: boolean;
|
||||
/**
|
||||
* Other keys that can be referred to by the tokenizer.
|
||||
*/
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -19,6 +19,17 @@ suite('Monarch', () => {
|
|||
return new MonarchTokenizer(modeService, null!, languageId, compile(languageId, language));
|
||||
}
|
||||
|
||||
function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {
|
||||
const actualTokens: Token[][] = [];
|
||||
let state = tokenizer.getInitialState();
|
||||
for (const line of lines) {
|
||||
const result = tokenizer.tokenize(line, true, state, 0);
|
||||
actualTokens.push(result.tokens);
|
||||
state = result.endState;
|
||||
}
|
||||
return actualTokens;
|
||||
}
|
||||
|
||||
test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {
|
||||
const modeService = new ModeServiceImpl();
|
||||
const innerModeRegistration = ModesRegistry.registerLanguage({
|
||||
|
@ -65,13 +76,7 @@ suite('Monarch', () => {
|
|||
`""")`,
|
||||
];
|
||||
|
||||
const actualTokens: Token[][] = [];
|
||||
let state = tokenizer.getInitialState();
|
||||
for (const line of lines) {
|
||||
const result = tokenizer.tokenize(line, true, state, 0);
|
||||
actualTokens.push(result.tokens);
|
||||
state = result.endState;
|
||||
}
|
||||
const actualTokens = getTokens(tokenizer, lines);
|
||||
|
||||
assert.deepStrictEqual(actualTokens, [
|
||||
[
|
||||
|
@ -140,13 +145,7 @@ suite('Monarch', () => {
|
|||
`But the line was empty. This line should not be commented.`,
|
||||
];
|
||||
|
||||
const actualTokens: Token[][] = [];
|
||||
let state = tokenizer.getInitialState();
|
||||
for (const line of lines) {
|
||||
const result = tokenizer.tokenize(line, true, state, 0);
|
||||
actualTokens.push(result.tokens);
|
||||
state = result.endState;
|
||||
}
|
||||
const actualTokens = getTokens(tokenizer, lines);
|
||||
|
||||
assert.deepStrictEqual(actualTokens, [
|
||||
[new Token(0, 'comment.test', 'test')],
|
||||
|
@ -190,13 +189,7 @@ suite('Monarch', () => {
|
|||
`PRINT 2*3:*FX200, 3`
|
||||
];
|
||||
|
||||
const actualTokens: Token[][] = [];
|
||||
let state = tokenizer.getInitialState();
|
||||
for (const line of lines) {
|
||||
const result = tokenizer.tokenize(line, true, state, 0);
|
||||
actualTokens.push(result.tokens);
|
||||
state = result.endState;
|
||||
}
|
||||
const actualTokens = getTokens(tokenizer, lines);
|
||||
|
||||
assert.deepStrictEqual(actualTokens, [
|
||||
[
|
||||
|
@ -218,4 +211,57 @@ suite('Monarch', () => {
|
|||
]);
|
||||
});
|
||||
|
||||
test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {
|
||||
const modeService = new ModeServiceImpl();
|
||||
|
||||
const tokenizer1 = createMonarchTokenizer(modeService, 'test', {
|
||||
ignoreCase: false,
|
||||
uselessReplaceKey1: '@uselessReplaceKey2',
|
||||
uselessReplaceKey2: '@uselessReplaceKey3',
|
||||
uselessReplaceKey3: '@uselessReplaceKey4',
|
||||
uselessReplaceKey4: '@uselessReplaceKey5',
|
||||
uselessReplaceKey5: '@ham' || '',
|
||||
tokenizer: {
|
||||
root: [
|
||||
{
|
||||
regex: /@\w+/.test('@ham')
|
||||
? new RegExp(`^${'@uselessReplaceKey1'}$`)
|
||||
: new RegExp(`^${'@ham'}$`),
|
||||
action: { token: 'ham' }
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const tokenizer2 = createMonarchTokenizer(modeService, 'test', {
|
||||
ignoreCase: false,
|
||||
tokenizer: {
|
||||
root: [
|
||||
{
|
||||
regex: /@@ham/,
|
||||
action: { token: 'ham' }
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const lines = [
|
||||
`@ham`
|
||||
];
|
||||
|
||||
const actualTokens1 = getTokens(tokenizer1, lines);
|
||||
assert.deepStrictEqual(actualTokens1, [
|
||||
[
|
||||
new Token(0, 'ham.test', 'test'),
|
||||
]
|
||||
]);
|
||||
|
||||
const actualTokens2 = getTokens(tokenizer2, lines);
|
||||
assert.deepStrictEqual(actualTokens2, [
|
||||
[
|
||||
new Token(0, 'ham.test', 'test'),
|
||||
]
|
||||
]);
|
||||
});
|
||||
|
||||
});
|
||||
|
|
4
src/vs/monaco.d.ts
vendored
4
src/vs/monaco.d.ts
vendored
|
@ -6505,6 +6505,10 @@ declare namespace monaco.languages {
|
|||
* Defaults to false
|
||||
*/
|
||||
includeLF?: boolean;
|
||||
/**
|
||||
* Other keys that can be referred to by the tokenizer.
|
||||
*/
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue