search: intelligently normalize crlf in regex search
Fixes https://github.com/microsoft/vscode/issues/100569
This commit is contained in:
parent
7dde16206f
commit
5afc5cd160
|
@ -67,6 +67,7 @@
|
|||
"vscode-nsfw": "1.2.9",
|
||||
"vscode-oniguruma": "1.3.1",
|
||||
"vscode-proxy-agent": "^0.5.2",
|
||||
"vscode-regexpp": "^3.1.0",
|
||||
"vscode-ripgrep": "^1.11.0",
|
||||
"vscode-sqlite3": "4.0.10",
|
||||
"vscode-textmate": "5.2.0",
|
||||
|
@ -194,4 +195,4 @@
|
|||
"windows-mutex": "0.3.0",
|
||||
"windows-process-tree": "0.2.4"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ import { URI } from 'vs/base/common/uri';
|
|||
import { Progress } from 'vs/platform/progress/common/progress';
|
||||
import { IExtendedExtensionSearchOptions, SearchError, SearchErrorCode, serializeSearchError } from 'vs/workbench/services/search/common/search';
|
||||
import { Range, TextSearchComplete, TextSearchContext, TextSearchMatch, TextSearchOptions, TextSearchPreviewOptions, TextSearchQuery, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes';
|
||||
import { RegExpParser, RegExpVisitor, AST as ReAST } from 'vscode-regexpp';
|
||||
import { rgPath } from 'vscode-ripgrep';
|
||||
import { anchorGlob, createTextSearchResult, IOutputChannel, Maybe } from './ripgrepSearchUtils';
|
||||
|
||||
|
@ -541,10 +542,78 @@ export interface IRgSubmatch {
|
|||
|
||||
export type IRgBytesOrText = { bytes: string } | { text: string };
|
||||
|
||||
const isLookBehind = (node: ReAST.Node) => node.type === 'Assertion' && node.kind === 'lookbehind';
|
||||
|
||||
export function fixRegexNewline(pattern: string): string {
|
||||
// Replace an unescaped $ at the end of the pattern with \r?$
|
||||
// Match $ preceded by none or even number of literal \
|
||||
return pattern.replace(/(?<=[^\\]|^)(\\\\)*\\n/g, '$1\\r?\\n');
|
||||
// we parse the pattern anew each tiem
|
||||
let re: ReAST.Pattern;
|
||||
try {
|
||||
re = new RegExpParser().parsePattern(pattern);
|
||||
} catch {
|
||||
return pattern;
|
||||
}
|
||||
|
||||
let output = '';
|
||||
let lastEmittedIndex = 0;
|
||||
const replace = (start: number, end: number, text: string) => {
|
||||
output += pattern.slice(lastEmittedIndex, start) + text;
|
||||
lastEmittedIndex = end;
|
||||
};
|
||||
|
||||
const context: ReAST.Node[] = [];
|
||||
const visitor = new RegExpVisitor({
|
||||
onCharacterEnter(char) {
|
||||
if (char.raw !== '\\n') {
|
||||
return;
|
||||
}
|
||||
|
||||
const parent = context[0];
|
||||
if (!parent) {
|
||||
// simple char, \n -> \r?\n
|
||||
replace(char.start, char.end, '\\r?\\n');
|
||||
} else if (context.some(isLookBehind)) {
|
||||
// no-op in a lookbehind, see #100569
|
||||
} else if (parent.type === 'CharacterClass') {
|
||||
// in a bracket expr, [a-z\n] -> (?:[a-z]|\r?\n)
|
||||
const otherContent = pattern.slice(parent.start + 1, char.start) + pattern.slice(char.end, parent.end - 1);
|
||||
replace(parent.start, parent.end, otherContent === '' ? '\\r?\\n' : `(?:[${otherContent}]|\\r?\\n)`);
|
||||
} else if (parent.type === 'Quantifier') {
|
||||
replace(char.start, char.end, '(?:\\r?\\n)');
|
||||
}
|
||||
},
|
||||
onQuantifierEnter(node) {
|
||||
context.unshift(node);
|
||||
},
|
||||
onQuantifierLeave() {
|
||||
context.shift();
|
||||
},
|
||||
onCharacterClassRangeEnter(node) {
|
||||
context.unshift(node);
|
||||
},
|
||||
onCharacterClassRangeLeave() {
|
||||
context.shift();
|
||||
},
|
||||
onCharacterClassEnter(node) {
|
||||
context.unshift(node);
|
||||
},
|
||||
onCharacterClassLeave() {
|
||||
context.shift();
|
||||
},
|
||||
onAssertionEnter(node) {
|
||||
if (isLookBehind(node)) {
|
||||
context.push(node);
|
||||
}
|
||||
},
|
||||
onAssertionLeave(node) {
|
||||
if (context[0] === node) {
|
||||
context.shift();
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
visitor.visit(re);
|
||||
output += pattern.slice(lastEmittedIndex);
|
||||
return output;
|
||||
}
|
||||
|
||||
export function fixNewline(pattern: string): string {
|
||||
|
|
|
@ -28,7 +28,24 @@ suite('RipgrepTextSearchEngine', () => {
|
|||
assert.equal(unicodeEscapesToPCRE2(''), '');
|
||||
});
|
||||
|
||||
test('fixRegexNewline', () => {
|
||||
test('fixRegexNewline - src', () => {
|
||||
const ttable = [
|
||||
['foo', 'foo'],
|
||||
['invalid(', 'invalid('],
|
||||
['fo\\no', 'fo\\r?\\no'],
|
||||
['f\\no\\no', 'f\\r?\\no\\r?\\no'],
|
||||
['f[a-z\\n1]', 'f(?:[a-z1]|\\r?\\n)'],
|
||||
['f[\\n-a]', 'f[\\n-a]'],
|
||||
['(?<=\\n)\\w', '(?<=\\n)\\w'],
|
||||
['fo\\n+o', 'fo(?:\\r?\\n)+o'],
|
||||
];
|
||||
|
||||
for (const [input, expected] of ttable) {
|
||||
assert.equal(fixRegexNewline(input), expected, `${input} -> ${expected}`);
|
||||
}
|
||||
});
|
||||
|
||||
test('fixRegexNewline - re', () => {
|
||||
function testFixRegexNewline([inputReg, testStr, shouldMatch]: readonly [string, string, boolean]): void {
|
||||
const fixed = fixRegexNewline(inputReg);
|
||||
const reg = new RegExp(fixed);
|
||||
|
@ -48,10 +65,12 @@ suite('RipgrepTextSearchEngine', () => {
|
|||
|
||||
['foo\\n+abc', 'foo\r\nabc', true],
|
||||
['foo\\n+abc', 'foo\n\n\nabc', true],
|
||||
['foo\\n+abc', 'foo\r\n\r\n\r\nabc', true],
|
||||
['foo[\\n-9]+abc', 'foo1abc', true],
|
||||
] as const).forEach(testFixRegexNewline);
|
||||
});
|
||||
|
||||
test('fixNewline', () => {
|
||||
test('fixNewline - matching', () => {
|
||||
function testFixNewline([inputReg, testStr, shouldMatch = true]: readonly [string, string, boolean?]): void {
|
||||
const fixed = fixNewline(inputReg);
|
||||
const reg = new RegExp(fixed);
|
||||
|
|
|
@ -9735,6 +9735,11 @@ vscode-proxy-agent@^0.5.2:
|
|||
https-proxy-agent "^2.2.3"
|
||||
socks-proxy-agent "^4.0.1"
|
||||
|
||||
vscode-regexpp@^3.1.0:
|
||||
version "3.1.0"
|
||||
resolved "https://registry.yarnpkg.com/vscode-regexpp/-/vscode-regexpp-3.1.0.tgz#42d059b6fffe99bd42939c0d013f632f0cad823f"
|
||||
integrity sha512-pqtN65VC1jRLawfluX4Y80MMG0DHJydWhe5ZwMHewZD6sys4LbU6lHwFAHxeuaVE6Y6+xZOtAw+9hvq7/0ejkg==
|
||||
|
||||
vscode-ripgrep@^1.11.0:
|
||||
version "1.11.0"
|
||||
resolved "https://registry.yarnpkg.com/vscode-ripgrep/-/vscode-ripgrep-1.11.0.tgz#2874adea1753545590a315f02f36bed05b9e2380"
|
||||
|
|
Loading…
Reference in a new issue