acdbd10626
* Amend scanner to support astral characters in identifiers when parsing es6+ * Use charSize helper rather than one-off maybe advance helper * Update script to emit informative comment, run in unicode 12.1 environment * Add suggested change
28 lines
1.5 KiB
TypeScript
28 lines
1.5 KiB
TypeScript
|
|
const MAX_UNICODE_CODEPOINT = 0x10FFFF;
|
|
const isStart = c => /[\p{ID_Start}\u{2118}\u{212E}\u{309B}\u{309C}]/u.test(c); // Other_ID_Start explicitly included for back compat - see http://www.unicode.org/reports/tr31/#Introduction
|
|
const isPart = c => /[\p{ID_Continue}\u{00B7}\u{0387}\u{19DA}\u{1369}\u{136A}\u{136B}\u{136C}\u{136D}\u{136E}\u{136F}\u{1370}\u{1371}]/u.test(c) || isStart(c); // Likewise for Other_ID_Continue
|
|
const parts = [];
|
|
let partsActive = false;
|
|
let startsActive = false;
|
|
const starts = [];
|
|
|
|
for (let i = 0; i < MAX_UNICODE_CODEPOINT; i++) {
|
|
if (isStart(String.fromCodePoint(i)) !== startsActive) {
|
|
starts.push(i - +startsActive);
|
|
startsActive = !startsActive;
|
|
}
|
|
if (isPart(String.fromCodePoint(i)) !== partsActive) {
|
|
parts.push(i - +partsActive);
|
|
partsActive = !partsActive;
|
|
}
|
|
}
|
|
|
|
console.log(`/**
|
|
* Generated by scripts/regenerate-unicode-identifier-parts.js on node ${process.version} with unicode ${process.versions.unicode}
|
|
* based on http://www.unicode.org/reports/tr31/ and https://www.ecma-international.org/ecma-262/6.0/#sec-names-and-keywords
|
|
* unicodeESNextIdentifierStart corresponds to the ID_Start and Other_ID_Start property, and
|
|
* unicodeESNextIdentifierPart corresponds to ID_Continue, Other_ID_Continue, plus ID_Start and Other_ID_Start
|
|
*/`);
|
|
console.log(`const unicodeESNextIdentifierStart = [${starts.join(", ")}];`);
|
|
console.log(`const unicodeESNextIdentifierPart = [${parts.join(", ")}];`);
|