[ML] Fix HTML named characters encoding (#72060)

* [ML] improve special characters encoding

* [ML] update renovate.json5
This commit is contained in:
Dima Arnautov 2020-07-17 16:37:10 +02:00 committed by GitHub
parent 937314ad11
commit 8f442f8318
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 33 additions and 12 deletions

View file

@ -418,6 +418,14 @@
'@types/has-ansi',
],
},
{
groupSlug: 'he',
groupName: 'he related packages',
packageNames: [
'he',
'@types/he',
],
},
{
groupSlug: 'history',
groupName: 'history related packages',

View file

@ -72,6 +72,7 @@
"@types/graphql": "^0.13.2",
"@types/gulp": "^4.0.6",
"@types/hapi__wreck": "^15.0.1",
"@types/he": "^1.1.1",
"@types/hoist-non-react-statics": "^3.3.1",
"@types/history": "^4.7.3",
"@types/jest": "^25.2.3",
@ -265,6 +266,7 @@
"graphql-tools": "^3.0.2",
"h2o2": "^8.1.2",
"handlebars": "4.7.6",
"he": "^1.2.0",
"history": "4.9.0",
"history-extra": "^5.0.1",
"i18n-iso-countries": "^4.3.1",

View file

@ -126,8 +126,11 @@ describe('ML - string utils', () => {
expect(mlEscape('foo<bar')).toBe('foo&lt;bar');
expect(mlEscape('foo>bar')).toBe('foo&gt;bar');
expect(mlEscape('foo"bar')).toBe('foo&quot;bar');
expect(mlEscape("foo'bar")).toBe('foo&#39;bar');
expect(mlEscape('foo/bar')).toBe('foo&#x2F;bar');
expect(mlEscape("foo'bar")).toBe('foo&apos;bar');
expect(mlEscape('foo/bar')).toBe('foo&sol;bar');
expect(mlEscape('escape © everything ≠ / 𝌆 \\')).toBe(
'escape&#x20;&copy;&#x20;everything&#x20;&ne;&#x20;&sol;&#x20;&#xD834;&#xDF06;&#x20;&#x5C;'
);
});
});

View file

@ -9,6 +9,7 @@
*/
import _ from 'lodash';
import d3 from 'd3';
import he from 'he';
import { CustomUrlAnomalyRecordDoc } from '../../../common/types/custom_urls';
import { Detector } from '../../../common/types/anomaly_detection_jobs';
@ -105,15 +106,17 @@ export function toLocaleString(x: number | undefined | null): string {
// escape html characters
export function mlEscape(str: string): string {
const entityMap: { [escapeChar: string]: string } = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#39;',
'/': '&#x2F;',
};
return String(str).replace(/[&<>"'\/]/g, (s) => entityMap[s]);
// It's not possible to use "he" encoding directly
// because \ and / characters are not going to be replaced without
// encodeEverything option. But with this option enabled
// each word character is encoded as well.
return String(str).replace(/\W/g, (s) =>
he.encode(s, {
useNamedReferences: true,
encodeEverything: true,
allowUnsafeSymbols: false,
})
);
}
// Escapes reserved characters for use in Elasticsearch query terms.

View file

@ -5141,6 +5141,11 @@
resolved "https://registry.yarnpkg.com/@types/has-ansi/-/has-ansi-3.0.0.tgz#636403dc4e0b2649421c4158e5c404416f3f0330"
integrity sha512-H3vFOwfLlFEC0MOOrcSkus8PCnMCzz4N0EqUbdJZCdDhBTfkAu86aRYA+MTxjKW6jCpUvxcn4715US8g+28BMA==
"@types/he@^1.1.1":
version "1.1.1"
resolved "https://registry.yarnpkg.com/@types/he/-/he-1.1.1.tgz#19e14033c4ee8f1a702c74dcc6182664839ac2b7"
integrity sha512-jpzrsR1ns0n3kyWt92QfOUQhIuJGQ9+QGa7M62rO6toe98woQjnsnzjdMtsQXCdvjjmqjS2ZBCC7xKw0cdzU+Q==
"@types/history@*":
version "4.7.2"
resolved "https://registry.yarnpkg.com/@types/history/-/history-4.7.2.tgz#0e670ea254d559241b6eeb3894f8754991e73220"
@ -17063,7 +17068,7 @@ hawk@~6.0.2:
hoek "4.x.x"
sntp "2.x.x"
he@1.2.0, he@1.2.x, he@^1.1.1:
he@1.2.0, he@1.2.x, he@^1.1.1, he@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"
integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==