[ML] Fixing missing final new line character issue (#109274) (#110050)

* [ML] Fixing missing final new line character issue

* adding tests

* tiny refactor

* test fixes based on review

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>

Co-authored-by: James Gowdy <jgowdy@elastic.co>
This commit is contained in:
Kibana Machine 2021-08-25 11:20:32 -04:00 committed by GitHub
parent 1d6ca80c7e
commit 66ed66ae8b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 95 additions and 5 deletions

View file

@ -40,7 +40,10 @@ export abstract class Importer implements IImporter {
let remainder = 0;
for (let i = 0; i < parts; i++) {
const byteArray = decoder.decode(data.slice(i * size - remainder, (i + 1) * size));
const { success, docs, remainder: tempRemainder } = this._createDocs(byteArray);
const { success, docs, remainder: tempRemainder } = this._createDocs(
byteArray,
i === parts - 1
);
if (success) {
this._docArray = this._docArray.concat(docs);
remainder = tempRemainder;
@ -52,7 +55,7 @@ export abstract class Importer implements IImporter {
return { success: true };
}
protected abstract _createDocs(t: string): CreateDocsResponse;
protected abstract _createDocs(t: string, isLastPart: boolean): CreateDocsResponse;
public async initializeImport(
index: string,

View file

@ -30,7 +30,7 @@ export class MessageImporter extends Importer {
// multiline_start_pattern regex
// if it does, it is a legitimate end of line and can be pushed into the list,
// if not, it must be a newline char inside a field value, so keep looking.
protected _createDocs(text: string): CreateDocsResponse {
protected _createDocs(text: string, isLastPart: boolean): CreateDocsResponse {
let remainder = 0;
try {
const docs: Doc[] = [];
@ -39,9 +39,17 @@ export class MessageImporter extends Importer {
let line = '';
for (let i = 0; i < text.length; i++) {
const char = text[i];
const isLastChar = i === text.length - 1;
if (char === '\n') {
message = this._processLine(docs, message, line);
line = '';
} else if (isLastPart && isLastChar) {
// if this is the end of the last line and the last chunk of data,
// add the remainder as a final line.
// just in case the last line doesn't end in a new line char.
line += char;
message = this._processLine(docs, message, line);
line = '';
} else {
line += char;
}

View file

@ -13,7 +13,7 @@ export class NdjsonImporter extends Importer {
super();
}
protected _createDocs(json: string): CreateDocsResponse {
protected _createDocs(json: string, isLastPart: boolean): CreateDocsResponse {
let remainder = 0;
try {
const splitJson = json.split(/}\s*\n/);

View file

@ -111,6 +111,7 @@ export default function ({ getService }: FtrProviderContext) {
totalFieldsCount: 12,
fieldTypeFiltersResultCount: 4,
fieldNameFiltersResultCount: 1,
ingestedDocCount: 20,
},
},
{
@ -152,6 +153,51 @@ export default function ({ getService }: FtrProviderContext) {
totalFieldsCount: 3,
fieldTypeFiltersResultCount: 1,
fieldNameFiltersResultCount: 1,
ingestedDocCount: 13,
},
},
{
suiteSuffix: 'with a file with a missing new line char at the end',
filePath: path.join(__dirname, 'files_to_import', 'missing_end_of_file_newline.csv'),
indexName: 'user-import_3',
createIndexPattern: false,
fieldTypeFilters: [],
fieldNameFilters: [],
expected: {
results: {
title: 'missing_end_of_file_newline.csv',
numberOfFields: 3,
},
metricFields: [
{
fieldName: 'value',
type: ML_JOB_FIELD_TYPES.NUMBER,
docCountFormatted: '3 (100%)',
exampleCount: 3,
topValuesCount: 3,
},
],
nonMetricFields: [
{
fieldName: 'title',
type: ML_JOB_FIELD_TYPES.UNKNOWN,
docCountFormatted: '3 (100%)',
exampleCount: 3,
},
{
fieldName: 'description',
type: ML_JOB_FIELD_TYPES.KEYWORD,
docCountFormatted: '3 (100%)',
exampleCount: 3,
},
],
visibleMetricFieldsCount: 0,
totalMetricFieldsCount: 0,
populatedFieldsCount: 3,
totalFieldsCount: 3,
fieldTypeFiltersResultCount: 3,
fieldNameFiltersResultCount: 3,
ingestedDocCount: 3,
},
},
];
@ -271,6 +317,10 @@ export default function ({ getService }: FtrProviderContext) {
await ml.testExecution.logTestStep('imports the file');
await ml.dataVisualizerFileBased.startImportAndWaitForProcessing();
await ml.dataVisualizerFileBased.assertIngestedDocCount(
testData.expected.ingestedDocCount
);
await ml.testExecution.logTestStep('creates filebeat config');
await ml.dataVisualizerFileBased.selectCreateFilebeatConfig();

View file

@ -11,4 +11,4 @@ POINT (-2.509384 51.40959),On or near Barnard Walk,
POINT (-2.495055 51.422132),On or near Cross Street,
POINT (-2.509384 51.40959),On or near Barnard Walk,
POINT (-2.495055 51.422132),On or near Cross Street,
POINT (-2.509126 51.416137),On or near St Francis Road,
POINT (-2.509126 51.416137),On or near St Francis Road,

1 Coordinates Location Context
11 POINT (-2.495055 51.422132) On or near Cross Street
12 POINT (-2.509384 51.40959) On or near Barnard Walk
13 POINT (-2.495055 51.422132) On or near Cross Street
14 POINT (-2.509126 51.416137) On or near St Francis Road

View file

@ -0,0 +1,4 @@
title,description,value
first title,this is the first description,22
second title,this is the second description,66
third title,this is the third description,88
1 title description value
2 first title this is the first description 22
3 second title this is the second description 66
4 third title this is the third description 88

View file

@ -285,6 +285,20 @@ export function MachineLearningCommonUIProvider({
await this.assertRowsNumberPerPage(testSubj, rowsNumber);
},
async getEuiDescriptionListDescriptionFromTitle(testSubj: string, title: string) {
const subj = await testSubjects.find(testSubj);
const titles = await subj.findAllByTagName('dt');
const descriptions = await subj.findAllByTagName('dd');
for (let i = 0; i < titles.length; i++) {
const titleText = (await titles[i].parseDomContent()).html();
if (titleText === title) {
return (await descriptions[i].parseDomContent()).html();
}
}
return null;
},
async changeToSpace(spaceId: string) {
await PageObjects.spaceSelector.openSpacesNav();
await PageObjects.spaceSelector.goToSpecificSpace(spaceId);

View file

@ -132,6 +132,17 @@ export function MachineLearningDataVisualizerFileBasedProvider(
});
},
async assertIngestedDocCount(count: number) {
const docCount = await mlCommonUI.getEuiDescriptionListDescriptionFromTitle(
'dataVisualizerFileImportSuccessCallout',
'Documents ingested'
);
expect(docCount).to.eql(
count,
`Expected Documents ingested count to be '${count}' (got '${docCount}')`
);
},
async selectCreateFilebeatConfig() {
await testSubjects.scrollIntoView('fileDataVisFilebeatConfigLink', {
bottomOffset: fixedFooterHeight,