Improve highlighting by using highlight_query with all_fields enabled (#9671)

* Add all_fields to highlight query to improve highlighting

* Refactor highlighting and move out of _flatten

* Make changes as per @bargs' requests

* Add documentation about highlightAll setting

* Fix docs typo

* Remove unused function

* Remove unused code
This commit is contained in:
Lukas Olson 2017-02-03 10:44:00 -07:00 committed by GitHub
parent 36821edf52
commit 909b8c7a0e
15 changed files with 268 additions and 129 deletions

View file

@ -35,6 +35,8 @@ document.
`discover:sampleSize`:: The number of rows to show in the Discover table.
`doc_table:highlight`:: Highlight results in Discover and Saved Searches Dashboard. Highlighting makes request slow when
working on big documents. Set this property to `false` to disable highlighting.
`doc_table:highlight:all_fields`:: Improves highlighting by using a separate `highlight_query` that uses `all_fields` mode on
`query_string` queries. Set to `false` if you are using a `default_field` in your index.
`courier:maxSegmentCount`:: Kibana splits requests in the Discover app into segments to limit the size of requests sent to
the Elasticsearch cluster. This setting constrains the length of the segment list. Long segment lists can significantly
increase request processing time.
@ -86,4 +88,4 @@ Markdown.
`timelion:default_rows`:: The default number of rows to use on a timelion sheet.
`timelion:graphite.url`:: [experimental] Used with graphite queries, this it the URL of your host
`timelion:quandl.key`:: [experimental] Used with quandl queries, this is your API key from www.quandl.com
`state:storeInSessionStorage`:: [experimental] Kibana tracks UI state in the URL, which can lead to problems when there is a lot of information there and the URL gets very long. Enabling this will store parts of the state in your browser session instead, to keep the URL shorter.
`state:storeInSessionStorage`:: [experimental] Kibana tracks UI state in the URL, which can lead to problems when there is a lot of information there and the URL gets very long. Enabling this will store parts of the state in your browser session instead, to keep the URL shorter.

View file

@ -13,7 +13,6 @@ import 'ui/courier';
import 'ui/index_patterns';
import 'ui/state_management/app_state';
import 'ui/timefilter';
import 'ui/highlight/highlight_tags';
import 'ui/share';
import VisProvider from 'ui/vis';
import DocTitleProvider from 'ui/doc_title';
@ -89,7 +88,7 @@ app.directive('discoverApp', function () {
});
function discoverController($scope, config, courier, $route, $window, Notifier,
AppState, timefilter, Promise, Private, kbnUrl, highlightTags) {
AppState, timefilter, Promise, Private, kbnUrl) {
const Vis = Private(VisProvider);
const docTitle = Private(DocTitleProvider);
@ -143,7 +142,9 @@ function discoverController($scope, config, courier, $route, $window, Notifier,
// the actual courier.SearchSource
$scope.searchSource = savedSearch.searchSource;
$scope.indexPattern = resolveIndexPatternLoading();
$scope.searchSource.set('index', $scope.indexPattern);
$scope.searchSource
.set('index', $scope.indexPattern)
.highlightAll(true);
if (savedSearch.id) {
docTitle.change(savedSearch.title);
@ -475,22 +476,12 @@ function discoverController($scope, config, courier, $route, $window, Notifier,
kbnUrl.change('/discover');
};
$scope.updateDataSource = Promise.method(function () {
$scope.updateDataSource = Promise.method(function updateDataSource() {
$scope.searchSource
.size($scope.opts.sampleSize)
.sort(getSort($state.sort, $scope.indexPattern))
.query(!$state.query ? null : $state.query)
.set('filter', queryFilter.getFilters());
if (config.get('doc_table:highlight')) {
$scope.searchSource.highlight({
pre_tags: [highlightTags.pre],
post_tags: [highlightTags.post],
fields: { '*': {} },
require_field_match: false,
fragment_size: 2147483647 // Limit of an integer.
});
}
});
// TODO: On array fields, negating does not negate the combination, rather all terms

View file

@ -8,12 +8,14 @@ import ErrorHandlersProvider from '../_error_handlers';
import FetchProvider from '../fetch';
import DecorateQueryProvider from './_decorate_query';
import FieldWildcardProvider from '../../field_wildcard';
import { getHighlightRequestProvider } from '../../highlight';
export default function SourceAbstractFactory(Private, Promise, PromiseEmitter) {
const requestQueue = Private(RequestQueueProvider);
const errorHandlers = Private(ErrorHandlersProvider);
const courierFetch = Private(FetchProvider);
const { fieldWildcardFilter } = Private(FieldWildcardProvider);
const getHighlightRequest = Private(getHighlightRequestProvider);
function SourceAbstract(initialState, strategy) {
const self = this;
@ -371,6 +373,13 @@ export default function SourceAbstractFactory(Private, Promise, PromiseEmitter)
delete flatState.filters;
}
if (flatState.highlightAll != null) {
if (flatState.highlightAll && flatState.body.query) {
flatState.body.highlight = getHighlightRequest(flatState.body.query);
}
delete flatState.highlightAll;
}
// re-write filters within filter aggregations
(function recurse(aggBranch) {
if (!aggBranch) return;

View file

@ -93,6 +93,7 @@ export default function SearchSourceFactory(Promise, Private, config) {
'filter',
'sort',
'highlight',
'highlightAll',
'aggs',
'from',
'size',
@ -178,7 +179,6 @@ export default function SearchSourceFactory(Promise, Private, config) {
});
};
/******
* PRIVATE APIS
******/
@ -249,6 +249,7 @@ export default function SearchSourceFactory(Promise, Private, config) {
case 'index':
case 'type':
case 'id':
case 'highlightAll':
if (key && state[key] == null) {
state[key] = val;
}

View file

@ -1,73 +0,0 @@
import angular from 'angular';
import expect from 'expect.js';
import ngMock from 'ng_mock';
import 'ui/highlight';
describe('Highlight', function () {
let filter;
let tags;
beforeEach(ngMock.module('kibana'));
beforeEach(ngMock.inject(function (highlightFilter, highlightTags) {
filter = highlightFilter;
tags = highlightTags;
}));
const text = '' +
'Bacon ipsum dolor amet pork loin pork cow pig beef chuck ground round shankle sirloin landjaeger kevin ' +
'venison sausage ribeye tongue. Chicken bacon ball tip pork. Brisket pork capicola spare ribs pastrami rump ' +
'sirloin, t-bone ham shoulder jerky turducken bresaola. Chicken cow beef picanha. Picanha hamburger alcatra ' +
'cupim. Salami capicola boudin pork belly shank picanha.';
it('should not modify text if highlight is empty', function () {
expect(filter(text, undefined)).to.be(text);
expect(filter(text, null)).to.be(text);
expect(filter(text, [])).to.be(text);
});
it('should highlight a single result', function () {
const highlights = [
tags.pre + 'hamburger' + tags.post + ' alcatra cupim. Salami capicola boudin pork belly shank picanha.'
];
const result = filter(text, highlights);
expect(result.indexOf('<mark>hamburger</mark>')).to.be.greaterThan(-1);
expect(result.split('<mark>hamburger</mark>').length).to.be(text.split('hamburger').length);
});
it('should highlight multiple results', function () {
const highlights = [
'kevin venison sausage ribeye tongue. ' + tags.pre + 'Chicken' + tags.post + ' bacon ball tip pork. Brisket ' +
'pork capicola spare ribs pastrami rump sirloin, t-bone ham shoulder jerky turducken bresaola. ' + tags.pre +
'Chicken' + tags.post + ' cow beef picanha. Picanha'
];
const result = filter(text, highlights);
expect(result.indexOf('<mark>Chicken</mark>')).to.be.greaterThan(-1);
expect(result.split('<mark>Chicken</mark>').length).to.be(text.split('Chicken').length);
});
it('should highlight multiple hits in a result', function () {
const highlights = [
'Bacon ipsum dolor amet ' + tags.pre + 'pork' + tags.post + ' loin ' +
'' + tags.pre + 'pork' + tags.post + ' cow pig beef chuck ground round shankle ' +
'sirloin landjaeger',
'kevin venison sausage ribeye tongue. Chicken bacon ball tip ' +
'' + tags.pre + 'pork' + tags.post + '. Brisket ' +
'' + tags.pre + 'pork' + tags.post + ' capicola spare ribs',
'hamburger alcatra cupim. Salami capicola boudin ' + tags.pre + 'pork' + tags.post + ' ' +
'belly shank picanha.'
];
const result = filter(text, highlights);
expect(result.indexOf('<mark>pork</mark>')).to.be.greaterThan(-1);
expect(result.split('<mark>pork</mark>').length).to.be(text.split('pork').length);
});
it('should accept an object and return a string containing its properties', function () {
const obj = { foo: 1, bar: 2 };
const result = filter(obj, null);
expect(result.indexOf('' + obj)).to.be(-1);
expect(result.indexOf('foo')).to.be.greaterThan(-1);
expect(result.indexOf('bar')).to.be.greaterThan(-1);
});
});

View file

@ -0,0 +1,69 @@
import expect from 'expect.js';
import highlightTags from '../highlight_tags';
import htmlTags from '../html_tags';
import getHighlightHtml from '../highlight_html';
describe('getHighlightHtml', function () {
const text = '' +
'Bacon ipsum dolor amet pork loin pork cow pig beef chuck ground round shankle sirloin landjaeger kevin ' +
'venison sausage ribeye tongue. Chicken bacon ball tip pork. Brisket pork capicola spare ribs pastrami rump ' +
'sirloin, t-bone ham shoulder jerky turducken bresaola. Chicken cow beef picanha. Picanha hamburger alcatra ' +
'cupim. Salami capicola boudin pork belly shank picanha.';
it('should not modify text if highlight is empty', function () {
expect(getHighlightHtml(text, undefined)).to.be(text);
expect(getHighlightHtml(text, null)).to.be(text);
expect(getHighlightHtml(text, [])).to.be(text);
});
it('should preserve escaped text', function () {
const highlights = ['<foo>'];
const result = getHighlightHtml('&lt;foo&gt;', highlights);
expect(result.indexOf('<foo>')).to.be(-1);
expect(result.indexOf('&lt;foo&gt;')).to.be.greaterThan(-1);
});
it('should highlight a single result', function () {
const highlights = [
highlightTags.pre + 'hamburger' + highlightTags.post + ' alcatra cupim. Salami capicola boudin pork belly shank picanha.'
];
const result = getHighlightHtml(text, highlights);
expect(result.indexOf(htmlTags.pre + 'hamburger' + htmlTags.post)).to.be.greaterThan(-1);
expect(result.split(htmlTags.pre + 'hamburger' + htmlTags.post).length).to.be(text.split('hamburger').length);
});
it('should highlight multiple results', function () {
const highlights = [
'kevin venison sausage ribeye tongue. ' + highlightTags.pre + 'Chicken' + highlightTags.post + ' bacon ball tip pork. Brisket ' +
'pork capicola spare ribs pastrami rump sirloin, t-bone ham shoulder jerky turducken bresaola. ' + highlightTags.pre +
'Chicken' + highlightTags.post + ' cow beef picanha. Picanha'
];
const result = getHighlightHtml(text, highlights);
expect(result.indexOf(htmlTags.pre + 'Chicken' + htmlTags.post)).to.be.greaterThan(-1);
expect(result.split(htmlTags.pre + 'Chicken' + htmlTags.post).length).to.be(text.split('Chicken').length);
});
it('should highlight multiple hits in a result', function () {
const highlights = [
'Bacon ipsum dolor amet ' + highlightTags.pre + 'pork' + highlightTags.post + ' loin ' +
'' + highlightTags.pre + 'pork' + highlightTags.post + ' cow pig beef chuck ground round shankle ' +
'sirloin landjaeger',
'kevin venison sausage ribeye tongue. Chicken bacon ball tip ' +
'' + highlightTags.pre + 'pork' + highlightTags.post + '. Brisket ' +
'' + highlightTags.pre + 'pork' + highlightTags.post + ' capicola spare ribs',
'hamburger alcatra cupim. Salami capicola boudin ' + highlightTags.pre + 'pork' + highlightTags.post + ' ' +
'belly shank picanha.'
];
const result = getHighlightHtml(text, highlights);
expect(result.indexOf(htmlTags.pre + 'pork' + htmlTags.post)).to.be.greaterThan(-1);
expect(result.split(htmlTags.pre + 'pork' + htmlTags.post).length).to.be(text.split('pork').length);
});
it('should accept an object and return a string containing its properties', function () {
const obj = { foo: 1, bar: 2 };
const result = getHighlightHtml(obj, null);
expect(result.indexOf('' + obj)).to.be(-1);
expect(result.indexOf('foo')).to.be.greaterThan(-1);
expect(result.indexOf('bar')).to.be.greaterThan(-1);
});
});

View file

@ -0,0 +1,82 @@
import expect from 'expect.js';
import ngMock from 'ng_mock';
import getHighlightRequestProvider from '../highlight_request';
describe('getHighlightRequest', () => {
const queryStringQuery = { query_string: { query: 'foo' } };
const rangeQuery = { range: { '@timestamp': { gte: 0, lte: 0 } } };
const boolQueryWithSingleCondition = {
bool: {
must: queryStringQuery,
}
};
const boolQueryWithMultipleConditions = {
bool: {
must: [
queryStringQuery,
rangeQuery
]
}
};
let config;
let previousHighlightConfig;
let previousAllFieldsConfig;
let getHighlightRequest;
beforeEach(ngMock.module('kibana'));
beforeEach(ngMock.inject(function (_config_) {
config = _config_;
previousHighlightConfig = config.get('doc_table:highlight');
previousAllFieldsConfig = config.get('doc_table:highlight:all_fields');
}));
afterEach(() => {
config.set('doc_table:highlight', previousHighlightConfig);
config.set('doc_table:highlight:all_fields', previousAllFieldsConfig);
});
it('should be a function', () => {
getHighlightRequest = getHighlightRequestProvider(config);
expect(getHighlightRequest).to.be.a(Function);
});
it('should add the all_fields param with query_string query without modifying original query', () => {
getHighlightRequest = getHighlightRequestProvider(config);
const request = getHighlightRequest(queryStringQuery);
expect(request.fields['*']).to.have.property('highlight_query');
expect(request.fields['*'].highlight_query.query_string).to.have.property('all_fields');
expect(queryStringQuery.query_string).to.not.have.property('all_fields');
});
it('should add the all_fields param with bool query with single condition without modifying original query', () => {
getHighlightRequest = getHighlightRequestProvider(config);
const request = getHighlightRequest(boolQueryWithSingleCondition);
expect(request.fields['*']).to.have.property('highlight_query');
expect(request.fields['*'].highlight_query.bool.must.query_string).to.have.property('all_fields');
expect(queryStringQuery.query_string).to.not.have.property('all_fields');
});
it('should add the all_fields param with bool query with multiple conditions without modifying original query', () => {
getHighlightRequest = getHighlightRequestProvider(config);
const request = getHighlightRequest(boolQueryWithMultipleConditions);
expect(request.fields['*']).to.have.property('highlight_query');
expect(request.fields['*'].highlight_query.bool.must).to.have.length(boolQueryWithMultipleConditions.bool.must.length);
expect(request.fields['*'].highlight_query.bool.must[0].query_string).to.have.property('all_fields');
expect(queryStringQuery.query_string).to.not.have.property('all_fields');
});
it('should return undefined if highlighting is turned off', () => {
config.set('doc_table:highlight', false);
getHighlightRequest = getHighlightRequestProvider(config);
const request = getHighlightRequest(queryStringQuery);
expect(request).to.be(undefined);
});
it('should not add the highlight_query param if all_fields is turned off', () => {
config.set('doc_table:highlight:all_fields', false);
getHighlightRequest = getHighlightRequestProvider(config);
const request = getHighlightRequest(queryStringQuery);
expect(request.fields['*']).to.not.have.property('highlight_query');
});
});

View file

@ -1,31 +1,7 @@
import 'ui/highlight/highlight_tags';
import _ from 'lodash';
import angular from 'angular';
import uiModules from 'ui/modules';
import getHighlightHtml from './highlight_html';
import getHighlightRequestProvider from './highlight_request';
const module = uiModules.get('kibana');
module.filter('highlight', function (highlightTags) {
return function (formatted, highlight) {
if (typeof formatted === 'object') formatted = angular.toJson(formatted);
_.each(highlight, function (section) {
section = _.escape(section);
// Strip out the highlight tags to compare against the formatted string
const untagged = section
.split(highlightTags.pre).join('')
.split(highlightTags.post).join('');
// Replace all highlight tags with proper html tags
const tagged = section
.split(highlightTags.pre).join('<mark>')
.split(highlightTags.post).join('</mark>');
// Replace all instances of the untagged string with the properly tagged string
formatted = formatted.split(untagged).join(tagged);
});
return formatted;
};
});
export default {
getHighlightHtml,
getHighlightRequestProvider
};

View file

@ -0,0 +1,29 @@
import _ from 'lodash';
import angular from 'angular';
import highlightTags from './highlight_tags';
import htmlTags from './html_tags';
export default function getHighlightHtml(fieldValue, highlights) {
let highlightHtml = (typeof fieldValue === 'object')
? angular.toJson(fieldValue)
: fieldValue;
_.each(highlights, function (highlight) {
const escapedHighlight = _.escape(highlight);
// Strip out the highlight tags to compare against the field text
const untaggedHighlight = escapedHighlight
.split(highlightTags.pre).join('')
.split(highlightTags.post).join('');
// Replace all highlight tags with proper html tags
const taggedHighlight = escapedHighlight
.split(highlightTags.pre).join(htmlTags.pre)
.split(highlightTags.post).join(htmlTags.post);
// Replace all instances of the untagged string with the properly tagged string
highlightHtml = highlightHtml.split(untaggedHighlight).join(taggedHighlight);
});
return highlightHtml;
}

View file

@ -0,0 +1,44 @@
import _ from 'lodash';
import highlightTags from './highlight_tags';
const FRAGMENT_SIZE = Math.pow(2, 31) - 1; // Max allowed value for fragment_size (limit of a java int)
/**
* Returns a clone of the query with `"all_fields": true` set on any `query_string` queries
*/
function getHighlightQuery(query) {
const clone = _.cloneDeep(query);
if (_.has(clone, 'query_string')) {
clone.query_string.all_fields = true;
} else if (_.has(clone, 'bool.must')) {
if (Array.isArray(clone.bool.must)) {
clone.bool.must = clone.bool.must.map(getHighlightQuery);
} else {
clone.bool.must = getHighlightQuery(clone.bool.must);
}
}
return clone;
}
export default function getHighlightRequestProvider(config) {
if (!config.get('doc_table:highlight')) {
return _.noop;
}
return function getHighlightRequest(query) {
const fieldsParams = config.get('doc_table:highlight:all_fields')
? { highlight_query: getHighlightQuery(query) }
: {};
return {
pre_tags: [highlightTags.pre],
post_tags: [highlightTags.post],
fields: {
'*': fieldsParams
},
fragment_size: FRAGMENT_SIZE
};
};
}

View file

@ -1,10 +1,7 @@
import uiModules from 'ui/modules';
const module = uiModules.get('kibana');
// By default, ElasticSearch surrounds matched values in <em></em>. This is not ideal because it is possible that
// the value could contain <em></em> in the value. We define these custom tags that we would never expect to see
// inside a field value.
module.constant('highlightTags', {
export default {
pre: '@kibana-highlighted-field@',
post: '@/kibana-highlighted-field@'
});
};

View file

@ -0,0 +1,5 @@
// These are the html tags that will replace the highlight tags.
export default {
pre: '<mark>',
post: '</mark>'
};

View file

@ -1,7 +1,7 @@
import _ from 'lodash';
import angular from 'angular';
import 'ui/highlight';
export default function contentTypesProvider(highlightFilter) {
import { getHighlightHtml } from 'ui/highlight';
export default function contentTypesProvider() {
const types = {
html: function (format, convert) {
@ -47,7 +47,7 @@ export default function contentTypesProvider(highlightFilter) {
if (!hit || !hit.highlight || !hit.highlight[field.name]) {
return formatted;
} else {
return highlightFilter(formatted, hit.highlight[field.name]);
return getHighlightHtml(formatted, hit.highlight[field.name]);
}
}

View file

@ -3,7 +3,9 @@ import 'ui/field_format_editor/pattern/pattern';
import 'ui/stringify/icons';
import IndexPatternsFieldFormatProvider from 'ui/index_patterns/_field_format/field_format';
import urlTemplate from 'ui/stringify/editors/url.html';
export default function UrlFormatProvider(Private, highlightFilter) {
import { getHighlightHtml } from 'ui/highlight';
export default function UrlFormatProvider(Private) {
const FieldFormat = Private(IndexPatternsFieldFormatProvider);
@ -94,7 +96,7 @@ export default function UrlFormatProvider(Private, highlightFilter) {
return '<img src="' + url + '" alt="' + label + '" title="' + label + '">';
default:
if (hit && hit.highlight && hit.highlight[field.name]) {
label = highlightFilter(label, hit.highlight[field.name]);
label = getHighlightHtml(label, hit.highlight[field.name]);
}
return '<a href="' + url + '" target="_blank">' + label + '</a>';

View file

@ -75,6 +75,11 @@ export default function defaultSettingsProvider() {
description: 'Highlight results in Discover and Saved Searches Dashboard.' +
'Highlighting makes requests slow when working on big documents.',
},
'doc_table:highlight:all_fields': {
value: true,
description: 'Improves highlighting by using a separate "highlight_query" that uses "all_fields" mode on "query_string" queries. ' +
'Set to false if you are using a "default_field" in your index.',
},
'courier:maxSegmentCount': {
value: 30,
description: 'Requests in discover are split into segments to prevent massive requests from being sent to ' +