minio/vendor/gopkg.in/olivere/elastic.v5/search_queries_more_like_this.go
Aditya Manthramurthy a2a8d54bb6 Add access format support for Elasticsearch notification target (#4006)
This change adds `access` format support for notifications to a
Elasticsearch server, and it refactors `namespace` format support.

In the case of `access` format, for each event in Minio, a JSON
document is inserted into Elasticsearch with its timestamp set to the
event's timestamp, and with the ID generated automatically by
elasticsearch. No events are modified or deleted in this mode.

In the case of `namespace` format, for each event in Minio, a JSON
document is keyed together by the bucket and object name is updated in
Elasticsearch. In the case of an object being created or over-written
in Minio, a new document or an existing document is inserted into the
Elasticsearch index. If an object is deleted in Minio, the
corresponding document is deleted from the Elasticsearch index.

Additionally, this change upgrades Elasticsearch support to the 5.x
series. This is a breaking change, and users of previous elasticsearch
versions should upgrade.

Also updates documentation on Elasticsearch notification target usage
and has a link to an elasticsearch upgrade guide.

This is the last patch that finally resolves #3928.
2017-03-31 14:11:27 -07:00

413 lines
12 KiB
Go

// Copyright 2012-present Oliver Eilhard. All rights reserved.
// Use of this source code is governed by a MIT-license.
// See http://olivere.mit-license.org/license.txt for details.
package elastic
import "errors"
// MoreLikeThis query (MLT Query) finds documents that are "like" a given
// set of documents. In order to do so, MLT selects a set of representative
// terms of these input documents, forms a query using these terms, executes
// the query and returns the results. The user controls the input documents,
// how the terms should be selected and how the query is formed.
//
// For more details, see
// https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-mlt-query.html
type MoreLikeThisQuery struct {
fields []string
docs []*MoreLikeThisQueryItem
unlikeDocs []*MoreLikeThisQueryItem
include *bool
minimumShouldMatch string
minTermFreq *int
maxQueryTerms *int
stopWords []string
minDocFreq *int
maxDocFreq *int
minWordLen *int
maxWordLen *int
boostTerms *float64
boost *float64
analyzer string
failOnUnsupportedField *bool
queryName string
}
// NewMoreLikeThisQuery creates and initializes a new MoreLikeThisQuery.
func NewMoreLikeThisQuery() *MoreLikeThisQuery {
return &MoreLikeThisQuery{
fields: make([]string, 0),
stopWords: make([]string, 0),
docs: make([]*MoreLikeThisQueryItem, 0),
unlikeDocs: make([]*MoreLikeThisQueryItem, 0),
}
}
// Field adds one or more field names to the query.
func (q *MoreLikeThisQuery) Field(fields ...string) *MoreLikeThisQuery {
q.fields = append(q.fields, fields...)
return q
}
// StopWord sets the stopwords. Any word in this set is considered
// "uninteresting" and ignored. Even if your Analyzer allows stopwords,
// you might want to tell the MoreLikeThis code to ignore them, as for
// the purposes of document similarity it seems reasonable to assume that
// "a stop word is never interesting".
func (q *MoreLikeThisQuery) StopWord(stopWords ...string) *MoreLikeThisQuery {
q.stopWords = append(q.stopWords, stopWords...)
return q
}
// LikeText sets the text to use in order to find documents that are "like" this.
func (q *MoreLikeThisQuery) LikeText(likeTexts ...string) *MoreLikeThisQuery {
for _, s := range likeTexts {
item := NewMoreLikeThisQueryItem().LikeText(s)
q.docs = append(q.docs, item)
}
return q
}
// LikeItems sets the documents to use in order to find documents that are "like" this.
func (q *MoreLikeThisQuery) LikeItems(docs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
q.docs = append(q.docs, docs...)
return q
}
// IgnoreLikeText sets the text from which the terms should not be selected from.
func (q *MoreLikeThisQuery) IgnoreLikeText(ignoreLikeText ...string) *MoreLikeThisQuery {
for _, s := range ignoreLikeText {
item := NewMoreLikeThisQueryItem().LikeText(s)
q.unlikeDocs = append(q.unlikeDocs, item)
}
return q
}
// IgnoreLikeItems sets the documents from which the terms should not be selected from.
func (q *MoreLikeThisQuery) IgnoreLikeItems(ignoreDocs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
q.unlikeDocs = append(q.unlikeDocs, ignoreDocs...)
return q
}
// Ids sets the document ids to use in order to find documents that are "like" this.
func (q *MoreLikeThisQuery) Ids(ids ...string) *MoreLikeThisQuery {
for _, id := range ids {
item := NewMoreLikeThisQueryItem().Id(id)
q.docs = append(q.docs, item)
}
return q
}
// Include specifies whether the input documents should also be included
// in the results returned. Defaults to false.
func (q *MoreLikeThisQuery) Include(include bool) *MoreLikeThisQuery {
q.include = &include
return q
}
// MinimumShouldMatch sets the number of terms that must match the generated
// query expressed in the common syntax for minimum should match.
// The default value is "30%".
//
// This used to be "PercentTermsToMatch" in Elasticsearch versions before 2.0.
func (q *MoreLikeThisQuery) MinimumShouldMatch(minimumShouldMatch string) *MoreLikeThisQuery {
q.minimumShouldMatch = minimumShouldMatch
return q
}
// MinTermFreq is the frequency below which terms will be ignored in the
// source doc. The default frequency is 2.
func (q *MoreLikeThisQuery) MinTermFreq(minTermFreq int) *MoreLikeThisQuery {
q.minTermFreq = &minTermFreq
return q
}
// MaxQueryTerms sets the maximum number of query terms that will be included
// in any generated query. It defaults to 25.
func (q *MoreLikeThisQuery) MaxQueryTerms(maxQueryTerms int) *MoreLikeThisQuery {
q.maxQueryTerms = &maxQueryTerms
return q
}
// MinDocFreq sets the frequency at which words will be ignored which do
// not occur in at least this many docs. The default is 5.
func (q *MoreLikeThisQuery) MinDocFreq(minDocFreq int) *MoreLikeThisQuery {
q.minDocFreq = &minDocFreq
return q
}
// MaxDocFreq sets the maximum frequency for which words may still appear.
// Words that appear in more than this many docs will be ignored.
// It defaults to unbounded.
func (q *MoreLikeThisQuery) MaxDocFreq(maxDocFreq int) *MoreLikeThisQuery {
q.maxDocFreq = &maxDocFreq
return q
}
// MinWordLength sets the minimum word length below which words will be
// ignored. It defaults to 0.
func (q *MoreLikeThisQuery) MinWordLen(minWordLen int) *MoreLikeThisQuery {
q.minWordLen = &minWordLen
return q
}
// MaxWordLen sets the maximum word length above which words will be ignored.
// Defaults to unbounded (0).
func (q *MoreLikeThisQuery) MaxWordLen(maxWordLen int) *MoreLikeThisQuery {
q.maxWordLen = &maxWordLen
return q
}
// BoostTerms sets the boost factor to use when boosting terms.
// It defaults to 1.
func (q *MoreLikeThisQuery) BoostTerms(boostTerms float64) *MoreLikeThisQuery {
q.boostTerms = &boostTerms
return q
}
// Analyzer specifies the analyzer that will be use to analyze the text.
// Defaults to the analyzer associated with the field.
func (q *MoreLikeThisQuery) Analyzer(analyzer string) *MoreLikeThisQuery {
q.analyzer = analyzer
return q
}
// Boost sets the boost for this query.
func (q *MoreLikeThisQuery) Boost(boost float64) *MoreLikeThisQuery {
q.boost = &boost
return q
}
// FailOnUnsupportedField indicates whether to fail or return no result
// when this query is run against a field which is not supported such as
// a binary/numeric field.
func (q *MoreLikeThisQuery) FailOnUnsupportedField(fail bool) *MoreLikeThisQuery {
q.failOnUnsupportedField = &fail
return q
}
// QueryName sets the query name for the filter that can be used when
// searching for matched_filters per hit.
func (q *MoreLikeThisQuery) QueryName(queryName string) *MoreLikeThisQuery {
q.queryName = queryName
return q
}
// Source creates the source for the MLT query.
// It may return an error if the caller forgot to specify any documents to
// be "liked" in the MoreLikeThisQuery.
func (q *MoreLikeThisQuery) Source() (interface{}, error) {
// {
// "match_all" : { ... }
// }
if len(q.docs) == 0 {
return nil, errors.New(`more_like_this requires some documents to be "liked"`)
}
source := make(map[string]interface{})
params := make(map[string]interface{})
source["mlt"] = params
if len(q.fields) > 0 {
params["fields"] = q.fields
}
var likes []interface{}
for _, doc := range q.docs {
src, err := doc.Source()
if err != nil {
return nil, err
}
likes = append(likes, src)
}
params["like"] = likes
if len(q.unlikeDocs) > 0 {
var dontLikes []interface{}
for _, doc := range q.unlikeDocs {
src, err := doc.Source()
if err != nil {
return nil, err
}
dontLikes = append(dontLikes, src)
}
params["unlike"] = dontLikes
}
if q.minimumShouldMatch != "" {
params["minimum_should_match"] = q.minimumShouldMatch
}
if q.minTermFreq != nil {
params["min_term_freq"] = *q.minTermFreq
}
if q.maxQueryTerms != nil {
params["max_query_terms"] = *q.maxQueryTerms
}
if len(q.stopWords) > 0 {
params["stop_words"] = q.stopWords
}
if q.minDocFreq != nil {
params["min_doc_freq"] = *q.minDocFreq
}
if q.maxDocFreq != nil {
params["max_doc_freq"] = *q.maxDocFreq
}
if q.minWordLen != nil {
params["min_word_len"] = *q.minWordLen
}
if q.maxWordLen != nil {
params["max_word_len"] = *q.maxWordLen
}
if q.boostTerms != nil {
params["boost_terms"] = *q.boostTerms
}
if q.boost != nil {
params["boost"] = *q.boost
}
if q.analyzer != "" {
params["analyzer"] = q.analyzer
}
if q.failOnUnsupportedField != nil {
params["fail_on_unsupported_field"] = *q.failOnUnsupportedField
}
if q.queryName != "" {
params["_name"] = q.queryName
}
if q.include != nil {
params["include"] = *q.include
}
return source, nil
}
// -- MoreLikeThisQueryItem --
// MoreLikeThisQueryItem represents a single item of a MoreLikeThisQuery
// to be "liked" or "unliked".
type MoreLikeThisQueryItem struct {
likeText string
index string
typ string
id string
doc interface{}
fields []string
routing string
fsc *FetchSourceContext
version int64
versionType string
}
// NewMoreLikeThisQueryItem creates and initializes a MoreLikeThisQueryItem.
func NewMoreLikeThisQueryItem() *MoreLikeThisQueryItem {
return &MoreLikeThisQueryItem{
version: -1,
}
}
// LikeText represents a text to be "liked".
func (item *MoreLikeThisQueryItem) LikeText(likeText string) *MoreLikeThisQueryItem {
item.likeText = likeText
return item
}
// Index represents the index of the item.
func (item *MoreLikeThisQueryItem) Index(index string) *MoreLikeThisQueryItem {
item.index = index
return item
}
// Type represents the document type of the item.
func (item *MoreLikeThisQueryItem) Type(typ string) *MoreLikeThisQueryItem {
item.typ = typ
return item
}
// Id represents the document id of the item.
func (item *MoreLikeThisQueryItem) Id(id string) *MoreLikeThisQueryItem {
item.id = id
return item
}
// Doc represents a raw document template for the item.
func (item *MoreLikeThisQueryItem) Doc(doc interface{}) *MoreLikeThisQueryItem {
item.doc = doc
return item
}
// Fields represents the list of fields of the item.
func (item *MoreLikeThisQueryItem) Fields(fields ...string) *MoreLikeThisQueryItem {
item.fields = append(item.fields, fields...)
return item
}
// Routing sets the routing associated with the item.
func (item *MoreLikeThisQueryItem) Routing(routing string) *MoreLikeThisQueryItem {
item.routing = routing
return item
}
// FetchSourceContext represents the fetch source of the item which controls
// if and how _source should be returned.
func (item *MoreLikeThisQueryItem) FetchSourceContext(fsc *FetchSourceContext) *MoreLikeThisQueryItem {
item.fsc = fsc
return item
}
// Version specifies the version of the item.
func (item *MoreLikeThisQueryItem) Version(version int64) *MoreLikeThisQueryItem {
item.version = version
return item
}
// VersionType represents the version type of the item.
func (item *MoreLikeThisQueryItem) VersionType(versionType string) *MoreLikeThisQueryItem {
item.versionType = versionType
return item
}
// Source returns the JSON-serializable fragment of the entity.
func (item *MoreLikeThisQueryItem) Source() (interface{}, error) {
if item.likeText != "" {
return item.likeText, nil
}
source := make(map[string]interface{})
if item.index != "" {
source["_index"] = item.index
}
if item.typ != "" {
source["_type"] = item.typ
}
if item.id != "" {
source["_id"] = item.id
}
if item.doc != nil {
source["doc"] = item.doc
}
if len(item.fields) > 0 {
source["fields"] = item.fields
}
if item.routing != "" {
source["_routing"] = item.routing
}
if item.fsc != nil {
src, err := item.fsc.Source()
if err != nil {
return nil, err
}
source["_source"] = src
}
if item.version >= 0 {
source["_version"] = item.version
}
if item.versionType != "" {
source["_version_type"] = item.versionType
}
return source, nil
}