minio/vendor/gopkg.in/olivere/elastic.v5/termvectors.go
Aditya Manthramurthy a2a8d54bb6 Add access format support for Elasticsearch notification target (#4006)
This change adds `access` format support for notifications to a
Elasticsearch server, and it refactors `namespace` format support.

In the case of `access` format, for each event in Minio, a JSON
document is inserted into Elasticsearch with its timestamp set to the
event's timestamp, and with the ID generated automatically by
elasticsearch. No events are modified or deleted in this mode.

In the case of `namespace` format, for each event in Minio, a JSON
document is keyed together by the bucket and object name is updated in
Elasticsearch. In the case of an object being created or over-written
in Minio, a new document or an existing document is inserted into the
Elasticsearch index. If an object is deleted in Minio, the
corresponding document is deleted from the Elasticsearch index.

Additionally, this change upgrades Elasticsearch support to the 5.x
series. This is a breaking change, and users of previous elasticsearch
versions should upgrade.

Also updates documentation on Elasticsearch notification target usage
and has a link to an elasticsearch upgrade guide.

This is the last patch that finally resolves #3928.
2017-03-31 14:11:27 -07:00

461 lines
12 KiB
Go

// Copyright 2012-present Oliver Eilhard. All rights reserved.
// Use of this source code is governed by a MIT-license.
// See http://olivere.mit-license.org/license.txt for details.
package elastic
import (
"fmt"
"net/url"
"strings"
"golang.org/x/net/context"
"gopkg.in/olivere/elastic.v5/uritemplates"
)
// TermvectorsService returns information and statistics on terms in the
// fields of a particular document. The document could be stored in the
// index or artificially provided by the user.
//
// See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/docs-termvectors.html
// for documentation.
type TermvectorsService struct {
client *Client
pretty bool
id string
index string
typ string
dfs *bool
doc interface{}
fieldStatistics *bool
fields []string
filter *TermvectorsFilterSettings
perFieldAnalyzer map[string]string
offsets *bool
parent string
payloads *bool
positions *bool
preference string
realtime *bool
routing string
termStatistics *bool
version interface{}
versionType string
bodyJson interface{}
bodyString string
}
// NewTermvectorsService creates a new TermvectorsService.
func NewTermvectorsService(client *Client) *TermvectorsService {
return &TermvectorsService{
client: client,
}
}
// Index in which the document resides.
func (s *TermvectorsService) Index(index string) *TermvectorsService {
s.index = index
return s
}
// Type of the document.
func (s *TermvectorsService) Type(typ string) *TermvectorsService {
s.typ = typ
return s
}
// Id of the document.
func (s *TermvectorsService) Id(id string) *TermvectorsService {
s.id = id
return s
}
// Dfs specifies if distributed frequencies should be returned instead
// shard frequencies.
func (s *TermvectorsService) Dfs(dfs bool) *TermvectorsService {
s.dfs = &dfs
return s
}
// Doc is the document to analyze.
func (s *TermvectorsService) Doc(doc interface{}) *TermvectorsService {
s.doc = doc
return s
}
// FieldStatistics specifies if document count, sum of document frequencies
// and sum of total term frequencies should be returned.
func (s *TermvectorsService) FieldStatistics(fieldStatistics bool) *TermvectorsService {
s.fieldStatistics = &fieldStatistics
return s
}
// Fields a list of fields to return.
func (s *TermvectorsService) Fields(fields ...string) *TermvectorsService {
if s.fields == nil {
s.fields = make([]string, 0)
}
s.fields = append(s.fields, fields...)
return s
}
// Filter adds terms filter settings.
func (s *TermvectorsService) Filter(filter *TermvectorsFilterSettings) *TermvectorsService {
s.filter = filter
return s
}
// PerFieldAnalyzer allows to specify a different analyzer than the one
// at the field.
func (s *TermvectorsService) PerFieldAnalyzer(perFieldAnalyzer map[string]string) *TermvectorsService {
s.perFieldAnalyzer = perFieldAnalyzer
return s
}
// Offsets specifies if term offsets should be returned.
func (s *TermvectorsService) Offsets(offsets bool) *TermvectorsService {
s.offsets = &offsets
return s
}
// Parent id of documents.
func (s *TermvectorsService) Parent(parent string) *TermvectorsService {
s.parent = parent
return s
}
// Payloads specifies if term payloads should be returned.
func (s *TermvectorsService) Payloads(payloads bool) *TermvectorsService {
s.payloads = &payloads
return s
}
// Positions specifies if term positions should be returned.
func (s *TermvectorsService) Positions(positions bool) *TermvectorsService {
s.positions = &positions
return s
}
// Preference specify the node or shard the operation
// should be performed on (default: random).
func (s *TermvectorsService) Preference(preference string) *TermvectorsService {
s.preference = preference
return s
}
// Realtime specifies if request is real-time as opposed to
// near-real-time (default: true).
func (s *TermvectorsService) Realtime(realtime bool) *TermvectorsService {
s.realtime = &realtime
return s
}
// Routing is a specific routing value.
func (s *TermvectorsService) Routing(routing string) *TermvectorsService {
s.routing = routing
return s
}
// TermStatistics specifies if total term frequency and document frequency
// should be returned.
func (s *TermvectorsService) TermStatistics(termStatistics bool) *TermvectorsService {
s.termStatistics = &termStatistics
return s
}
// Version an explicit version number for concurrency control.
func (s *TermvectorsService) Version(version interface{}) *TermvectorsService {
s.version = version
return s
}
// VersionType specifies a version type ("internal", "external", "external_gte", or "force").
func (s *TermvectorsService) VersionType(versionType string) *TermvectorsService {
s.versionType = versionType
return s
}
// Pretty indicates that the JSON response be indented and human readable.
func (s *TermvectorsService) Pretty(pretty bool) *TermvectorsService {
s.pretty = pretty
return s
}
// BodyJson defines the body parameters. See documentation.
func (s *TermvectorsService) BodyJson(body interface{}) *TermvectorsService {
s.bodyJson = body
return s
}
// BodyString defines the body parameters as a string. See documentation.
func (s *TermvectorsService) BodyString(body string) *TermvectorsService {
s.bodyString = body
return s
}
// buildURL builds the URL for the operation.
func (s *TermvectorsService) buildURL() (string, url.Values, error) {
var pathParam = map[string]string{
"index": s.index,
"type": s.typ,
}
var path string
var err error
// Build URL
if s.id != "" {
pathParam["id"] = s.id
path, err = uritemplates.Expand("/{index}/{type}/{id}/_termvectors", pathParam)
} else {
path, err = uritemplates.Expand("/{index}/{type}/_termvectors", pathParam)
}
if err != nil {
return "", url.Values{}, err
}
// Add query string parameters
params := url.Values{}
if s.pretty {
params.Set("pretty", "1")
}
if s.dfs != nil {
params.Set("dfs", fmt.Sprintf("%v", *s.dfs))
}
if s.fieldStatistics != nil {
params.Set("field_statistics", fmt.Sprintf("%v", *s.fieldStatistics))
}
if len(s.fields) > 0 {
params.Set("fields", strings.Join(s.fields, ","))
}
if s.offsets != nil {
params.Set("offsets", fmt.Sprintf("%v", *s.offsets))
}
if s.parent != "" {
params.Set("parent", s.parent)
}
if s.payloads != nil {
params.Set("payloads", fmt.Sprintf("%v", *s.payloads))
}
if s.positions != nil {
params.Set("positions", fmt.Sprintf("%v", *s.positions))
}
if s.preference != "" {
params.Set("preference", s.preference)
}
if s.realtime != nil {
params.Set("realtime", fmt.Sprintf("%v", *s.realtime))
}
if s.routing != "" {
params.Set("routing", s.routing)
}
if s.termStatistics != nil {
params.Set("term_statistics", fmt.Sprintf("%v", *s.termStatistics))
}
if s.version != nil {
params.Set("version", fmt.Sprintf("%v", s.version))
}
if s.versionType != "" {
params.Set("version_type", s.versionType)
}
return path, params, nil
}
// Validate checks if the operation is valid.
func (s *TermvectorsService) Validate() error {
var invalid []string
if s.index == "" {
invalid = append(invalid, "Index")
}
if s.typ == "" {
invalid = append(invalid, "Type")
}
if len(invalid) > 0 {
return fmt.Errorf("missing required fields: %v", invalid)
}
return nil
}
// Do executes the operation.
func (s *TermvectorsService) Do(ctx context.Context) (*TermvectorsResponse, error) {
// Check pre-conditions
if err := s.Validate(); err != nil {
return nil, err
}
// Get URL for request
path, params, err := s.buildURL()
if err != nil {
return nil, err
}
// Setup HTTP request body
var body interface{}
if s.bodyJson != nil {
body = s.bodyJson
} else if s.bodyString != "" {
body = s.bodyString
} else {
data := make(map[string]interface{})
if s.doc != nil {
data["doc"] = s.doc
}
if len(s.perFieldAnalyzer) > 0 {
data["per_field_analyzer"] = s.perFieldAnalyzer
}
if s.filter != nil {
src, err := s.filter.Source()
if err != nil {
return nil, err
}
data["filter"] = src
}
if len(data) > 0 {
body = data
}
}
// Get HTTP response
res, err := s.client.PerformRequest(ctx, "GET", path, params, body)
if err != nil {
return nil, err
}
// Return operation response
ret := new(TermvectorsResponse)
if err := s.client.decoder.Decode(res.Body, ret); err != nil {
return nil, err
}
return ret, nil
}
// -- Filter settings --
// TermvectorsFilterSettings adds additional filters to a Termsvector request.
// It allows to filter terms based on their tf-idf scores.
// See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/docs-termvectors.html#_terms_filtering
// for more information.
type TermvectorsFilterSettings struct {
maxNumTerms *int64
minTermFreq *int64
maxTermFreq *int64
minDocFreq *int64
maxDocFreq *int64
minWordLength *int64
maxWordLength *int64
}
// NewTermvectorsFilterSettings creates and initializes a new TermvectorsFilterSettings struct.
func NewTermvectorsFilterSettings() *TermvectorsFilterSettings {
return &TermvectorsFilterSettings{}
}
// MaxNumTerms specifies the maximum number of terms the must be returned per field.
func (fs *TermvectorsFilterSettings) MaxNumTerms(value int64) *TermvectorsFilterSettings {
fs.maxNumTerms = &value
return fs
}
// MinTermFreq ignores words with less than this frequency in the source doc.
func (fs *TermvectorsFilterSettings) MinTermFreq(value int64) *TermvectorsFilterSettings {
fs.minTermFreq = &value
return fs
}
// MaxTermFreq ignores words with more than this frequency in the source doc.
func (fs *TermvectorsFilterSettings) MaxTermFreq(value int64) *TermvectorsFilterSettings {
fs.maxTermFreq = &value
return fs
}
// MinDocFreq ignores terms which do not occur in at least this many docs.
func (fs *TermvectorsFilterSettings) MinDocFreq(value int64) *TermvectorsFilterSettings {
fs.minDocFreq = &value
return fs
}
// MaxDocFreq ignores terms which occur in more than this many docs.
func (fs *TermvectorsFilterSettings) MaxDocFreq(value int64) *TermvectorsFilterSettings {
fs.maxDocFreq = &value
return fs
}
// MinWordLength specifies the minimum word length below which words will be ignored.
func (fs *TermvectorsFilterSettings) MinWordLength(value int64) *TermvectorsFilterSettings {
fs.minWordLength = &value
return fs
}
// MaxWordLength specifies the maximum word length above which words will be ignored.
func (fs *TermvectorsFilterSettings) MaxWordLength(value int64) *TermvectorsFilterSettings {
fs.maxWordLength = &value
return fs
}
// Source returns JSON for the query.
func (fs *TermvectorsFilterSettings) Source() (interface{}, error) {
source := make(map[string]interface{})
if fs.maxNumTerms != nil {
source["max_num_terms"] = *fs.maxNumTerms
}
if fs.minTermFreq != nil {
source["min_term_freq"] = *fs.minTermFreq
}
if fs.maxTermFreq != nil {
source["max_term_freq"] = *fs.maxTermFreq
}
if fs.minDocFreq != nil {
source["min_doc_freq"] = *fs.minDocFreq
}
if fs.maxDocFreq != nil {
source["max_doc_freq"] = *fs.maxDocFreq
}
if fs.minWordLength != nil {
source["min_word_length"] = *fs.minWordLength
}
if fs.maxWordLength != nil {
source["max_word_length"] = *fs.maxWordLength
}
return source, nil
}
// -- Response types --
type TokenInfo struct {
StartOffset int64 `json:"start_offset"`
EndOffset int64 `json:"end_offset"`
Position int64 `json:"position"`
Payload string `json:"payload"`
}
type TermsInfo struct {
DocFreq int64 `json:"doc_freq"`
Score float64 `json:"score"`
TermFreq int64 `json:"term_freq"`
Ttf int64 `json:"ttf"`
Tokens []TokenInfo `json:"tokens"`
}
type FieldStatistics struct {
DocCount int64 `json:"doc_count"`
SumDocFreq int64 `json:"sum_doc_freq"`
SumTtf int64 `json:"sum_ttf"`
}
type TermVectorsFieldInfo struct {
FieldStatistics FieldStatistics `json:"field_statistics"`
Terms map[string]TermsInfo `json:"terms"`
}
// TermvectorsResponse is the response of TermvectorsService.Do.
type TermvectorsResponse struct {
Index string `json:"_index"`
Type string `json:"_type"`
Id string `json:"_id,omitempty"`
Version int `json:"_version"`
Found bool `json:"found"`
Took int64 `json:"took"`
TermVectors map[string]TermVectorsFieldInfo `json:"term_vectors"`
}