minio/pkg/s3select/evaluate.go
Harshavardhana 7e1661f4fa Performance improvements to SELECT API on certain query operations (#6752)
This improves the performance of certain queries dramatically,
such as 'count(*)' etc.

Without this PR
```
~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz
2173762

real	0m42.464s
user	0m0.071s
sys	0m0.010s
```

With this PR
```
~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz
2173762

real	0m17.603s
user	0m0.093s
sys	0m0.008s
```

Almost a 250% improvement in performance. This PR avoids a lot of type
conversions and instead relies on raw sequences of data and interprets
them lazily.

```
benchcmp old new
benchmark                        old ns/op       new ns/op       delta
BenchmarkSQLAggregate_100K-4     551213          259782          -52.87%
BenchmarkSQLAggregate_1M-4       6981901985      2432413729      -65.16%
BenchmarkSQLAggregate_2M-4       13511978488     4536903552      -66.42%
BenchmarkSQLAggregate_10M-4      68427084908     23266283336     -66.00%

benchmark                        old allocs     new allocs     delta
BenchmarkSQLAggregate_100K-4     2366           485            -79.50%
BenchmarkSQLAggregate_1M-4       47455492       21462860       -54.77%
BenchmarkSQLAggregate_2M-4       95163637       43110771       -54.70%
BenchmarkSQLAggregate_10M-4      476959550      216906510      -54.52%

benchmark                        old bytes       new bytes      delta
BenchmarkSQLAggregate_100K-4     1233079         1086024        -11.93%
BenchmarkSQLAggregate_1M-4       2607984120      557038536      -78.64%
BenchmarkSQLAggregate_2M-4       5254103616      1128149168     -78.53%
BenchmarkSQLAggregate_10M-4      26443524872     5722715992     -78.36%
```
2018-11-14 15:55:10 -08:00

224 lines
6.8 KiB
Go

/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package s3select
import (
"strings"
"github.com/tidwall/gjson"
"github.com/xwb1989/sqlparser"
"github.com/minio/minio/pkg/s3select/format"
)
// stringOps is a function which handles the case in a clause
// if there is a need to perform a string function
func stringOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
var value string
funcName := myFunc.Name.CompliantName()
switch tempArg := myFunc.Exprs[0].(type) {
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
// myReturnVal is actually the tail recursive value being used in the eval func.
return applyStrFunc(gjson.Parse(myReturnVal), funcName)
case *sqlparser.ColName:
value = applyStrFunc(gjson.GetBytes(record, col.Name.CompliantName()), funcName)
case *sqlparser.SQLVal:
value = applyStrFunc(gjson.ParseBytes(col.Val), funcName)
}
}
return value
}
// coalOps is a function which decomposes a COALESCE func expr into its struct.
func coalOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
myArgs := make([]string, len(myFunc.Exprs))
for i, expr := range myFunc.Exprs {
switch tempArg := expr.(type) {
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
// myReturnVal is actually the tail recursive value being used in the eval func.
return myReturnVal
case *sqlparser.ColName:
myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String()
case *sqlparser.SQLVal:
myArgs[i] = string(col.Val)
}
}
}
return processCoalNoIndex(myArgs)
}
// nullOps is a function which decomposes a NullIf func expr into its struct.
func nullOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
myArgs := make([]string, 2)
for i, expr := range myFunc.Exprs {
switch tempArg := expr.(type) {
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
return myReturnVal
case *sqlparser.ColName:
myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String()
case *sqlparser.SQLVal:
myArgs[i] = string(col.Val)
}
}
}
if myArgs[0] == myArgs[1] {
return ""
}
return myArgs[0]
}
// isValidString is a function that ensures the
// current index is one with a StrFunc
func isValidFunc(myList []int, index int) bool {
if myList == nil {
return false
}
for _, i := range myList {
if i == index {
return true
}
}
return false
}
// processCoalNoIndex is a function which evaluates a given COALESCE clause.
func processCoalNoIndex(coalStore []string) string {
for _, coal := range coalStore {
if coal != "null" && coal != "missing" && coal != "" {
return coal
}
}
return "null"
}
// evaluateFuncExpr is a function that allows for tail recursive evaluation of
// nested function expressions
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, record []byte) string {
if myVal == nil {
return myReturnVal
}
// retrieve all the relevant arguments of the function
var mySubFunc []*sqlparser.FuncExpr
mySubFunc = make([]*sqlparser.FuncExpr, len(myVal.Exprs))
for i, expr := range myVal.Exprs {
switch col := expr.(type) {
case *sqlparser.AliasedExpr:
switch temp := col.Expr.(type) {
case *sqlparser.FuncExpr:
mySubFunc[i] = temp
}
}
}
// Need to do tree recursion so as to explore all possible directions of the
// nested function recursion
for i := 0; i < len(mySubFunc); i++ {
if supportedString(myVal.Name.CompliantName()) {
if mySubFunc != nil {
return stringOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
}
return stringOps(myVal, record, myReturnVal)
} else if strings.ToUpper(myVal.Name.CompliantName()) == "NULLIF" {
if mySubFunc != nil {
return nullOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
}
return nullOps(myVal, record, myReturnVal)
} else if strings.ToUpper(myVal.Name.CompliantName()) == "COALESCE" {
if mySubFunc != nil {
return coalOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
}
return coalOps(myVal, record, myReturnVal)
}
}
return ""
}
// evaluateFuncErr is a function that flags errors in nested functions.
func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error {
if myVal == nil {
return nil
}
if !supportedFunc(myVal.Name.CompliantName()) {
return ErrUnsupportedSQLOperation
}
for _, expr := range myVal.Exprs {
switch tempArg := expr.(type) {
case *sqlparser.StarExpr:
return ErrParseUnsupportedCallWithStar
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
if err := evaluateFuncErr(col, reader); err != nil {
return err
}
case *sqlparser.ColName:
if err := reader.ColNameErrs([]string{col.Name.CompliantName()}); err != nil {
return err
}
}
}
}
return nil
}
// evaluateIsExpr is a function for evaluating expressions of the form "column is ...."
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []byte, alias string) (bool, error) {
getMyVal := func() (myVal string) {
switch myIs := myFunc.Expr.(type) {
// case for literal val
case *sqlparser.SQLVal:
myVal = string(myIs.Val)
// case for nested func val
case *sqlparser.FuncExpr:
myVal = evaluateFuncExpr(myIs, "", row)
// case for col val
case *sqlparser.ColName:
myVal = gjson.GetBytes(row, myIs.Name.CompliantName()).String()
}
return myVal
}
operator := strings.ToLower(myFunc.Operator)
switch operator {
case "is null":
return getMyVal() == "", nil
case "is not null":
return getMyVal() != "", nil
default:
return false, ErrUnsupportedSQLOperation
}
}
// supportedString is a function that checks whether the function is a supported
// string one
func supportedString(strFunc string) bool {
return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER"})
}
// supportedFunc is a function that checks whether the function is a supported
// S3 one.
func supportedFunc(strFunc string) bool {
return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER", "COALESCE", "NULLIF"})
}