mint: Add more SQL tests (#9540)

This commit is contained in:
Anis Elleuch 2020-05-15 19:20:57 +01:00 committed by GitHub
parent d348ec0f6c
commit dfadf70a7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 710 additions and 268 deletions

View file

@ -0,0 +1,172 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# MinIO Python Library for Amazon S3 Compatible Cloud Storage,
# (C) 2015-2020 MinIO, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import io
from minio import Minio
from minio.select.options import (SelectObjectOptions, CSVInput,
RequestProgress, InputSerialization,
OutputSerialization, CSVOutput, JsonOutput)
from utils import *
def test_sql_api(test_name, client, bucket_name, input_data, sql_opts, expected_output):
""" Test if the passed SQL request has the output equal to the passed execpted one"""
object_name = generate_object_name()
got_output = b''
try:
bytes_content = io.BytesIO(input_data)
client.put_object(bucket_name, object_name, io.BytesIO(input_data), len(input_data))
data = client.select_object_content(bucket_name, object_name, sql_opts)
# Get the records
records = io.BytesIO()
for d in data.stream(10*1024):
records.write(d.encode('utf-8'))
got_output = records.getvalue()
except Exception as select_err:
if not isinstance(expected_output, Exception):
raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
else:
if isinstance(expected_output, Exception):
raise ValueError('Test {}: expected an exception, got {}'.format(test_name, got_output))
if got_output != expected_output:
raise ValueError('Test {}: data mismatch. Expected : {}, Received {}'.format(test_name, expected_output, got_output))
finally:
client.remove_object(bucket_name, object_name)
def test_csv_input_custom_quote_char(client, log_output):
# Get a unique bucket_name and object_name
log_output.args['bucket_name'] = bucket_name = generate_bucket_name()
tests = [
# Invalid quote character, should fail
('""', '"', b'col1,col2,col3\n', Exception()),
# UTF-8 quote character
('ع', '"', 'عcol1ع,عcol2ع,عcol3ع\n'.encode(), b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
# Only one field is quoted
('"', '"', b'"col1",col2,col3\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
('"', '"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
('\'', '"', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
('', '"', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
('', '"', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
('', '"', b'"col1","col2","col3"\n', b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
('"', '"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
('"', '"', b'A",B\n', b'{"_1":"A\\"","_2":"B"}\n'),
('"', '"', b'A"",B\n', b'{"_1":"A\\"\\"","_2":"B"}\n'),
('"', '\\', b'A\\B,C\n', b'{"_1":"A\\\\B","_2":"C"}\n'),
('"', '"', b'"A""B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
('"', '\\', b'"A\\B","CD"\n', b'{"_1":"AB","_2":"CD"}\n'),
('"', '\\', b'"A\\,","CD"\n', b'{"_1":"A,","_2":"CD"}\n'),
('"', '\\', b'"A\\"B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
('"', '\\', b'"A\\""\n', b'{"_1":"A\\""}\n'),
('"', '\\', b'"A\\"\\"B"\n', b'{"_1":"A\\"\\"B"}\n'),
('"', '\\', b'"A\\"","\\"B"\n', b'{"_1":"A\\"","_2":"\\"B"}\n'),
]
client.make_bucket(bucket_name)
try:
for idx, (quote_char, escape_char, data, expected_output) in enumerate(tests):
sql_opts = SelectObjectOptions(
expression="select * from s3object",
input_serialization=InputSerialization(
compression_type="NONE",
csv=CSVInput(FileHeaderInfo="NONE",
RecordDelimiter="\n",
FieldDelimiter=",",
QuoteCharacter=quote_char,
QuoteEscapeCharacter=escape_char,
Comments="#",
AllowQuotedRecordDelimiter="FALSE",),
),
output_serialization=OutputSerialization(
json = JsonOutput(
RecordDelimiter="\n",
)
),
request_progress=RequestProgress(
enabled="False"
)
)
test_sql_api(f'test_{idx}', client, bucket_name, data, sql_opts, expected_output)
finally:
client.remove_bucket(bucket_name)
# Test passes
print(log_output.json_report())
def test_csv_output_custom_quote_char(client, log_output):
# Get a unique bucket_name and object_name
log_output.args['bucket_name'] = bucket_name = generate_bucket_name()
tests = [
# UTF-8 quote character
("''", "''", b'col1,col2,col3\n', Exception()),
("'", "'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
("", '"', b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
('"', '"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
('"', '"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
('"', '"', b'""""\n', b'""""\n'),
('"', '"', b'\n', b''),
("'", "\\", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
("'", "\\", b'col""1,col2,col3\n', b"'col\"\"1','col2','col3'\n"),
("'", "\\", b'col\'1,col2,col3\n', b"'col\\'1','col2','col3'\n"),
("'", "\\", b'"col\'1","col2","col3"\n', b"'col\\'1','col2','col3'\n"),
("'", "\\", b'col\'\n', b"'col\\''\n"),
# Two consecutive escaped quotes
("'", "\\", b'"a"""""\n', b"'a\"\"'\n"),
]
client.make_bucket(bucket_name)
try:
for idx, (quote_char, escape_char, input_data, expected_output) in enumerate(tests):
sql_opts = SelectObjectOptions(
expression="select * from s3object",
input_serialization=InputSerialization(
compression_type="NONE",
csv=CSVInput(FileHeaderInfo="NONE",
RecordDelimiter="\n",
FieldDelimiter=",",
QuoteCharacter='"',
QuoteEscapeCharacter='"',
Comments="#",
AllowQuotedRecordDelimiter="FALSE",),
),
output_serialization=OutputSerialization(
csv=CSVOutput(QuoteFields="ALWAYS",
RecordDelimiter="\n",
FieldDelimiter=",",
QuoteCharacter=quote_char,
QuoteEscapeCharacter=escape_char,)
),
request_progress=RequestProgress(
enabled="False"
)
)
test_sql_api(f'test_{idx}', client, bucket_name, input_data, sql_opts, expected_output)
finally:
client.remove_bucket(bucket_name)
# Test passes
print(log_output.json_report())

View file

@ -0,0 +1,399 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# MinIO Python Library for Amazon S3 Compatible Cloud Storage,
# (C) 2020 MinIO, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
from datetime import datetime
from minio import Minio
from minio.select.options import (SelectObjectOptions, CSVInput, JSONInput,
RequestProgress, InputSerialization,
OutputSerialization, CSVOutput, JsonOutput)
from utils import *
def test_sql_expressions_custom_input_output(client, input_bytes, sql_input, sql_output, tests, log_output):
bucket_name = generate_bucket_name()
object_name = generate_object_name()
log_output.args['total_tests'] = 0
log_output.args['total_success'] = 0
client.make_bucket(bucket_name)
try:
content = io.BytesIO(bytes(input_bytes, 'utf-8'))
client.put_object(bucket_name, object_name, content, len(input_bytes))
for idx, (test_name, select_expression, expected_output) in enumerate(tests):
if select_expression == '':
continue
try:
log_output.args['total_tests'] += 1
options = SelectObjectOptions(
expression=select_expression,
input_serialization=sql_input,
output_serialization=sql_output,
request_progress=RequestProgress(
enabled="False"
)
)
data = client.select_object_content(bucket_name, object_name, options)
# Get the records
records = io.BytesIO()
for d in data.stream(10*1024):
records.write(d.encode('utf-8'))
got_output = records.getvalue()
if got_output != expected_output:
if type(expected_output) == datetime:
# Attempt to parse the date which will throw an exception for any issue
datetime.strptime(got_output.decode("utf-8").strip(), '%Y-%m-%dT%H:%M:%S.%f%z')
else:
raise ValueError('Test {}: data mismatch. Expected : {}. Received: {}.'.format(idx+1, expected_output, got_output))
log_output.args['total_success'] += 1
except Exception as err:
continue ## TODO, raise instead
# raise Exception(err)
finally:
client.remove_object(bucket_name, object_name)
client.remove_bucket(bucket_name)
def test_sql_expressions(client, input_json_bytes, tests, log_output):
input_serialization = InputSerialization(
compression_type="NONE",
json=JSONInput(Type="DOCUMENT"),
)
output_serialization=OutputSerialization(
csv=CSVOutput(QuoteFields="ASNEEDED")
)
test_sql_expressions_custom_input_output(client, input_json_bytes,
input_serialization, output_serialization, tests, log_output)
def test_sql_operators(client, log_output):
json_testfile = """{"id": 1, "name": "John", "age": 3}
{"id": 2, "name": "Elliot", "age": 4}
{"id": 3, "name": "Yves", "age": 5}
{"id": 4, "name": null, "age": 0}
"""
tests = [
# Logical operators
("AND", "select * from S3Object s where s.id = 1 AND s.name = 'John'", b'1,John,3\n'),
("NOT", "select * from S3Object s where NOT s.id = 1", b'2,Elliot,4\n3,Yves,5\n4,,0\n'),
("OR", "select * from S3Object s where s.id = 1 OR s.id = 3", b'1,John,3\n3,Yves,5\n'),
# Comparison Operators
("<", "select * from S3Object s where s.age < 4", b'1,John,3\n4,,0\n'),
(">", "select * from S3Object s where s.age > 4", b'3,Yves,5\n'),
("<=", "select * from S3Object s where s.age <= 4", b'1,John,3\n2,Elliot,4\n4,,0\n'),
(">=", "select * from S3Object s where s.age >= 4", b'2,Elliot,4\n3,Yves,5\n'),
("=", "select * from S3Object s where s.age = 4", b'2,Elliot,4\n'),
("<>", "select * from S3Object s where s.age <> 4", b'1,John,3\n3,Yves,5\n4,,0\n'),
("!=", "select * from S3Object s where s.age != 4", b'1,John,3\n3,Yves,5\n4,,0\n'),
("BETWEEN", "select * from S3Object s where s.age BETWEEN 4 AND 5", b'2,Elliot,4\n3,Yves,5\n'),
("IN", "select * from S3Object s where s.age IN (3,5)", b'1,John,3\n3,Yves,5\n'),
# Pattern Matching Operators
("LIKE_", "select * from S3Object s where s.name LIKE '_ves'", b'3,Yves,5\n'),
("LIKE%", "select * from S3Object s where s.name LIKE 'Ell%t'", b'2,Elliot,4\n'),
# Unitary Operators
("NULL", "select * from S3Object s where s.name IS NULL", b'4,,0\n'),
("NOT_NULL", "select * from S3Object s where s.age IS NOT NULL", b'1,John,3\n2,Elliot,4\n3,Yves,5\n4,,0\n'),
# Math Operators
("+", "select * from S3Object s where s.age = 1+3 ", b'2,Elliot,4\n'),
("-", "select * from S3Object s where s.age = 5-1 ", b'2,Elliot,4\n'),
("*", "select * from S3Object s where s.age = 2*2 ", b'2,Elliot,4\n'),
("%", "select * from S3Object s where s.age = 10%6 ", b'2,Elliot,4\n'),
]
try:
test_sql_expressions(client, json_testfile, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())
def test_sql_operators_precedence(client, log_output):
json_testfile = """{"id": 1, "name": "Eric"}"""
tests = [
("-_1", "select -3*3 from S3Object", b'-9\n'),
("*", "select 10-3*2 from S3Object", b'4\n'),
("/", "select 13-10/5 from S3Object", b'11\n'),
("%", "select 13-10%5 from S3Object", b'13\n'),
("+", "select 1+1*3 from S3Object", b'4\n'),
("-_2", "select 1-1*3 from S3Object", b'-2\n'),
("=", "select * from S3Object as s where s.id = 13-12", b'1,Eric\n'),
("<>", "select * from S3Object as s where s.id <> 1-1", b'1,Eric\n'),
("NOT", "select * from S3Object where false OR NOT false", b'1,Eric\n'),
("AND", "select * from S3Object where true AND true OR false ", b'1,Eric\n'),
("OR", "select * from S3Object where false OR NOT false", b'1,Eric\n'),
("IN", "select * from S3Object as s where s.id <> -1 AND s.id IN (1,2,3)", b'1,Eric\n'),
("BETWEEN", "select * from S3Object as s where s.id <> -1 AND s.id BETWEEN -1 AND 3", b'1,Eric\n'),
("LIKE", "select * from S3Object as s where s.id <> -1 AND s.name LIKE 'E%'", b'1,Eric\n'),
]
try:
test_sql_expressions(client, json_testfile, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())
def test_sql_functions_agg_cond_conv(client, log_output):
json_testfile = """{"id": 1, "name": "John", "age": 3}
{"id": 2, "name": "Elliot", "age": 4}
{"id": 3, "name": "Yves", "age": 5}
{"id": 4, "name": "Christine", "age": null}
{"id": 5, "name": "Eric", "age": 0}
"""
tests = [
# Aggregate functions
("COUNT", "select count(*) from S3Object s", b'5\n'),
("AVG", "select avg(s.age) from S3Object s", b'3\n'),
("MAX", "select max(s.age) from S3Object s", b'5\n'),
("MIN", "select min(s.age) from S3Object s", b'0\n'),
("SUM", "select sum(s.age) from S3Object s", b'12\n'),
# Conditional functions
("COALESCE", "SELECT COALESCE(s.age, 99) FROM S3Object s", b'3\n4\n5\n99\n0\n'),
("NULLIF", "SELECT NULLIF(s.age, 0) FROM S3Object s", b'3\n4\n5\n\n\n'),
## Conversion functions
("CAST", "SELECT CAST(s.age AS FLOAT) FROM S3Object s", b'3.0\n4.0\n5.0\n\n0.0\n'),
]
try:
test_sql_expressions(client, json_testfile, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())
def test_sql_functions_date(client, log_output):
json_testfile = """
{"id": 1, "name": "John", "datez": "2017-01-02T03:04:05.006+07:30"}
"""
tests = [
# DATE_ADD
("DATE_ADD_1", "select DATE_ADD(year, 5, TO_TIMESTAMP(s.datez)) from S3Object as s", b'2022-01-02T03:04:05.006+07:30\n'),
("DATE_ADD_2", "select DATE_ADD(month, 1, TO_TIMESTAMP(s.datez)) from S3Object as s", b'2017-02-02T03:04:05.006+07:30\n'),
("DATE_ADD_3", "select DATE_ADD(day, -1, TO_TIMESTAMP(s.datez)) from S3Object as s", b'2017-01-01T03:04:05.006+07:30\n'),
("DATE_ADD_4", "select DATE_ADD(hour, 1, TO_TIMESTAMP(s.datez)) from S3Object as s", b'2017-01-02T04:04:05.006+07:30\n'),
("DATE_ADD_5", "select DATE_ADD(minute, 5, TO_TIMESTAMP(s.datez)) from S3Object as s", b'2017-01-02T03:09:05.006+07:30\n'),
("DATE_ADD_6", "select DATE_ADD(second, 5, TO_TIMESTAMP(s.datez)) from S3Object as s", b'2017-01-02T03:04:10.006+07:30\n'),
# DATE_DIFF
("DATE_DIFF_1", "select DATE_DIFF(year, TO_TIMESTAMP(s.datez), TO_TIMESTAMP('2011-01-01T')) from S3Object as s", b'-6\n'),
("DATE_DIFF_2", "select DATE_DIFF(month, TO_TIMESTAMP(s.datez), TO_TIMESTAMP('2011T')) from S3Object as s", b'-72\n'),
("DATE_DIFF_3", "select DATE_DIFF(day, TO_TIMESTAMP(s.datez), TO_TIMESTAMP('2010-01-02T')) from S3Object as s", b'-2556\n'),
# EXTRACT
("EXTRACT_1", "select EXTRACT(year FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'2017\n'),
("EXTRACT_2", "select EXTRACT(month FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'1\n'),
("EXTRACT_3", "select EXTRACT(hour FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'3\n'),
("EXTRACT_4", "select EXTRACT(minute FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'4\n'),
("EXTRACT_5", "select EXTRACT(timezone_hour FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'7\n'),
("EXTRACT_6", "select EXTRACT(timezone_minute FROM TO_TIMESTAMP(s.datez)) from S3Object as s", b'30\n'),
# TO_STRING
("TO_STRING_1", "select TO_STRING(TO_TIMESTAMP(s.datez), 'MMMM d, y') from S3Object as s", b'"January 2, 2017"\n'),
("TO_STRING_2", "select TO_STRING(TO_TIMESTAMP(s.datez), 'MMM d, yyyy') from S3Object as s", b'"Jan 2, 2017"\n'),
("TO_STRING_3", "select TO_STRING(TO_TIMESTAMP(s.datez), 'M-d-yy') from S3Object as s", b'1-2-17\n'),
("TO_STRING_4", "select TO_STRING(TO_TIMESTAMP(s.datez), 'MM-d-y') from S3Object as s", b'01-2-2017\n'),
("TO_STRING_5", "select TO_STRING(TO_TIMESTAMP(s.datez), 'MMMM d, y h:m a') from S3Object as s", b'"January 2, 2017 3:4 AM"\n'),
("TO_STRING_6", "select TO_STRING(TO_TIMESTAMP(s.datez), 'y-MM-dd''T''H:m:ssX') from S3Object as s", b'2017-01-02T3:4:05+0730\n'),
("TO_STRING_7", "select TO_STRING(TO_TIMESTAMP(s.datez), 'y-MM-dd''T''H:m:ssX') from S3Object as s", b'2017-01-02T3:4:05+0730\n'),
("TO_STRING_8", "select TO_STRING(TO_TIMESTAMP(s.datez), 'y-MM-dd''T''H:m:ssXXXX') from S3Object as s", b'2017-01-02T3:4:05+0730\n'),
("TO_STRING_9", "select TO_STRING(TO_TIMESTAMP(s.datez), 'y-MM-dd''T''H:m:ssXXXXX') from S3Object as s", b'2017-01-02T3:4:05+07:30\n'),
("TO_TIMESTAMP", "select TO_TIMESTAMP(s.datez) from S3Object as s", b'2017-01-02T03:04:05.006+07:30\n'),
("UTCNOW", "select UTCNOW() from S3Object", datetime(1,1,1)),
]
try:
test_sql_expressions(client, json_testfile, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())
def test_sql_functions_string(client, log_output):
json_testfile = """
{"id": 1, "name": "John"}
{"id": 2, "name": " \tfoobar\t "}
{"id": 3, "name": "1112211foobar22211122"}
"""
tests = [
# CHAR_LENGTH
("CHAR_LENGTH", "select CHAR_LENGTH(s.name) from S3Object as s", b'4\n24\n21\n'),
("CHARACTER_LENGTH", "select CHARACTER_LENGTH(s.name) from S3Object as s", b'4\n24\n21\n'),
# LOWER
("LOWER", "select LOWER(s.name) from S3Object as s where s.id= 1", b'john\n'),
# SUBSTRING
("SUBSTRING_1", "select SUBSTRING(s.name FROM 2) from S3Object as s where s.id = 1", b'ohn\n'),
("SUBSTRING_2", "select SUBSTRING(s.name FROM 2 FOR 2) from S3Object as s where s.id = 1", b'oh\n'),
("SUBSTRING_3", "select SUBSTRING(s.name FROM -1 FOR 2) from S3Object as s where s.id = 1", b'\n'),
# TRIM
("TRIM_1", "select TRIM(s.name) from S3Object as s where s.id = 2", b'\tfoobar\t\n'),
("TRIM_2", "select TRIM(LEADING FROM s.name) from S3Object as s where s.id = 2", b'\tfoobar\t \n'),
("TRIM_3", "select TRIM(TRAILING FROM s.name) from S3Object as s where s.id = 2", b' \tfoobar\t\n'),
("TRIM_4", "select TRIM(BOTH FROM s.name) from S3Object as s where s.id = 2", b'\tfoobar\t\n'),
("TRIM_5", "select TRIM(BOTH '12' FROM s.name) from S3Object as s where s.id = 3", b'foobar\n'),
# UPPER
("UPPER", "select UPPER(s.name) from S3Object as s where s.id= 1", b'JOHN\n'),
]
try:
test_sql_expressions(client, json_testfile, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())
def test_sql_datatypes(client, log_output):
json_testfile = """
{"name": "John"}
"""
tests = [
("bool", "select CAST('true' AS BOOL) from S3Object", b'true\n'),
("int", "select CAST('13' AS INT) from S3Object", b'13\n'),
("integer", "select CAST('13' AS INTEGER) from S3Object", b'13\n'),
("string", "select CAST(true AS STRING) from S3Object", b'true\n'),
("float", "select CAST('13.3' AS FLOAT) from S3Object", b'13.3\n'),
("decimal", "select CAST('14.3' AS FLOAT) from S3Object", b'14.3\n'),
("numeric", "select CAST('14.3' AS FLOAT) from S3Object", b'14.3\n'),
("timestamp", "select CAST('2007-04-05T14:30Z' AS TIMESTAMP) from S3Object", b'2007-04-05T14:30Z\n'),
]
try:
test_sql_expressions(client, json_testfile, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())
def test_sql_select(client, log_output):
json_testfile = """{"id": 1, "created": "June 27", "modified": "July 6" }
{"id": 2, "Created": "June 28", "Modified": "July 7", "Cast": "Random Date" }"""
tests = [
("select_1", "select * from S3Object", b'1,June 27,July 6\n2,June 28,July 7,Random Date\n'),
("select_2", "select * from S3Object s", b'1,June 27,July 6\n2,June 28,July 7,Random Date\n'),
("select_3", "select * from S3Object as s", b'1,June 27,July 6\n2,June 28,July 7,Random Date\n'),
("select_4", "select s.line from S3Object as s", b'\n\n'),
("select_5", 'select s."Created" from S3Object as s', b'\nJune 28\n'),
("select_5", 'select s."Cast" from S3Object as s', b'\nRandom Date\n'),
("where", 'select s.created from S3Object as s', b'June 27\nJune 28\n'),
("limit", 'select * from S3Object as s LIMIT 1', b'1,June 27,July 6\n'),
]
try:
test_sql_expressions(client, json_testfile, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())
def test_sql_select_json(client, log_output):
json_testcontent = """{ "Rules": [ {"id": "1"}, {"expr": "y > x"}, {"id": "2", "expr": "z = DEBUG"} ]}
{ "created": "June 27", "modified": "July 6" }
"""
tests = [
("select_1", "SELECT id FROM S3Object[*].Rules[*].id", b'{"id":"1"}\n{}\n{"id":"2"}\n{}\n'),
("select_2", "SELECT id FROM S3Object[*].Rules[*].id WHERE id IS NOT MISSING", b'{"id":"1"}\n{"id":"2"}\n'),
("select_3", "SELECT d.created, d.modified FROM S3Object[*] d", b'{}\n{"created":"June 27","modified":"July 6"}\n'),
("select_4", "SELECT _1.created, _1.modified FROM S3Object[*]", b'{}\n{"created":"June 27","modified":"July 6"}\n'),
("select_5", "Select s.rules[1].expr from S3Object s", b'{"expr":"y > x"}\n{}\n'),
]
input_serialization = InputSerialization(json=JSONInput(Type="DOCUMENT"))
output_serialization = OutputSerialization(json=JsonOutput())
try:
test_sql_expressions_custom_input_output(client, json_testcontent,
input_serialization, output_serialization, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())
def test_sql_select_csv_no_header(client, log_output):
json_testcontent = """val1,val2,val3
val4,val5,val6
"""
tests = [
("select_1", "SELECT s._2 FROM S3Object as s", b'val2\nval5\n'),
]
input_serialization=InputSerialization(
csv=CSVInput(
FileHeaderInfo="NONE",
AllowQuotedRecordDelimiter="FALSE",
),
)
output_serialization=OutputSerialization(csv=CSVOutput())
try:
test_sql_expressions_custom_input_output(client, json_testcontent,
input_serialization, output_serialization, tests, log_output)
except Exception as select_err:
raise select_err
# raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
# pass
# Test passes
print(log_output.json_report())

View file

@ -15,278 +15,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# from __future__ import division
# from __future__ import absolute_import
import os
import io
from sys import exit
import uuid
import inspect
import json
import time
import traceback
from minio import Minio
from minio.select.options import (SelectObjectOptions, CSVInput,
RequestProgress, InputSerialization,
OutputSerialization, CSVOutput, JsonOutput)
class LogOutput(object):
"""
LogOutput is the class for log output. It is required standard for all
SDK tests controlled by mint.
Here are its attributes:
'name': name of the SDK under test, e.g. 's3select'
'function': name of the method/api under test with its signature
The following python code can be used to
pull args information of a <method> and to
put together with the method name:
<method>.__name__+'('+', '.join(args_list)+')'
e.g. 'remove_object(bucket_name, object_name)'
'args': method/api arguments with their values, in
dictionary form: {'arg1': val1, 'arg2': val2, ...}
'duration': duration of the whole test in milliseconds,
defaults to 0
'alert': any extra information user is needed to be alerted about,
like whether this is a Blocker/Gateway/Server related
issue, etc., defaults to None
'message': descriptive error message, defaults to None
'error': stack-trace/exception message(only in case of failure),
actual low level exception/error thrown by the program,
defaults to None
'status': exit status, possible values are 'PASS', 'FAIL', 'NA',
defaults to 'PASS'
"""
PASS = 'PASS'
FAIL = 'FAIL'
NA = 'NA'
def __init__(self, meth, test_name):
self.__args_list = inspect.getargspec(meth).args[1:]
self.__name = 'minio-py:'+test_name
self.__function = meth.__name__+'('+', '.join(self.__args_list)+')'
self.__args = {}
self.__duration = 0
self.__alert = ''
self.__message = None
self.__error = None
self.__status = self.PASS
self.__start_time = time.time()
@property
def name(self): return self.__name
@property
def function(self): return self.__function
@property
def args(self): return self.__args
@name.setter
def name(self, val): self.__name = val
@function.setter
def function(self, val): self.__function = val
@args.setter
def args(self, val): self.__args = val
def json_report(self, err_msg='', alert='', status=''):
self.__args = {k: v for k, v in self.__args.items() if v and v != ''}
entry = {'name': self.__name,
'function': self.__function,
'args': self.__args,
'duration': int(round((time.time() - self.__start_time)*1000)),
'alert': str(alert),
'message': str(err_msg),
'error': traceback.format_exc() if err_msg and err_msg != '' else '',
'status': status if status and status != '' else
self.FAIL if err_msg and err_msg != '' else self.PASS
}
return json.dumps({k: v for k, v in entry.items() if v and v != ''})
def generate_bucket_name():
return "s3select-test-" + uuid.uuid4().__str__()
def test_csv_input_custom_quote_char(client, log_output):
# Get a unique bucket_name and object_name
log_output.args['bucket_name'] = bucket_name = generate_bucket_name()
tests = [
# Invalid quote character, should fail
('""', '"', b'col1,col2,col3\n', Exception()),
# UTF-8 quote character
('ع', '"', b'\xd8\xb9col1\xd8\xb9,\xd8\xb9col2\xd8\xb9,\xd8\xb9col3\xd8\xb9\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
# Only one field is quoted
('"', '"', b'"col1",col2,col3\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
('"', '"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
('\'', '"', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
('', '"', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
('', '"', b'"col1",col2,col3\n', b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
('', '"', b'"col1","col2","col3"\n', b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
('"', '"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
('"', '"', b'A",B\n', b'{"_1":"A\\"","_2":"B"}\n'),
('"', '"', b'A"",B\n', b'{"_1":"A\\"\\"","_2":"B"}\n'),
('"', '\\', b'A\\B,C\n', b'{"_1":"A\\\\B","_2":"C"}\n'),
('"', '"', b'"A""B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
('"', '\\', b'"A\\B","CD"\n', b'{"_1":"AB","_2":"CD"}\n'),
('"', '\\', b'"A\\,","CD"\n', b'{"_1":"A,","_2":"CD"}\n'),
('"', '\\', b'"A\\"B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
('"', '\\', b'"A\\""\n', b'{"_1":"A\\""}\n'),
('"', '\\', b'"A\\"\\"B"\n', b'{"_1":"A\\"\\"B"}\n'),
('"', '\\', b'"A\\"","\\"B"\n', b'{"_1":"A\\"","_2":"\\"B"}\n'),
]
try:
client.make_bucket(bucket_name)
for idx, (quote_char, escape_char, object_content, expected_output) in enumerate(tests):
options = SelectObjectOptions(
expression="select * from s3object",
input_serialization=InputSerialization(
compression_type="NONE",
csv=CSVInput(FileHeaderInfo="NONE",
RecordDelimiter="\n",
FieldDelimiter=",",
QuoteCharacter=quote_char,
QuoteEscapeCharacter=escape_char,
Comments="#",
AllowQuotedRecordDelimiter="FALSE",),
),
output_serialization=OutputSerialization(
json = JsonOutput(
RecordDelimiter="\n",
)
),
request_progress=RequestProgress(
enabled="False"
)
)
got_output = b''
try:
got_output = exec_select(client, bucket_name, object_content, options, log_output)
except Exception as select_err:
if not isinstance(expected_output, Exception):
raise ValueError('Test {} unexpectedly failed with: {}'.format(idx+1, select_err))
else:
if isinstance(expected_output, Exception):
raise ValueError('Test {}: expected an exception, got {}'.format(idx+1, got_output))
if got_output != expected_output:
raise ValueError('Test {}: data mismatch. Expected : {}, Received {}'.format(idx+1, expected_output, got_output))
except Exception as err:
raise Exception(err)
finally:
try:
client.remove_bucket(bucket_name)
except Exception as err:
raise Exception(err)
# Test passes
print(log_output.json_report())
def test_csv_output_custom_quote_char(client, log_output):
# Get a unique bucket_name and object_name
log_output.args['bucket_name'] = bucket_name = generate_bucket_name()
tests = [
# UTF-8 quote character
("''", "''", b'col1,col2,col3\n', Exception()),
("'", "'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
("", '"', b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
('"', '"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
('"', '"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
('"', '"', b'""""\n', b'""""\n'),
('"', '"', b'\n', b''),
("'", "\\", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
("'", "\\", b'col""1,col2,col3\n', b"'col\"\"1','col2','col3'\n"),
("'", "\\", b'col\'1,col2,col3\n', b"'col\\'1','col2','col3'\n"),
("'", "\\", b'"col\'1","col2","col3"\n', b"'col\\'1','col2','col3'\n"),
("'", "\\", b'col\'\n', b"'col\\''\n"),
# Two consecutive escaped quotes
("'", "\\", b'"a"""""\n', b"'a\"\"'\n"),
]
try:
client.make_bucket(bucket_name)
for idx, (quote_char, escape_char, object_content, expected_output) in enumerate(tests):
options = SelectObjectOptions(
expression="select * from s3object",
input_serialization=InputSerialization(
compression_type="NONE",
csv=CSVInput(FileHeaderInfo="NONE",
RecordDelimiter="\n",
FieldDelimiter=",",
QuoteCharacter='"',
QuoteEscapeCharacter='"',
Comments="#",
AllowQuotedRecordDelimiter="FALSE",),
),
output_serialization=OutputSerialization(
csv=CSVOutput(QuoteFields="ALWAYS",
RecordDelimiter="\n",
FieldDelimiter=",",
QuoteCharacter=quote_char,
QuoteEscapeCharacter=escape_char,)
),
request_progress=RequestProgress(
enabled="False"
)
)
got_output = b''
try:
got_output = exec_select(client, bucket_name, object_content, options, log_output)
except Exception as select_err:
if not isinstance(expected_output, Exception):
raise ValueError('Test {} unexpectedly failed with: {}'.format(idx+1, select_err))
else:
if isinstance(expected_output, Exception):
raise ValueError('Test {}: expected an exception, got {}'.format(idx+1, got_output))
if got_output != expected_output:
raise ValueError('Test {}: data mismatch. Expected : {}. Received: {}.'.format(idx+1, expected_output, got_output))
except Exception as err:
raise Exception(err)
finally:
try:
client.remove_bucket(bucket_name)
except Exception as err:
raise Exception(err)
# Test passes
print(log_output.json_report())
def exec_select(client, bucket_name, object_content, options, log_output):
log_output.args['object_name'] = object_name = uuid.uuid4().__str__()
try:
bytes_content = io.BytesIO(object_content)
client.put_object(bucket_name, object_name, io.BytesIO(object_content), len(object_content))
data = client.select_object_content(bucket_name, object_name, options)
# Get the records
records = io.BytesIO()
for d in data.stream(10*1024):
records.write(d.encode('utf-8'))
return records.getvalue()
except Exception as err:
raise Exception(err)
finally:
try:
client.remove_object(bucket_name, object_name)
except Exception as err:
raise Exception(err)
from utils import LogOutput
from sql_ops import *
from csv import *
def main():
"""
@ -312,6 +47,33 @@ def main():
log_output = LogOutput(client.select_object_content, 'test_csv_output_quote_char')
test_csv_output_custom_quote_char(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_operators')
test_sql_operators(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_operators_precedence')
test_sql_operators_precedence(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_functions_agg_cond_conv')
test_sql_functions_agg_cond_conv(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_functions_date')
test_sql_functions_date(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_functions_string')
test_sql_functions_string(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_datatypes')
test_sql_datatypes(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_select')
test_sql_select(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_select_json')
test_sql_select_json(client, log_output)
log_output = LogOutput(client.select_object_content, 'test_sql_select_csv')
test_sql_select_csv_no_header(client, log_output)
except Exception as err:
print(log_output.json_report(err))
exit(1)
@ -319,3 +81,6 @@ def main():
if __name__ == "__main__":
# Execute only if run as a script
main()

View file

@ -0,0 +1,106 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# MinIO Python Library for Amazon S3 Compatible Cloud Storage,
# (C) 2015-2020 MinIO, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import uuid
import inspect
import json
import time
import traceback
class LogOutput(object):
"""
LogOutput is the class for log output. It is required standard for all
SDK tests controlled by mint.
Here are its attributes:
'name': name of the SDK under test, e.g. 's3select'
'function': name of the method/api under test with its signature
The following python code can be used to
pull args information of a <method> and to
put together with the method name:
<method>.__name__+'('+', '.join(args_list)+')'
e.g. 'remove_object(bucket_name, object_name)'
'args': method/api arguments with their values, in
dictionary form: {'arg1': val1, 'arg2': val2, ...}
'duration': duration of the whole test in milliseconds,
defaults to 0
'alert': any extra information user is needed to be alerted about,
like whether this is a Blocker/Gateway/Server related
issue, etc., defaults to None
'message': descriptive error message, defaults to None
'error': stack-trace/exception message(only in case of failure),
actual low level exception/error thrown by the program,
defaults to None
'status': exit status, possible values are 'PASS', 'FAIL', 'NA',
defaults to 'PASS'
"""
PASS = 'PASS'
FAIL = 'FAIL'
NA = 'NA'
def __init__(self, meth, test_name):
self.__args_list = inspect.getargspec(meth).args[1:]
self.__name = 's3select:'+test_name
self.__function = meth.__name__+'('+', '.join(self.__args_list)+')'
self.__args = {}
self.__duration = 0
self.__alert = ''
self.__message = None
self.__error = None
self.__status = self.PASS
self.__start_time = time.time()
@property
def name(self): return self.__name
@property
def function(self): return self.__function
@property
def args(self): return self.__args
@name.setter
def name(self, val): self.__name = val
@function.setter
def function(self, val): self.__function = val
@args.setter
def args(self, val): self.__args = val
def json_report(self, err_msg='', alert='', status=''):
self.__args = {k: v for k, v in self.__args.items() if v and v != ''}
entry = {'name': self.__name,
'function': self.__function,
'args': self.__args,
'duration': int(round((time.time() - self.__start_time)*1000)),
'alert': str(alert),
'message': str(err_msg),
'error': traceback.format_exc() if err_msg and err_msg != '' else '',
'status': status if status and status != '' else
self.FAIL if err_msg and err_msg != '' else self.PASS
}
return json.dumps({k: v for k, v in entry.items() if v and v != ''})
def generate_bucket_name():
return "s3select-test-" + str(uuid.uuid4())
def generate_object_name():
return str(uuid.uuid4())