minio/pkg/csvparser/writer.go
Anis Elleuch 9902c9baaa
sql: Add support of escape quote in CSV (#9231)
This commit modifies csv parser, a fork of golang csv
parser to support a custom quote escape character.

The quote escape character is used to escape the quote
character when a csv field contains a quote character
as part of data.
2020-04-01 15:39:34 -07:00

180 lines
4.9 KiB
Go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in https://golang.org/LICENSE
package csv
import (
"bufio"
"io"
"strings"
"unicode"
"unicode/utf8"
)
// A Writer writes records using CSV encoding.
//
// As returned by NewWriter, a Writer writes records terminated by a
// newline and uses ',' as the field delimiter. The exported fields can be
// changed to customize the details before the first call to Write or WriteAll.
//
// Comma is the field delimiter.
//
// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
//
// The writes of individual records are buffered.
// After all data has been written, the client should call the
// Flush method to guarantee all data has been forwarded to
// the underlying io.Writer. Any errors that occurred should
// be checked by calling the Error method.
type Writer struct {
Comma rune // Field delimiter (set to ',' by NewWriter)
Quote rune // Fields quote character
QuoteEscape rune
AlwaysQuote bool // True to quote all fields
UseCRLF bool // True to use \r\n as the line terminator
w *bufio.Writer
}
// NewWriter returns a new Writer that writes to w.
func NewWriter(w io.Writer) *Writer {
return &Writer{
Comma: ',',
Quote: '"',
QuoteEscape: '"',
w: bufio.NewWriter(w),
}
}
// Write writes a single CSV record to w along with any necessary quoting.
// A record is a slice of strings with each string being one field.
// Writes are buffered, so Flush must eventually be called to ensure
// that the record is written to the underlying io.Writer.
func (w *Writer) Write(record []string) error {
if !validDelim(w.Comma) {
return errInvalidDelim
}
for n, field := range record {
if n > 0 {
if _, err := w.w.WriteRune(w.Comma); err != nil {
return err
}
}
// If we don't have to have a quoted field then just
// write out the field and continue to the next field.
if !w.AlwaysQuote && !w.fieldNeedsQuotes(field) {
if _, err := w.w.WriteString(field); err != nil {
return err
}
continue
}
if _, err := w.w.WriteRune(w.Quote); err != nil {
return err
}
specialChars := "\r\n" + string(w.Quote)
for len(field) > 0 {
// Search for special characters.
i := strings.IndexAny(field, specialChars)
if i < 0 {
i = len(field)
}
// Copy verbatim everything before the special character.
if _, err := w.w.WriteString(field[:i]); err != nil {
return err
}
field = field[i:]
// Encode the special character.
if len(field) > 0 {
var err error
switch nextRune([]byte(field)) {
case w.Quote:
_, err = w.w.WriteRune(w.QuoteEscape)
if err != nil {
break
}
_, err = w.w.WriteRune(w.Quote)
case '\r':
if !w.UseCRLF {
err = w.w.WriteByte('\r')
}
case '\n':
if w.UseCRLF {
_, err = w.w.WriteString("\r\n")
} else {
err = w.w.WriteByte('\n')
}
}
field = field[1:]
if err != nil {
return err
}
}
}
if _, err := w.w.WriteRune(w.Quote); err != nil {
return err
}
}
var err error
if w.UseCRLF {
_, err = w.w.WriteString("\r\n")
} else {
err = w.w.WriteByte('\n')
}
return err
}
// Flush writes any buffered data to the underlying io.Writer.
// To check if an error occurred during the Flush, call Error.
func (w *Writer) Flush() {
w.w.Flush()
}
// Error reports any error that has occurred during a previous Write or Flush.
func (w *Writer) Error() error {
_, err := w.w.Write(nil)
return err
}
// WriteAll writes multiple CSV records to w using Write and then calls Flush,
// returning any error from the Flush.
func (w *Writer) WriteAll(records [][]string) error {
for _, record := range records {
err := w.Write(record)
if err != nil {
return err
}
}
return w.w.Flush()
}
// fieldNeedsQuotes reports whether our field must be enclosed in quotes.
// Fields with a Comma, fields with a quote or newline, and
// fields which start with a space must be enclosed in quotes.
// We used to quote empty strings, but we do not anymore (as of Go 1.4).
// The two representations should be equivalent, but Postgres distinguishes
// quoted vs non-quoted empty string during database imports, and it has
// an option to force the quoted behavior for non-quoted CSV but it has
// no option to force the non-quoted behavior for quoted CSV, making
// CSV with quoted empty strings strictly less useful.
// Not quoting the empty string also makes this package match the behavior
// of Microsoft Excel and Google Drive.
// For Postgres, quote the data terminating string `\.`.
func (w *Writer) fieldNeedsQuotes(field string) bool {
if field == "" {
return false
}
if field == `\.` || strings.ContainsAny(field, "\r\n"+string(w.Quote)+string(w.Comma)) {
return true
}
r1, _ := utf8.DecodeRuneInString(field)
return unicode.IsSpace(r1)
}