Merge branch 'dev/unmarshal' into dev/suyono.dev/unmarshal

This commit is contained in:
Suyono 2023-06-30 22:36:50 +10:00
commit 3be5acadaf
8 changed files with 588 additions and 60 deletions

View File

@ -1,48 +1,168 @@
package gocsvparser
import (
"errors"
"fmt"
"reflect"
"strconv"
"strings"
)
type defaultRecordHandler struct {
handlersByName map[string]reflect.StructField
outType reflect.Type
fieldsHandlers []FieldsHandler
fieldByTag map[string]columnFieldBinding
outType reflect.Type
columnNameMapped bool
}
type columnFieldBinding struct {
field reflect.StructField
index int
}
func newDefaultHandler() *defaultRecordHandler {
newHander := new(defaultRecordHandler)
newHander.handlersByName = make(map[string]reflect.StructField)
return newHander
newHandler := new(defaultRecordHandler)
newHandler.fieldByTag = make(map[string]columnFieldBinding)
newHandler.columnNameMapped = false
return newHandler
}
func (d *defaultRecordHandler) HandleRecord(v interface{}, record []string) error {
//TODO: implementation
func (d *defaultRecordHandler) HandleRecord(v any, record []string) error {
var (
err error
strField string
index int
ok bool
binding columnFieldBinding
val reflect.Value
)
if d.outType == nil {
err = d.detType(v)
if err != nil {
return fmt.Errorf("error HandleRecord: detType: %+v", err)
}
}
if !d.columnNameMapped {
for index, strField = range record {
if binding, ok = d.fieldByTag[strField]; ok {
binding.index = index
d.fieldByTag[strField] = binding
}
}
d.columnNameMapped = true
return HeaderRead
}
val = reflect.ValueOf(v)
if val.Kind() != reflect.Pointer {
return errors.New("error HandleRecord: v Kind() is not Pointer")
}
val = val.Elem()
switch val.Kind() {
case reflect.Struct:
for _, binding = range d.fieldByTag {
if binding.index < len(record) {
val, err = d.setValue(val, record[binding.index], binding.field)
if err != nil {
return fmt.Errorf("error HandleRecord: StructField SetValue: %+v", err)
}
}
}
case reflect.Map:
return errors.New("error HandleRecord: Map is not supported yet") //TODO: fix me
case reflect.Slice:
return errors.New("error HandleRecord: Slice is not supported yet") //TODO: fix me
}
return nil
}
func (d *defaultRecordHandler) FieldsHandlers() []FieldsHandler {
//TODO: implementation
return nil
func (d *defaultRecordHandler) setValue(val reflect.Value, strValue string, structFiled reflect.StructField) (reflect.Value, error) {
var (
fieldVal reflect.Value
f64 float64
i64 int64
i int
err error
b bool
)
fieldVal = val.FieldByIndex(structFiled.Index)
switch fieldVal.Type().Kind() {
case reflect.String:
fieldVal.SetString(strValue)
case reflect.Bool:
b, err = strconv.ParseBool(strValue)
if err != nil {
return reflect.Value{}, fmt.Errorf("ParseBool: %+v", err)
}
fieldVal.SetBool(b)
case reflect.Int64:
i64, err = strconv.ParseInt(strValue, 0, 64)
if err != nil {
return reflect.Value{}, fmt.Errorf("ParseInt 64: %+v", err)
}
fieldVal.SetInt(i64)
case reflect.Int32:
i64, err = strconv.ParseInt(strValue, 0, 32)
if err != nil {
return reflect.Value{}, fmt.Errorf("ParseInt 32: %+v", err)
}
fieldVal.SetInt(i64)
case reflect.Int16:
i64, err = strconv.ParseInt(strValue, 0, 16)
if err != nil {
return reflect.Value{}, fmt.Errorf("ParseInt 16: %+v", err)
}
fieldVal.SetInt(i64)
case reflect.Int8:
i64, err = strconv.ParseInt(strValue, 0, 8)
if err != nil {
return reflect.Value{}, fmt.Errorf("ParseInt 8: %+v", err)
}
fieldVal.SetInt(i64)
case reflect.Int:
i, err = strconv.Atoi(strValue)
if err != nil {
return reflect.Value{}, fmt.Errorf("strconv.Atoi: %+v", err)
}
fieldVal.SetInt(int64(i))
case reflect.Uint64, reflect.Uint32, reflect.Uint16, reflect.Uint8, reflect.Uint:
return reflect.Value{}, errors.New("unimplemented") //TODO: fix me
case reflect.Float32:
f64, err = strconv.ParseFloat(strValue, 32)
if err != nil {
return reflect.Value{}, fmt.Errorf("ParseFloat 32: %+v", err)
}
fieldVal.SetFloat(f64)
case reflect.Float64:
f64, err = strconv.ParseFloat(strValue, 64)
if err != nil {
return reflect.Value{}, fmt.Errorf("ParseFloat 64: %+v", err)
}
fieldVal.SetFloat(f64)
default:
return reflect.Value{}, errors.New("missing implementation") //TODO: fix me
}
return val, nil
}
func (d *defaultRecordHandler) SetFieldConfigs(configs []FieldsConfig) {
}
func (d *defaultRecordHandler) parseVal(v interface{}) error {
func (d *defaultRecordHandler) detType(v any) error {
typ := reflect.TypeOf(v)
if typ.Kind() == reflect.Pointer {
typ = typ.Elem()
if typ.Kind() == reflect.Struct {
d.outType = typ
return d.buildStructHandlers()
return d.mapStructTag()
} else if typ.Kind() == reflect.Map {
//TODO: implementation
} else if typ.Kind() == reflect.Slice {
@ -52,7 +172,7 @@ func (d *defaultRecordHandler) parseVal(v interface{}) error {
return fmt.Errorf("v should be pointer of Struct, Map, or Slice: %+v", typ)
}
func (d *defaultRecordHandler) buildStructHandlers() error {
func (d *defaultRecordHandler) mapStructTag() error {
//TODO: implementation
for _, field := range reflect.VisibleFields(d.outType) {
if csv, ok := field.Tag.Lookup(csvTag); ok {
@ -60,10 +180,12 @@ func (d *defaultRecordHandler) buildStructHandlers() error {
if len(s) == 0 {
return fmt.Errorf("invalid tag %+v", field.Tag)
}
if _, ok = d.handlersByName[s[0]]; ok {
if _, ok = d.fieldByTag[s[0]]; ok {
return fmt.Errorf("problem with the receiving struct, multiple field with tag %s", s[0])
}
d.handlersByName[s[0]] = field
d.fieldByTag[s[0]] = columnFieldBinding{
field: field,
}
} else if csvIndex, ok := field.Tag.Lookup(csvIndexTag); ok {
_ = csvIndex //TODO: process tag
}

38
example/example_test.go Normal file
View File

@ -0,0 +1,38 @@
package example
import (
"testing"
"github.com/budiuno/gocsvparser"
)
type mtcarsFlat struct {
Model string `csv:"model"`
MilesPerGalon float64 `csv:"mpg"`
Cylinder int `csv:"cyl"`
Displacement float64 `csv:"disp"`
Horsepower int `csv:"hp"`
DriveShaftRatio float64 `csv:"drat"`
Weight float64 `csv:"wt"`
QuerterMileTime float64 `csv:"qsec"`
VEngine bool `csv:"vs"`
AutoTransmission bool `csv:"am"`
Gear int `csv:"gear"`
Carburetors int `csv:"carb"`
}
func TestMtcars(t *testing.T) {
var (
cars []mtcarsFlat
err error
)
err = gocsvparser.Unmarshal(MtcarsCsv, &cars)
if err != nil {
t.Fatalf("unexpected error %+v", err)
}
for _, c := range cars {
t.Logf("%+v", c)
}
}

37
example/mtcars.go Normal file
View File

@ -0,0 +1,37 @@
package example
var (
MtcarsCsv = []byte(`model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21,6,160,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21,6,160,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225,105,2.76,3.46,20.22,1,0,3,1
Duster 360,14.3,8,360,245,3.21,3.57,15.84,0,0,3,4
Merc 240D,24.4,4,146.7,62,3.69,3.19,20,1,0,4,2
Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4
Merc 280C,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4
Merc 450SE,16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
Merc 450SLC,15.2,8,275.8,180,3.07,3.78,18,0,0,3,3
Cadillac Fleetwood,10.4,8,472,205,2.93,5.25,17.98,0,0,3,4
Lincoln Continental,10.4,8,460,215,3,5.424,17.82,0,0,3,4
Chrysler Imperial,14.7,8,440,230,3.23,5.345,17.42,0,0,3,4
Fiat 128,32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1
Honda Civic,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2
Toyota Corolla,33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1
Toyota Corona,21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1
Dodge Challenger,15.5,8,318,150,2.76,3.52,16.87,0,0,3,2
AMC Javelin,15.2,8,304,150,3.15,3.435,17.3,0,0,3,2
Camaro Z28,13.3,8,350,245,3.73,3.84,15.41,0,0,3,4
Pontiac Firebird,19.2,8,400,175,3.08,3.845,17.05,0,0,3,2
Fiat X1-9,27.3,4,79,66,4.08,1.935,18.9,1,1,4,1
Porsche 914-2,26,4,120.3,91,4.43,2.14,16.7,0,1,5,2
Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
Ford Pantera L,15.8,8,351,264,4.22,3.17,14.5,0,1,5,4
Ferrari Dino,19.7,6,145,175,3.62,2.77,15.5,0,1,5,6
Maserati Bora,15,8,301,335,3.54,3.57,14.6,0,1,5,8
Volvo 142E,21.4,4,121,109,4.11,2.78,18.6,1,1,4,2`)
)

25
ext_test.go Normal file
View File

@ -0,0 +1,25 @@
package gocsvparser_test
import (
"testing"
"github.com/budiuno/gocsvparser"
)
type testDirect struct {
}
func (t *testDirect) SetFieldConfigs(configs []gocsvparser.FieldsConfig) {
}
func (t *testDirect) HandleRecord(v any, records []string) error {
return nil
}
func TestExt(t *testing.T) {
// _, err := gocsvparser.NewUnmarshaler().WithRecordHandler(new(testDirect))
// if err != nil {
// t.Fatalf("unexpected error %v", err)
// }
}

178
options.go Normal file
View File

@ -0,0 +1,178 @@
package gocsvparser
import (
"encoding/csv"
"errors"
"fmt"
)
type csvOptionsType int
type CsvOption interface {
getType() csvOptionsType
}
const (
comma csvOptionsType = iota
comment
fieldsPerRecord
lazyQuotes
trimLeadingSpace
reuseRecord
useCrlf
columnHeader
recordHandler
csvReader
)
type csvReaderOption struct {
reader *csv.Reader
}
func CsvReader(reader *csv.Reader) csvReaderOption {
return csvReaderOption{
reader: reader,
}
}
func (csvReaderOption) getType() csvOptionsType {
return csvReader
}
type recordHandlerOption struct {
handlerType recordHandlerType
recordHandler RecordHandler
recordFieldsHandler RecordFieldsHandler
}
func RecordHandlerOption(handler any) (*recordHandlerOption, error) {
var rho *recordHandlerOption
if d, ok := handler.(RecordHandler); ok {
if d == nil {
return nil, errors.New("handler value is nil")
}
rho = &recordHandlerOption{
handlerType: direct,
recordHandler: d,
}
} else if s, ok := handler.(RecordFieldsHandler); ok {
if s == nil {
return nil, errors.New("handler value is nil")
}
rho = &recordHandlerOption{
handlerType: fieldsSpecific,
recordFieldsHandler: s,
}
} else {
return nil, fmt.Errorf("invalid handler type %T", handler)
}
return rho, nil
}
func (r *recordHandlerOption) getType() csvOptionsType {
return recordHandler
}
type columnHeaderOption struct {
header []string
}
func ColumnHeader(header ...string) columnHeaderOption {
columnHeader := columnHeaderOption{}
if len(header) > 0 {
columnHeader.header = make([]string, len(header))
copy(columnHeader.header, header)
}
return columnHeader
}
func (c columnHeaderOption) getType() csvOptionsType {
return columnHeader
}
type commaOption struct {
comma rune
}
func CommaOption(comma rune) commaOption {
return commaOption{
comma: comma,
}
}
func (c commaOption) getType() csvOptionsType {
return comma
}
type commentOption struct {
comment rune
}
func CommentOption(comment rune) commentOption {
return commentOption{
comment: comment,
}
}
func (c commentOption) getType() csvOptionsType {
return comment
}
type fieldsPerRecordOption struct {
fieldsPerRecord int
}
func FieldPerRecordOption(i int) fieldsPerRecordOption {
return fieldsPerRecordOption{
fieldsPerRecord: i,
}
}
func (f fieldsPerRecordOption) getType() csvOptionsType {
return fieldsPerRecord
}
type lazyQuotesOption struct {
lazyQuotes bool
}
func LazyQuotesOption(b bool) lazyQuotesOption {
return lazyQuotesOption{
lazyQuotes: b,
}
}
func (l lazyQuotesOption) getType() csvOptionsType {
return lazyQuotes
}
type trimLeadingSpaceOption struct {
trimLeadingSpace bool
}
func TrimLeadingSpaceOption(b bool) trimLeadingSpaceOption {
return trimLeadingSpaceOption{
trimLeadingSpace: b,
}
}
func (t trimLeadingSpaceOption) getType() csvOptionsType {
return trimLeadingSpace
}
type reuseRecordOption struct {
reuseRecord bool
}
func ReuseRecordOption(b bool) reuseRecordOption {
return reuseRecordOption{
reuseRecord: b,
}
}
func (r reuseRecordOption) getType() csvOptionsType {
return reuseRecord
}

View File

@ -1,2 +0,0 @@
# Proposal 1

View File

@ -1,7 +1,16 @@
package gocsvparser
import (
"bytes"
"encoding/csv"
"errors"
"fmt"
"io"
"reflect"
)
var (
HeaderRead = errors.New("column headers successfully read")
)
type FieldsConfig struct {
@ -26,64 +35,185 @@ type FieldsHandler interface {
NumFields() int
}
type RecordHandler interface {
type RecordFieldsHandler interface {
FieldsHandlers() []FieldsHandler
Out(v any) error
}
type RecordHandler interface {
// SetFieldConfigs is only effective if a Map is passed to HandleRecord
SetFieldConfigs(configs []FieldsConfig)
HandleRecord(v interface{}, record []string) error
HandleRecord(v any, record []string) error
}
type recordHandlerType int
const (
direct recordHandlerType = 1
fieldsSpecific recordHandlerType = 2
)
type Unmarshaler struct {
header []string
reader *csv.Reader
recordHandler RecordHandler
options []CsvOption
header []string
recordHandler RecordHandler
recordFieldHandler RecordFieldsHandler
handlerType recordHandlerType
csvReader *csv.Reader
}
func NewUnmarshaler() *Unmarshaler {
return &Unmarshaler{}
}
func (u *Unmarshaler) WithCsvReader(reader *csv.Reader) *Unmarshaler {
u.reader = reader
return u
}
func (u *Unmarshaler) WithHeader(header []string) *Unmarshaler {
if len(header) > 0 {
newHeader := make([]string, len(header))
copy(newHeader, header)
u.header = newHeader
func NewUnmarshaler(options ...CsvOption) *Unmarshaler {
unmarshaler := &Unmarshaler{
handlerType: direct,
options: []CsvOption{nil},
}
return u
if len(options) > 0 {
unmarshaler.options = append(unmarshaler.options, options...)
}
return unmarshaler
}
func (u *Unmarshaler) WithRecordHandler(handler RecordHandler) *Unmarshaler {
u.recordHandler = handler
return u
func (u *Unmarshaler) processOptions(options ...CsvOption) {
for _, option := range options {
if option == nil {
continue
}
switch option.getType() {
case csvReader:
if u.csvReader != nil {
continue
}
o := option.(csvReaderOption)
u.csvReader = o.reader
case comma:
o := option.(commaOption)
u.csvReader.Comma = o.comma
case comment:
o := option.(commentOption)
u.csvReader.Comment = o.comment
case fieldsPerRecord:
o := option.(fieldsPerRecordOption)
u.csvReader.FieldsPerRecord = o.fieldsPerRecord
case lazyQuotes:
o := option.(lazyQuotesOption)
u.csvReader.LazyQuotes = o.lazyQuotes
case trimLeadingSpace:
o := option.(trimLeadingSpaceOption)
u.csvReader.TrimLeadingSpace = o.trimLeadingSpace
case reuseRecord:
o := option.(reuseRecordOption)
u.csvReader.ReuseRecord = o.reuseRecord
case columnHeader:
headerOption := option.(columnHeaderOption)
u.header = headerOption.header
case recordHandler:
rho := option.(*recordHandlerOption)
u.handlerType = rho.handlerType
switch rho.handlerType {
case direct:
u.recordHandler = rho.recordHandler
case fieldsSpecific:
u.recordFieldHandler = rho.recordFieldsHandler
}
}
}
}
func (u *Unmarshaler) Unmarshal(data []byte, v interface{}) error {
func (u *Unmarshaler) Unmarshal(data []byte, v any, options ...CsvOption) error {
var (
typ reflect.Type
val, slice reflect.Value
record []string
err error
)
if len(options) > 0 {
u.options = append(u.options, options...)
}
u.options[0] = CsvReader(csv.NewReader(bytes.NewReader(data)))
u.processOptions(u.options...)
if u.handlerType == direct {
if u.recordHandler == nil {
u.recordHandler = newDefaultHandler()
}
slice, typ, err = u.detOutputType(v)
if err != nil {
return fmt.Errorf("error Unmarshal: detOutputType: %+v", err)
}
for {
record, err = u.csvReader.Read()
if err != nil {
if err == io.EOF {
break
}
return fmt.Errorf("error Unmarshal: csv.Reader.Read: %+v", err)
}
val, err = u.newElem(typ)
if err != nil {
return fmt.Errorf("error Unmarshal: newElem: %+v", err)
}
err = u.recordHandler.HandleRecord(val.Interface(), record)
if err != nil {
if err == HeaderRead {
continue
}
return fmt.Errorf("error Unmarshal: RecordHandler.HandleRecord: %+v", err)
}
slice.Set(reflect.Append(slice, val.Elem()))
}
} else {
}
//TODO: implementation
return nil
}
func Unmarshal(data []byte, v interface{}) error {
return NewUnmarshaler().Unmarshal(data, v)
func (u *Unmarshaler) detOutputType(v any) (reflect.Value, reflect.Type, error) {
var typ reflect.Type
if v == nil {
return reflect.Value{}, nil, errors.New("the output parameter is nil")
}
val := reflect.ValueOf(v)
if val.Kind() != reflect.Pointer {
return reflect.Value{}, nil, errors.New("invalid output parameter type")
}
val = val.Elem()
if val.Kind() != reflect.Slice {
return reflect.Value{}, nil, errors.New("invalid output parameter type")
}
typ = val.Type().Elem()
return val, typ, nil
}
// func Read(i interface{}) {
// var val reflect.Value
func Unmarshal(data []byte, v any, options ...CsvOption) error {
return NewUnmarshaler().Unmarshal(data, v, options...)
}
// val = reflect.ValueOf(i)
// if val.Kind() == reflect.Pointer {
// val = val.Elem()
func (u *Unmarshaler) newElem(typ reflect.Type) (reflect.Value, error) {
switch typ.Kind() {
case reflect.Struct:
return reflect.New(typ), nil
case reflect.Map:
//TODO: implementation
case reflect.Slice:
//TODO: implementation
}
// if val.Kind() == reflect.Slice {
// val = val.Elem()
// if val.Kind() == reflect.Struct {
// fields := reflect.VisibleFields(val.Type())
// }
// }
// }
// }
return reflect.Zero(typ), errors.New("invalid impelementation") //TODO: placeholder; update me
}

View File

@ -18,7 +18,7 @@ type anon struct {
}
func TestParse(t *testing.T) {
newDefaultHandler().parseVal(&Coba{})
newDefaultHandler().detType(&Coba{})
}
func TestRead(t *testing.T) {