Skip to content

Commit b0620c7

Browse files
authored
feat!: Add ability to override CSV File options (headers and delimiters) (#9144)
Users can now use the following options to change the destination behavior: 1. `skip_header`: default is `false` which will mean the the first row will be the column headers 2. `delimiter`: default value is `,` but user can specify any single character The underlying implementation for CSV and JSON is also being changed to Apache Arrow through the updated import of the `filetypes` library.
1 parent eedea97 commit b0620c7

8 files changed

Lines changed: 44 additions & 59 deletions

File tree

plugins/destination/file/client/client.go

Lines changed: 6 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,10 @@ package client
33
import (
44
"context"
55
"fmt"
6-
"io"
76
"os"
87

9-
"github.com/cloudquery/filetypes/csv"
10-
"github.com/cloudquery/filetypes/json"
11-
"github.com/cloudquery/filetypes/parquet"
8+
"github.com/cloudquery/filetypes"
129
"github.com/cloudquery/plugin-sdk/plugins/destination"
13-
"github.com/cloudquery/plugin-sdk/schema"
1410
"github.com/cloudquery/plugin-sdk/specs"
1511
"github.com/rs/zerolog"
1612
)
@@ -21,20 +17,7 @@ type Client struct {
2117
spec specs.Destination
2218
pluginSpec Spec
2319

24-
formatClient formatClient
25-
26-
// Embedded transformers
27-
schema.CQTypeTransformer
28-
reverseTransformer
29-
}
30-
31-
type reverseTransformer interface {
32-
ReverseTransformValues(table *schema.Table, values []any) (schema.CQTypes, error)
33-
}
34-
35-
type formatClient interface {
36-
Read(r io.Reader, table *schema.Table, sourceName string, res chan<- []any) error
37-
WriteTableBatch(w io.Writer, table *schema.Table, resources [][]any) error
20+
*filetypes.Client
3821
}
3922

4023
func New(ctx context.Context, logger zerolog.Logger, spec specs.Destination) (destination.Client, error) {
@@ -54,23 +37,11 @@ func New(ctx context.Context, logger zerolog.Logger, spec specs.Destination) (de
5437
}
5538
c.pluginSpec.SetDefaults()
5639

57-
var err error
58-
switch c.pluginSpec.Format {
59-
case FormatTypeCSV:
60-
c.formatClient, err = csv.NewClient()
61-
c.CQTypeTransformer = &csv.Transformer{}
62-
c.reverseTransformer = &csv.ReverseTransformer{}
63-
case FormatTypeJSON:
64-
c.formatClient, err = json.NewClient()
65-
c.CQTypeTransformer = &schema.DefaultTransformer{}
66-
c.reverseTransformer = &json.ReverseTransformer{}
67-
case FormatTypeParquet:
68-
c.formatClient, err = parquet.NewClient()
69-
c.CQTypeTransformer = &parquet.Transformer{}
70-
c.reverseTransformer = &parquet.ReverseTransformer{}
71-
default:
72-
return nil, fmt.Errorf("unknown format %q", c.pluginSpec.Format)
40+
filetypesClient, err := filetypes.NewClient(c.pluginSpec.FileSpec)
41+
if err != nil {
42+
return nil, fmt.Errorf("failed to create filetypes client: %w", err)
7343
}
44+
c.Client = filetypesClient
7445

7546
if err != nil {
7647
return nil, fmt.Errorf("failed to create filetype client: %w", err)

plugins/destination/file/client/client_test.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package client
33
import (
44
"testing"
55

6+
"github.com/cloudquery/filetypes"
67
"github.com/cloudquery/plugin-sdk/plugins/destination"
78
"github.com/cloudquery/plugin-sdk/specs"
89
)
@@ -22,8 +23,10 @@ func TestPluginCSV(t *testing.T) {
2223
},
2324
Spec{
2425
Directory: t.TempDir(),
25-
Format: FormatTypeCSV,
26-
NoRotate: true,
26+
FileSpec: &filetypes.FileSpec{
27+
Format: filetypes.FormatTypeCSV,
28+
},
29+
NoRotate: true,
2730
},
2831
destination.PluginTestSuiteTests{
2932
SkipOverwrite: true,
@@ -45,8 +48,10 @@ func TestPluginJSON(t *testing.T) {
4548
},
4649
Spec{
4750
Directory: t.TempDir(),
48-
Format: FormatTypeJSON,
49-
NoRotate: true,
51+
FileSpec: &filetypes.FileSpec{
52+
Format: filetypes.FormatTypeJSON,
53+
},
54+
NoRotate: true,
5055
},
5156
destination.PluginTestSuiteTests{
5257
SkipOverwrite: true,
@@ -68,8 +73,10 @@ func TestPluginParquet(t *testing.T) {
6873
},
6974
Spec{
7075
Directory: t.TempDir(),
71-
Format: FormatTypeParquet,
72-
NoRotate: true,
76+
FileSpec: &filetypes.FileSpec{
77+
Format: filetypes.FormatTypeParquet,
78+
},
79+
NoRotate: true,
7380
},
7481
destination.PluginTestSuiteTests{
7582
SkipOverwrite: true,

plugins/destination/file/client/read.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@ func (c *Client) Read(ctx context.Context, table *schema.Table, sourceName strin
1919
}
2020
defer f.Close()
2121

22-
return c.formatClient.Read(f, table, sourceName, res)
22+
return c.Client.Read(f, table, sourceName, res)
2323
}

plugins/destination/file/client/spec.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,14 @@ package client
22

33
import (
44
"fmt"
5-
)
6-
7-
type FormatType string
85

9-
const (
10-
FormatTypeCSV = "csv"
11-
FormatTypeJSON = "json"
12-
FormatTypeParquet = "parquet"
6+
"github.com/cloudquery/filetypes"
137
)
148

159
type Spec struct {
16-
Directory string `json:"directory,omitempty"`
17-
Format FormatType `json:"format,omitempty"`
18-
NoRotate bool `json:"no_rotate,omitempty"`
10+
*filetypes.FileSpec
11+
Directory string `json:"directory,omitempty"`
12+
NoRotate bool `json:"no_rotate,omitempty"`
1913
}
2014

2115
func (*Spec) SetDefaults() {}

plugins/destination/file/client/write.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ func (c *Client) WriteTableBatch(ctx context.Context, table *schema.Table, data
2020
}
2121
defer f.Close()
2222

23-
return c.formatClient.WriteTableBatch(f, table, data)
23+
return c.Client.WriteTableBatchFile(f, table, data)
2424
}

plugins/destination/file/go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module github.com/cloudquery/cloudquery/plugins/destination/file
33
go 1.19
44

55
require (
6-
github.com/cloudquery/filetypes v1.5.1
6+
github.com/cloudquery/filetypes v1.6.0
77
github.com/cloudquery/plugin-sdk v1.43.0
88
github.com/google/uuid v1.3.0
99
github.com/rs/zerolog v1.29.0
@@ -24,7 +24,7 @@ require (
2424
golang.org/x/tools v0.6.0 // indirect
2525
)
2626

27-
replace github.com/apache/arrow/go/v12 => github.com/cloudquery/arrow/go/v12 v12.0.0-20230306072451-b6560ef2e6c1
27+
replace github.com/apache/arrow/go/v12 => github.com/cloudquery/arrow/go/v12 v12.0.0-20230317130341-c648117570af
2828

2929
require (
3030
github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 // indirect

plugins/destination/file/go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,10 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR
155155
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
156156
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
157157
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
158-
github.com/cloudquery/arrow/go/v12 v12.0.0-20230306072451-b6560ef2e6c1 h1:z37B3z+FV7fMga4toXrY0eWR6tfX/x3qefSmfNsWTb8=
159-
github.com/cloudquery/arrow/go/v12 v12.0.0-20230306072451-b6560ef2e6c1/go.mod h1:d+tV/eHZZ7Dz7RPrFKtPK02tpr+c9/PEd/zm8mDS9Vg=
160-
github.com/cloudquery/filetypes v1.5.1 h1:/LtV7yqspVlTZ30+nL/OA7JRZ5SIlXiZeq5V4O6x950=
161-
github.com/cloudquery/filetypes v1.5.1/go.mod h1:7kCMZzIjNaHX/cfXHwepuFZNx1ra5AkR+aiKo7ujiPg=
158+
github.com/cloudquery/arrow/go/v12 v12.0.0-20230317130341-c648117570af h1:iK2UwRTmBl9+I41tASIttizlmiY7dH9KmKt7iOiwyOc=
159+
github.com/cloudquery/arrow/go/v12 v12.0.0-20230317130341-c648117570af/go.mod h1:d+tV/eHZZ7Dz7RPrFKtPK02tpr+c9/PEd/zm8mDS9Vg=
160+
github.com/cloudquery/filetypes v1.6.0 h1:f+p6345zgFVgFIDgkxm3Raz9RumYp4KHKF504I196/8=
161+
github.com/cloudquery/filetypes v1.6.0/go.mod h1:PEmKtraGq/7uHHCFtgY9MLONr+ii4q8Cj8uy+pa0Cuo=
162162
github.com/cloudquery/plugin-sdk v1.43.0 h1:vYycBgKdfDbrW7sp+r5ATxjTVVXU2AMc3x6cPs8DTns=
163163
github.com/cloudquery/plugin-sdk v1.43.0/go.mod h1:CIv+fgm6siZhReOuMGU/OCBPNRLNY4At2RkC31LRaS0=
164164
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=

website/pages/docs/plugins/destinations/file/overview.mdx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,16 @@ This is the (nested) spec used by the CSV destination Plugin.
3333

3434
If set to true, the plugin will write to one file per table.
3535
Otherwise, for every batch a new file will be created with a different `.<UUID>` suffix.
36+
37+
- `format_spec` (map [format_spec](#format_spec)) (optional)
38+
Optional parameters to change the format of the file
39+
40+
### format_spec
41+
42+
- `delimiter` (string) (optional) (default: `,`)
43+
44+
Character that will be used as want to use as the delimiter if the format type is `csv`
45+
46+
- `skip_header` (bool) (optional) (default: false)
47+
48+
Specifies if the first line of a file should be the headers (when format is `csv`).

0 commit comments

Comments
 (0)