Skip to content

Commit dd6b275

Browse files
authored
feat(nested): Simplify constructing nested ClickHouse value (#11205)
Requires cloudquery/plugin-sdk#948 #### Benchmarks Performed by the following command in `typeconv/ch/values` dir: ```sh go test \ -test.run=BenchmarkFromArray \ -test.bench=BenchmarkFromArray \ -test.count 10 -test.benchmem -test.benchtime 10000x ``` <details><summary>Before this update</summary> ``` goos: darwin goarch: arm64 pkg: github.com/cloudquery/cloudquery/plugins/destination/clickhouse/typeconv/ch/values BenchmarkFromArray-10 10000 465316 ns/op 508005 B/op 7044 allocs/op BenchmarkFromArray-10 10000 442365 ns/op 508002 B/op 7044 allocs/op BenchmarkFromArray-10 10000 434197 ns/op 508006 B/op 7044 allocs/op BenchmarkFromArray-10 10000 430641 ns/op 508006 B/op 7044 allocs/op BenchmarkFromArray-10 10000 406410 ns/op 507999 B/op 7044 allocs/op BenchmarkFromArray-10 10000 429302 ns/op 508002 B/op 7044 allocs/op BenchmarkFromArray-10 10000 399611 ns/op 508005 B/op 7044 allocs/op BenchmarkFromArray-10 10000 425171 ns/op 508004 B/op 7044 allocs/op BenchmarkFromArray-10 10000 423713 ns/op 508007 B/op 7044 allocs/op BenchmarkFromArray-10 10000 413656 ns/op 508006 B/op 7044 allocs/op PASS ok github.com/cloudquery/cloudquery/plugins/destination/clickhouse/typeconv/ch/values 108.016s ``` </details> <details><summary>After this update</summary> ``` goos: darwin goarch: arm64 pkg: github.com/cloudquery/cloudquery/plugins/destination/clickhouse/typeconv/ch/values BenchmarkFromArray-10 10000 326041 ns/op 235184 B/op 4512 allocs/op BenchmarkFromArray-10 10000 310762 ns/op 235187 B/op 4512 allocs/op BenchmarkFromArray-10 10000 324254 ns/op 235182 B/op 4512 allocs/op BenchmarkFromArray-10 10000 320649 ns/op 235188 B/op 4512 allocs/op BenchmarkFromArray-10 10000 326467 ns/op 235183 B/op 4512 allocs/op BenchmarkFromArray-10 10000 310552 ns/op 235189 B/op 4512 allocs/op BenchmarkFromArray-10 10000 310553 ns/op 235188 B/op 4512 allocs/op BenchmarkFromArray-10 10000 313457 ns/op 235187 B/op 4512 allocs/op BenchmarkFromArray-10 10000 327252 ns/op 235185 B/op 4512 allocs/op BenchmarkFromArray-10 10000 312083 ns/op 235188 B/op 4512 allocs/op PASS ok github.com/cloudquery/cloudquery/plugins/destination/clickhouse/typeconv/ch/values 97.273s ``` </details>
1 parent dc00994 commit dd6b275

11 files changed

Lines changed: 84 additions & 51 deletions

File tree

plugins/destination/clickhouse/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ require (
66
github.com/ClickHouse/clickhouse-go/v2 v2.10.0
77
github.com/apache/arrow/go/v13 v13.0.0-20230601070034-e07e22c5580a
88
github.com/cloudquery/plugin-pb-go v1.0.8
9-
github.com/cloudquery/plugin-sdk/v3 v3.8.1
9+
github.com/cloudquery/plugin-sdk/v3 v3.10.3
1010
github.com/google/uuid v1.3.0
1111
github.com/rs/zerolog v1.29.1
1212
github.com/stretchr/testify v1.8.3

plugins/destination/clickhouse/go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ github.com/cloudquery/plugin-pb-go v1.0.8 h1:wn3GXhcNItcP+6wUUZuzUFbvdL59liKBO37
5454
github.com/cloudquery/plugin-pb-go v1.0.8/go.mod h1:vAGA27psem7ZZNAY4a3S9TKuA/JDQWstjKcHPJX91Mc=
5555
github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3KF+gPGqm7U=
5656
github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug=
57-
github.com/cloudquery/plugin-sdk/v3 v3.8.1 h1:Rj+3zBLmscKSbG+JPLT5bzgv56oPwBHRSMGyJ1DSfyc=
58-
github.com/cloudquery/plugin-sdk/v3 v3.8.1/go.mod h1:8PHS8cMjWPeXrurnI30dyHwViK4HJUZLA6uys+F2fXQ=
57+
github.com/cloudquery/plugin-sdk/v3 v3.10.3 h1:aMofD3hHU4Dm+raxNgIOdSg+hGQrkTUTV2KXjxSwtqE=
58+
github.com/cloudquery/plugin-sdk/v3 v3.10.3/go.mod h1:P3zucEOH+IEhdM9FGD5q3WqciXIBOPCKw2kHZT4UrlQ=
5959
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
6060
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
6161
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=

plugins/destination/clickhouse/typeconv/arrow/values/string.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,21 @@ import (
55
)
66

77
func buildFromString(builder array.Builder, value any) error {
8+
return buildFromStringWithZero(builder, value, "")
9+
}
10+
11+
// buildFromStringWithZero will use builder.AppendEmptyValue if the v is "" or matches the passed empty value
12+
func buildFromStringWithZero(builder array.Builder, value any, zero string) error {
813
v, ok := unwrap[string](value)
914
if !ok {
1015
builder.AppendNull()
1116
return nil
1217
}
1318

14-
if len(v) > 0 {
15-
return builder.AppendValueFromString(v)
19+
if len(v) == 0 || v == zero {
20+
builder.AppendEmptyValue()
21+
return nil
1622
}
1723

18-
// binary types are handled separately, so here we have a builder that most likely can't handle empty string.
19-
// having empty string in CH means that this was an empty value
20-
builder.AppendEmptyValue()
21-
return nil
24+
return builder.AppendValueFromString(v)
2225
}

plugins/destination/clickhouse/typeconv/arrow/values/values.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,15 @@ func buildValue(builder array.Builder, value any) error {
6666
case *types.UUIDBuilder:
6767
buildPrimitive[uuid.UUID](builder, value)
6868

69-
case *types.JSONBuilder, *types.InetBuilder, *types.MACBuilder:
69+
case *types.InetBuilder:
70+
const zero = "0.0.0.0/0"
71+
return buildFromStringWithZero(builder, value, zero)
72+
73+
case *types.MACBuilder:
74+
const zero = "00:00:00:00:00:00"
75+
return buildFromStringWithZero(builder, value, zero)
76+
77+
case *array.ExtensionBuilder, *types.JSONBuilder:
7078
return buildFromString(builder, value)
7179

7280
case *array.StructBuilder:

plugins/destination/clickhouse/typeconv/ch/values/extension.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ func extensionValue(arr array.ExtensionArray) any {
1313
case *types.InetArray, *types.MACArray, *types.JSONArray:
1414
return valueStrData(arr)
1515
default:
16-
// we fallback here to string representation
16+
// we fall back here to string representation
1717
return valueStrData(arr)
1818
}
1919
}

plugins/destination/clickhouse/typeconv/ch/values/list.go

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ import (
55
"time"
66

77
"github.com/ClickHouse/clickhouse-go/v2/lib/column"
8-
"github.com/apache/arrow/go/v13/arrow"
98
"github.com/apache/arrow/go/v13/arrow/array"
10-
"github.com/apache/arrow/go/v13/arrow/memory"
119
"github.com/cloudquery/cloudquery/plugins/destination/clickhouse/typeconv/ch/types"
1210
)
1311

@@ -24,11 +22,7 @@ func listValue(arr array.ListLike) (any, error) {
2422
}
2523
valueType := col.ScanType()
2624

27-
sanitized, err := sanitizeNested(arr)
28-
if err != nil {
29-
return nil, err
30-
}
31-
arr = sanitized.(array.ListLike)
25+
arr = sanitizeNested(arr).(array.ListLike)
3226

3327
elems := make([]any, arr.Len())
3428
for i := 0; i < arr.Len(); i++ {
@@ -50,24 +44,3 @@ func listValue(arr array.ListLike) (any, error) {
5044

5145
return res.Interface(), nil
5246
}
53-
54-
// sanitizeNested will replace all null entries with empty ones as in CH nested types aren't nullable themselves
55-
// https://clickhouse.com/docs/en/sql-reference/data-types/nullable
56-
func sanitizeNested(arr arrow.Array) (arrow.Array, error) {
57-
if arr.NullN() == 0 {
58-
return arr, nil
59-
}
60-
61-
builder := array.NewBuilder(memory.DefaultAllocator, arr.DataType())
62-
for i := 0; i < arr.Len(); i++ {
63-
if arr.IsNull(i) {
64-
builder.AppendEmptyValue()
65-
continue
66-
}
67-
if err := builder.AppendValueFromString(arr.ValueStr(i)); err != nil {
68-
return nil, err
69-
}
70-
}
71-
72-
return builder.NewArray(), nil
73-
}

plugins/destination/clickhouse/typeconv/ch/values/map.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,7 @@ func mapValue(arr *array.Map) (any, error) {
3232
return nil, fmt.Errorf("unexpected reflect type for map: %q", valueType.String())
3333
}
3434

35-
sanitized, err := sanitizeNested(arr)
36-
if err != nil {
37-
return nil, err
38-
}
39-
40-
return makeMapSlice(valueType, sanitized.(*array.Map))
35+
return makeMapSlice(valueType, sanitizeNested(arr).(*array.Map))
4136
}
4237

4338
func makeMapSlice(mapType reflect.Type, arr *array.Map) (any, error) {
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package values
2+
3+
import (
4+
"github.com/apache/arrow/go/v13/arrow"
5+
"github.com/apache/arrow/go/v13/arrow/array"
6+
"github.com/apache/arrow/go/v13/arrow/memory"
7+
)
8+
9+
// sanitizeNested will replace all null entries with empty ones as in CH nested types aren't nullable themselves
10+
// https://clickhouse.com/docs/en/sql-reference/data-types/nullable
11+
// array passed should be of arrow.NestedType
12+
func sanitizeNested(arr arrow.Array) arrow.Array {
13+
if arr.NullN() == 0 {
14+
return arr
15+
}
16+
17+
if _, ok := arr.DataType().(arrow.NestedType); !ok {
18+
// This can happen only if the parent is nested & we need to construct the empty value
19+
return array.MakeFromData(array.NewData(
20+
arr.DataType(), arr.Len(),
21+
append([]*memory.Buffer{nil}, arr.Data().Buffers()[1:]...), // the first elem is validity
22+
arr.Data().Children(),
23+
0, arr.Data().Offset(),
24+
))
25+
}
26+
27+
children := make([]arrow.ArrayData, len(arr.Data().Children()))
28+
for i, child := range arr.Data().Children() {
29+
children[i] = sanitizeNested(array.MakeFromData(child)).Data()
30+
}
31+
32+
return array.MakeFromData(array.NewData(
33+
arr.DataType(), arr.Len(),
34+
append([]*memory.Buffer{nil}, arr.Data().Buffers()[1:]...), // the first elem is validity
35+
children,
36+
0, arr.Data().Offset(),
37+
))
38+
}

plugins/destination/clickhouse/typeconv/ch/values/struct.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@ import (
99
)
1010

1111
func structValue(arr *array.Struct) (any, error) {
12-
sanitized, err := sanitizeNested(arr)
13-
if err != nil {
14-
return nil, err
15-
}
16-
arr = sanitized.(*array.Struct)
12+
arr = sanitizeNested(arr).(*array.Struct)
1713

1814
fields := arr.DataType().(*arrow.StructType).Fields()
1915
columns := make(map[string][]any, len(fields))

plugins/destination/clickhouse/typeconv/ch/values/struct_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func Test_structValue(t *testing.T) {
4747
elem = elems[1]
4848
require.NotNil(t, elem)
4949
require.Equal(t, map[string]any{
50-
"nullable_bool": (*bool)(nil),
50+
"nullable_bool": ptr(false),
5151
"non_nullable_bool": ptr(true),
5252
}, *elem)
5353

0 commit comments

Comments
 (0)