Skip to content

Commit 4f16d35

Browse files
author
Przemysław Stępień
authored
fix: Bump sjson library to fix accessing root arrays and complex path (#21022)
This PR fixes remove_column to support removing json arrays with 'foo.#.bar' syntax. It also fixes accessing complex paths starting from '#' in example '#.env.#.value' #### Summary ⚠️ **If you're contributing to a plugin please read this section of the [contribution guidelines](https://github.com/cloudquery/cloudquery/blob/main/CONTRIBUTING.md#open-core-vs-open-source) 🧑‍🎓 before submitting this PR** ⚠️
1 parent f329dfb commit 4f16d35

File tree

3 files changed

+131
-3
lines changed

3 files changed

+131
-3
lines changed

plugins/transformer/basic/client/recordupdater/record_updater_test.go

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,14 @@ import (
1515
"github.com/cloudquery/plugin-sdk/v4/types"
1616
"github.com/stretchr/testify/assert"
1717
"github.com/stretchr/testify/require"
18+
"github.com/tidwall/pretty"
1819
)
1920

21+
func sortJSON(jsonStr string) string {
22+
opts := pretty.Options{SortKeys: true}
23+
return string(pretty.Ugly(pretty.PrettyOptions([]byte(jsonStr), &opts)))
24+
}
25+
2026
func TestRemoveColumns(t *testing.T) {
2127
record := createTestRecord()
2228
updater := New(record)
@@ -540,3 +546,125 @@ func TestObfuscateDeeplyNestedColumnsWithGjsonSyntax(t *testing.T) {
540546
require.Contains(t, col3Val2, redactedByCQMessage, "Expected obfuscated values to contain redacted message")
541547
require.Equal(t, 2, strings.Count(col3Val2, redactedByCQMessage), "Expected 2 obfuscated values for the 2 nested2_object1 items in second row")
542548
}
549+
550+
func TestRemoveNestedColumnsWithGjsonSyntax(t *testing.T) {
551+
// Create test record with nested JSON structure
552+
md := arrow.NewMetadata([]string{schema.MetadataTableName}, []string{"testTable"})
553+
bld := array.NewRecordBuilder(memory.DefaultAllocator, arrow.NewSchema(
554+
[]arrow.Field{
555+
{Name: "col1", Type: arrow.BinaryTypes.String},
556+
{Name: "col2", Type: arrow.BinaryTypes.String},
557+
{Name: "col3", Type: types.NewJSONType()},
558+
},
559+
&md,
560+
))
561+
defer bld.Release()
562+
563+
bld.Field(0).(*array.StringBuilder).AppendValues([]string{"val1", "val2"}, nil)
564+
bld.Field(1).(*array.StringBuilder).AppendValues([]string{"val3", "val4"}, nil)
565+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`{"top_foo":[{"foo":"baz0","keep":"value0"},{"foo":"baz1","keep":"value1"},{"foo":"baz2","keep":"value2"}],"other":"data"}`))
566+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`{"top_foo":[{"foo":"baz3","keep":"value3"},{"foo":"baz4","keep":"value4"},{"foo":"baz5","keep":"value5"}],"other":"data"}`))
567+
568+
record := bld.NewRecord()
569+
updater := New(record)
570+
571+
// Test removal using gjson syntax with # for array elements
572+
updatedRecord, err := updater.RemoveColumns([]string{"col3.top_foo.#.foo"})
573+
require.NoError(t, err)
574+
575+
require.Equal(t, int64(3), updatedRecord.NumCols())
576+
require.Equal(t, int64(2), updatedRecord.NumRows())
577+
requireAllColsLenMatchRecordsLen(t, updatedRecord)
578+
579+
// Check that the nested foo values are removed but keep values remain
580+
expectedJSON1 := `{"top_foo":[{"keep":"value0"},{"keep":"value1"},{"keep":"value2"}],"other":"data"}`
581+
actualJSON1 := updatedRecord.Column(2).ValueStr(0)
582+
require.Equal(t, sortJSON(expectedJSON1), sortJSON(actualJSON1), "Expected foo fields to be removed from first row")
583+
584+
expectedJSON2 := `{"top_foo":[{"keep":"value3"},{"keep":"value4"},{"keep":"value5"}],"other":"data"}`
585+
actualJSON2 := updatedRecord.Column(2).ValueStr(1)
586+
require.Equal(t, sortJSON(expectedJSON2), sortJSON(actualJSON2), "Expected foo fields to be removed from second row")
587+
}
588+
589+
func TestRemoveDeeplyNestedColumnsWithGjsonSyntax(t *testing.T) {
590+
// Create test record with deeply nested JSON structure
591+
md := arrow.NewMetadata([]string{schema.MetadataTableName}, []string{"testTable"})
592+
bld := array.NewRecordBuilder(memory.DefaultAllocator, arrow.NewSchema(
593+
[]arrow.Field{
594+
{Name: "col1", Type: arrow.BinaryTypes.String},
595+
{Name: "col2", Type: arrow.BinaryTypes.String},
596+
{Name: "col3", Type: types.NewJSONType()},
597+
},
598+
&md,
599+
))
600+
defer bld.Release()
601+
602+
bld.Field(0).(*array.StringBuilder).AppendValues([]string{"val1", "val2"}, nil)
603+
bld.Field(1).(*array.StringBuilder).AppendValues([]string{"val3", "val4"}, nil)
604+
// First row: has 2 objects in object2 array, each with 2 nested2_object1 values = 4 total
605+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"nested2_object1":1,"keep":"a"},{"nested2_object1":2,"keep":"b"}]}},{"nested_object1":{"nested_object2":[{"nested2_object1":3,"keep":"c"},{"nested2_object1":4,"keep":"d"}]}}]}}`))
606+
// Second row: has 1 object in object2 array, with 2 nested2_object1 values = 2 total
607+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"nested2_object1":5,"keep":"e"},{"nested2_object1":6,"keep":"f"}]}}]}}`))
608+
609+
record := bld.NewRecord()
610+
updater := New(record)
611+
612+
// Test removal using gjson syntax with multiple # for nested arrays
613+
updatedRecord, err := updater.RemoveColumns([]string{"col3.object1.object2.#.nested_object1.nested_object2.#.nested2_object1"})
614+
require.NoError(t, err)
615+
616+
require.Equal(t, int64(3), updatedRecord.NumCols())
617+
require.Equal(t, int64(2), updatedRecord.NumRows())
618+
requireAllColsLenMatchRecordsLen(t, updatedRecord)
619+
620+
// Check first row: nested2_object1 values should be removed but keep values should remain
621+
expectedJSON1 := `{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"keep":"a"},{"keep":"b"}]}},{"nested_object1":{"nested_object2":[{"keep":"c"},{"keep":"d"}]}}]}}`
622+
actualJSON1 := updatedRecord.Column(2).ValueStr(0)
623+
require.Equal(t, sortJSON(expectedJSON1), sortJSON(actualJSON1), "Expected nested2_object1 fields to be removed from first row")
624+
625+
// Check second row: nested2_object1 values should be removed but keep values should remain
626+
expectedJSON2 := `{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"keep":"e"},{"keep":"f"}]}}]}}`
627+
actualJSON2 := updatedRecord.Column(2).ValueStr(1)
628+
require.Equal(t, sortJSON(expectedJSON2), sortJSON(actualJSON2), "Expected nested2_object1 fields to be removed from second row")
629+
}
630+
631+
func TestRemoveNestedArrayWithGjsonSyntax(t *testing.T) {
632+
// Create test record with nested array structure like user described
633+
md := arrow.NewMetadata([]string{schema.MetadataTableName}, []string{"testTable"})
634+
bld := array.NewRecordBuilder(memory.DefaultAllocator, arrow.NewSchema(
635+
[]arrow.Field{
636+
{Name: "col1", Type: arrow.BinaryTypes.String},
637+
{Name: "col2", Type: arrow.BinaryTypes.String},
638+
{Name: "col3", Type: types.NewJSONType()},
639+
},
640+
&md,
641+
))
642+
defer bld.Release()
643+
644+
bld.Field(0).(*array.StringBuilder).AppendValues([]string{"val1", "val2"}, nil)
645+
bld.Field(1).(*array.StringBuilder).AppendValues([]string{"val3", "val4"}, nil)
646+
// Test structure: [{"env": [{"name": "AWS_ACCESS_KEY_ID", "value": "test"}]}]
647+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`[{"env": [{"name": "AWS_ACCESS_KEY_ID", "value": "test"}, {"name": "AWS_SECRET_KEY", "value": "secret"}]}, {"env": [{"name": "DB_PASSWORD", "value": "password"}]}]`))
648+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`[{"env": [{"name": "API_KEY", "value": "api-key-value"}]}]`))
649+
650+
record := bld.NewRecord()
651+
updater := New(record)
652+
653+
// Test removal using gjson syntax: #.env.#.value (remove all "value" fields from nested env arrays)
654+
updatedRecord, err := updater.RemoveColumns([]string{"col3.#.env.#.value"})
655+
require.NoError(t, err)
656+
657+
require.Equal(t, int64(3), updatedRecord.NumCols())
658+
require.Equal(t, int64(2), updatedRecord.NumRows())
659+
requireAllColsLenMatchRecordsLen(t, updatedRecord)
660+
661+
// Check first row: "value" fields should be removed but "name" fields should remain
662+
expectedJSON1 := `[{"env": [{"name": "AWS_ACCESS_KEY_ID"}, {"name": "AWS_SECRET_KEY"}]}, {"env": [{"name": "DB_PASSWORD"}]}]`
663+
actualJSON1 := updatedRecord.Column(2).ValueStr(0)
664+
require.Equal(t, sortJSON(expectedJSON1), sortJSON(actualJSON1), "Expected value fields to be removed from first row")
665+
666+
// Check second row: "value" field should be removed but "name" field should remain
667+
expectedJSON2 := `[{"env": [{"name": "API_KEY"}]}]`
668+
actualJSON2 := updatedRecord.Column(2).ValueStr(1)
669+
require.Equal(t, sortJSON(expectedJSON2), sortJSON(actualJSON2), "Expected value fields to be removed from second row")
670+
}

plugins/transformer/basic/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,4 @@ require (
9898
// github.com/cloudquery/jsonschema @ cqmain
9999
replace github.com/invopop/jsonschema => github.com/cloudquery/jsonschema v0.0.0-20240220124159-92878faa2a66
100100

101-
replace github.com/tidwall/sjson => github.com/cloudquery/sjson v0.0.0-20250708134708-0065b237a60e
101+
replace github.com/tidwall/sjson => github.com/cloudquery/sjson v0.0.0-20250715101255-737185f49eb9

plugins/transformer/basic/go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3K
6262
github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug=
6363
github.com/cloudquery/plugin-sdk/v4 v4.86.0 h1:iUE8ShyoG1zbeesNkKmM0RPyeYeQekC+h3PhnZz04QA=
6464
github.com/cloudquery/plugin-sdk/v4 v4.86.0/go.mod h1:31CkkksHcifSdRyT2TLPqFoS2wunHu1+fcdCinEO62o=
65-
github.com/cloudquery/sjson v0.0.0-20250708134708-0065b237a60e h1:qIbdJvSJOou66f/XRQgti3a4vsL3sMZIJinp7jejHio=
66-
github.com/cloudquery/sjson v0.0.0-20250708134708-0065b237a60e/go.mod h1:owSZeKAGP6udCIFuKgdQDQiUXj+L4X113HjyRnIqONk=
65+
github.com/cloudquery/sjson v0.0.0-20250715101255-737185f49eb9 h1:ZeAY9KSqwgY/gv4XtAn4VespxlLEesNzd9HWrrNJUso=
66+
github.com/cloudquery/sjson v0.0.0-20250715101255-737185f49eb9/go.mod h1:owSZeKAGP6udCIFuKgdQDQiUXj+L4X113HjyRnIqONk=
6767
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
6868
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
6969
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

0 commit comments

Comments
 (0)