@@ -15,8 +15,14 @@ import (
1515 "github.com/cloudquery/plugin-sdk/v4/types"
1616 "github.com/stretchr/testify/assert"
1717 "github.com/stretchr/testify/require"
18+ "github.com/tidwall/pretty"
1819)
1920
21+ func sortJSON (jsonStr string ) string {
22+ opts := pretty.Options {SortKeys : true }
23+ return string (pretty .Ugly (pretty .PrettyOptions ([]byte (jsonStr ), & opts )))
24+ }
25+
2026func TestRemoveColumns (t * testing.T ) {
2127 record := createTestRecord ()
2228 updater := New (record )
@@ -540,3 +546,125 @@ func TestObfuscateDeeplyNestedColumnsWithGjsonSyntax(t *testing.T) {
540546 require .Contains (t , col3Val2 , redactedByCQMessage , "Expected obfuscated values to contain redacted message" )
541547 require .Equal (t , 2 , strings .Count (col3Val2 , redactedByCQMessage ), "Expected 2 obfuscated values for the 2 nested2_object1 items in second row" )
542548}
549+
550+ func TestRemoveNestedColumnsWithGjsonSyntax (t * testing.T ) {
551+ // Create test record with nested JSON structure
552+ md := arrow .NewMetadata ([]string {schema .MetadataTableName }, []string {"testTable" })
553+ bld := array .NewRecordBuilder (memory .DefaultAllocator , arrow .NewSchema (
554+ []arrow.Field {
555+ {Name : "col1" , Type : arrow .BinaryTypes .String },
556+ {Name : "col2" , Type : arrow .BinaryTypes .String },
557+ {Name : "col3" , Type : types .NewJSONType ()},
558+ },
559+ & md ,
560+ ))
561+ defer bld .Release ()
562+
563+ bld .Field (0 ).(* array.StringBuilder ).AppendValues ([]string {"val1" , "val2" }, nil )
564+ bld .Field (1 ).(* array.StringBuilder ).AppendValues ([]string {"val3" , "val4" }, nil )
565+ bld .Field (2 ).(* types.JSONBuilder ).AppendBytes ([]byte (`{"top_foo":[{"foo":"baz0","keep":"value0"},{"foo":"baz1","keep":"value1"},{"foo":"baz2","keep":"value2"}],"other":"data"}` ))
566+ bld .Field (2 ).(* types.JSONBuilder ).AppendBytes ([]byte (`{"top_foo":[{"foo":"baz3","keep":"value3"},{"foo":"baz4","keep":"value4"},{"foo":"baz5","keep":"value5"}],"other":"data"}` ))
567+
568+ record := bld .NewRecord ()
569+ updater := New (record )
570+
571+ // Test removal using gjson syntax with # for array elements
572+ updatedRecord , err := updater .RemoveColumns ([]string {"col3.top_foo.#.foo" })
573+ require .NoError (t , err )
574+
575+ require .Equal (t , int64 (3 ), updatedRecord .NumCols ())
576+ require .Equal (t , int64 (2 ), updatedRecord .NumRows ())
577+ requireAllColsLenMatchRecordsLen (t , updatedRecord )
578+
579+ // Check that the nested foo values are removed but keep values remain
580+ expectedJSON1 := `{"top_foo":[{"keep":"value0"},{"keep":"value1"},{"keep":"value2"}],"other":"data"}`
581+ actualJSON1 := updatedRecord .Column (2 ).ValueStr (0 )
582+ require .Equal (t , sortJSON (expectedJSON1 ), sortJSON (actualJSON1 ), "Expected foo fields to be removed from first row" )
583+
584+ expectedJSON2 := `{"top_foo":[{"keep":"value3"},{"keep":"value4"},{"keep":"value5"}],"other":"data"}`
585+ actualJSON2 := updatedRecord .Column (2 ).ValueStr (1 )
586+ require .Equal (t , sortJSON (expectedJSON2 ), sortJSON (actualJSON2 ), "Expected foo fields to be removed from second row" )
587+ }
588+
589+ func TestRemoveDeeplyNestedColumnsWithGjsonSyntax (t * testing.T ) {
590+ // Create test record with deeply nested JSON structure
591+ md := arrow .NewMetadata ([]string {schema .MetadataTableName }, []string {"testTable" })
592+ bld := array .NewRecordBuilder (memory .DefaultAllocator , arrow .NewSchema (
593+ []arrow.Field {
594+ {Name : "col1" , Type : arrow .BinaryTypes .String },
595+ {Name : "col2" , Type : arrow .BinaryTypes .String },
596+ {Name : "col3" , Type : types .NewJSONType ()},
597+ },
598+ & md ,
599+ ))
600+ defer bld .Release ()
601+
602+ bld .Field (0 ).(* array.StringBuilder ).AppendValues ([]string {"val1" , "val2" }, nil )
603+ bld .Field (1 ).(* array.StringBuilder ).AppendValues ([]string {"val3" , "val4" }, nil )
604+ // First row: has 2 objects in object2 array, each with 2 nested2_object1 values = 4 total
605+ bld .Field (2 ).(* types.JSONBuilder ).AppendBytes ([]byte (`{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"nested2_object1":1,"keep":"a"},{"nested2_object1":2,"keep":"b"}]}},{"nested_object1":{"nested_object2":[{"nested2_object1":3,"keep":"c"},{"nested2_object1":4,"keep":"d"}]}}]}}` ))
606+ // Second row: has 1 object in object2 array, with 2 nested2_object1 values = 2 total
607+ bld .Field (2 ).(* types.JSONBuilder ).AppendBytes ([]byte (`{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"nested2_object1":5,"keep":"e"},{"nested2_object1":6,"keep":"f"}]}}]}}` ))
608+
609+ record := bld .NewRecord ()
610+ updater := New (record )
611+
612+ // Test removal using gjson syntax with multiple # for nested arrays
613+ updatedRecord , err := updater .RemoveColumns ([]string {"col3.object1.object2.#.nested_object1.nested_object2.#.nested2_object1" })
614+ require .NoError (t , err )
615+
616+ require .Equal (t , int64 (3 ), updatedRecord .NumCols ())
617+ require .Equal (t , int64 (2 ), updatedRecord .NumRows ())
618+ requireAllColsLenMatchRecordsLen (t , updatedRecord )
619+
620+ // Check first row: nested2_object1 values should be removed but keep values should remain
621+ expectedJSON1 := `{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"keep":"a"},{"keep":"b"}]}},{"nested_object1":{"nested_object2":[{"keep":"c"},{"keep":"d"}]}}]}}`
622+ actualJSON1 := updatedRecord .Column (2 ).ValueStr (0 )
623+ require .Equal (t , sortJSON (expectedJSON1 ), sortJSON (actualJSON1 ), "Expected nested2_object1 fields to be removed from first row" )
624+
625+ // Check second row: nested2_object1 values should be removed but keep values should remain
626+ expectedJSON2 := `{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"keep":"e"},{"keep":"f"}]}}]}}`
627+ actualJSON2 := updatedRecord .Column (2 ).ValueStr (1 )
628+ require .Equal (t , sortJSON (expectedJSON2 ), sortJSON (actualJSON2 ), "Expected nested2_object1 fields to be removed from second row" )
629+ }
630+
631+ func TestRemoveNestedArrayWithGjsonSyntax (t * testing.T ) {
632+ // Create test record with nested array structure like user described
633+ md := arrow .NewMetadata ([]string {schema .MetadataTableName }, []string {"testTable" })
634+ bld := array .NewRecordBuilder (memory .DefaultAllocator , arrow .NewSchema (
635+ []arrow.Field {
636+ {Name : "col1" , Type : arrow .BinaryTypes .String },
637+ {Name : "col2" , Type : arrow .BinaryTypes .String },
638+ {Name : "col3" , Type : types .NewJSONType ()},
639+ },
640+ & md ,
641+ ))
642+ defer bld .Release ()
643+
644+ bld .Field (0 ).(* array.StringBuilder ).AppendValues ([]string {"val1" , "val2" }, nil )
645+ bld .Field (1 ).(* array.StringBuilder ).AppendValues ([]string {"val3" , "val4" }, nil )
646+ // Test structure: [{"env": [{"name": "AWS_ACCESS_KEY_ID", "value": "test"}]}]
647+ bld .Field (2 ).(* types.JSONBuilder ).AppendBytes ([]byte (`[{"env": [{"name": "AWS_ACCESS_KEY_ID", "value": "test"}, {"name": "AWS_SECRET_KEY", "value": "secret"}]}, {"env": [{"name": "DB_PASSWORD", "value": "password"}]}]` ))
648+ bld .Field (2 ).(* types.JSONBuilder ).AppendBytes ([]byte (`[{"env": [{"name": "API_KEY", "value": "api-key-value"}]}]` ))
649+
650+ record := bld .NewRecord ()
651+ updater := New (record )
652+
653+ // Test removal using gjson syntax: #.env.#.value (remove all "value" fields from nested env arrays)
654+ updatedRecord , err := updater .RemoveColumns ([]string {"col3.#.env.#.value" })
655+ require .NoError (t , err )
656+
657+ require .Equal (t , int64 (3 ), updatedRecord .NumCols ())
658+ require .Equal (t , int64 (2 ), updatedRecord .NumRows ())
659+ requireAllColsLenMatchRecordsLen (t , updatedRecord )
660+
661+ // Check first row: "value" fields should be removed but "name" fields should remain
662+ expectedJSON1 := `[{"env": [{"name": "AWS_ACCESS_KEY_ID"}, {"name": "AWS_SECRET_KEY"}]}, {"env": [{"name": "DB_PASSWORD"}]}]`
663+ actualJSON1 := updatedRecord .Column (2 ).ValueStr (0 )
664+ require .Equal (t , sortJSON (expectedJSON1 ), sortJSON (actualJSON1 ), "Expected value fields to be removed from first row" )
665+
666+ // Check second row: "value" field should be removed but "name" field should remain
667+ expectedJSON2 := `[{"env": [{"name": "API_KEY"}]}]`
668+ actualJSON2 := updatedRecord .Column (2 ).ValueStr (1 )
669+ require .Equal (t , sortJSON (expectedJSON2 ), sortJSON (actualJSON2 ), "Expected value fields to be removed from second row" )
670+ }
0 commit comments