Skip to content

Commit 96d55cd

Browse files
author
Przemysław Stępień
authored
fix: Fix nested column obfuscation (#20981)
#### Summary ⚠️ **If you're contributing to a plugin please read this section of the [contribution guidelines](https://github.com/cloudquery/cloudquery/blob/main/CONTRIBUTING.md#open-core-vs-open-source) 🧑‍🎓 before submitting this PR** ⚠️ This PR fixes obfuscation of nested columns
1 parent 70f254a commit 96d55cd

File tree

4 files changed

+98
-9
lines changed

4 files changed

+98
-9
lines changed

plugins/transformer/basic/client/recordupdater/record_updater.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,9 @@ func (*RecordUpdater) obfuscateJSONColumns(column arrow.Array, jcs []jsonColumn)
433433
str := column.ValueStr(i)
434434
for _, jc := range jcs {
435435
val := gjson.Get(column.ValueStr(i), jc.columnPath)
436-
if val.Exists() && val.Type == gjson.String {
437-
if modified, err := sjson.Set(str, jc.columnPath, fmt.Sprintf("%s %x", redactedByCQMessage, sha256.Sum256([]byte(val.Str)))); err == nil {
436+
// todo: Currently nested types will create a single SHA hash for all matched array elements. Consider changing this to hash for each element separately.
437+
if val.Exists() {
438+
if modified, err := sjson.Set(str, jc.columnPath, fmt.Sprintf("%s %x", redactedByCQMessage, sha256.Sum256([]byte(val.Raw)))); err == nil {
438439
str = modified
439440
continue
440441
}

plugins/transformer/basic/client/recordupdater/record_updater_test.go

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package recordupdater
33
import (
44
"encoding/json"
55
"fmt"
6+
"strings"
67
"testing"
78
"time"
89

@@ -89,10 +90,10 @@ func TestObfuscateColumns(t *testing.T) {
8990
fmt.Sprintf("%s 528e5290f8ff0eb0325f0472b9c1a9ef4fac0b02ff6094b64d9382af4a10444b", redactedByCQMessage),
9091
updatedRecord.Column(0).(*array.String).Value(1))
9192
assert.Equal(t,
92-
fmt.Sprintf(`{"foo":{"bar":["%s ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb","%s 3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d","c"]},"hello":"world"}`, redactedByCQMessage, redactedByCQMessage),
93+
fmt.Sprintf(`{"foo":{"bar":["%s ac8d8342bbb2362d13f0a559a3621bb407011368895164b628a54f7fc33fc43c","%s c100f95c1913f9c72fc1f4ef0847e1e723ffe0bde0b36e5f36c13f81fe8c26ed","c"]},"hello":"world"}`, redactedByCQMessage, redactedByCQMessage),
9394
updatedRecord.Column(2).ValueStr(0))
9495
assert.Equal(t,
95-
fmt.Sprintf(`{"foo":{"bar":["%s 18ac3e7343f016890c510e93f935261169d9e3f565436429830faf0934f4f8e4","%s 3f79bb7b435b05321651daefd374cdc681dc06faa65e374e38337b88ca046dea","f"]}}`, redactedByCQMessage, redactedByCQMessage),
96+
fmt.Sprintf(`{"foo":{"bar":["%s 3fa5834dc920d385ca9b099c9fe55dcca163a6b256a261f8f147291b0e7cf633","%s 8c8656c5d114d7f8b2a412d2d5fd03accce3ed050624a0493734591a9666b110","f"]}}`, redactedByCQMessage, redactedByCQMessage),
9697
updatedRecord.Column(2).ValueStr(1))
9798
}
9899

@@ -121,10 +122,10 @@ func TestAutoObfuscateColumns(t *testing.T) {
121122
fmt.Sprintf("%s 528e5290f8ff0eb0325f0472b9c1a9ef4fac0b02ff6094b64d9382af4a10444b", redactedByCQMessage),
122123
updatedRecord.Column(0).(*array.String).Value(1))
123124
assert.Equal(t,
124-
fmt.Sprintf(`{"foo":{"bar":["%s ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb","%s 3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d","c"]},"hello":"world"}`, redactedByCQMessage, redactedByCQMessage),
125+
fmt.Sprintf(`{"foo":{"bar":["%s ac8d8342bbb2362d13f0a559a3621bb407011368895164b628a54f7fc33fc43c","%s c100f95c1913f9c72fc1f4ef0847e1e723ffe0bde0b36e5f36c13f81fe8c26ed","c"]},"hello":"world"}`, redactedByCQMessage, redactedByCQMessage),
125126
updatedRecord.Column(2).ValueStr(0))
126127
assert.Equal(t,
127-
fmt.Sprintf(`{"foo":{"bar":["%s 18ac3e7343f016890c510e93f935261169d9e3f565436429830faf0934f4f8e4","%s 3f79bb7b435b05321651daefd374cdc681dc06faa65e374e38337b88ca046dea","f"]}}`, redactedByCQMessage, redactedByCQMessage),
128+
fmt.Sprintf(`{"foo":{"bar":["%s 3fa5834dc920d385ca9b099c9fe55dcca163a6b256a261f8f147291b0e7cf633","%s 8c8656c5d114d7f8b2a412d2d5fd03accce3ed050624a0493734591a9666b110","f"]}}`, redactedByCQMessage, redactedByCQMessage),
128129
updatedRecord.Column(2).ValueStr(1))
129130
assert.Equal(t,
130131
fmt.Sprintf("%s cc1d9c865e8380c2d566dc724c66369051acfaa3e9e8f36ad6c67d7d9b8461a5", redactedByCQMessage),
@@ -453,3 +454,89 @@ func TestChangeCaseEntireJson(t *testing.T) {
453454
require.Equal(t, "val3", updatedRecord.Column(1).(*array.String).Value(0))
454455
require.Equal(t, "val4", updatedRecord.Column(1).(*array.String).Value(1))
455456
}
457+
458+
func TestObfuscateNestedColumnsWithGjsonSyntax(t *testing.T) {
459+
// Create test record with nested JSON structure
460+
md := arrow.NewMetadata([]string{schema.MetadataTableName}, []string{"testTable"})
461+
bld := array.NewRecordBuilder(memory.DefaultAllocator, arrow.NewSchema(
462+
[]arrow.Field{
463+
{Name: "col1", Type: arrow.BinaryTypes.String},
464+
{Name: "col2", Type: arrow.BinaryTypes.String},
465+
{Name: "col3", Type: types.NewJSONType()},
466+
},
467+
&md,
468+
))
469+
defer bld.Release()
470+
471+
bld.Field(0).(*array.StringBuilder).AppendValues([]string{"val1", "val2"}, nil)
472+
bld.Field(1).(*array.StringBuilder).AppendValues([]string{"val3", "val4"}, nil)
473+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`{"top_foo":[{"foo":"baz0"},{"foo":"baz1"},{"foo":"baz2"}]}`))
474+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`{"top_foo":[{"foo":"baz3"},{"foo":"baz4"},{"foo":"baz5"}]}`))
475+
476+
record := bld.NewRecord()
477+
updater := New(record)
478+
479+
// Test obfuscation using gjson syntax with # for array elements
480+
updatedRecord, err := updater.ObfuscateColumns([]string{"col3.top_foo.#.foo"})
481+
require.NoError(t, err)
482+
483+
require.Equal(t, int64(3), updatedRecord.NumCols())
484+
require.Equal(t, int64(2), updatedRecord.NumRows())
485+
requireAllColsLenMatchRecordsLen(t, updatedRecord)
486+
487+
// Check that the nested foo values are obfuscated
488+
col3Val := updatedRecord.Column(2).ValueStr(0)
489+
require.Contains(t, col3Val, redactedByCQMessage, "Expected obfuscated values to contain redacted message")
490+
require.Contains(t, col3Val, "top_foo", "Expected top_foo structure to be maintained")
491+
// Verify that all three "foo" values in the array are obfuscated
492+
require.Equal(t, 3, strings.Count(col3Val, redactedByCQMessage), "Expected 3 obfuscated values for the 3 foo items")
493+
494+
// Check second row as well
495+
col3Val2 := updatedRecord.Column(2).ValueStr(1)
496+
require.Contains(t, col3Val2, redactedByCQMessage, "Expected obfuscated values to contain redacted message")
497+
require.Equal(t, 3, strings.Count(col3Val2, redactedByCQMessage), "Expected 3 obfuscated values for the 3 foo items")
498+
}
499+
500+
func TestObfuscateDeeplyNestedColumnsWithGjsonSyntax(t *testing.T) {
501+
// Create test record with deeply nested JSON structure
502+
md := arrow.NewMetadata([]string{schema.MetadataTableName}, []string{"testTable"})
503+
bld := array.NewRecordBuilder(memory.DefaultAllocator, arrow.NewSchema(
504+
[]arrow.Field{
505+
{Name: "col1", Type: arrow.BinaryTypes.String},
506+
{Name: "col2", Type: arrow.BinaryTypes.String},
507+
{Name: "col3", Type: types.NewJSONType()},
508+
},
509+
&md,
510+
))
511+
defer bld.Release()
512+
513+
bld.Field(0).(*array.StringBuilder).AppendValues([]string{"val1", "val2"}, nil)
514+
bld.Field(1).(*array.StringBuilder).AppendValues([]string{"val3", "val4"}, nil)
515+
// First row: has 2 objects in object2 array, each with 2 nested2_object1 values = 4 total
516+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"nested2_object1":1},{"nested2_object1":2}]}},{"nested_object1":{"nested_object2":[{"nested2_object1":3},{"nested2_object1":4}]}}]}}`))
517+
// Second row: has 1 object in object2 array, with 2 nested2_object1 values = 2 total
518+
bld.Field(2).(*types.JSONBuilder).AppendBytes([]byte(`{"object1":{"object2":[{"nested_object1":{"nested_object2":[{"nested2_object1":5},{"nested2_object1":6}]}}]}}`))
519+
520+
record := bld.NewRecord()
521+
updater := New(record)
522+
523+
// Test obfuscation using gjson syntax with multiple # for nested arrays
524+
updatedRecord, err := updater.ObfuscateColumns([]string{"col3.object1.object2.#.nested_object1.nested_object2.#.nested2_object1"})
525+
require.NoError(t, err)
526+
527+
require.Equal(t, int64(3), updatedRecord.NumCols())
528+
require.Equal(t, int64(2), updatedRecord.NumRows())
529+
requireAllColsLenMatchRecordsLen(t, updatedRecord)
530+
531+
// Check first row: should have 4 obfuscated values
532+
col3Val := updatedRecord.Column(2).ValueStr(0)
533+
require.Contains(t, col3Val, redactedByCQMessage, "Expected obfuscated values to contain redacted message")
534+
require.Contains(t, col3Val, "object1", "Expected object1 structure to be maintained")
535+
require.Contains(t, col3Val, "nested_object1", "Expected nested_object1 structure to be maintained")
536+
require.Equal(t, 4, strings.Count(col3Val, redactedByCQMessage), "Expected 4 obfuscated values for the 4 nested2_object1 items in first row")
537+
538+
// Check second row: should have 2 obfuscated values
539+
col3Val2 := updatedRecord.Column(2).ValueStr(1)
540+
require.Contains(t, col3Val2, redactedByCQMessage, "Expected obfuscated values to contain redacted message")
541+
require.Equal(t, 2, strings.Count(col3Val2, redactedByCQMessage), "Expected 2 obfuscated values for the 2 nested2_object1 items in second row")
542+
}

plugins/transformer/basic/go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,5 @@ require (
9797

9898
// github.com/cloudquery/jsonschema @ cqmain
9999
replace github.com/invopop/jsonschema => github.com/cloudquery/jsonschema v0.0.0-20240220124159-92878faa2a66
100+
101+
replace github.com/tidwall/sjson => github.com/cloudquery/sjson v0.0.0-20250708134708-0065b237a60e

plugins/transformer/basic/go.sum

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ github.com/cloudquery/plugin-sdk/v2 v2.7.0 h1:hRXsdEiaOxJtsn/wZMFQC9/jPfU1MeMK3K
6262
github.com/cloudquery/plugin-sdk/v2 v2.7.0/go.mod h1:pAX6ojIW99b/Vg4CkhnsGkRIzNaVEceYMR+Bdit73ug=
6363
github.com/cloudquery/plugin-sdk/v4 v4.86.0 h1:iUE8ShyoG1zbeesNkKmM0RPyeYeQekC+h3PhnZz04QA=
6464
github.com/cloudquery/plugin-sdk/v4 v4.86.0/go.mod h1:31CkkksHcifSdRyT2TLPqFoS2wunHu1+fcdCinEO62o=
65+
github.com/cloudquery/sjson v0.0.0-20250708134708-0065b237a60e h1:qIbdJvSJOou66f/XRQgti3a4vsL3sMZIJinp7jejHio=
66+
github.com/cloudquery/sjson v0.0.0-20250708134708-0065b237a60e/go.mod h1:owSZeKAGP6udCIFuKgdQDQiUXj+L4X113HjyRnIqONk=
6567
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
6668
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
6769
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -158,15 +160,12 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf
158160
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
159161
github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw=
160162
github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
161-
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
162163
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
163164
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
164165
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
165166
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
166167
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
167168
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
168-
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
169-
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
170169
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
171170
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
172171
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=

0 commit comments

Comments
 (0)