-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Description
- Part of [EPIC] [Parquet] Implement Variant type support in Parquet #6736
- From [Variant] Improve getter API for
VariantListandVariantObject#7757 (comment)
Describe the bug
VariantObject::field_name(i:usize) returns the field name corresponding to its index. If index is out of bounds, the method should return None, but when the index is the length of the fields, it will return the first element. When the index is larger than the length of the fields, it starts to panic.
Steps to reproduce
Please use the following test:
arrow-rs/parquet-variant/src/variant/object.rs
Lines 205 to 256 in 7d3a25a
| #[test] | |
| fn test_variant_object_simple() { | |
| // Create metadata with field names: "age", "name", "active" (sorted) | |
| // Header: version=1, sorted=1, offset_size=1 (offset_size_minus_one=0) | |
| // So header byte = 00_0_1_0001 = 0x10 | |
| let metadata_bytes = vec![ | |
| 0b0001_0001, | |
| 3, // dictionary size | |
| 0, // "active" | |
| 6, // "age" | |
| 9, // "name" | |
| 13, | |
| b'a', | |
| b'c', | |
| b't', | |
| b'i', | |
| b'v', | |
| b'e', | |
| b'a', | |
| b'g', | |
| b'e', | |
| b'n', | |
| b'a', | |
| b'm', | |
| b'e', | |
| ]; | |
| let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap(); | |
| // Create object value data for: {"active": true, "age": 42, "name": "hello"} | |
| // Field IDs in sorted order: [0, 1, 2] (active, age, name) | |
| // Header: basic_type=2, field_offset_size_minus_one=0, field_id_size_minus_one=0, is_large=0 | |
| // value_header = 0000_00_00 = 0x00 | |
| // So header byte = (0x00 << 2) | 2 = 0x02 | |
| let object_value = vec![ | |
| 0x02, // header: basic_type=2, value_header=0x00 | |
| 3, // num_elements = 3 | |
| // Field IDs (1 byte each): active=0, age=1, name=2 | |
| 0, 1, 2, | |
| // Field offsets (1 byte each): 4 offsets total | |
| 0, // offset to first value (boolean true) | |
| 1, // offset to second value (int8) | |
| 3, // offset to third value (short string) | |
| 9, // end offset | |
| // Values: | |
| 0x04, // boolean true: primitive_header=1, basic_type=0 -> (1 << 2) | 0 = 0x04 | |
| 0x0C, | |
| 42, // int8: primitive_header=3, basic_type=0 -> (3 << 2) | 0 = 0x0C, then value 42 | |
| 0x15, b'h', b'e', b'l', b'l', | |
| b'o', // short string: length=5, basic_type=1 -> (5 << 2) | 1 = 0x15 | |
| ]; | |
| let variant_obj = VariantObject::try_new(metadata, &object_value).unwrap(); |
This test builds up a VariantObject with three fields: {"active": true, "age": 42, "name": "hello"}
let res = variant_object.field_name(3); // 3 is out of bounds!
// the assertion below _will_ fail. We expect `res` to be `None`, but it returns the first field ("active").
assert_eq!(res.is_none());
// this call below will panic! This is because `field_name` internally calls
// `try_field_name`, which will err with the following:
// validation error after construction: InvalidArgumentError("Tried to extract byte(s) 302..303 from 18-byte buffer")
let res = variant_object.field_name(300); // 300 is definitely out of boundsNote, using the VariantBuilder to build the same object and then trying to access fields with an out of bounds index will err correctly:
Reproducible test
#[test]
fn test_foo() {
let mut variant_builder = VariantBuilder::new();
let mut obj_builder = variant_builder.new_object();
obj_builder.append_value("active", true);
obj_builder.append_value("age", 42);
obj_builder.append_value("name", "hello");
obj_builder.finish();
let (m, v) = variant_builder.finish();
let variant = Variant::try_new(&m, &v).unwrap();
let variant_obj = variant.as_object().unwrap();
let res = variant_obj.field(3);
assert!(res.is_none());
let res = variant_obj.field(300);
assert!(res.is_none());
}You can obviously add a oob check since we know the number of elements in a VariantObject, but it seems like there is a bug in the underlying try_field_name logic.
cc @scovich