-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
Executing array_has_any(column_name, []) results in an error:
ArrowError(InvalidArgumentError("RowConverter column schema mismatch, expected Utf8 got Int64"), None)
To Reproduce
datafusion version "44.0"
use std::sync::Arc;
use datafusion::arrow::array::{Int32Array, StringArray, ArrayRef, ListArray};
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::arrow::buffer::OffsetBuffer;
use datafusion::datasource::MemTable;
use datafusion::error::Result;
use datafusion::prelude::*;
#[tokio::main]
async fn main() -> Result<()> {
// Create a simple datafusion table
let ctx = SessionContext::new();
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("taxonomy", DataType::List(Arc::new(Field::new(
"item",
DataType::Utf8,
true,
))), false),
]));
let id_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
let values = Arc::new(StringArray::from(vec![
"category_1", "category_2", // For id=1
"category_3", // For id=2
"category_4" // For id=3
]));
let offsets = OffsetBuffer::new(vec![0, 2, 3, 4].into());
let field = Arc::new(Field::new("item", DataType::Utf8, true));
let taxonomy_array = Arc::new(ListArray::new(field, offsets, values, None));
let batch = RecordBatch::try_new(
schema.clone(),
vec![id_array, taxonomy_array],
)?;
let partitions = vec![vec![batch]];
let table = MemTable::try_new(schema, partitions)?;
ctx.register_table("test_table", Arc::new(table))?;
println!("Original data:");
let df = ctx.table("test_table").await?;
df.show().await?;
// Execute the problematic query with empty array
println!("\nExecuting filter with empty array:");
let query = "SELECT * FROM test_table WHERE array_has_any(taxonomy, [])";
// print
// Error: ArrowError(InvalidArgumentError("RowConverter column schema mismatch, expected Utf8 got Int64"), None)
ctx.sql(query).await?.show().await?;
Ok(())
}
Expected behavior
The error message is misleading and does not clearly indicate that an empty array might be an invalid or unsupported input. The issue could be improved by either:
- Providing a more meaningful error message that explicitly states that an empty array is not a valid input.
- Handling the query more gracefully, ensuring that an empty array input does not cause a type mismatch in schema conversion.
Additional context
No response
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working