Skip to content

parquet::build_row_gropup_predicate hits stackoverflowed #419

@NGA-TRAN

Description

@NGA-TRAN

Describe the bug
While testing IOX, I hit stack overflowed in build_row_group_predicate with this SQL:
'"SELECT * from restaurant where system > 5.0 and 'tewsbury' != town and system < 7.0 and town = 'reading'"'

Here is the println! of RowGroupPredicateBuilder

Some(
    RowGroupPredicateBuilder {
        parquet_schema: Schema {
            fields: [
                Field {
                    name: "count",
                    data_type: UInt64,
                    nullable: true,
                    dict_id: 0,
                    dict_is_ordered: false,
                    metadata: None,
                },
                Field {
                    name: "system",
                    data_type: Float64,
                    nullable: true,
                    dict_id: 0,
                    dict_is_ordered: false,
                    metadata: None,
                },
                Field {
                    name: "time",
                    data_type: Timestamp(
                        Nanosecond,
                        None,
                    ),
                    nullable: false,
                    dict_id: 0,
                    dict_is_ordered: false,
                    metadata: None,
                },
                Field {
                    name: "town",
                    data_type: Dictionary(
                        Int32,
                        Utf8,
                    ),
                    nullable: true,
                    dict_id: 0,
                    dict_is_ordered: false,
                    metadata: None,
                },
            ],
            metadata: {},
        },
        predicate_expr: BinaryExpr {
            left: BinaryExpr {
                left: BinaryExpr {
                    left: BinaryExpr {
                        left: BinaryExpr {
                            left: BinaryExpr {
                                left: BinaryExpr {
                                    left: BinaryExpr {
                                        left: Column {
                                            name: "system_max",
                                        },
                                        op: Gt,
                                        right: Literal {
                                            value: Float64(5),
                                        },
                                    },
                                    op: And,
                                    right: Literal {
                                        value: Boolean(true),
                                    },
                                },
                                op: And,
                                right: BinaryExpr {
                                    left: Column {
                                        name: "system_min",
                                    },
                                    op: Lt,
                                    right: Literal {
                                        value: Float64(7),
                                    },
                                },
                            },
                            op: And,
                            right: BinaryExpr {
                                left: BinaryExpr {
                                    left: TryCastExpr {
                                        expr: Column {
                                            name: "town_min",
                                        },
                                        cast_type: Utf8,
                                    },
                                    op: LtEq,
                                    right: Literal {
                                        value: Utf8("reading"),
                                    },
                                },
                                op: And,
                                right: BinaryExpr {
                                    left: Literal {
                                        value: Utf8("reading"),
                                    },
                                    op: LtEq,
                                    right: TryCastExpr {
                                        expr: Column {
                                            name: "town_max",
                                        },
                                        cast_type: Utf8,
                                    },
                                },
                            },
                        },
                        op: And,
                        right: BinaryExpr {
                            left: Column {
                                name: "system_max",
                            },
                            op: Gt,
                            right: Literal {
                                value: Float64(5),
                            },
                        },
                    },
                    op: And,
                    right: Literal {
                        value: Boolean(true),
                    },
                },
                op: And,
                right: BinaryExpr {
                    left: Column {
                        name: "system_min",
                    },
                    op: Lt,
                    right: Literal {
                        value: Float64(7),
                    },
                },
            },
            op: And,
            right: BinaryExpr {
                left: BinaryExpr {
                    left: TryCastExpr {
                        expr: Column {
                            name: "town_min",
                        },
                        cast_type: Utf8,
                    },
                    op: LtEq,
                    right: Literal {
                        value: Utf8("reading"),
                    },
                },
                op: And,
                right: BinaryExpr {
                    left: Literal {
                        value: Utf8("reading"),
                    },
                    op: LtEq,
                    right: TryCastExpr {
                        expr: Column {
                            name: "town_max",
                        },
                        cast_type: Utf8,
                    },
                },
            },
        },
        stat_column_req: [
            (
                "system",
                Max,
                Field {
                    name: "system_max",
                    data_type: Float64,
                    nullable: true,
                    dict_id: 0,
                    dict_is_ordered: false,
                    metadata: None,
                },
            ),
            (
                "system",
                Min,
                Field {
                    name: "system_min",
                    data_type: Float64,
                    nullable: true,
                    dict_id: 0,
                    dict_is_ordered: false,
                    metadata: None,
                },
            ),
            (
                "town",
                Min,
                Field {
                    name: "town_min",
                    data_type: Dictionary(
                        Int32,
                        Utf8,
                    ),
                    nullable: true,
                    dict_id: 0,
                    dict_is_ordered: false,
                    metadata: None,
                },
            ),
            (
                "town",
                Max,
                Field {
                    name: "town_max",
                    data_type: Dictionary(
                        Int32,
                        Utf8,
                    ),
                    nullable: true,
                    dict_id: 0,
                    dict_is_ordered: false,
                    metadata: None,
                },
            ),
        ],
    },
)

I think after this fix #409 merged, the redundant expressions will go away and reduce the size of this predicate but this is an example that we cannot yet handle many expressions in a filter

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions