#!/usr/bin/env python3
"""
Test case for ClickHouse Iceberg partition bug with special characters.

This creates a minimal Parquet file with a partition column containing "/"
to demonstrate ClickHouse's inability to handle URL-encoded partition paths.

Requirements:
  pip install pandas pyarrow
"""

import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from pathlib import Path

def create_test_data():
    """Create a simple dataset with partition column containing '/'"""
    data = {
        'id': [1, 2, 3, 4, 5],
        'value': [100, 200, 300, 400, 500],
        'partition_key': [
            'prod/app1/service1',
            'prod/app1/service1', 
            'prod/app2/service2',
            'dev/app1/service1',
            'dev/app1/service1'
        ]
    }
    return pd.DataFrame(data)

def main():
    # Create test data
    df = create_test_data()
    
    # Convert to PyArrow table
    table = pa.Table.from_pandas(df)
    
    # Write to Parquet file
    output_path = Path('test-data-with-slash.parquet')
    pq.write_table(table, output_path)
    
    print(f"✅ Created test Parquet file: {output_path}")
    print(f"📊 Rows: {len(df)}")
    print(f"📁 Partition column 'partition_key' contains '/' characters")
    print(f"\nSample data:")
    print(df)
    
    print("\n" + "="*60)
    print("Next steps:")
    print("="*60)
    print("\n1. Upload to S3:")
    print(f"   aws s3 cp {output_path} s3://YOUR-BUCKET/test-data/ --region eu-west-1")
    
    print("\n2. Create Iceberg table with ICE:")
    print("   ice create-table test_schema.partition_bug_test \\")
    print("     --schema-from-parquet s3://YOUR-BUCKET/test-data/test-data-with-slash.parquet \\")
    print('     --partition \'[{"column":"partition_key","transform":"identity"}]\'')
    
    print("\n3. Insert data with ICE (creates hierarchical partitioning):")
    print("   ice insert test_schema.partition_bug_test \\")
    print("     s3://YOUR-BUCKET/test-data/test-data-with-slash.parquet")
    
    print("\n4. Check S3 structure (you'll see URL-encoded paths):")
    print("   aws s3 ls s3://YOUR-BUCKET/iceberg/test_schema/partition_bug_test/data/ --recursive")
    print("   # Expected: partition_key=prod%2Fapp1%2Fservice1/...")
    
    print("\n5. Query with ClickHouse (THIS WILL FAIL):")
    print("   SELECT * FROM iceberg_catalog.`test_schema.partition_bug_test` LIMIT 5;")
    print("   # Error: Bad URI syntax: URI contains invalid characters")

if __name__ == '__main__':
    main()
