@@ -653,4 +653,159 @@ public void testIntersectRightSingleChildNode() throws IOException {
653653 }
654654 }
655655 }
656+
657+ // Following test replicates the http_logs dataset
658+ public void testHttpLogTimestampDistribution () throws IOException {
659+ try (Directory directory = newDirectory ()) {
660+ try (RandomIndexWriter iw = new RandomIndexWriter (random (), directory , new WhitespaceAnalyzer ())) {
661+ int dims = 1 ;
662+ // Sparse range: 100-199 (100 docs, one per value)
663+ for (int i = 100 ; i < 200 ; i ++) {
664+ Document doc = new Document ();
665+ doc .add (new LongPoint ("timestamp" , i ));
666+ doc .add (new NumericDocValuesField ("timestamp" , i ));
667+ iw .addDocument (doc );
668+ }
669+ // Dense range: 1000-1999 (5000 docs, 5 per value)
670+ for (int i = 0 ; i < 5000 ; i ++) {
671+ long value = 1000 + (i / 5 ); // Creates 5 docs per value from 1000-1999
672+ Document doc = new Document ();
673+ doc .add (new LongPoint ("timestamp" , value ));
674+ doc .add (new NumericDocValuesField ("timestamp" , value ));
675+ iw .addDocument (doc );
676+ }
677+ // 0-99 (100 docs)
678+ for (int i = 0 ; i < 100 ; i ++) {
679+ Document doc = new Document ();
680+ doc .add (new LongPoint ("timestamp" , i ));
681+ doc .add (new NumericDocValuesField ("timestamp" , i ));
682+ iw .addDocument (doc );
683+ }
684+ iw .flush ();
685+ iw .forceMerge (1 );
686+ try (IndexReader reader = iw .getReader ()) {
687+ IndexSearcher searcher = new IndexSearcher (reader );
688+ // Test sparse region
689+ testApproximateVsExactQuery (searcher , "timestamp" , 100 , 199 , 50 , dims );
690+ // Test dense region
691+ testApproximateVsExactQuery (searcher , "timestamp" , 1000 , 1500 , 100 , dims );
692+ // Test across regions
693+ testApproximateVsExactQuery (searcher , "timestamp" , 0 , 2000 , 200 , dims );
694+ }
695+ }
696+ }
697+ }
698+
699+ // Following test replicates the nyx_taxis dataset
700+ public void testNycTaxiDataDistribution () throws IOException {
701+ try (Directory directory = newDirectory ()) {
702+ try (RandomIndexWriter iw = new RandomIndexWriter (random (), directory , new WhitespaceAnalyzer ())) {
703+ int dims = 1 ;
704+ // Create NYC taxi fare distribution with different ranges
705+ for (long fare = 250 ; fare <= 500 ; fare ++) {
706+ iw .addDocument (asList (new LongPoint ("fare_amount" , fare ), new NumericDocValuesField ("fare_amount" , fare )));
707+ }
708+ // Typical fares: 1000-3000 (dense, 5 docs per value)
709+ for (long fare = 1000 ; fare <= 3000 ; fare ++) {
710+ for (int dup = 0 ; dup < 5 ; dup ++) {
711+ iw .addDocument (asList (new LongPoint ("fare_amount" , fare ), new NumericDocValuesField ("fare_amount" , fare )));
712+ }
713+ }
714+ // High fares: 10000-20000 (sparse, 1 doc every 100)
715+ for (long fare = 10000 ; fare <= 20000 ; fare += 100 ) {
716+ iw .addDocument (asList (new LongPoint ("fare_amount" , fare ), new NumericDocValuesField ("fare_amount" , fare )));
717+ }
718+ iw .flush ();
719+ iw .forceMerge (1 );
720+ try (IndexReader reader = iw .getReader ()) {
721+ IndexSearcher searcher = new IndexSearcher (reader );
722+ // Test 1: Query for typical fare range
723+ testApproximateVsExactQuery (searcher , "fare_amount" , 1000 , 3000 , 100 , dims );
724+ // Test 2: Query for high fare range
725+ testApproximateVsExactQuery (searcher , "fare_amount" , 10000 , 20000 , 50 , dims );
726+ // Test 3: Query for low fares
727+ testApproximateVsExactQuery (searcher , "fare_amount" , 250 , 500 , 50 , dims );
728+ }
729+ }
730+ }
731+ }
732+
733+ private void testApproximateVsExactQuery (IndexSearcher searcher , String field , long lower , long upper , int size , int dims )
734+ throws IOException {
735+ // Test with approximate query
736+ ApproximatePointRangeQuery approxQuery = new ApproximatePointRangeQuery (
737+ field ,
738+ pack (lower ).bytes ,
739+ pack (upper ).bytes ,
740+ dims ,
741+ size ,
742+ null ,
743+ ApproximatePointRangeQuery .LONG_FORMAT
744+ );
745+ // Test with exact query
746+ Query exactQuery = LongPoint .newRangeQuery (field , lower , upper );
747+ TopDocs approxDocs = searcher .search (approxQuery , size );
748+ TopDocs exactDocs = searcher .search (exactQuery , size );
749+ // Verify approximate query returns correct number of results
750+ assertTrue ("Approximate query should return at most " + size + " docs" , approxDocs .scoreDocs .length <= size );
751+ // If exact query returns fewer docs than size, approximate should match
752+ if (exactDocs .totalHits .value () <= size ) {
753+ assertEquals (
754+ "When exact results fit in size, approximate should match exactly" ,
755+ exactDocs .totalHits .value (),
756+ approxDocs .totalHits .value ()
757+ );
758+ }
759+ // Test with sorting (ASC and DESC)
760+ Sort ascSort = new Sort (new SortField (field , SortField .Type .LONG ));
761+ Sort descSort = new Sort (new SortField (field , SortField .Type .LONG , true ));
762+ // Test ASC sort
763+ ApproximatePointRangeQuery approxQueryAsc = new ApproximatePointRangeQuery (
764+ field ,
765+ pack (lower ).bytes ,
766+ pack (upper ).bytes ,
767+ dims ,
768+ size ,
769+ SortOrder .ASC ,
770+ ApproximatePointRangeQuery .LONG_FORMAT
771+ );
772+ TopDocs approxDocsAsc = searcher .search (approxQueryAsc , size , ascSort );
773+ TopDocs exactDocsAsc = searcher .search (exactQuery , size , ascSort );
774+ // Verify results match
775+ for (int i = 0 ; i < size ; i ++) {
776+ assertEquals ("ASC sorted results should match at position " + i , exactDocsAsc .scoreDocs [i ].doc , approxDocsAsc .scoreDocs [i ].doc );
777+ }
778+ assertEquals ("Should return exactly size value documents" , size , approxDocsAsc .scoreDocs .length );
779+ assertEquals (
780+ "Should return exactly size value documents as regular query" ,
781+ exactDocsAsc .scoreDocs .length ,
782+ approxDocsAsc .scoreDocs .length
783+ );
784+ // Test DESC sort
785+ ApproximatePointRangeQuery approxQueryDesc = new ApproximatePointRangeQuery (
786+ field ,
787+ pack (lower ).bytes ,
788+ pack (upper ).bytes ,
789+ dims ,
790+ size ,
791+ SortOrder .DESC ,
792+ ApproximatePointRangeQuery .LONG_FORMAT
793+ );
794+ TopDocs approxDocsDesc = searcher .search (approxQueryDesc , size , descSort );
795+ TopDocs exactDocsDesc = searcher .search (exactQuery , size , descSort );
796+ // Verify the results match
797+ for (int i = 0 ; i < size ; i ++) {
798+ assertEquals (
799+ "DESC sorted results should match at position " + i ,
800+ exactDocsDesc .scoreDocs [i ].doc ,
801+ approxDocsDesc .scoreDocs [i ].doc
802+ );
803+ }
804+ assertEquals ("Should return exactly size value documents" , size , approxDocsAsc .scoreDocs .length );
805+ assertEquals (
806+ "Should return exactly size value documents as regular query" ,
807+ exactDocsAsc .scoreDocs .length ,
808+ approxDocsAsc .scoreDocs .length
809+ );
810+ }
656811}
0 commit comments