RediSearch/src/result_processor.c at master · RediSearch/RediSearch

History

2782 lines (2370 loc) · 98.8 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

* Licensed under your choice of the Redis Source Available License 2.0

* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the

* GNU Affero General Public License v3 (AGPLv3).

#include "aggregate/aggregate.h"

#include "result_processor.h"

#include "query.h"

#include "extension.h"

#include <util/minmax_heap.h>

#include "ext/default.h"

#include "result_processor_rs.h"

#include "rlookup.h"

#include "rlookup_load_document.h"

#include "rmutil/rm_assert.h"

#include "util/timeout.h"

#include "util/arr.h"

#include "iterators_rs.h"

#include "rs_wall_clock.h"

#include <stdatomic.h>

#include <pthread.h>

#include <unistd.h>

#include <time.h>

#include "util/references.h"

#include "hybrid/hybrid_scoring.h"

#include "hybrid/hybrid_search_result.h"

#include "config.h"

#include "module.h"

#include "search_disk.h"

#include "debug_commands.h"

#include "search_result.h"

#include "redisearch.h"

#include "asm_state_machine.h"

#include "index_result.h"

#include "index_result_async_read.h"

// Maximum number of concurrent async disk reads

#define MAX_ONGOING_READ_SIZE 16

// Timeout for async disk poll when iterator is at EOF (in milliseconds)

// When the iterator is exhausted, we wait for pending async reads to complete

#define ASYNC_POLL_TIMEOUT_AT_EOF_MS 1000

/*******************************************************************************************************************

* Base Result Processor - this processor is the topmost processor of every processing chain.

* It takes the raw index results from the index, and builds the search result to be sent

* downstream.

*******************************************************************************************************************/

static int UnlockSpec_and_ReturnRPResult(RedisSearchCtx *sctx, int result_status) {

RedisSearchCtx_UnlockSpec(sctx);

return result_status;

}

typedef struct {

ResultProcessor base;

QueryIterator *iterator;

RedisSearchCtx *sctx;

uint32_t timeoutLimiter; // counter to limit number of calls to TimedOut_WithCounter()

uint32_t keySpaceVersion; // version of the Keyspace slot ranges used for filtering

const RedisModuleSlotRangeArray *querySlots; // Query slots info, may be used for filtering

// Async disk I/O state (only used when async disk I/O is enabled)

IndexResultAsyncReadState async;

#ifdef ENABLE_ASSERT

bool firstRead; // Debug only: tracks if this is the first read for sync point testing

#endif

} RPQueryIterator;

/****

* getDocumentMetadata - get the document metadata for the current document from the iterator.

* If the document is deleted or expired, return false.

* If the document is not deleted or expired, return true.

* If the document is not deleted or expired, and dmd is not NULL, set *dmd to the document metadata.

* @param spec The index spec

* @param docs The document table

* @param sctx The search context

* @param it The query iterator

* @param dmd The document metadata pointer to set

* @return true if the document is not deleted or expired, false otherwise.

static bool getDocumentMetadata(IndexSpec* spec, DocTable* docs, RedisSearchCtx *sctx, const QueryIterator *it, const RSDocumentMetadata **dmd) {

if (spec->diskSpec) {

RSDocumentMetadata* diskDmd = (RSDocumentMetadata *)rm_calloc(1, sizeof(RSDocumentMetadata));

diskDmd->sortVector = RSSortingVector_Empty();

diskDmd->ref_count = 1;

// Start from checking the deleted-ids (in memory), then perform IO

const bool foundDocument = !SearchDisk_DocIdDeleted(spec->diskSpec, it->current->docId) && SearchDisk_GetDocumentMetadata(spec->diskSpec, it->current->docId, diskDmd, &sctx->time.current);

if (!foundDocument) {

DMD_Return(diskDmd);

return false;

}

*dmd = diskDmd;

} else {

if (it->current->dmd) {

*dmd = it->current->dmd;

} else {

*dmd = DocTable_Borrow(docs, it->lastDocId);

}

if (!*dmd || (*dmd)->flags & Document_Deleted || DocTable_IsDocExpired(docs, *dmd, &sctx->time.current)) {

DMD_Return(*dmd);

return false;

}

return true;

}

/**

* Refill the IndexResult buffer from the iterator.

* Fills up to current capacity, doesn't grow the buffer.

* Returns RS_RESULT_OK on success, RS_RESULT_TIMEDOUT on timeout.

static int refillBufferUsingIterator(RPQueryIterator *self) {

QueryIterator *it = self->iterator;

RedisSearchCtx *sctx = self->sctx;

IndexSpec *spec = sctx->spec;

// Don't refill if iterator is done

if (it->atEOF) {

return RS_RESULT_OK;

}

// Fill buffer up to max capacity

while (self->async.iteratorResultCount < self->async.poolSize && !it->atEOF) {

if (TimedOut_WithCounter(&sctx->time.timeout, &self->timeoutLimiter) == TIMED_OUT) {

return RS_RESULT_TIMEDOUT;

}

IteratorStatus rc = it->Read(it);

if (rc == ITERATOR_EOF) {

break;

} else if (rc == ITERATOR_TIMEOUT) {

return RS_RESULT_TIMEDOUT;

}

// Skip deleted documents (in-memory check, no IO)

t_docId docId = it->current->docId;

if (SearchDisk_DocIdDeleted(spec->diskSpec, docId)) {

continue;

}

// Deep copy the IndexResult since iterator reuses the same pointer

// The copy will be freed after the async read completes and result is consumed

RSIndexResult *copy = IndexResult_DeepCopy(it->current);

// Allocate a new node and add it to the list

IndexResultNode *node = rm_calloc(1, sizeof(*node));

node->result = copy;

dllist_append(&self->async.iteratorResults, &node->node);

self->async.iteratorResultCount++;

}

return RS_RESULT_OK;

}

/**

* Validate DMD against sharding/slot filters.

* Returns true if DMD is valid, false if it should be skipped.

static bool validateDmdSlot(const RPQueryIterator *self, const RSDocumentMetadata *dmd) {

// Defensive check: if keyPtr is NULL (allocation failure in disk API), skip this document

if (!dmd->keyPtr) {

return false;

}

// Check trimming (sharding migration)

if (isTrimming && RedisModule_ShardingGetKeySlot) {

RedisModuleString *key = RedisModule_CreateString(NULL, dmd->keyPtr, sdslen(dmd->keyPtr));

int slot = RedisModule_ShardingGetKeySlot(key);

RedisModule_FreeString(NULL, key);

int firstSlot, lastSlot;

RedisModule_ShardingGetSlotRange(&firstSlot, &lastSlot);

if (firstSlot > slot || lastSlot < slot) {

return false;

}

// Check query slots (internal command filtering)

if (self->querySlots && (__atomic_load_n(&key_space_version, __ATOMIC_RELAXED) != self->keySpaceVersion)) {

int slot = RedisModule_ClusterKeySlotC(dmd->keyPtr, sdslen(dmd->keyPtr));

if (!SlotRangeArray_ContainsSlot(self->querySlots, slot)) {

return false;

}

return true;

}

/**

* Set the search result data from a DMD and IndexResult.

static void setSearchResult(ResultProcessor *base, SearchResult *res, RSIndexResult *indexResult,

const RSDocumentMetadata *dmd) {

RS_LOG_ASSERT(SearchResult_GetDocumentMetadata(res) == NULL, "SearchResult already has associated document metadata");

base->parent->totalResults++;

SearchResult_SetDocId(res, dmd->id);

SearchResult_SetIndexResult(res, indexResult);

SearchResult_SetScore(res, 0);

SearchResult_SetDocumentMetadata(res, dmd);

RLookupRow_SetSortingVector(SearchResult_GetRowDataMut(res), &dmd->sortVector);

}

/**

* Handle initial spec lock and iterator revalidation.

* Returns true if we should goto validate_current (VALIDATE_MOVED case).

* * For disk indexes, we skip the lock acquisition because:

* 1. All in-memory structure accesses (terms Trie, suffix Trie, stats) happen

* during QAST_Iterate() which already runs under the read-lock.

* 2. Disk iterators capture an implicit snapshot at creation time, ensuring

* consistency for disk reads without needing to hold the lock.

* 3. This avoids blocking the main thread during disk IO operations.

static bool handleSpecLockAndRevalidate(RPQueryIterator *self) {

RedisSearchCtx *sctx = self->sctx;

// For disk indexes, return immediately, since we don't need to acquire the

// lock, nor to revalidate the iterators.

if (sctx->spec->diskSpec) {

return false;

}

QueryIterator *it = self->iterator;

if (sctx->flags != RS_CTX_UNSET) {

return false;

}

RedisSearchCtx_LockSpecRead(sctx);

ValidateStatus rc = it->Revalidate(it);

if (rc == VALIDATE_ABORTED) {

self->iterator->Free(self->iterator);

self->iterator = NewEmptyIterator();

} else if (rc == VALIDATE_MOVED && !it->atEOF) {

return true; // Caller should validate current

}

return false;

}

/* Next implementation for sync disk and regular (in-memory) flow */

static int rpQueryItNext(ResultProcessor *base, SearchResult *res) {

RPQueryIterator *self = (RPQueryIterator *)base;

QueryIterator *it = self->iterator;

RedisSearchCtx *sctx = self->sctx;

IndexSpec* spec = sctx->spec;

const RSDocumentMetadata *dmd;

// Handle spec lock and revalidation

bool needToValidateCurrent = handleSpecLockAndRevalidate(self);

// Always update it after revalidation as iterator may have been replaced

it = self->iterator;

#ifdef ENABLE_ASSERT

// Make sure MT is enabled and `workers > 0` - deadlock otherwise.

if (self->firstRead) {

self->firstRead = false;

SyncPoint_Wait(SYNC_POINT_BEFORE_FIRST_READ);

}

#endif

while (1) {

if (TimedOut_WithCounter(&sctx->time.timeout, &self->timeoutLimiter) == TIMED_OUT) {

return UnlockSpec_and_ReturnRPResult(sctx, RS_RESULT_TIMEDOUT);

}

if (!needToValidateCurrent) {

IteratorStatus rc = it->Read(it);

switch (rc) {

case ITERATOR_EOF:

return UnlockSpec_and_ReturnRPResult(sctx, RS_RESULT_EOF);

case ITERATOR_TIMEOUT:

return UnlockSpec_and_ReturnRPResult(sctx, RS_RESULT_TIMEDOUT);

default:

RS_ASSERT(rc == ITERATOR_OK);

}

// validate current result only once

needToValidateCurrent = false;

// Get document metadata (either from disk or in-memory DocTable)

if (!getDocumentMetadata(spec, &spec->docs, sctx, it, &dmd)) {

continue;

}

if (!validateDmdSlot(self, dmd)) {

DMD_Return(dmd);

continue;

}

setSearchResult(base, res, it->current, dmd);

return RS_RESULT_OK;

}

/* Next implementation for async disk flow with two-level buffering */

static int rpQueryItNext_AsyncDisk(ResultProcessor *base, SearchResult *res) {

RPQueryIterator *self = (RPQueryIterator *)base;

QueryIterator *it = self->iterator;

RedisSearchCtx *sctx = self->sctx;

// Handle spec lock and revalidation

// no need store the return value since validate current result is not needed for async disk path

handleSpecLockAndRevalidate(self);

// Always update it after revalidation as iterator may have been replaced

it = self->iterator;

#ifdef ENABLE_ASSERT

if (self->firstRead) {

self->firstRead = false;

SyncPoint_Wait(SYNC_POINT_BEFORE_FIRST_READ);

}

#endif

while (1) {

if (TimedOut_WithCounter(&sctx->time.timeout, &self->timeoutLimiter) == TIMED_OUT) {

return UnlockSpec_and_ReturnRPResult(sctx, RS_RESULT_TIMEDOUT);

}

// Free the previous deep-copied IndexResult if any

// (it was consumed by the parent result processor in the previous call)

if (self->async.lastReturnedIndexResult) {

IndexResult_Free(self->async.lastReturnedIndexResult);

self->async.lastReturnedIndexResult = NULL;

}

// Step 1: Refill IndexResult buffer if needed (cheap iterator reads)

int refillResult = refillBufferUsingIterator(self);

if (refillResult == RS_RESULT_TIMEDOUT) {

return UnlockSpec_and_ReturnRPResult(sctx, RS_RESULT_TIMEDOUT);

}

// Step 1b: Submit any buffered results to async pool (keep pipeline full)

IndexResultAsyncRead_RefillPool(&self->async);

// Step 2: Try to serve a ready result if we have one

RSIndexResult *indexResult = IndexResultAsyncRead_PopReadyResult(&self->async);

if (indexResult) {

RS_ASSERT(indexResult->dmd); // DMD should be populated

if (!validateDmdSlot(self, indexResult->dmd)) {

DMD_Return(indexResult->dmd);

// Free the deep-copied IndexResult since we're not using it

IndexResult_Free(indexResult);

continue;

}

setSearchResult(base, res, indexResult, indexResult->dmd);

// Track this IndexResult so we can free it on the next call

self->async.lastReturnedIndexResult = indexResult;

return RS_RESULT_OK;

}

// Step 3: No ready results - poll for more

int timeout_ms = it->atEOF ? ASYNC_POLL_TIMEOUT_AT_EOF_MS : 0;

const size_t pendingCount = IndexResultAsyncRead_Poll(&self->async, timeout_ms, &sctx->time.current);

// Step 4: Check if we're completely done

if (IndexResultAsyncRead_IsIterationComplete(&self->async, it->atEOF, pendingCount)) {

return UnlockSpec_and_ReturnRPResult(sctx, RS_RESULT_EOF);

}

// Loop back to serve results (I/O for next batch is already running)

}

static void rpQueryItFree(ResultProcessor *iter) {

RPQueryIterator *self = (RPQueryIterator *)iter;

self->iterator->Free(self->iterator);

rm_free((void *)self->querySlots);

// Free async disk I/O state

IndexResultAsyncRead_Free(&self->async);

rm_free(iter);

}

ResultProcessor *RPQueryIterator_New(QueryIterator *root, const RedisModuleSlotRangeArray *querySlots, uint32_t keySpaceVersion, RedisSearchCtx *sctx) {

RS_ASSERT(root != NULL);

RPQueryIterator *ret = rm_calloc(1, sizeof(*ret));

ret->iterator = root;

ret->querySlots = querySlots;

ret->keySpaceVersion = keySpaceVersion;

ret->base.Free = rpQueryItFree;

ret->sctx = sctx;

ret->base.type = RP_INDEX;

// Use REDISEARCH_UNINITIALIZED counter to skip timeout checks

ret->timeoutLimiter = sctx->time.skipTimeoutChecks ? REDISEARCH_UNINITIALIZED : 0;

#ifdef ENABLE_ASSERT

ret->firstRead = true;

#endif

// Initialize async read state

IndexResultAsyncRead_Init(&ret->async, MAX_ONGOING_READ_SIZE);

// Determine which Next function to use based on disk configuration

if (sctx->spec->diskSpec &&

SearchDisk_IsAsyncIOSupported() &&

SearchDisk_GetAsyncIOEnabled()) {

// Create async pool and setup async I/O

RedisSearchDiskAsyncReadPool asyncPool =

SearchDisk_CreateAsyncReadPool(sctx->spec->diskSpec, MAX_ONGOING_READ_SIZE);

if (asyncPool) {

// Async disk flow with buffering

IndexResultAsyncRead_SetupAsyncPool(&ret->async, asyncPool);

ret->base.Next = rpQueryItNext_AsyncDisk;

} else {

ret->base.Next = rpQueryItNext;

}

} else {

// Sync disk or regular in-memory flow (both use getDocumentMetadata)

ret->base.Next = rpQueryItNext;

}

return &ret->base;

}

QueryIterator *QITR_GetRootFilter(QueryProcessingCtx *it) {

/* On coordinator, the root result processor will be a network result processor and we should ignore it */

if (it->rootProc && it->rootProc->type == RP_INDEX) {

return ((RPQueryIterator *)it->rootProc)->iterator;

}

return NULL;

}

void QITR_PushRP(QueryProcessingCtx *it, ResultProcessor *rp) {

rp->parent = it;

if (!it->rootProc) {

it->endProc = it->rootProc = rp;

rp->upstream = NULL;

return;

}

rp->upstream = it->endProc;

it->endProc = rp;

}

void QITR_FreeChain(QueryProcessingCtx *qitr) {

ResultProcessor *rp = qitr->endProc;

while (rp) {

ResultProcessor *next = rp->upstream;

rp->Free(rp);

rp = next;

}

/*******************************************************************************************************************

* Scoring Processor

* It takes results from upstream, and using a scoring function applies the score to each one.

* It may not be invoked if we are working in SORTBY mode (or later on in aggregations)

*******************************************************************************************************************/

typedef struct {

ResultProcessor base;

RSScoringFunction scorer;

RSFreeFunction scorerFree;

ScoringFunctionArgs scorerCtx;

const RLookupKey *scoreKey;

} RPScorer;

static int rpscoreNext(ResultProcessor *base, SearchResult *res) {

int rc;

RPScorer *self = (RPScorer *)base;

do {

rc = base->upstream->Next(base->upstream, res);

if (rc != RS_RESULT_OK) {

return rc;

}

// Apply the scoring function

SearchResult_SetScore(res, self->scorer(&self->scorerCtx, SearchResult_GetIndexResult(res), SearchResult_GetDocumentMetadata(res), base->parent->minScore));

if (self->scorerCtx.scrExp) {

SearchResult_SetScoreExplain(res, (RSScoreExplain *)self->scorerCtx.scrExp);

self->scorerCtx.scrExp = rm_calloc(1, sizeof(RSScoreExplain));

}

// If we got the special score RS_SCORE_FILTEROUT - disregard the result and decrease the total

// number of results (it's been increased by the upstream processor)

if (SearchResult_GetScore(res) == RS_SCORE_FILTEROUT) {

base->parent->totalResults--;

SearchResult_Clear(res);

// continue and loop to the next result, since this is excluded by the

// scorer.

continue;

}

if (self->scoreKey) {

RLookup_WriteOwnKey(self->scoreKey, SearchResult_GetRowDataMut(res), RSValue_NewNumber(SearchResult_GetScore(res)));

}

break;

} while (1);

return rc;

}

/* Free impl. for scorer - frees up the scorer privdata if needed */

static void rpscoreFree(ResultProcessor *rp) {

RPScorer *self = (RPScorer *)rp;

if (self->scorerFree) {

self->scorerFree(self->scorerCtx.extdata);

}

rm_free(self->scorerCtx.scrExp);

self->scorerCtx.scrExp = NULL;

rm_free(self);

}

/* Create a new scorer by name. If the name is not found in the scorer registry, we use the default

* scorer */

ResultProcessor *RPScorer_New(const ExtScoringFunctionCtx *funcs,

const ScoringFunctionArgs *fnargs,

const RLookupKey *rlk) {

RPScorer *ret = rm_calloc(1, sizeof(*ret));

ret->scorer = funcs->sf;

ret->scorerFree = funcs->ff;

ret->scorerCtx = *fnargs;

ret->scoreKey = rlk;

ret->base.Next = rpscoreNext;

ret->base.Free = rpscoreFree;

ret->base.type = RP_SCORER;

return &ret->base;

}

/*******************************************************************************************************************

* Additional Values Loader Result Processor

* It takes results from upstream (should be Index iterator or close; before any RP that need these field),

* and add their additional value to the right score field before sending them downstream.

*******************************************************************************************************************/

typedef struct {

ResultProcessor base;

} RPMetrics;

static int rpMetricsNext(ResultProcessor *base, SearchResult *res) {

int rc;

rc = base->upstream->Next(base->upstream, res);

if (rc != RS_RESULT_OK) {

return rc;

}

RSYieldableMetricSlice slice = MetricsVec_AsSlice(&SearchResult_GetIndexResult(res)->metrics);

for (size_t i = 0; i < slice.len; i++) {

RLookup_WriteOwnKey(slice.data[i].key, SearchResult_GetRowDataMut(res), RSValue_NewNumber(slice.data[i].value));

}

return rc;

}

/* Free implementation for RPMetrics */

static void rpMetricsFree(ResultProcessor *rp) {

RPMetrics *self = (RPMetrics *)rp;

rm_free(self);

}

ResultProcessor *RPMetricsLoader_New() {

RPMetrics *ret = rm_calloc(1, sizeof(*ret));

ret->base.Next = rpMetricsNext;

ret->base.Free = rpMetricsFree;

ret->base.type = RP_METRICS;

return &ret->base;

}

/*******************************************************************************************************************

* Sorting Processor

* This is where things become a bit complex...

* The sorter takes scored results from the scorer (or in the case of SORTBY, the raw results), and

* maintains a heap of the top N results.

* Since we need it to be thread safe, every result that's put on the heap is copied, including its

* index result tree.

* This means that from here down-stream, everything is thread safe, but we also need to properly

* free discarded results.

* The sorter is actually a reducer - it returns RS_RESULT_QUEUED until its upstream parent returns

* EOF. then it starts yielding results one by one by popping from the top of the heap.

* Note: We use a min-max heap to simplify maintaining a max heap where we can pop from the bottom

* while finding the top N results

*******************************************************************************************************************/

typedef int (*RPSorterCompareFunc)(const void *e1, const void *e2, const void *udata);

typedef struct {

ResultProcessor base;

// The heap. We use a min-max heap here

mm_heap_t *pq;

// the compare function for the heap. We use it to test if a result needs to be added to the heap

RPSorterCompareFunc cmp;

// private data for the compare function

void *cmpCtx;

// pooled result - we recycle it to avoid allocations

SearchResult *pooledResult;

struct {

const RLookupKey **keys;

size_t nkeys;

uint64_t ascendMap;

} fieldcmp;

// Whether a timeout warning needs to be propagated down the downstream

bool timedOut;

} RPSorter;

/* Yield - pops the current top result from the heap */

static int rpsortNext_Yield(ResultProcessor *rp, SearchResult *r) {

RPSorter *self = (RPSorter *)rp;

SearchResult *cur_best = mmh_pop_max(self->pq);

if (cur_best) {

SearchResult_Override(r, cur_best);

rm_free(cur_best);

return RS_RESULT_OK;

}

int ret = self->timedOut ? RS_RESULT_TIMEDOUT : RS_RESULT_EOF;

self->timedOut = false;

return ret;

}

static void rpsortFree(ResultProcessor *rp) {

RPSorter *self = (RPSorter *)rp;

SearchResult_Destroy(self->pooledResult);

rm_free(self->pooledResult);

// calling mmh_free will free all the remaining results in the heap, if any

mmh_free(self->pq);

rm_free(rp);

}

#define RESULT_QUEUED RS_RESULT_MAX + 1

static int rpsortNext_innerLoop(ResultProcessor *rp, SearchResult *r) {

RPSorter *self = (RPSorter *)rp;

// get the next result from upstream. `self->pooledResult` is expected to be empty and allocated.

int rc = rp->upstream->Next(rp->upstream, self->pooledResult);

// if our upstream has finished - just change the state to not accumulating, and yield

if (rc == RS_RESULT_EOF) {

rp->Next = rpsortNext_Yield;

return rpsortNext_Yield(rp, r);

} else if (rc == RS_RESULT_TIMEDOUT && (rp->parent->timeoutPolicy == TimeoutPolicy_Return)) {

self->timedOut = true;

rp->Next = rpsortNext_Yield;

return rpsortNext_Yield(rp, r);

} else if (rc != RS_RESULT_OK) {

// whoops!

return rc;

}

// If the queue is not full - we just push the result into it

if (self->pq->count < self->pq->size) {

// copy the index result to make it thread safe - but only if it is pushed to the heap

SearchResult_SetIndexResult(self->pooledResult, NULL);

mmh_insert(self->pq, self->pooledResult);

if (SearchResult_GetScore(self->pooledResult) < rp->parent->minScore) {

rp->parent->minScore = SearchResult_GetScore(self->pooledResult);

}

// we need to allocate a new result for the next iteration

self->pooledResult = rm_calloc(1, sizeof(*self->pooledResult));

*self->pooledResult = SearchResult_New();

} else {

// find the min result

SearchResult *minh = mmh_peek_min(self->pq);

// update the min score. Irrelevant to SORTBY mode but hardly costs anything...

if (SearchResult_GetScore(minh) > rp->parent->minScore) {

rp->parent->minScore = SearchResult_GetScore(minh);

}

// if needed - pop it and insert a new result

if (self->cmp(self->pooledResult, minh, self->cmpCtx) > 0) {

SearchResult_SetIndexResult(self->pooledResult, NULL);

self->pooledResult = mmh_exchange_min(self->pq, self->pooledResult);

}

// clear the result in preparation for the next iteration

SearchResult_Clear(self->pooledResult);

}

return RESULT_QUEUED;

}

static int rpsortNext_Accum(ResultProcessor *rp, SearchResult *r) {

uint32_t chunkLimit = rp->parent->resultLimit;

rp->parent->resultLimit = UINT32_MAX; // we want to accumulate all results

int rc;

while ((rc = rpsortNext_innerLoop(rp, r)) == RESULT_QUEUED) {

// Do nothing.

}

rp->parent->resultLimit = chunkLimit; // restore the limit

return rc;

}

/* Compare results for the heap by score */

static inline int cmpByScore(const void *e1, const void *e2, const void *udata) {

const SearchResult *h1 = e1, *h2 = e2;

if (SearchResult_GetScore(h1) < SearchResult_GetScore(h2)) {

return -1;

} else if (SearchResult_GetScore(h1) > SearchResult_GetScore(h2)) {

return 1;

}

return SearchResult_GetDocId(h1) > SearchResult_GetDocId(h2) ? -1 : 1;

}

/* Compare results for the heap by sorting key.

* The field comparison loop lives in Rust (RLookupRow_CmpByFields) to avoid

* per-key FFI crossings for RLookupRow_Get. This wrapper handles the qerr

* setup and docid tiebreak. */

static int cmpByFields(const void *e1, const void *e2, const void *udata) {

const RPSorter *self = udata;

const SearchResult *h1 = e1, *h2 = e2;

QueryError *qerr = NULL;

if (self && self->base.parent && self->base.parent->err) {

qerr = self->base.parent->err;

}

return SearchResult_CmpByFields(

self->fieldcmp.keys, self->fieldcmp.nkeys,

h1, h2, self->fieldcmp.ascendMap, qerr);

}

static void srDtor(void *p) {

if (p) {

SearchResult_Destroy(p);

rm_free(p);

}

ResultProcessor *RPSorter_NewByFields(size_t maxresults, const RLookupKey **keys, size_t nkeys, uint64_t ascmap) {

RPSorter *ret = rm_calloc(1, sizeof(*ret));

ret->cmp = nkeys ? cmpByFields : cmpByScore;

ret->cmpCtx = ret;

ret->fieldcmp.ascendMap = ascmap;

ret->fieldcmp.keys = keys;

ret->fieldcmp.nkeys = nkeys;

ret->pq = mmh_init_with_size(maxresults, ret->cmp, ret->cmpCtx, srDtor);

ret->pooledResult = rm_calloc(1, sizeof(*ret->pooledResult));

*ret->pooledResult = SearchResult_New();

ret->base.Next = rpsortNext_Accum;

ret->base.Free = rpsortFree;

ret->base.type = RP_SORTER;

return &ret->base;

}

ResultProcessor *RPSorter_NewByScore(size_t maxresults) {

return RPSorter_NewByFields(maxresults, NULL, 0, 0);

}

/*******************************************************************************************************************

* Paging Processor

* The sorter builds a heap of size N, but the pager is responsible for taking result

* FIRST...FIRST+NUM from it.

* For example, if we want to get results 40-50, we build a heap of size 50 on the sorter, and

*the pager is responsible for discarding the first 40 results and returning just 10

* They are separated so that later on we can cache the sorter's heap, and continue paging it

* without re-executing the entire query

*******************************************************************************************************************/

typedef struct {

ResultProcessor base;

uint32_t offset;

uint32_t remaining;

} RPPager;

static int rppagerNext_Limit(ResultProcessor *base, SearchResult *r) {

RPPager *self = (RPPager *)base;

// If we've reached LIMIT:

if (!self->remaining) {

return RS_RESULT_EOF;

}

int ret = base->upstream->Next(base->upstream, r);

// Account for the result only if we got one.

if (ret == RS_RESULT_OK) self->remaining--;

return ret;

}

static int rppagerNext_Skip(ResultProcessor *base, SearchResult *r) {

RPPager *self = (RPPager *)base;

// Currently a pager is never called more than offset+limit times.

// We limit the entire pipeline to offset+limit (upstream and downstream).

uint32_t limit = MIN(self->remaining, base->parent->resultLimit);

// Save the previous limit, so that it will seem untouched to the downstream

uint32_t downstreamLimit = base->parent->resultLimit;

base->parent->resultLimit = self->offset + limit;

// If we've not reached the offset

while (self->offset) {

int rc = base->upstream->Next(base->upstream, r);

if (rc != RS_RESULT_OK) {

return rc;

}

base->parent->resultLimit--;

self->offset--;

SearchResult_Clear(r);

}

base->parent->resultLimit = downstreamLimit;

base->Next = rppagerNext_Limit; // switch to second phase

return base->Next(base, r);

}

static void rppagerFree(ResultProcessor *base) {

rm_free(base);

}

/* Create a new pager. The offset and limit are taken from the user request */

ResultProcessor *RPPager_New(size_t offset, size_t limit) {

RPPager *ret = rm_calloc(1, sizeof(*ret));

ret->offset = offset;

ret->remaining = limit;

ret->base.type = RP_PAGER_LIMITER;

ret->base.Next = rppagerNext_Skip;

ret->base.Free = rppagerFree;

return &ret->base;

}

////////////////////////////////////////////////////////////////////////////////

/// Value Loader ///

////////////////////////////////////////////////////////////////////////////////

typedef struct {

ResultProcessor base;

RLookup *lk;

RLookupLoadOptions loadopts;

bool load_all;

QueryError status;

} RPLoader;

/***

* isDocumentStillValid - check if the document is still valid for loading.

* @param self The loader

* @param r The search result

* @return true if the document is still valid, false otherwise.

static bool isDocumentStillValid(const RPLoader *self, SearchResult *r) {

if (self->loadopts.sctx->spec->diskSpec) {

// The Document_Deleted and Document_FailedToOpen flags are not used on disk and are not updated after we take the GIL, so we check the disk directly.

if (SearchDisk_DocIdDeleted(self->loadopts.sctx->spec->diskSpec, SearchResult_GetDocumentMetadata(r)->id)) {

SearchResult_SetFlags(r, SearchResult_GetFlags(r) | Result_ExpiredDoc);

return false;

}

} else {

if ((SearchResult_GetDocumentMetadata(r)->flags & Document_FailedToOpen) || (SearchResult_GetDocumentMetadata(r)->flags & Document_Deleted)) {

SearchResult_SetFlags(r, SearchResult_GetFlags(r) | Result_ExpiredDoc);

return false;

}

return true;

}

static void rpLoader_loadDocument(RPLoader *self, SearchResult *r) {

// If the document was modified or deleted, we don't load it, and we need to mark

// the result as expired.

if (!isDocumentStillValid(self, r)) {

return;

}

self->loadopts.dmd = SearchResult_GetDocumentMetadata(r);

int ret;

if (self->load_all) {

ret = RLookup_LoadDocumentAll(self->lk, SearchResult_GetRowDataMut(r), &self->loadopts);

} else {

ret = RLookup_LoadDocumentIndividual(self->lk, SearchResult_GetRowDataMut(r), &self->loadopts);

}

// if loading the document has failed, we keep the row as it was.

// Error code and message are ignored.

if (ret != REDISMODULE_OK) {

// mark the document as "failed to open" for later loaders or other threads (optimization)

((RSDocumentMetadata *)(SearchResult_GetDocumentMetadata(r)))->flags |= Document_FailedToOpen;

// The result contains an expired document.

SearchResult_SetFlags(r, SearchResult_GetFlags(r) | Result_ExpiredDoc);

QueryError_ClearError(&self->status);

}

static int rploaderNext(ResultProcessor *base, SearchResult *r) {

RPLoader *lc = (RPLoader *)base;

int rc = base->upstream->Next(base->upstream, r);

if (rc != RS_RESULT_OK) {

return rc;

}

rpLoader_loadDocument(lc, r);

return RS_RESULT_OK;

}

static void rploaderFreeInternal(ResultProcessor *base) {

RPLoader *lc = (RPLoader *)base;

QueryError_ClearError(&lc->status);

rm_free(lc->loadopts.keys);

}

static void rploaderFree(ResultProcessor *base) {

rploaderFreeInternal(base);

rm_free(base);

}

static void rploaderNew_setLoadOpts(RPLoader *self, RedisSearchCtx *sctx, RLookup *lk, const RLookupKey **keys, size_t nkeys, bool forceLoad) {

self->loadopts.forceString = 1; // used in `LOAD_ALLKEYS` mode.

self->loadopts.forceLoad = forceLoad;

self->loadopts.status = &self->status;

self->loadopts.sctx = sctx;

self->loadopts.dmd = NULL;

if (nkeys) {

self->loadopts.keys = rm_malloc(sizeof(*keys) * nkeys);

memcpy(self->loadopts.keys, keys, sizeof(*keys) * nkeys);

self->loadopts.nkeys = nkeys;

self->load_all = false;

} else {

self->load_all = true;

RLookup_EnableOptions(lk, RLOOKUP_OPT_ALLLOADED); // TODO: turn on only for HASH specs

}

self->lk = lk;

}

static ResultProcessor *RPPlainLoader_New(RedisSearchCtx *sctx, RLookup *lk, const RLookupKey **keys, size_t nkeys, bool forceLoad) {

RPLoader *self = rm_calloc(1, sizeof(*self));

rploaderNew_setLoadOpts(self, sctx, lk, keys, nkeys, forceLoad);

self->base.Next = rploaderNext;

self->base.Free = rploaderFree;

self->base.type = RP_LOADER;

return &self->base;

}

/*******************************************************************************************************************

* Safe Loader Results Processor

* This component should be added to the query's execution pipeline INSTEAD OF a loader, if a loader is needed.

* The RP has few phases:

* 1. Buffering phase - the RP will buffer the results from the upstream.

* 2. Loading phase:

* a. Verify that the spec is unlocked, and lock the Redis keyspace.

* b. Load the needed data for each buffered result.

* c. Unlock the Redis keyspace.

* 3. Yielding phase - the RP will yield the buffered results.

*******************************************************************************************************************/

#define DEFAULT_BUFFER_BLOCK_SIZE 1024

typedef struct RPSafeLoader {

// Loading context

RPLoader base_loader;

// Buffer management

SearchResult **BufferBlocks;

size_t buffer_results_count;

// Results iterator

size_t curr_result_index;

// Last buffered result code. To know weather to return OK or EOF.

char last_buffered_rc;

// If true, the loader will become a plain loader after the buffer is empty.

// Used when changing the MT mode through a cursor execution session (e.g. FT.CURSOR READ)

bool becomePlainLoader;

// Search context

RedisSearchCtx *sctx;

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

result_processor.c

Latest commit

History

result_processor.c

File metadata and controls