Skip to content

Commit 357d356

Browse files
meiravgriGuyAv46
andauthored
MOD-8097, MOD-8114 Fix memory counting (#5204)
* expose openNumericKeysDict, requires a write/read argument. replaces all OpenNumericIndex calls. * fork gc -initialiize index pointer only once rename createIndex to createPopulateTermsInvIndex add function to tests/index_utils generalize FGCTest suit add inheriting suite for tags some cosmetics in cpp_range ExtTest.testDynamicLoading: log when assert fails (the test halts upon assertion and the log will be nerver pronted) in IndexTest and QueryTest use IndexSpec_RemoveFromGlobals instead of StrongRef_Release to ensure the spec is properly cleaned otherwise some structs like the global prefix trie nodes, potentially accessed by following tests, might hold a copy of the spec refrence that was already released and cuase a crash. * add inverted index total size to the numeric tree root update in gc and in addition add cpp_range test and cpp_forkgc test * FIX: decrease gc->stats.totalCollected by bytesAdded to not count bytes added by us remove reseting currNode->range->invertedIndexSize in applyNumIdx called upon each node recieved from the child process instead, we will count the memory released when trimming the empty leaves call IndexSpec_TotalMemUsage from RediSearch_MemUsage instead of code duplication add IndexSpec_collect_numeric_overhead that counts the memory of NumericRangeTree in a given spec call this function to add it to IndexSpec_TotalMemUsage update LLApiTest testInfo* accordingly * fixes * fix new cpp forkgc test * fix unsafe read * fix status check * numeric tree debug commands - if tree was not intizlied yet, reply as if it was an empty tree * fix reset range->invertedIndexSize in applyNumIdx when index is empty. it will be counted when the range is deleted * add initial inverted index size if numeric to test don't intialize the numeric inverted index in the child if it's empty * fix gc warning * little fix * changes rt->invertedIndexSize to rt->invertedIndexesSize fix IndexSpec_collect_numeric_overhead and add test fix sending NULL to FGC_sendFixed * CalculateNumericInvertedIndexMemory fix identation and break when failing * rename openNumericKeysDict variable write to initialize and change type to bool * change OPEN_INDEX_READ to DONT_CREATE_INDEX and OPEN_INDEX_WRITE to CREATE_INDEX add coments to tests * use Likst instead of list to support older python versopns * remove type from function args * fix --------- Co-authored-by: GuyAv46 <guy.avimor@gmail.com>
1 parent 75feca2 commit 357d356

24 files changed

Lines changed: 661 additions & 235 deletions

src/debug_commands.c

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -192,19 +192,24 @@ DEBUG_COMMAND(NumericIndexSummary) {
192192
RedisModule_ReplyWithError(sctx->redisCtx, "Could not find given field in index spec");
193193
goto end;
194194
}
195-
NumericRangeTree *rt = OpenNumericIndex(sctx, keyName);
196-
if (!rt) {
197-
RedisModule_ReplyWithError(sctx->redisCtx, "can not open numeric field");
198-
goto end;
195+
NumericRangeTree rt_info = {0};
196+
int root_max_depth = 0;
197+
198+
NumericRangeTree *rt = openNumericKeysDict(sctx->spec, keyName, DONT_CREATE_INDEX);
199+
// If we failed to open the numeric index, it was not initialized yet.
200+
// Else, we copy the data to a local variable.
201+
if (rt) {
202+
rt_info = *rt;
203+
root_max_depth = rt->root->maxDepth;
199204
}
200205

201206
START_POSTPONED_LEN_ARRAY(numIdxSum);
202-
REPLY_WITH_LONG_LONG("numRanges", rt->numRanges, ARRAY_LEN_VAR(numIdxSum));
203-
REPLY_WITH_LONG_LONG("numEntries", rt->numEntries, ARRAY_LEN_VAR(numIdxSum));
204-
REPLY_WITH_LONG_LONG("lastDocId", rt->lastDocId, ARRAY_LEN_VAR(numIdxSum));
205-
REPLY_WITH_LONG_LONG("revisionId", rt->revisionId, ARRAY_LEN_VAR(numIdxSum));
206-
REPLY_WITH_LONG_LONG("emptyLeaves", rt->emptyLeaves, ARRAY_LEN_VAR(numIdxSum));
207-
REPLY_WITH_LONG_LONG("RootMaxDepth", rt->root->maxDepth, ARRAY_LEN_VAR(numIdxSum));
207+
REPLY_WITH_LONG_LONG("numRanges", rt_info.numRanges, ARRAY_LEN_VAR(numIdxSum));
208+
REPLY_WITH_LONG_LONG("numEntries", rt_info.numEntries, ARRAY_LEN_VAR(numIdxSum));
209+
REPLY_WITH_LONG_LONG("lastDocId", rt_info.lastDocId, ARRAY_LEN_VAR(numIdxSum));
210+
REPLY_WITH_LONG_LONG("revisionId", rt_info.revisionId, ARRAY_LEN_VAR(numIdxSum));
211+
REPLY_WITH_LONG_LONG("emptyLeaves", rt_info.emptyLeaves, ARRAY_LEN_VAR(numIdxSum));
212+
REPLY_WITH_LONG_LONG("RootMaxDepth", root_max_depth, ARRAY_LEN_VAR(numIdxSum));
208213
END_POSTPONED_LEN_ARRAY(numIdxSum);
209214

210215
end:
@@ -227,11 +232,13 @@ DEBUG_COMMAND(DumpNumericIndex) {
227232
// It's a debug command... lets not waste time on string comparison.
228233
int with_headers = argc == 5 ? true : false;
229234

230-
NumericRangeTree *rt = OpenNumericIndex(sctx, keyName);
235+
NumericRangeTree *rt = openNumericKeysDict(sctx->spec, keyName, DONT_CREATE_INDEX);
236+
// If we failed to open the numeric index, it was not initialized yet.
231237
if (!rt) {
232-
RedisModule_ReplyWithError(sctx->redisCtx, "can not open numeric field");
238+
RedisModule_ReplyWithEmptyArray(sctx->redisCtx);
233239
goto end;
234240
}
241+
235242
NumericRangeNode *currNode;
236243
NumericRangeTreeIterator *iter = NumericRangeTreeIterator_New(rt);
237244
size_t InvertedIndexNumber = 0;
@@ -345,6 +352,9 @@ InvertedIndexStats NumericRange_DebugReply(RedisModuleCtx *ctx, NumericRange *r)
345352
return ret;
346353
}
347354

355+
/**
356+
* It is safe to use @param n equals to NULL.
357+
*/
348358
InvertedIndexStats NumericRangeNode_DebugReply(RedisModuleCtx *ctx, NumericRangeNode *n) {
349359

350360
size_t len = 0;
@@ -375,6 +385,9 @@ InvertedIndexStats NumericRangeNode_DebugReply(RedisModuleCtx *ctx, NumericRange
375385
return invIdxStats;
376386
}
377387

388+
/**
389+
* It is safe to use @param rt with all fields initialized to 0, including a NULL root.
390+
*/
378391
void NumericRangeTree_DebugReply(RedisModuleCtx *ctx, NumericRangeTree *rt) {
379392

380393
size_t len = 0;
@@ -412,10 +425,12 @@ DEBUG_COMMAND(DumpNumericIndexTree) {
412425
RedisModule_ReplyWithError(sctx->redisCtx, "Could not find given field in index spec");
413426
goto end;
414427
}
415-
NumericRangeTree *rt = OpenNumericIndex(sctx, keyName);
428+
NumericRangeTree dummy_rt = {0};
429+
NumericRangeTree *rt = openNumericKeysDict(sctx->spec, keyName, DONT_CREATE_INDEX);
430+
// If we failed to open the numeric index, it was not initialized yet,
431+
// reply as if the tree is empty.
416432
if (!rt) {
417-
RedisModule_ReplyWithError(sctx->redisCtx, "can not open numeric field");
418-
goto end;
433+
rt = &dummy_rt;
419434
}
420435

421436
NumericRangeTree_DebugReply(sctx->redisCtx, rt);
@@ -677,6 +692,7 @@ DEBUG_COMMAND(GCWaitForAllJobs) {
677692
return REDISMODULE_OK;
678693
}
679694

695+
// GC_CLEAN_NUMERIC INDEX_NAME NUMERIC_FIELD_NAME
680696
DEBUG_COMMAND(GCCleanNumeric) {
681697

682698
if (argc != 4) {
@@ -688,9 +704,8 @@ DEBUG_COMMAND(GCCleanNumeric) {
688704
RedisModule_ReplyWithError(sctx->redisCtx, "Could not find given field in index spec");
689705
goto end;
690706
}
691-
NumericRangeTree *rt = OpenNumericIndex(sctx, keyName);
707+
NumericRangeTree *rt = openNumericKeysDict(sctx->spec, keyName, DONT_CREATE_INDEX);
692708
if (!rt) {
693-
RedisModule_ReplyWithError(sctx->redisCtx, "can not open numeric field");
694709
goto end;
695710
}
696711

src/document.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "geometry/geometry_api.h"
2424
#include "aggregate/expr/expression.h"
2525
#include "rmutil/rm_assert.h"
26+
#include "redis_index.h"
2627

2728
// Memory pool for RSAddDocumentContext contexts
2829
static mempool_t *actxPool_g = NULL;
@@ -578,7 +579,7 @@ FIELD_BULK_INDEXER(geometryIndexer) {
578579

579580
FIELD_BULK_INDEXER(numericIndexer) {
580581
RedisModuleString *keyName = IndexSpec_GetFormattedKey(ctx->spec, fs, INDEXFLD_T_NUMERIC);
581-
NumericRangeTree *rt = OpenNumericIndex(ctx, keyName);
582+
NumericRangeTree *rt = openNumericKeysDict(ctx->spec, keyName, CREATE_INDEX);
582583
if (!rt) {
583584
QueryError_SetError(status, QUERY_EGENERIC, "Could not open numeric index for indexing");
584585
return -1;

src/fork_gc.c

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,13 @@ typedef enum {
4747
static void FGC_updateStats(ForkGC *gc, RedisSearchCtx *sctx,
4848
size_t recordsRemoved, size_t bytesCollected, size_t bytesAdded) {
4949
sctx->spec->stats.numRecords -= recordsRemoved;
50-
sctx->spec->stats.invertedSize += bytesAdded - bytesCollected;
50+
sctx->spec->stats.invertedSize += bytesAdded;
51+
sctx->spec->stats.invertedSize -= bytesCollected;
5152
gc->stats.totalCollected += bytesCollected;
53+
gc->stats.totalCollected -= bytesAdded;
5254
}
5355

56+
// Buff shouldn't be NULL.
5457
static void FGC_sendFixed(ForkGC *fgc, const void *buff, size_t len) {
5558
RS_LOG_ASSERT(len > 0, "buffer length cannot be 0");
5659
ssize_t size = write(fgc->pipefd[GC_WRITERFD], buff, len);
@@ -254,7 +257,8 @@ static bool FGC_childRepairInvidx(ForkGC *gc, RedisSearchCtx *sctx, InvertedInde
254257
if (array_len(blocklist) == idx->size) {
255258
// no empty block, there is no need to send the blocks array. Don't send
256259
// any new blocks.
257-
FGC_sendBuffer(gc, NULL, 0);
260+
size_t len = 0;
261+
FGC_SEND_VAR(gc, len);
258262
} else {
259263
FGC_sendBuffer(gc, blocklist, array_len(blocklist) * sizeof(*blocklist));
260264
}
@@ -292,7 +296,7 @@ static void FGC_childCollectTerms(ForkGC *gc, RedisSearchCtx *sctx) {
292296
while (TrieIterator_Next(iter, &rstr, &slen, NULL, &score, &dist)) {
293297
size_t termLen;
294298
char *term = runesToStr(rstr, slen, &termLen);
295-
InvertedIndex *idx = Redis_OpenInvertedIndex(sctx, term, strlen(term), 1, NULL);
299+
InvertedIndex *idx = Redis_OpenInvertedIndex(sctx, term, strlen(term), DONT_CREATE_INDEX, NULL);
296300
if (idx) {
297301
struct iovec iov = {.iov_base = (void *)term, termLen};
298302
FGC_childRepairInvidx(gc, sctx, idx, sendHeaderString, &iov, NULL);
@@ -422,7 +426,12 @@ static void FGC_childCollectNumeric(ForkGC *gc, RedisSearchCtx *sctx) {
422426

423427
for (int i = 0; i < array_len(numericFields); ++i) {
424428
RedisModuleString *keyName = IndexSpec_GetFormattedKey(sctx->spec, numericFields[i], INDEXFLD_T_NUMERIC);
425-
NumericRangeTree *rt = OpenNumericIndex(sctx, keyName);
429+
NumericRangeTree *rt = openNumericKeysDict(sctx->spec, keyName, DONT_CREATE_INDEX);
430+
431+
// No entries were added to the numeric field, hence the tree was not initialized
432+
if (!rt) {
433+
continue;
434+
}
426435

427436
NumericRangeTreeIterator *gcIterator = NumericRangeTreeIterator_New(rt);
428437

@@ -834,12 +843,6 @@ static void applyNumIdx(ForkGC *gc, RedisSearchCtx *sctx, NumGcInfo *ninfo) {
834843
currNode->range->invertedIndexSize += info->nbytesAdded;
835844
currNode->range->invertedIndexSize -= info->nbytesCollected;
836845

837-
// TODO: fix for NUMERIC similar to TAG fix PR#2269
838-
if (currNode->range->entries->numDocs == 0) {
839-
// NumericRangeTree_DeleteNode(rt, (currNode->range->minVal + currNode->range->maxVal) / 2);
840-
info->nbytesCollected += InvertedIndex_MemUsage(currNode->range->entries);
841-
currNode->range->invertedIndexSize = 0;
842-
}
843846
FGC_updateStats(gc, sctx, info->nentriesCollected, info->nbytesCollected, info->nbytesAdded);
844847

845848
resetCardinality(ninfo, currNode);
@@ -930,9 +933,12 @@ static FGCError FGC_parentHandleTerms(ForkGC *gc) {
930933
static FGCError FGC_parentHandleNumeric(ForkGC *gc) {
931934
size_t fieldNameLen;
932935
char *fieldName = NULL;
936+
const FieldSpec *fs = NULL;
937+
RedisModuleString *keyName = NULL;
933938
uint64_t rtUniqueId;
934939
NumericRangeTree *rt = NULL;
935940
FGCError status = recvNumericTagHeader(gc, &fieldName, &fieldNameLen, &rtUniqueId);
941+
bool initialized = false;
936942
if (status == FGC_DONE) {
937943
return FGC_DONE;
938944
}
@@ -959,8 +965,12 @@ static FGCError FGC_parentHandleNumeric(ForkGC *gc) {
959965

960966
RedisSearchCtx_LockSpecWrite(sctx);
961967

962-
RedisModuleString *keyName = IndexSpec_GetFormattedKeyByName(sctx->spec, fieldName, INDEXFLD_T_NUMERIC);
963-
rt = OpenNumericIndex(sctx, keyName);
968+
if (!initialized) {
969+
fs = IndexSpec_GetField(sctx->spec, fieldName, strlen(fieldName));
970+
keyName = IndexSpec_GetFormattedKey(sctx->spec, fs, fs->types);
971+
rt = openNumericKeysDict(sctx->spec, keyName, DONT_CREATE_INDEX);
972+
initialized = true;
973+
}
964974

965975
if (rt->uniqueId != rtUniqueId) {
966976
status = FGC_PARENT_ERROR;
@@ -974,6 +984,8 @@ static FGCError FGC_parentHandleNumeric(ForkGC *gc) {
974984

975985
applyNumIdx(gc, sctx, &ninfo);
976986
rt->numEntries -= ninfo.info.nentriesCollected;
987+
rt->invertedIndexesSize -= ninfo.info.nbytesCollected;
988+
rt->invertedIndexesSize += ninfo.info.nbytesAdded;
977989

978990
if (ninfo.node->range->entries->numDocs == 0) {
979991
rt->emptyLeaves++;
@@ -993,16 +1005,16 @@ static FGCError FGC_parentHandleNumeric(ForkGC *gc) {
9931005

9941006
rm_free(fieldName);
9951007

996-
if (rt && rt->emptyLeaves >= rt->numRanges / 2) {
1008+
if (status == FGC_COLLECTED && rt && gc->cleanNumericEmptyNodes) {
1009+
// We need to have a valid strong reference to the spec in order to dereference rt
9971010
StrongRef spec_ref = WeakRef_Promote(gc->index);
9981011
IndexSpec *sp = StrongRef_Get(spec_ref);
999-
if (!sp) {
1000-
return FGC_SPEC_DELETED;
1001-
}
1012+
if (!sp) return FGC_SPEC_DELETED;
10021013
RedisSearchCtx sctx = SEARCH_CTX_STATIC(gc->ctx, sp);
10031014
RedisSearchCtx_LockSpecWrite(&sctx);
1004-
if (gc->cleanNumericEmptyNodes) {
1015+
if (rt->emptyLeaves >= rt->numRanges / 2) { // TODO: count `numLeaves` in the tree and use it here
10051016
NRN_AddRv rv = NumericRangeTree_TrimEmptyLeaves(rt);
1017+
// rv.sz is the number of bytes added. Since we are cleaning empty leaves, it should be negative
10061018
FGC_updateStats(gc, &sctx, 0, -rv.sz, 0);
10071019
}
10081020
RedisSearchCtx_UnlockSpec(&sctx);

src/inverted_index.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ static inline size_t sizeof_InvertedIndex(IndexFlags flags) {
9595

9696
// Create a new inverted index object, with the given flag.
9797
// If initBlock is 1, we create the first block.
98-
// out parameter memsize must be not NULL, the total of allocated memory
98+
// out parameter memsize must be not NULL, the total of allocated memory
9999
// will be returned in it
100100
InvertedIndex *NewInvertedIndex(IndexFlags flags, int initBlock, size_t *memsize);
101101

src/numeric_index.c

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ NumericRangeTree *NewNumericRangeTree() {
387387

388388
// updated value since splitCard should be >NR_CARD_CHECK
389389
ret->root = NewLeafNode(2, 16);
390+
ret->invertedIndexesSize = ret->root->range->invertedIndexSize;
390391
ret->numEntries = 0;
391392
ret->numRanges = 1;
392393
ret->revisionId = 0;
@@ -415,6 +416,7 @@ NRN_AddRv NumericRangeTree_Add(NumericRangeTree *t, t_docId docId, double value,
415416
}
416417
t->numRanges += rv.numRanges;
417418
t->numEntries++;
419+
t->invertedIndexesSize += rv.sz;
418420

419421
return rv;
420422
}
@@ -494,6 +496,7 @@ NRN_AddRv NumericRangeTree_TrimEmptyLeaves(NumericRangeTree *t) {
494496
t->revisionId++;
495497
t->numRanges += rv.numRanges;
496498
t->emptyLeaves = 0;
499+
t->invertedIndexesSize += rv.sz;
497500
}
498501
return rv;
499502
}
@@ -574,13 +577,13 @@ RedisModuleString *fmtRedisNumericIndexKey(const RedisSearchCtx *ctx, const char
574577
field);
575578
}
576579

577-
static NumericRangeTree *openNumericKeysDict(IndexSpec* spec, RedisModuleString *keyName,
578-
int write) {
580+
NumericRangeTree *openNumericKeysDict(IndexSpec* spec, RedisModuleString *keyName,
581+
bool create_if_missing) {
579582
KeysDictValue *kdv = dictFetchValue(spec->keysDict, keyName);
580583
if (kdv) {
581584
return kdv->p;
582585
}
583-
if (!write) {
586+
if (!create_if_missing) {
584587
return NULL;
585588
}
586589
kdv = rm_calloc(1, sizeof(*kdv));
@@ -608,7 +611,7 @@ struct indexIterator *NewNumericFilterIterator(const RedisSearchCtx *ctx, const
608611

609612
t = RedisModule_ModuleTypeGetValue(key);
610613
} else {
611-
t = openNumericKeysDict(ctx->spec, s, 0);
614+
t = openNumericKeysDict(ctx->spec, s, DONT_CREATE_INDEX);
612615
}
613616

614617
if (!t) {
@@ -631,10 +634,6 @@ struct indexIterator *NewNumericFilterIterator(const RedisSearchCtx *ctx, const
631634
return it;
632635
}
633636

634-
NumericRangeTree *OpenNumericIndex(const RedisSearchCtx *ctx, RedisModuleString *keyName) {
635-
return openNumericKeysDict(ctx->spec, keyName, 1);
636-
}
637-
638637
void __numericIndex_memUsageCallback(NumericRangeNode *n, void *ctx) {
639638
unsigned long *sz = ctx;
640639
*sz += sizeof(NumericRangeNode);
@@ -820,7 +819,7 @@ void NumericRangeIterator_OnReopen(void *privdata) {
820819
IndexIterator *it = nu->it;
821820

822821
RedisModuleString *numField = IndexSpec_GetFormattedKeyByName(sp, nu->fieldName, INDEXFLD_T_NUMERIC);
823-
NumericRangeTree *rt = openNumericKeysDict(sp, numField, 0);
822+
NumericRangeTree *rt = openNumericKeysDict(sp, numField, DONT_CREATE_INDEX);
824823

825824
if (!rt || rt->revisionId != nu->lastRevId) {
826825
// The numeric tree was either completely deleted or a node was splitted or removed.

src/numeric_index.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ typedef struct {
7979
NumericRangeNode *root;
8080
size_t numRanges;
8181
size_t numEntries;
82+
size_t invertedIndexesSize;
83+
8284
t_docId lastDocId;
8385

8486
uint32_t revisionId;
@@ -137,7 +139,7 @@ void NumericRangeTree_Free(NumericRangeTree *t);
137139

138140
extern RedisModuleType *NumericIndexType;
139141

140-
NumericRangeTree *OpenNumericIndex(const RedisSearchCtx *ctx, RedisModuleString *keyName);
142+
NumericRangeTree *openNumericKeysDict(IndexSpec* spec, RedisModuleString *keyName, bool create_if_missing);
141143

142144
int NumericIndexType_Register(RedisModuleCtx *ctx);
143145
void *NumericIndexType_RdbLoad(RedisModuleIO *rdb, int encver);

src/redis_index.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ const char *Redis_SelectRandomTerm(const RedisSearchCtx *ctx, size_t *tlen);
3939
#define INVERTED_INDEX_ENCVER 1
4040
#define INVERTED_INDEX_NOFREQFLAG_VER 0
4141

42+
#define DONT_CREATE_INDEX false
43+
#define CREATE_INDEX true
44+
4245
typedef int (*ScanFunc)(RedisModuleCtx *ctx, RedisModuleString *keyName, void *opaque);
4346

4447
/* Scan the keyspace with MATCH for a prefix, and call ScanFunc for each key found */

src/redisearch_api.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -910,16 +910,7 @@ int RediSearch_IndexInfo(RSIndex* rm, RSIdxInfo *info) {
910910

911911
size_t RediSearch_MemUsage(RSIndex* rm) {
912912
IndexSpec *sp = __RefManager_Get_Object(rm);
913-
size_t res = 0;
914-
res += sp->docs.memsize;
915-
res += sp->docs.sortablesSize;
916-
res += TrieMap_MemUsage(sp->docs.dim.tm);
917-
res += IndexSpec_collect_text_overhead(sp);
918-
res += IndexSpec_collect_tags_overhead(sp);
919-
res += sp->stats.invertedSize;
920-
res += sp->stats.offsetVecsSize;
921-
res += sp->stats.termsSize;
922-
return res;
913+
return IndexSpec_TotalMemUsage(sp, 0, 0, 0);
923914
}
924915

925916
// Collect statistics of all the currently existing indexes

0 commit comments

Comments
 (0)