Skip to content

Abort has triggered in mh_point_holes_resize #11022

@ligurio

Description

@ligurio

Bug description

(gdb) frame 5
#5  0x00005a6f65473c94 in mh_point_holes_resize (h=0x5a6f675d0200, arg=0) at /home/sergeyb/sources/MRG/tarantool/src/lib/salad/mhash.h:495
495                     abort();
(gdb) 

void
_mh(resize)(struct _mh(t) *h, mh_arg_t arg)
{
struct _mh(t) *s = h->shadow;
int exist;
#if MH_INCREMENTAL_RESIZE
mh_int_t batch = h->batch;
#endif
for (mh_int_t i = h->resize_position; i < h->n_buckets; i++) {
#if MH_INCREMENTAL_RESIZE
if (batch-- == 0) {
h->resize_position = i;
return;
}
#endif
if (!mh_exist(h, i))
continue;
mh_int_t n = put_slot(s, mh_node(h, i), &exist, arg);
s->p[n] = h->p[i];
}
free(h->p);
free(h->b);
if (s->size != h->size)
abort();
memcpy(h, s, sizeof(*h));
h->resize_cnt++;
memset(s, 0, sizeof(*s));
}

  • OS: Linux
  • OS Version: Ubuntu 22.04
  • Architecture: amd64

Tarantool 3.4.0-entrypoint-67-gb9265cd368
Target: Linux-x86_64-Debug
Build options: cmake . -DCMAKE_INSTALL_PREFIX=/usr/local -DENABLE_BACKTRACE=TRUE
Compiler: GNU-11.4.0
C_FLAGS: -fexceptions -funwind-tables -fasynchronous-unwind-tables -fno-common -msse2 -Wformat -Wformat-security -Werror=format-security -fstack-protector-strong -fPIC -fmacro-prefix-map=/home/sergeyb/sources/MRG/tarantool=. -std=c11 -Wall -Wextra -Wno-gnu-alignof-expression -fno-gnu89-inline -Wno-cast-function-type -Werror -g -ggdb -O0
CXX_FLAGS: -fexceptions -funwind-tables -fasynchronous-unwind-tables -fno-common -msse2 -Wformat -Wformat-security -Werror=format-security -fstack-protector-strong -fPIC -fmacro-prefix-map=/home/sergeyb/sources/MRG/tarantool=. -std=c++11 -Wall -Wextra -Wno-invalid-offsetof -Wno-gnu-alignof-expression -Wno-cast-function-type -Werror -g -ggdb -O0

Steps to reproduce

Execute the test with the following parameters and applied patch below: ./build/src/tarantool test/fuzz/lua/test_engine.lua --test_duration=180 --workers=1500 --verbose

Patch
diff --git a/test/fuzz/lua/test_engine.lua b/test/fuzz/lua/test_engine.lua
index cafeccb5da..ae19981262 100644
--- a/test/fuzz/lua/test_engine.lua
+++ b/test/fuzz/lua/test_engine.lua
@@ -514,15 +514,15 @@ local function setup(engine_name, space_id_func, test_dir, verbose)
         readahead = 16320,
         slab_alloc_factor = math.random(1, 2),
         vinyl_bloom_fpr = math.random(50) / 100,
-        vinyl_cache = oneof({0, 2}) * 1024 * 1024,
+        vinyl_cache = oneof({0, 2}) * 1024,
         vinyl_max_tuple_size = math.random(0, 100000),
-        vinyl_memory = 800 * 1024 * 1024,
-        vinyl_page_size = math.random(1024, 2048),
-        vinyl_range_size = 128 * 1024,
+        vinyl_memory = 5 * 1024,
+        vinyl_page_size = 256,
+        vinyl_range_size = 256,
         vinyl_read_threads = math.random(2, 10),
-        vinyl_run_count_per_level = math.random(1, 10),
-        vinyl_run_size_ratio = math.random(2, 5),
-        vinyl_timeout = math.random(1, 5),
+        vinyl_run_count_per_level = math.random(1, 5),
+        vinyl_run_size_ratio = 1.5,
+        vinyl_timeout = math.random(1, 10),
         vinyl_write_threads = math.random(2, 10),
         wal_cleanup_delay = 14400,
         wal_dir_rescan_delay = math.random(1, 20),
@@ -991,14 +991,14 @@ local ops = {
         end,
         args = function(_) return end,
     },
-    TX_ROLLBACK = {
-        func = function()
-            if box.is_in_txn() then
-                box.rollback()
-            end
-        end,
-        args = function(_) return end,
-    },
+    -- TX_ROLLBACK = {
+    --     func = function()
+    --         if box.is_in_txn() then
+    --             box.rollback()
+    --         end
+    --     end,
+    --     args = function(_) return end,
+    -- },
 
     SNAPSHOT_OP = {
         func = box_snapshot,
@@ -1443,30 +1443,30 @@ local function run_test(num_workers, test_duration, test_dir,
         table.insert(workers, f)
     end
 
-    local errinj_f = fiber.new(function(test_duration)
-        log.info('Fault injection fiber has started.')
-        local max_errinj_in_parallel = 5
-        local start = os.clock()
-        while os.clock() - start <= test_duration do
-            toggle_random_errinj(errinj_set, max_errinj_in_parallel, space)
-            fiber.sleep(2)
-        end
-        disable_all_errinj(errinj_set, space)
-        log.info('Fault injection fiber has finished.')
-    end, arg_test_duration)
-    errinj_f:set_joinable(true)
-    errinj_f:name('ERRINJ')
+    -- local errinj_f = fiber.new(function(test_duration)
+    --     log.info('Fault injection fiber has started.')
+    --     local max_errinj_in_parallel = 5
+    --     local start = os.clock()
+    --     while os.clock() - start <= test_duration do
+    --         toggle_random_errinj(errinj_set, max_errinj_in_parallel, space)
+    --         fiber.sleep(2)
+    --     end
+    --     disable_all_errinj(errinj_set, space)
+    --     log.info('Fault injection fiber has finished.')
+    -- end, arg_test_duration)
+    -- errinj_f:set_joinable(true)
+    -- errinj_f:name('ERRINJ')
 
     -- Stop the fault injection fiber first so that worker fibers can exit
     -- without getting stuck on some random timeout injection.
-    local ok, res = fiber.join(errinj_f)
-    if not ok then
-        log.info('ERROR: %s', json.encode(res))
-    end
+    -- local ok, res = fiber.join(errinj_f)
+    -- if not ok then
+    --     log.info('ERROR: %s', json.encode(res))
+    -- end
 
     local error_messages = {}
     for _, fb in ipairs(workers) do
-        ok, res = fiber.join(fb)
+        local ok, res = fiber.join(fb)
         if not ok then
             log.info('ERROR: %s', json.encode(res))
         else

Actual behavior

SIGABRT is triggered

Log by test:

<snipped>

2025-01-10 15:12:58.543 [2080667] main/111/vinyl.scheduler vy_log.c:1330 V> commit vylog transaction
2025-01-10 15:12:58.543 [2080667] coio xlog.c:2219 I> removed ./512/1/00000000000000003069.index
2025-01-10 15:12:58.543 [2080667] coio xlog.c:2219 I> removed ./512/1/00000000000000003069.run
2025-01-10 15:12:58.543 [2080667] coio xlog.c:2219 I> removed ./512/1/00000000000000003075.index
2025-01-10 15:12:58.544 [2080667] coio xlog.c:2219 I> removed ./512/1/00000000000000003075.run
2025-01-10 15:12:58.544 [2080667] coio xlog.c:2219 I> removed ./512/1/00000000000000003081.index
2025-01-10 15:12:58.544 [2080667] coio xlog.c:2219 I> removed ./512/1/00000000000000003081.run
2025-01-10 15:12:58.544 [2080667] coio xlog.c:2219 I> removed ./512/1/00000000000000003089.index
2025-01-10 15:12:58.544 [2080667] coio xlog.c:2219 I> removed ./512/1/00000000000000003089.run
2025-01-10 15:12:58.544 [2080667] main/111/vinyl.scheduler vy_scheduler.c:1672 V> 512/1: completed compacting range (-inf..inf)
2025-01-10 15:12:58.544 [2080667] main/111/vinyl.scheduler vy_log.c:1296 V> begin vylog transaction
2025-01-10 15:12:58.544 [2080667] main/111/vinyl.scheduler vy_log.c:1350 V> write vylog record: create_run{run_id=3095, dump_lsn=724, dump_count=1, }
2025-01-10 15:12:58.544 [2080667] main/111/vinyl.scheduler vy_log.c:1350 V> write vylog record: insert_slice{range_id=1, run_id=3095, slice_id=3097, }
2025-01-10 15:12:58.544 [2080667] main/111/vinyl.scheduler vy_log.c:1350 V> write vylog record: dump_lsm{dump_lsn=724, }
2025-01-10 15:12:58.544 [2080667] main/111/vinyl.scheduler vy_log.c:1330 V> commit vylog transaction
2025-01-10 15:12:58.544 [2080667] main/111/vinyl.scheduler vy_scheduler.c:1324 V> 512/0: dump completed
2025-01-10 15:12:58.544 [2080667] main/111/vinyl.scheduler vy_regulator.c:295 I> dumped 147808 bytes in 0.0 s, rate 24.4 MB/s
Aborted (core dumped)

Backtrace:

(gdb) bt
#0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=129253573393216) at ./nptl/pthread_kill.c:44
#1  __pthread_kill_internal (signo=6, threadid=129253573393216) at ./nptl/pthread_kill.c:78
#2  __GI___pthread_kill (threadid=129253573393216, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
#3  0x0000758e30242476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4  0x0000758e302287f3 in __GI_abort () at ./stdlib/abort.c:79
#5  0x00005a6f65473c94 in mh_point_holes_resize (h=0x5a6f675d0200, arg=0) at /home/sergeyb/sources/MRG/tarantool/src/lib/salad/mhash.h:495
#6  0x00005a6f654736a2 in mh_point_holes_del_resize (h=0x5a6f675d0200, x=370, arg=0) at /home/sergeyb/sources/MRG/tarantool/src/lib/salad/mhash.h:389
#7  0x00005a6f65473631 in mh_point_holes_del (h=0x5a6f675d0200, x=370, arg=0) at /home/sergeyb/sources/MRG/tarantool/src/lib/salad/mhash.h:350
#8  0x00005a6f6547c4ac in point_hole_storage_delete (object=0x758d07bbd3a8) at /home/sergeyb/sources/MRG/tarantool/src/box/memtx_tx.c:3333
#9  0x00005a6f6547cf61 in memtx_tx_clear_txn_read_lists (txn=0x758d0813d038) at /home/sergeyb/sources/MRG/tarantool/src/box/memtx_tx.c:3594
#10 0x00005a6f6547d21c in memtx_tx_clean_txn (txn=0x758d0813d038) at /home/sergeyb/sources/MRG/tarantool/src/box/memtx_tx.c:3621
#11 0x00005a6f65557112 in txn_free (txn=0x758d0813d038) at /home/sergeyb/sources/MRG/tarantool/src/box/txn.c:481
#12 0x00005a6f6555a3df in txn_commit_impl (txn=0x758d0813d038, wait_mode=TXN_COMMIT_WAIT_MODE_COMPLETE) at /home/sergeyb/sources/MRG/tarantool/src/box/txn.c:1228
#13 0x00005a6f6555aa9c in box_txn_commit_ex (wait_mode=TXN_COMMIT_WAIT_MODE_COMPLETE) at /home/sergeyb/sources/MRG/tarantool/src/box/txn.c:1355
#14 0x00005a6f65674560 in lbox_commit (L=0x41f0ef80) at /home/sergeyb/sources/MRG/tarantool/src/box/lua/init.c:535
#15 0x00005a6f657685d7 in lj_BC_FUNCC () at buildvm_x86.dasc:811
#16 0x00005a6f6577614c in lua_pcall (L=0x41f0ef80, nargs=4, nresults=-1, errfunc=0) at /home/sergeyb/sources/MRG/tarantool/third_party/luajit/src/lj_api.c:1173
#17 0x00005a6f656d3956 in luaT_call (L=0x41f0ef80, nargs=4, nreturns=-1) at /home/sergeyb/sources/MRG/tarantool/src/lua/utils.c:708
#18 0x00005a6f656c8640 in lua_fiber_run_f (ap=0x758e27aa1368) at /home/sergeyb/sources/MRG/tarantool/src/lua/fiber.c:452
#19 0x00005a6f6541b1be in fiber_cxx_invoke(fiber_func, typedef __va_list_tag __va_list_tag *) (f=0x5a6f656c8583 <lua_fiber_run_f>, ap=0x758e27aa1368) at /home/sergeyb/sources/MRG/tarantool/src/lib/core/fiber.h:1324
#20 0x00005a6f65705255 in fiber_loop (data=0x0) at /home/sergeyb/sources/MRG/tarantool/src/lib/core/fiber.c:1167
#21 0x00005a6f65b5959e in coro_init () at /home/sergeyb/sources/MRG/tarantool/third_party/coro/coro.c:108
(gdb) 

Archive with tarantool binary, coredump, vinyl files: https://drive.google.com/file/d/1dJEeIKBBEztrNaElq2R9PTrbufFqD3Hy/view?usp=sharing

Reproduced with a bit another backtrace (log and coredump gh-11022-1.zip):

#1  __pthread_kill_internal (signo=6, threadid=139468431719232) at ./nptl/pthread_kill.c:78
#2  __GI___pthread_kill (threadid=139468431719232, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
#3  0x00007ed885242476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4  0x00007ed8852287f3 in __GI_abort () at ./stdlib/abort.c:79
#5  0x000060a0c7ca0c94 in mh_point_holes_resize (h=0x60a0c9826200, arg=0) at /home/sergeyb/sources/MRG/tarantool/src/lib/salad/mhash.h:495
#6  0x000060a0c7ca042e in mh_point_holes_put (h=0x60a0c9826200, node=0x7ed7d0f809c8, ret=0x7ed7d0f809d8, arg=0) at /home/sergeyb/sources/MRG/tarantool/src/lib/salad/mhash.h:315  
#7  0x000060a0c7ca9110 in point_hole_storage_new (index=0x60a0c99e5510, key=0x7ed880e28031 "\315\002", key_len=11, txn=0x7ed7b8949038) at /home/sergeyb/sources/MRG/tarantool/src/box/memtx_tx.c:3281
#8  0x000060a0c7ca95e7 in memtx_tx_track_point_slow (txn=0x7ed7b8949038, index=0x60a0c99e5510, key=0x7ed880e28031 "\315\002") at /home/sergeyb/sources/MRG/tarantool/src/box/memtx_tx.c:3358
#9  0x000060a0c7c75a7e in memtx_tx_track_point (txn=0x7ed7b8949038, space=0x60a0c985b8d0, index=0x60a0c99e5510, key=0x7ed880e28031 "\315\002") at /home/sergeyb/sources/MRG/tarantool/src/box/memtx_tx.h:277
#10 0x000060a0c7c8e81c in memtx_tree_index_get_internal<true> (base=0x60a0c99e5510, key=0x7ed880e28031 "\315\002", part_count=2, result=0x7ed7d0f80bf8) at /home/sergeyb/sources/MRG/tarantool/src/box/memtx_tree.cc:1399
#11 0x000060a0c7cb92db in memtx_index_get (index=0x60a0c99e5510, key=0x7ed880e28031 "\315\002", part_count=2, result=0x7ed7d0f80bf8) at /home/sergeyb/sources/MRG/tarantool/src/box/memtx_engine.cc:2257
#12 0x000060a0c7cc1d14 in index_get (index=0x60a0c99e5510, key=0x7ed880e28031 "\315\002", part_count=2, result=0x7ed7d0f80bf8) at /home/sergeyb/sources/MRG/tarantool/src/box/index.h:928
#13 0x000060a0c7cc2e81 in sysview_index_get (base=0x60a0c99e68f0, key=0x7ed880e28031 "\315\002", part_count=2, result=0x41307480) at /home/sergeyb/sources/MRG/tarantool/src/box/sysview.c:167
#14 0x000060a0c7c65c18 in index_get (index=0x60a0c99e68f0, key=0x7ed880e28031 "\315\002", part_count=2, result=0x41307480) at /home/sergeyb/sources/MRG/tarantool/src/box/index.h:928
#15 0x000060a0c7c68d4f in box_index_get (space_id=289, index_id=2, key=0x7ed880e28031 "\315\002", key_end=0x7ed880e2803c "rue,\"parts\":[{\"fieldno\":2,\"sort_order\":\"asc\",\"type\":\"unsigned\",\"exclude_null\":false,\"is_nullable\":false},{\"fieldno\":3,\"sort_order\":\"asc\",\"type\":\"uuid\",\"exclude_null\":false,\"is_nullable\":false},{\"field"..., result=0x41307480) at /home/sergeyb/sources/MRG/tarantool/src/box/index.cc:384
#16 0x000060a0f2042bce in ?? ()

Expected behavior

no sigabort

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions