Skip to content

tarantool has crashed in vy_mem_wait_pinned #9995

@ligurio

Description

@ligurio

Bug description

tarantool crashed

  • OS: Linux
  • OS Version: 22.04
  • Architecture: amd64

commit b32b20c

Tarantool 3.2.0-entrypoint-21-g9aa1000a6d
Target: Linux-x86_64-Debug
Build options: cmake . -DCMAKE_INSTALL_PREFIX=/usr/local -DENABLE_BACKTRACE=TRUE
Compiler: GNU-11.4.0
C_FLAGS: -fexceptions -funwind-tables -fasynchronous-unwind-tables -fno-common -msse2 -Wformat -Wformat-security -Werror=format-security -fstack-protector-strong -fPIC -fmacro-prefix-map=/home/sergeyb/sources/MRG/tarantool=. -std=c11 -Wall -Wextra -Wno-gnu-alignof-expression -fno-gnu89-inline -Wno-cast-function-type -Werror -g -ggdb -O0
CXX_FLAGS: -fexceptions -funwind-tables -fasynchronous-unwind-tables -fno-common -msse2 -Wformat -Wformat-security -Werror=format-security -fstack-protector-strong -fPIC -fmacro-prefix-map=/home/sergeyb/sources/MRG/tarantool=. -std=c++11 -Wall -Wextra -Wno-invalid-offsetof -Wno-gnu-alignof-expression -Wno-cast-function-type -Werror -g -ggdb -O0

Steps to reproduce

Use reproducer script:

vinyl.lua
--[[ Тест для vinyl позволяет случайным образом выставлять error injections,
генерировать операции с индексами, генерировать операции с данными, настройки
vinyl в box.cfg. Все случайные операции и настройки зависят от seed, который
генерируется в самом начале теста.
]]

local TEST_DURATION = 10*60 -- Seconds.
local NUM_SP = 5
local NUM_TUPLES = 1000
local SEED = 10000
local N_OPS_IN_TX = 10

--

local fiber = require('fiber')
local log = require('log')
local math = require('math')

local seed = SEED or os.time()
seed = seed or math.randomseed(seed)
log.info(string.format("RANDOM SEED %d", seed))

local function trace(event, line)
    local s = debug.getinfo(2).short_src
    if s == 'vinyl.lua' then
        log.info(s .. ":" .. line)
    end
end

-- https://www.lua.org/pil/23.2.html
-- debug.sethook(trace, "l")

local function dict_keys(t)
    assert(next(t) ~= nil)
    local keys = {}
    for k, _ in pairs(t) do
        table.insert(keys, k)
    end
	return keys
end

local function random_elem(t)
    assert(type(t) == 'table')
    assert(next(t) ~= nil)

    local n = #t
    local idx = math.random(1, n)
    return t[idx]
end

local errinj_set = {
    ['ERRINJ_VY_GC'] = 'boolean',
    ['ERRINJ_VY_INDEX_DUMP'] = 'double',
    ['ERRINJ_VY_LOG_FLUSH'] = 'boolean',
    ['ERRINJ_VY_LOG_FLUSH_DELAY'] = 'boolean',
    ['ERRINJ_VY_READ_PAGE'] = 'boolean',
    ['ERRINJ_VY_READ_PAGE_DELAY'] = 'boolean',
    ['ERRINJ_VY_READ_PAGE_TIMEOUT'] = 'double',
    ['ERRINJ_VY_RUN_DISCARD'] = 'boolean',
    ['ERRINJ_VY_RUN_WRITE'] = 'boolean',
    ['ERRINJ_VY_RUN_WRITE_DELAY'] = 'boolean',
    ['ERRINJ_VY_SCHED_TIMEOUT'] = 'double',
    ['ERRINJ_VY_SQUASH_TIMEOUT'] = 'double',
    ['ERRINJ_VY_TASK_COMPLETE'] = 'boolean',
}

local TX_COMMIT = 0
local TX_ROLLBACK = 1
local TX_NOOP = 2

-- Forward declaration.
local function generate_dml()
end

local function generate_noop()
end

local function generate_tx(space)
    log.info("GENERATE_TX")
    if box.is_in_txn() then
        local tx_op = math.random(0, TX_NOOP * 2)
        if tx_op == TX_COMMIT then
            box.rollback()
        elseif tx_op == TX_ROLLBACK then
            box.commit()
        else
            -- None
        end
    else
        box.begin()
            for i = 1, N_OPS_IN_TX do
                generate_dml(space)
            end
        box.commit()
    end
end

local function generate_read(space)
    log.info("DML: READ")
    box.snapshot()

    -- Do fullscan.
    -- sk:select{}

    space:get(1)
    space:select(1)
    space:select({10}, { iterator = 'ge' })
    space:select({10}, { iterator = 'le' })
    space:select(math.random(count), { iterator = box.index.LE, limit = 10 })
end

local function generate_delete(space)
    log.info("DML: DELETE")
    space:delete({1, 1})
end

local function generate_insert(space)
    log.info("DML: NSERT")

    space:insert(1, math.random(100))
    -- local key = math.random(0, MAX_KEY)
    -- space:insert({key, data})

    pcall(space.insert, space, {math.random(MAX_KEY), math.random(MAX_VAL),
                        math.random(MAX_VAL), math.random(MAX_VAL), PADDING})
end

local function generate_upsert(space)
    log.info("DML: UPSERT")
    space:upsert({1, math.random(100)})
end

local function generate_update(space)
    log.info("DML: UPDATE")
    space:update({1, math.random(100)})
end

local function generate_replace(space)
    log.info("DML: REPLACE")
    local k = math.random(0, 1000)
    space:replace({k, math.random(100)})
end

local function init_space(space)
    log.info('CREATING TUPLES')
    for j = 1, NUM_TUPLES do
        box.begin()
            for i = 1, N_OPS_IN_TX do
                generate_insert(space)
            end
        box.commit()
    end

    --[[
    local dump_watermark = 7000000
    while box.stat.vinyl().memory.level0 < dump_watermark do
        generate_insert(space)
    end
    ]]
    log.info('snapshot')
    box.snapshot()
end

local function setup(spaces)
    log.info("SETUP")
    -- TODO: https://www.tarantool.io/en/doc/2.3/reference/configuration/
    box.cfg{
        -- listen = 3301,
        memtx_memory = 1024*1024,
        vinyl_cache = math.random(0, 10),
        vinyl_bloom_fpr = math.random(0, 0.5),
        vinyl_max_tuple_size = math.random(0, 100000),
        vinyl_memory = 256*1024,
        -- vinyl_page_size = math.random(1, 10),
        -- vinyl_range_size = math.random(1, 10),
        vinyl_run_size_ratio = math.random(2, 5),
        vinyl_run_count_per_level = math.random(1, 10),
        vinyl_read_threads = math.random(2, 5),
        vinyl_write_threads = math.random(2, 5),
        vinyl_timeout = math.random(1, 5),
        checkpoint_interval = 0,
        wal_mode = 'write',
    }
    log.info('FINISH BOX.CFG')

    for i = 1, NUM_SP do
        log.info(i)
        local space = box.schema.space.create('test' .. i, { engine = 'vinyl' })
        space:create_index('pk', { type = 'tree', parts = {{1, 'uint'}},
                           run_count_per_level = 100,
                           page_size = 128,
                           range_size = 1024 })
        space:create_index('secondary', { unique = false, parts = { 2, 'unsigned' }})
        -- init_space(space)
        spaces[i] = space
    end
    log.info('FINISH SETUP')
end

local function cleanup()
   log.info("CLEANUP")
   os.execute('rm -rf *.snap *.xlog *.vylog')
end

local function teardown(spaces)
   log.info("TEARDOWN")
   for i = 1, NUM_SP do
       spaces[i]:drop()
   end
   cleanup()
end

local dml_ops = {
    ['INSERT_OP'] = generate_insert,
    ['DELETE_OP'] = generate_delete,
    ['REPLACE_OP'] = generate_replace,
    ['UPSERT_OP'] = generate_upsert,
    ['UPDATE_OP'] = generate_update,
    ['NO_OP'] = generate_noop,
}

local function generate_dml(space)
    log.info("GENERATE DML")
    local op_name = random_elem(dict_keys(dml_ops))
    log.info(op_name)
	local fn = dml_ops[op_name]
	assert(type(fn) == 'function')
	local ok, err = pcall(fn, space)
	if ok ~= true then
        log.info('ERROR: ' .. err)
	end
end

local function index_create(space)
    log.info("INDEX_CREATE")
    space:create_index('i')
    space.index.i:alter({ bloom_fpr = 0.0 })
    for i = 1, 100000 do
        i = i + 1
        space:insert({ i, '' })
        space.index.i:alter({ page_size = i })
    end
end

local function index_drop(space)
    log.info("INDEX_DROP")
    if space.index.i ~= nil then
        space.index.i:drop()
    end
end

local function index_alter(space)
    log.info("INDEX_ALTER")
    -- TODO
end

local function index_compact(space)
    log.info("INDEX_COMPACT")
    if space.index.pk ~= nil then
        space.index.pk:compact()
    end
    if space.index.sk ~= nil then
        space.index.sk:compact()
    end
    -- fiber.create(function() space.index.sk:select() end)
    -- space.index.sk:alter{parts = {2, 'number'}}
    -- box.space.stock_reserved.index.primary:select({}, {limit=100})
end

local function index_noop(space)
    log.info("INDEX_NOOP")
end

local ddl_ops = {
    INDEX_CREATE = index_create,
    INDEX_DROP = index_drop,
    INDEX_ALTER = index_alter,
    INDEX_COMPACT = index_compact,
    INDEX_NO_OP = index_noop,
}

local function generate_ddl(space)
    log.info("GENERATE DDL")
    local op_name = random_elem(dict_keys(ddl_ops))
    log.info(op_name)
	local fn = ddl_ops[op_name]
	assert(type(fn) == 'function')
	local ok, err = pcall(fn, space)
	if ok ~= true then
        log.info('ERROR: ' .. err)
	end
end

local function set_err_injection()
    log.info("SET RANDOM ERROR INJECTIONS")
    local errinj_name = random_elem(dict_keys(errinj_set))
	local t = errinj_set[errinj_name]

    local errinj_val_enable = true
    local errinj_val_disable = false
    if t == 'double' then
        errinj_val_enable = math.random(0, 10)
        errinj_val_disable = 0
    end
    local pause_time = math.random(1, 20)

    log.info(string.format("SET %s -> %s", errinj_name, tostring(errinj_val_enable)))
    local ok, err
    ok, err = pcall(box.error.injection.set, errinj_name, errinj_val_enable)
    if ok ~= true then
        log.info(err)
    end
    fiber.sleep(pause_time)
    log.info(string.format("SET %s -> %s", errinj_name, tostring(errinj_val_disable)))
    ok, err = pcall(box.error.injection.set, errinj_name, errinj_val_disable)
    if ok ~= true then
        log.info(err)
    end
end

-- https://www.tarantool.io/en/doc/latest/reference/reference_lua/box_stat/vinyl/
local function print_stat(spaces)
    log.info("PRINT STATISTICS")
    local stat = box.stat.vinyl()
    log.info(string.format('transactions: %d, tx memory: %d',
                    stat.tx.transactions, stat.memory.tx))
    for i = 1, NUM_SP do
        stat = spaces[i].index.secondary:stat()
        log.info(string.format('memory rows %d bytes %d',
                    stat.memory.rows, stat.memory.bytes))
    end
end

local function main()
    local spaces = {}

    cleanup()
    setup(spaces)

    local f
    for i = 1, NUM_SP do
        f = fiber.create(function()
            log.info('START DML ' .. i)
            while true do generate_dml(spaces[i]); fiber.yield() end
        end)
        f:name('DML_' .. i)
    end

    for i = 1, NUM_SP do
        f = fiber.create(function()
            log.info('START TX ' .. i)
            while true do
                local ok, err = pcall(generate_tx, spaces[i])
                if ok ~= true then
                    log.info('TX: ' .. err)
                end
                fiber.yield()
            end
        end)
        f:name('TX_' .. i)
    end

    for i = 1, NUM_SP do
        f = fiber.create(function()
            log.info('START DDL ' .. i)
            while true do generate_ddl(spaces[i]); fiber.yield() end
        end)
        f:name('DDL_' .. i)
    end

    f = fiber.create(function()
        while true do
            local ok, err = pcall(box.snapshot)
            if ok ~= true then
                log.info('BOX SNAPSHOT: ' .. err)
            end; fiber.sleep(5)
        end
    end)
    f:name('snapshots')

    f = fiber.create(function()
        while true do set_err_injection(); fiber.sleep(5) end
    end)
    f:name('ERRINJ')

    f = fiber.create(function()
        while true do print_stat(spaces); fiber.sleep(5) end
    end)
    f:name('STATS')

    local start = os.clock()
    while os.clock() - start < TEST_DURATION do
        local n = math.random(1, NUM_SP)
        generate_dml(spaces[n])
        generate_tx(spaces[n])
        generate_ddl(spaces[n])
    end

    teardown(spaces)
end

main()

require('console').start()

How to run:

Just type tarantool vinyl.lua.

Actual behavior

reproduced only once and seed number is lost, sorry

Part of the log:

<snipped>

2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> GENERATE DDL                                                                             
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> INDEX_NO_OP                                                                              
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> INDEX_NOOP                                                                               
2024-05-07 18:45:03.809 [1428303] main/119/TX_1/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/123/TX_5/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/120/TX_2/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/122/TX_4/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/121/TX_3/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> GENERATE DDL                                                                             
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> INDEX_NO_OP                                                                              
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> INDEX_NOOP                                                                               
2024-05-07 18:45:03.809 [1428303] main/121/TX_3/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/122/TX_4/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/119/TX_1/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/123/TX_5/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/120/TX_2/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> GENERATE DDL                                                                             
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> INDEX_DROP                                                                               
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> INDEX_DROP                                                                               
2024-05-07 18:45:03.809 [1428303] main/120/TX_2/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/123/TX_5/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/121/TX_3/vinyl I> GENERATE_TX     
2024-05-07 18:45:03.809 [1428303] main/121/TX_3/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/122/TX_4/vinyl I> GENERATE_TX                                                                               
2024-05-07 18:45:03.809 [1428303] main/119/TX_1/vinyl I> GENERATE_TX     
2024-05-07 18:45:03.809 [1428303] main/120/TX_2/vinyl I> GENERATE_TX     
2024-05-07 18:45:03.809 [1428303] main/119/TX_1/vinyl I> GENERATE_TX   
2024-05-07 18:45:03.809 [1428303] main/123/TX_5/vinyl I> GENERATE_TX     
2024-05-07 18:45:03.809 [1428303] main/121/TX_3/vinyl I> GENERATE_TX     
2024-05-07 18:45:03.809 [1428303] main/122/TX_4/vinyl I> GENERATE_TX     
2024-05-07 18:45:03.809 [1428303] main/108/vinyl.scheduler I> 512/2: dump completed
2024-05-07 18:45:03.809 [1428303] main/108/vinyl.scheduler I> dumped 590152 bytes in 0.0 s, rate 102.2 MB/s
2024-05-07 18:45:03.809 [1428303] main/126/DDL_3/vinyl I> ERROR: Transaction has been aborted by conflict
2024-05-07 18:45:03.809 [1428303] main/118/DML_5/vinyl I> ERROR: Transaction has been aborted by conflict                                          
2024-05-07 18:45:03.809 [1428303] main/104/vinyl.lua I> dumping 295056 bytes, expected rate 10.0 MB/s, ETA 0.0 s, write rate (avg/max) 0.1/0.1 MB/s
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> GENERATE DDL   
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> INDEX_COMPACT  
2024-05-07 18:45:03.809 [1428303] main/128/DDL_5/vinyl I> INDEX_COMPACT
2024-05-07 18:45:03.809 [1428303] main/122/TX_4/vinyl I> GENERATE_TX
2024-05-07 18:45:03.809 [1428303] main/121/TX_3/vinyl I> GENERATE_TX
2024-05-07 18:45:03.809 [1428303] main/120/TX_2/vinyl I> GENERATE_TX
2024-05-07 18:45:03.809 [1428303] main/119/TX_1/vinyl I> GENERATE_TX
2024-05-07 18:45:03.809 [1428303] main/123/TX_5/vinyl I> GENERATE_TX
Segmentation fault             
  code: 128                    
  addr: (nil)                   
  context: 0x5a0e77c5d800
  siginfo: 0x5a0e77c5d930  
  rax      0x2323232323232323 2531906049332683555                                                                                                  
  rbx      0x5a0e7623d8cb     99018158102731                                                                                                       
  rcx      0x5a0e77cd1dc0     99018185973184                                                                                                       
  rdx      0x75dea3410ea8     129599082139304                            
  rsi      0x75de9dc80c80     129598990322816                            
  rdi      0x5a0e77ec9360     99018188034912                           
  rsp      0x75de9dc80ce0     129598990322912                            
  rbp      0x75de9dc80d30     129598990322992                            
  r8       0x0                0                                          
  r9       0x75dea6896c50     129599137213520                            
  r10      0x75de9dd80fd0     129598991372240                            
  r11      0x0                0                                          
  r12      0x0                0                                                                                                                    
  r13      0x0                0                                          
  r14      0x0                0                                          
  r15      0x0                0                                          
  rip      0x5a0e762494a9     99018158150825                        
  eflags   0x10246            66118
  cs       0x33               51
  gs       0x0                0
  fs       0x0                0
  cr2      0x0                0
  err      0x0                0
  oldmask  0x0                0
  trapno   0xd                13
Current time: 1715096703
Please file a bug at https://github.com/tarantool/tarantool/issues                                                                                 
Attempting backtrace... Note: since the server has already crashed,                                                                                
this may fail as well                                                                                                                              
#1  0x5a0e76230e6d in crash_collect+256                                                                                                            
#2  0x5a0e762318a9 in crash_signal_cb+100                                
#3  0x75dea5c42520 in __sigaction+80                                     
#4  0x5a0e762494a9 in fiber_cond_wait_timeout+168                      
#5  0x5a0e76249654 in fiber_cond_wait+40                                 
#6  0x5a0e760528e0 in vy_mem_wait_pinned+32                              
#7  0x5a0e76057309 in vy_task_dump_new+742                               
#8  0x5a0e76059036 in vy_scheduler_peek_dump+243                         
#9  0x5a0e76059214 in vy_schedule+51                                     
#10 0x5a0e760597da in vy_scheduler_f+220                                 
#11 0x5a0e75f6c187 in fiber_cxx_invoke(int (*)(__va_list_tag*), __va_list_tag*)+34                                                                 
#12 0x5a0e7623d9a6 in fiber_loop+219                                     
#13 0x5a0e76630436 in coro_init+120                                      
Aborted (core dumped)   

Expected behavior

no crashes

Backtrace

gdb -c core.tarantool.1000.cab22df69b8e4a23b2e81e7328171f40.1434252.1715097530000000 tarantool (both files in attached archive)

core.zip

(gdb) bt
#0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=132002158705344) at ./nptl/pthread_kill.c:44
#1  __pthread_kill_internal (signo=6, threadid=132002158705344) at ./nptl/pthread_kill.c:78
#2  __GI___pthread_kill (threadid=132002158705344, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
#3  0x0000780e25042476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4  0x0000780e250287f3 in __GI_abort () at ./stdlib/abort.c:79
#5  0x000058e998b92952 in crash_signal_cb (signo=11, siginfo=0x58e99ae92930, context=0x58e99ae92800)
    at /home/sergeyb/sources/MRG/tarantool/src/lib/core/crash.c:203
#6  <signal handler called>
#7  0x000058e998baa4a9 in rlist_add_tail (item=0x780e22810ea8, head=0x58e99aef94b0)
    at /home/sergeyb/sources/MRG/tarantool/src/lib/small/include/small/rlist.h:91
#8  fiber_cond_wait_timeout (c=0x58e99aef94b0, timeout=3153600000) at /home/sergeyb/sources/MRG/tarantool/src/lib/core/fiber_cond.c:106
#9  0x000058e998baa654 in fiber_cond_wait (c=0x58e99aef94b0) at /home/sergeyb/sources/MRG/tarantool/src/lib/core/fiber_cond.c:121
#10 0x000058e9989b38e0 in vy_mem_wait_pinned (mem=0x58e99aef9370) at /home/sergeyb/sources/MRG/tarantool/src/box/vy_mem.h:250
#11 0x000058e9989b8309 in vy_task_dump_new (scheduler=0x58e99aec1cb0, worker=0x58e99b08bb80, lsm=0x58e99b0d3900, p_task=0x780e22080ee0)
    at /home/sergeyb/sources/MRG/tarantool/src/box/vy_scheduler.c:1394
#12 0x000058e9989ba036 in vy_scheduler_peek_dump (scheduler=0x58e99aec1cb0, ptask=0x780e22080ee0)
    at /home/sergeyb/sources/MRG/tarantool/src/box/vy_scheduler.c:1899
#13 0x000058e9989ba214 in vy_schedule (scheduler=0x58e99aec1cb0, ptask=0x780e22080ee0)
    at /home/sergeyb/sources/MRG/tarantool/src/box/vy_scheduler.c:1982
#14 0x000058e9989ba7da in vy_scheduler_f (va=0x780e22810ef0) at /home/sergeyb/sources/MRG/tarantool/src/box/vy_scheduler.c:2092
#15 0x000058e9988cd187 in fiber_cxx_invoke(fiber_func, typedef __va_list_tag __va_list_tag *) (f=0x58e9989ba6fe <vy_scheduler_f>, 
    ap=0x780e22810ef0) at /home/sergeyb/sources/MRG/tarantool/src/lib/core/fiber.h:1311
#16 0x000058e998b9e9a6 in fiber_loop (data=0x0) at /home/sergeyb/sources/MRG/tarantool/src/lib/core/fiber.c:1159
#17 0x000058e998f91436 in coro_init () at /home/sergeyb/sources/MRG/tarantool/third_party/coro/coro.c:108
(gdb) 

Metadata

Metadata

Assignees

Labels

2.11Target is 2.11 and all newer release/master branches3.1Target is 3.1 and all newer release/master branchesbugSomething isn't workingcrashfuzzingvinyl

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions