Skip to content

Test test_global_overcommit is flaky #85972

@Avogar

Description

@Avogar

https://play.clickhouse.com/play?user=play#CldJVEgKICAgICd0ZXN0X2dsb2JhbF9vdmVyY29tbWl0JyBBUyBuYW1lX3N1YnN0ciwKICAgIDkwIEFTIGludGVydmFsX2RheXMsCiAgICAoJ1N0YXRlbGVzcyB0ZXN0cyAoYXNhbiknLCAnU3RhdGVsZXNzIHRlc3RzIChhZGRyZXNzKScsICdTdGF0ZWxlc3MgdGVzdHMgKGFkZHJlc3MsIGFjdGlvbnMpJykgQVMgYmFja3BvcnRfYW5kX3JlbGVhc2Vfc3BlY2lmaWNfY2hlY2tzClNFTEVDVAogICAgdG9TdGFydE9mRGF5KGNoZWNrX3N0YXJ0X3RpbWUpIEFTIGQsCiAgICBjb3VudCgpLAogICAgZ3JvdXBVbmlxQXJyYXkocHVsbF9yZXF1ZXN0X251bWJlcikgQVMgcHJzLAogICAgYW55KHJlcG9ydF91cmwpCkZST00gY2hlY2tzCldIRVJFICgobm93KCkgLSB0b0ludGVydmFsRGF5KGludGVydmFsX2RheXMpKSA8PSBjaGVja19zdGFydF90aW1lKSBBTkQgKHB1bGxfcmVxdWVzdF9udW1iZXIgTk9UIElOICgKICAgIFNFTEVDVCBwdWxsX3JlcXVlc3RfbnVtYmVyIEFTIHBybgogICAgRlJPTSBjaGVja3MKICAgIFdIRVJFIChwcm4gIT0gMCkgQU5EICgobm93KCkgLSB0b0ludGVydmFsRGF5KGludGVydmFsX2RheXMpKSA8PSBjaGVja19zdGFydF90aW1lKSBBTkQgKGNoZWNrX25hbWUgSU4gKGJhY2twb3J0X2FuZF9yZWxlYXNlX3NwZWNpZmljX2NoZWNrcykpCikpIEFORCAocG9zaXRpb24odGVzdF9uYW1lLCBuYW1lX3N1YnN0cikgPiAwKSBBTkQgKHRlc3Rfc3RhdHVzIElOICgnRkFJTCcsICdFUlJPUicsICdGTEFLWScpKQpHUk9VUCBCWSBkCk9SREVSIEJZIGQgREVTQwo=

For example: https://s3.amazonaws.com/clickhouse-test-reports/json.html?PR=85917&sha=57e917a32e697c305365478698d2c38c0399a20c&name_0=PR&name_1=Integration%20tests%20%28arm_binary%2C%20distributed%20plan%2C%201%2F4%29&name_1=Integration%20tests%20%28arm_binary%2C%20distributed%20plan%2C%201%2F4%29

    def test_global_overcommit():
        # NOTE: another option is to increase waiting time.
        if (
            node.is_built_with_thread_sanitizer()
            or node.is_built_with_address_sanitizer()
            or node.is_built_with_memory_sanitizer()
        ):
            pytest.skip("doesn't fit in memory limits")
    
        node.query("CREATE USER IF NOT EXISTS A")
        node.query("GRANT ALL ON *.* TO A")
        node.query("CREATE USER IF NOT EXISTS B")
        node.query("GRANT ALL ON *.* TO B")
    
        responses_A = list()
        responses_B = list()
        for i in range(50):
            responses_A.append(node.get_query_request(GLOBAL_TEST_QUERY_A, user="A"))
            responses_B.append(node.get_query_request(GLOBAL_TEST_QUERY_B, user="B"))
    
        overcommited_killed = False
        for response in responses_A:
            _, err = response.get_answer_and_error()
            if "MEMORY_LIMIT_EXCEEDED" in err:
                overcommited_killed = True
        finished = False
        for response in responses_B:
            _, err = response.get_answer_and_error()
            if err == "":
                finished = True
    
        assert overcommited_killed, "no overcommited task was killed"
        assert finished, "all tasks are killed"
    
>       node.query("DROP USER IF EXISTS A")

test_global_overcommit_tracker/test.py:63: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
helpers/cluster.py:4149: in query
    return self.client.query(
helpers/client.py:39: in wrap
    return func(self, *args, **kwargs)
helpers/client.py:79: in query
    ).get_answer()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <helpers.client.CommandRequest object at 0xff3fa23f1630>

    def get_answer(self):
        self.process.wait(timeout=DEFAULT_QUERY_TIMEOUT)
        self.stdout_file.seek(0)
        self.stderr_file.seek(0)
    
        stdout = self.stdout_file.read().decode("utf-8", errors="replace")
        stderr = self.stderr_file.read().decode("utf-8", errors="replace")
    
        if (
            self.timer is not None
            and not self.process_finished_before_timeout
            and not self.ignore_error
        ):
            logging.debug(f"Timed out. Last stdout:{stdout}, stderr:{stderr}")
            raise QueryTimeoutExceedException("Client timed out!")
    
        if (
            self.process.returncode != 0 or self.remove_trash_from_stderr(stderr)
        ) and not self.ignore_error:
>           raise QueryRuntimeException(
                "Client failed! Return code: {}, stderr: {}".format(
                    self.process.returncode, stderr
                ),
                self.process.returncode,
                stderr,
            )
E           helpers.client.QueryRuntimeException: Client failed! Return code: 209, stderr: Code: 209. DB::NetException: Timeout exceeded while reading from socket (peer: 172.16.1.2:9000, local: 172.16.1.1:64932, 300000 ms): (172.16.1.2:9000, local address: 172.16.1.1:64932). (SOCKET_TIMEOUT), Stack trace (when copying this message, always include the lines below):
E           
E           0. ./ci/tmp/build/./base/poco/Foundation/src/Exception.cpp:28: Poco::Exception::Exception(String const&, int) @ 0x000000001c28dcb8
E           1. ./ci/tmp/build/./src/Common/Exception.cpp:128: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000e34c7ac
E           2. DB::Exception::Exception(String&&, int, String, bool) @ 0x0000000008dc9658
E           3. ./src/Common/NetException.h:26: DB::NetException::NetException<String, String, long>(int, FormatStringHelperImpl<std::type_identity<String>::type, std::type_identity<String>::type, std::type_identity<long>::type>, String&&, String&&, long&&) @ 0x000000000e508314
E           4. ./ci/tmp/build/./src/IO/ReadBufferFromPocoSocket.cpp:87: DB::ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char*, unsigned long) @ 0x000000000e507830
E           5. ./ci/tmp/build/./src/IO/ReadBufferFromPocoSocket.cpp:107: DB::ReadBufferFromPocoSocketBase::nextImpl() @ 0x000000000e507be8
E           6. ./ci/tmp/build/./src/IO/ReadBuffer.cpp:96: DB::ReadBuffer::next() @ 0x000000000e413270
E           7. ./src/IO/ReadBuffer.h:81: DB::Connection::receiveHello(Poco::Timespan const&) @ 0x0000000016cf61a8
E           8. ./ci/tmp/build/./src/Client/Connection.cpp:264: DB::Connection::connect(DB::ConnectionTimeouts const&) @ 0x0000000016cf45c4
E           9. ./ci/tmp/build/./src/Client/Connection.cpp:665: DB::Connection::getServerVersion(DB::ConnectionTimeouts const&, String&, unsigned long&, unsigned long&, unsigned long&, unsigned long&) @ 0x0000000016cf9b90
E           10. ./ci/tmp/build/./programs/client/Client.cpp:485: DB::Client::connect() @ 0x000000000e559f94
E           11. ./ci/tmp/build/./programs/client/Client.cpp:377: DB::Client::main(std::vector<String, std::allocator<String>> const&) @ 0x000000000e55916c
E           12. ./ci/tmp/build/./base/poco/Util/src/Application.cpp:315: Poco::Util::Application::run() @ 0x000000001c373b7c
E           13. ./ci/tmp/build/./programs/client/Client.cpp:1143: mainEntryClickHouseClient(int, char**) @ 0x000000000e562504
E           14. ./ci/tmp/build/./programs/main.cpp:381: main @ 0x0000000008dc303c
E           15. ? @ 0x00000000000273fc
E           16. ? @ 0x00000000000274cc

Metadata

Metadata

Assignees

Labels

flaky testflaky test found by CI

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions