Skip to content

Test test_global_overcommit is flaky #85972

@Avogar

Description

@Avogar

https://play.clickhouse.com/play?user=play#CldJVEgKICAgICd0ZXN0X2dsb2JhbF9vdmVyY29tbWl0JyBBUyBuYW1lX3N1YnN0ciwKICAgIDkwIEFTIGludGVydmFsX2RheXMsCiAgICAoJ1N0YXRlbGVzcyB0ZXN0cyAoYXNhbiknLCAnU3RhdGVsZXNzIHRlc3RzIChhZGRyZXNzKScsICdTdGF0ZWxlc3MgdGVzdHMgKGFkZHJlc3MsIGFjdGlvbnMpJykgQVMgYmFja3BvcnRfYW5kX3JlbGVhc2Vfc3BlY2lmaWNfY2hlY2tzClNFTEVDVAogICAgdG9TdGFydE9mRGF5KGNoZWNrX3N0YXJ0X3RpbWUpIEFTIGQsCiAgICBjb3VudCgpLAogICAgZ3JvdXBVbmlxQXJyYXkocHVsbF9yZXF1ZXN0X251bWJlcikgQVMgcHJzLAogICAgYW55KHJlcG9ydF91cmwpCkZST00gY2hlY2tzCldIRVJFICgobm93KCkgLSB0b0ludGVydmFsRGF5KGludGVydmFsX2RheXMpKSA8PSBjaGVja19zdGFydF90aW1lKSBBTkQgKHB1bGxfcmVxdWVzdF9udW1iZXIgTk9UIElOICgKICAgIFNFTEVDVCBwdWxsX3JlcXVlc3RfbnVtYmVyIEFTIHBybgogICAgRlJPTSBjaGVja3MKICAgIFdIRVJFIChwcm4gIT0gMCkgQU5EICgobm93KCkgLSB0b0ludGVydmFsRGF5KGludGVydmFsX2RheXMpKSA8PSBjaGVja19zdGFydF90aW1lKSBBTkQgKGNoZWNrX25hbWUgSU4gKGJhY2twb3J0X2FuZF9yZWxlYXNlX3NwZWNpZmljX2NoZWNrcykpCikpIEFORCAocG9zaXRpb24odGVzdF9uYW1lLCBuYW1lX3N1YnN0cikgPiAwKSBBTkQgKHRlc3Rfc3RhdHVzIElOICgnRkFJTCcsICdFUlJPUicsICdGTEFLWScpKQpHUk9VUCBCWSBkCk9SREVSIEJZIGQgREVTQwo=

For example: https://s3.amazonaws.com/clickhouse-test-reports/json.html?PR=85917&sha=57e917a32e697c305365478698d2c38c0399a20c&name_0=PR&name_1=Integration%20tests%20%28arm_binary%2C%20distributed%20plan%2C%201%2F4%29&name_1=Integration%20tests%20%28arm_binary%2C%20distributed%20plan%2C%201%2F4%29

    def test_global_overcommit():
        # NOTE: another option is to increase waiting time.
        if (
            node.is_built_with_thread_sanitizer()
            or node.is_built_with_address_sanitizer()
            or node.is_built_with_memory_sanitizer()
        ):
            pytest.skip("doesn't fit in memory limits")
    
        node.query("CREATE USER IF NOT EXISTS A")
        node.query("GRANT ALL ON *.* TO A")
        node.query("CREATE USER IF NOT EXISTS B")
        node.query("GRANT ALL ON *.* TO B")
    
        responses_A = list()
        responses_B = list()
        for i in range(50):
            responses_A.append(node.get_query_request(GLOBAL_TEST_QUERY_A, user="A"))
            responses_B.append(node.get_query_request(GLOBAL_TEST_QUERY_B, user="B"))
    
        overcommited_killed = False
        for response in responses_A:
            _, err = response.get_answer_and_error()
            if "MEMORY_LIMIT_EXCEEDED" in err:
                overcommited_killed = True
        finished = False
        for response in responses_B:
            _, err = response.get_answer_and_error()
            if err == "":
                finished = True
    
        assert overcommited_killed, "no overcommited task was killed"
        assert finished, "all tasks are killed"
    
>       node.query("DROP USER IF EXISTS A")

test_global_overcommit_tracker/test.py:63: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
helpers/cluster.py:4149: in query
    return self.client.query(
helpers/client.py:39: in wrap
    return func(self, *args, **kwargs)
helpers/client.py:79: in query
    ).get_answer()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <helpers.client.CommandRequest object at 0xff3fa23f1630>

    def get_answer(self):
        self.process.wait(timeout=DEFAULT_QUERY_TIMEOUT)
        self.stdout_file.seek(0)
        self.stderr_file.seek(0)
    
        stdout = self.stdout_file.read().decode("utf-8", errors="replace")
        stderr = self.stderr_file.read().decode("utf-8", errors="replace")
    
        if (
            self.timer is not None
            and not self.process_finished_before_timeout
            and not self.ignore_error
        ):
            logging.debug(f"Timed out. Last stdout:{stdout}, stderr:{stderr}")
            raise QueryTimeoutExceedException("Client timed out!")
    
        if (
            self.process.returncode != 0 or self.remove_trash_from_stderr(stderr)
        ) and not self.ignore_error:
>           raise QueryRuntimeException(
                "Client failed! Return code: {}, stderr: {}".format(
                    self.process.returncode, stderr
                ),
                self.process.returncode,
                stderr,
            )
E           helpers.client.QueryRuntimeException: Client failed! Return code: 209, stderr: Code: 209. DB::NetException: Timeout exceeded while reading from socket (peer: 172.16.1.2:9000, local: 172.16.1.1:64932, 300000 ms): (172.16.1.2:9000, local address: 172.16.1.1:64932). (SOCKET_TIMEOUT), Stack trace (when copying this message, always include the lines below):
E           
E           0. ./ci/tmp/build/./base/poco/Foundation/src/Exception.cpp:28: Poco::Exception::Exception(String const&, int) @ 0x000000001c28dcb8
E           1. ./ci/tmp/build/./src/Common/Exception.cpp:128: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000e34c7ac
E           2. DB::Exception::Exception(String&&, int, String, bool) @ 0x0000000008dc9658
E           3. ./src/Common/NetException.h:26: DB::NetException::NetException<String, String, long>(int, FormatStringHelperImpl<std::type_identity<String>::type, std::type_identity<String>::type, std::type_identity<long>::type>, String&&, String&&, long&&) @ 0x000000000e508314
E           4. ./ci/tmp/build/./src/IO/ReadBufferFromPocoSocket.cpp:87: DB::ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char*, unsigned long) @ 0x000000000e507830
E           5. ./ci/tmp/build/./src/IO/ReadBufferFromPocoSocket.cpp:107: DB::ReadBufferFromPocoSocketBase::nextImpl() @ 0x000000000e507be8
E           6. ./ci/tmp/build/./src/IO/ReadBuffer.cpp:96: DB::ReadBuffer::next() @ 0x000000000e413270
E           7. ./src/IO/ReadBuffer.h:81: DB::Connection::receiveHello(Poco::Timespan const&) @ 0x0000000016cf61a8
E           8. ./ci/tmp/build/./src/Client/Connection.cpp:264: DB::Connection::connect(DB::ConnectionTimeouts const&) @ 0x0000000016cf45c4
E           9. ./ci/tmp/build/./src/Client/Connection.cpp:665: DB::Connection::getServerVersion(DB::ConnectionTimeouts const&, String&, unsigned long&, unsigned long&, unsigned long&, unsigned long&) @ 0x0000000016cf9b90
E           10. ./ci/tmp/build/./programs/client/Client.cpp:485: DB::Client::connect() @ 0x000000000e559f94
E           11. ./ci/tmp/build/./programs/client/Client.cpp:377: DB::Client::main(std::vector<String, std::allocator<String>> const&) @ 0x000000000e55916c
E           12. ./ci/tmp/build/./base/poco/Util/src/Application.cpp:315: Poco::Util::Application::run() @ 0x000000001c373b7c
E           13. ./ci/tmp/build/./programs/client/Client.cpp:1143: mainEntryClickHouseClient(int, char**) @ 0x000000000e562504
E           14. ./ci/tmp/build/./programs/main.cpp:381: main @ 0x0000000008dc303c
E           15. ? @ 0x00000000000273fc
E           16. ? @ 0x00000000000274cc

Metadata

Metadata

Assignees

Labels

flaky testflaky test found by CI

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions