Merge pull request #11149 from filimonov/kafka_reschedule

alexey-milovidov · web-flow · commit 992f0dfec569 · 2020-05-27T13:26:55.000+03:00
Fixed reschedule issue in Kafka
diff --git a/src/Storages/Kafka/KafkaBlockInputStream.h b/src/Storages/Kafka/KafkaBlockInputStream.h
@@ -25,6 +25,7 @@ class KafkaBlockInputStream : public IBlockInputStream
     void readSuffixImpl() override;
 
     void commit();
+    bool isStalled() const { return buffer->isStalled(); }
 
 private:
     StorageKafka & storage;
diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h
@@ -38,6 +38,7 @@ class ReadBufferFromKafkaConsumer : public ReadBuffer
 
     bool hasMorePolledMessages() const;
     bool polledDataUnusable() const { return (was_stopped || rebalance_happened); }
+    bool isStalled() const { return stalled; }
 
     void storeLastReadMessageOffset();
     void resetToLastCommitted(const char * msg);
diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp
@@ -51,6 +51,7 @@ namespace
 {
     const auto RESCHEDULE_MS = 500;
     const auto CLEANUP_TIMEOUT_MS = 3000;
+    const auto MAX_THREAD_WORK_DURATION_MS = 60000;  // once per minute leave do reschedule (we can't lock threads in pool forever)
 
     /// Configuration prefix
     const String CONFIG_PREFIX = "kafka";
@@ -386,6 +387,8 @@ void StorageKafka::threadFunc()
         size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size();
         if (dependencies_count)
         {
+            auto start_time = std::chrono::steady_clock::now();
+
             // Keep streaming as long as there are attached views and streaming is not cancelled
             while (!stream_cancelled && num_created_consumers > 0)
             {
@@ -394,9 +397,21 @@ void StorageKafka::threadFunc()
 
                 LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count);
 
-                // Reschedule if not limited
-                if (!streamToViews())
+                // Exit the loop & reschedule if some stream stalled
+                auto some_stream_is_stalled = streamToViews();
+                if (some_stream_is_stalled)
+                {
+                    LOG_TRACE(log, "Stream(s) stalled. Reschedule.");
+                    break;
+                }
+
+                auto ts = std::chrono::steady_clock::now();
+                auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts-start_time);
+                if (duration.count() > MAX_THREAD_WORK_DURATION_MS)
+                {
+                    LOG_TRACE(log, "Thread work duration limit exceeded. Reschedule.");
                     break;
+                }
             }
         }
     }
@@ -459,15 +474,15 @@ bool StorageKafka::streamToViews()
     // It will be cancelled on underlying layer (kafka buffer)
     std::atomic<bool> stub = {false};
     copyData(*in, *block_io.out, &stub);
+
+    bool some_stream_is_stalled = false;
     for (auto & stream : streams)
+    {
+        some_stream_is_stalled = some_stream_is_stalled || stream->as<KafkaBlockInputStream>()->isStalled();
         stream->as<KafkaBlockInputStream>()->commit();
+    }
 
-    // Check whether the limits were applied during query execution
-    bool limits_applied = false;
-    const BlockStreamProfileInfo & info = in->getProfileInfo();
-    limits_applied = info.hasAppliedLimit();
-
-    return limits_applied;
+    return some_stream_is_stalled;
 }
 
 void registerStorageKafka(StorageFactory & factory)
diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py
@@ -1017,7 +1017,10 @@ def test_kafka_flush_by_block_size(kafka_cluster):
 
     time.sleep(1)
 
-    result = instance.query('SELECT count() FROM test.view')
+    # TODO: due to https://github.com/ClickHouse/ClickHouse/issues/11216
+    # second flush happens earlier than expected, so we have 2 parts here instead of one
+    # flush by block size works correctly, so the feature checked by the test is working correctly
+    result = instance.query("SELECT count() FROM test.view WHERE _part='all_1_1_0'")
     # print(result)
 
     # kafka_cluster.open_bash_shell('instance')
@@ -1390,6 +1393,41 @@ def produce():
     assert TSV(result) == TSV('{0}\t{0}\t{0}'.format(i[0]-1)), 'Missing data!'
 
 
+
+@pytest.mark.timeout(120)
+def test_bad_reschedule(kafka_cluster):
+    messages = [json.dumps({'key': j+1, 'value': j+1}) for j in range(20000)]
+    kafka_produce('test_bad_reschedule', messages)
+
+    instance.query('''
+        CREATE TABLE test.kafka (key UInt64, value UInt64)
+            ENGINE = Kafka
+            SETTINGS kafka_broker_list = 'kafka1:19092',
+                    kafka_topic_list = 'test_bad_reschedule',
+                    kafka_group_name = 'test_bad_reschedule',
+                    kafka_format = 'JSONEachRow',
+                    kafka_max_block_size = 1000;
+
+        CREATE MATERIALIZED VIEW test.destination Engine=Log AS
+        SELECT
+            key,
+            now() as consume_ts,
+            value,
+            _topic,
+            _key,
+            _offset,
+            _partition,
+            _timestamp
+        FROM test.kafka;
+    ''')
+
+    while int(instance.query("SELECT count() FROM test.destination")) < 20000:
+        print("Waiting for consume")
+        time.sleep(1)
+
+    assert int(instance.query("SELECT max(consume_ts) - min(consume_ts) FROM test.destination")) < 8
+
+
 @pytest.mark.timeout(1200)
 def test_kafka_duplicates_when_commit_failed(kafka_cluster):
     messages = [json.dumps({'key': j+1, 'value': 'x' * 300}) for j in range(22)]