ClickHouse
diff --git a/‎ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt‎
Lines changed: 2 additions & 1 deletion b/‎ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/en/interfaces/third-party/client-libraries.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/en/interfaces/third-party/client-libraries.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/AggregateFunctions/Combinators/AggregateFunctionNull.h‎
Lines changed: 4 additions & 0 deletions b/‎src/AggregateFunctions/Combinators/AggregateFunctionNull.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/Core/Settings.cpp‎
Lines changed: 8 additions & 0 deletions b/‎src/Core/Settings.cpp‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/Core/SettingsChangesHistory.cpp‎
Lines changed: 1 addition & 0 deletions b/‎src/Core/SettingsChangesHistory.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/Dictionaries/CacheDictionary.cpp‎
Lines changed: 1 addition & 3 deletions b/‎src/Dictionaries/CacheDictionary.cpp‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/Disks/DiskObjectStorage/RegisterDiskCache.cpp‎
Lines changed: 5 additions & 3 deletions b/‎src/Disks/DiskObjectStorage/RegisterDiskCache.cpp‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/Functions/HighlightImpl.h‎
Lines changed: 206 additions & 0 deletions b/‎src/Functions/HighlightImpl.h‎
Lines changed: 206 additions & 0 deletions
@@ -1,4 +1,4 @@
-personal_ws-1.1 en 3602
+personal_ws-1.1 en 3603
 AArch
 ABIs
 ACLs
@@ -1529,6 +1529,7 @@ bcrypt's
 bech
 benchmarked
 benchmarking
+beeterty
 bfloat
 bigrams
 binlog
 
@@ -33,6 +33,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn't done
 - [glushkovds/php-clickhouse-schema-builder](https://packagist.org/packages/glushkovds/php-clickhouse-schema-builder)
 - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
 - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
+- [beeterty/clickhouse-php-client](https://github.com/beeterty-technologies/clickhouse-php-client) 
 ### Go {#go}
 - [clickhouse](https://github.com/kshvakov/clickhouse/)
 - [go-clickhouse](https://github.com/roistat/go-clickhouse)
 
@@ -169,6 +169,10 @@ class AggregateFunctionNullBase : public IAggregateFunctionHelper<Derived>
 
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override
     {
+        if constexpr (result_is_nullable)
+            if (getFlag(rhs))
+                setFlag(place);
+
         nested_function->merge(nestedPlace(place), nestedPlace(rhs), thread_pool, is_cancelled, arena);
     }
 
 
@@ -5983,6 +5983,14 @@ Sets the maximum number of matches for a single regular expression per row. Use
 
 Possible values:
 
+- Positive integer.
+)", 0) \
+    \
+    DECLARE(UInt64, highlight_max_matches_per_row, 10000, R"(
+Sets the maximum number of highlight matches per row in the [highlight](/sql-reference/functions/string-search-functions#highlight) function. Use it to protect against excessive memory usage when highlighting highly repetitive patterns in large texts.
+
+Possible values:
+
 - Positive integer.
 )", 0) \
     \
 
@@ -47,6 +47,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
             {"enable_materialized_cte", false, false, "New setting"},
             {"finalize_projection_parts_synchronously", false, false, "New setting to finalize projection parts synchronously during INSERT to reduce peak memory usage."},
             {"s3_propagate_credentials_to_other_storages", false, false, "New setting"},
+            {"highlight_max_matches_per_row", 10000, 10000, "New setting to limit the number of highlight matches per row to protect against excessive memory usage."},
         });
         addSettingsChanges(settings_changes_history, "26.3",
         {
 
@@ -291,9 +291,7 @@ ColumnUInt8::Ptr CacheDictionary<dictionary_key_type>::hasKeys(const Columns & k
     FetchResult result_of_fetch_from_storage;
 
     {
-        /// Write lock on storage
-        const ProfiledExclusiveLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs, ProfileEvents::DictCacheLockWriteHoldNs};
-
+        const ProfiledSharedLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs, ProfileEvents::DictCacheLockReadHoldNs};
         result_of_fetch_from_storage = cache_storage_ptr->fetchColumnsForKeys(keys, request, /*default_mask*/ nullptr);
     }
 
 
@@ -112,16 +112,18 @@ std::pair<FileCachePtr, FileCacheSettings> getCache(
     return std::pair(cache, file_cache_settings);
 }
 
-void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check */)
+void registerDiskCache(DiskFactory & factory, bool global_skip_access_check)
 {
-    auto creator = [](const String & name,
+    auto creator = [global_skip_access_check](const String & name,
                     const Poco::Util::AbstractConfiguration & config,
                     const String & config_prefix,
                     ContextPtr context,
                     const DisksMap & map,
                     bool attach,
                     bool custom_disk) -> DiskPtr
     {
+        const bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
+
         auto disk_name = config.getString(config_prefix + ".disk", "");
         if (disk_name.empty())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk Cache requires `disk` field in config");
@@ -143,7 +145,7 @@ void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check *
                 disk_name, name);
 
         auto cached_disk_object_storage = std::dynamic_pointer_cast<DiskObjectStorage>(disk)->wrapWithCache(cache, cache_settings, name);
-        cached_disk_object_storage->startupImpl();
+        cached_disk_object_storage->startup(skip_access_check);
 
         LOG_INFO(
             getLogger("DiskCache"),
 
@@ -0,0 +1,206 @@
+#pragma once
+
+#include <algorithm>
+#include <vector>
+
+#include <base/types.h>
+#include <Common/Exception.h>
+#include <Common/Volnitsky.h>
+#include <Columns/ColumnString.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LIMIT_EXCEEDED;
+}
+
+struct HighlightImpl
+{
+    static constexpr size_t DEFAULT_MAX_MATCHES_PER_ROW = 10000;
+    struct Interval
+    {
+        size_t begin;
+        size_t end;
+    };
+
+    /// Sort and merge overlapping/adjacent intervals in-place.
+    /// Uses <= for merge condition so that adjacent intervals like [0,5)+[5,10) merge into [0,10).
+    static void mergeIntervals(std::vector<Interval> & intervals)
+    {
+        if (intervals.size() <= 1)
+            return;
+
+        std::sort(intervals.begin(), intervals.end(), [](const Interval & a, const Interval & b)
+        {
+            return a.begin < b.begin || (a.begin == b.begin && a.end > b.end);
+        });
+
+        size_t write = 0;
+        for (size_t read = 1; read < intervals.size(); ++read)
+        {
+            if (intervals[read].begin <= intervals[write].end)
+                intervals[write].end = std::max(intervals[write].end, intervals[read].end);
+            else
+                intervals[++write] = intervals[read];
+        }
+        intervals.resize(write + 1);
+    }
+
+    struct NeedleSearcher
+    {
+        VolnitskyCaseInsensitive searcher;
+        size_t needle_size;
+    };
+
+    static void execute(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const std::vector<std::string_view> & needles,
+        const String & open_tag,
+        const String & close_tag,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets,
+        size_t input_rows_count,
+        UInt64 max_matches_per_row = DEFAULT_MAX_MATCHES_PER_ROW)
+    {
+        /// Pre-allocate output buffers — conservative estimate to avoid over-allocation
+        /// with many needles: at most one tag pair per row on average.
+        const size_t tag_overhead = open_tag.size() + close_tag.size();
+        res_data.reserve(haystack_data.size() + input_rows_count * tag_overhead);
+        res_offsets.resize(input_rows_count);
+
+        /// Build searcher instances once outside the row loop, paired with needle sizes.
+        /// We use VolnitskyCaseInsensitive with haystack_size_hint=0, which means
+        /// each search() call decides internally whether to use the hash table
+        /// or fall back to ASCIICaseInsensitiveStringSearcher for short haystacks.
+        std::vector<NeedleSearcher> searchers;
+        searchers.reserve(needles.size());
+        for (const auto & needle : needles)
+            if (!needle.empty())
+                searchers.push_back({VolnitskyCaseInsensitive(needle.data(), needle.size(), 0), needle.size()});
+
+        /// Reusable intervals buffer across rows
+        std::vector<Interval> intervals;
+        intervals.reserve(64);
+
+        ColumnString::Offset res_offset = 0;
+        ColumnString::Offset prev_haystack_offset = 0;
+
+        for (size_t i = 0; i < input_rows_count; ++i)
+        {
+            const size_t cur_size = haystack_offsets[i] - prev_haystack_offset;
+
+            if (cur_size > 0)
+            {
+                const UInt8 * cur_data = &haystack_data[prev_haystack_offset];
+
+                /// Phase 1: find all matches
+                intervals.clear();
+                findAllMatches(cur_data, cur_size, searchers, intervals, max_matches_per_row);
+
+                if (intervals.empty())
+                {
+                    /// No matches — copy as-is
+                    append(res_data, res_offset, cur_data, cur_size);
+                }
+                else
+                {
+                    /// Phase 2: merge overlapping intervals
+                    mergeIntervals(intervals);
+
+                    /// Phase 3: build output with tags
+                    buildOutput(cur_data, cur_size, intervals, open_tag, close_tag, res_data, res_offset);
+                }
+            }
+
+            res_offsets[i] = res_offset;
+            prev_haystack_offset = haystack_offsets[i];
+        }
+    }
+
+private:
+    /// Phase 1: For each needle, find all occurrence positions in the haystack.
+    static void findAllMatches(
+        const UInt8 * haystack,
+        size_t haystack_size,
+        const std::vector<NeedleSearcher> & searchers,
+        std::vector<Interval> & intervals,
+        UInt64 max_matches_per_row)
+    {
+        const UInt8 * haystack_end = haystack + haystack_size;
+
+        for (const auto & [searcher, needle_size] : searchers)
+        {
+            const UInt8 * pos = haystack;
+            while (pos < haystack_end)
+            {
+                const UInt8 * match = searcher.search(pos, haystack_end - pos);
+                if (match == haystack_end)
+                    break;
+
+                const size_t offset = match - haystack;
+                intervals.push_back({offset, offset + needle_size});
+                pos = match + 1;
+
+                if (intervals.size() > max_matches_per_row)
+                    throw Exception(
+                        ErrorCodes::LIMIT_EXCEEDED,
+                        "Too many highlight matches per row: {}, max: {}. "
+                        "You can increase this limit with the `highlight_max_matches_per_row` setting",
+                        intervals.size(), max_matches_per_row);
+            }
+        }
+    }
+
+    /// Phase 3: Build the output string by interleaving non-matched text with tagged matched text.
+    static void buildOutput(
+        const UInt8 * haystack,
+        size_t haystack_size,
+        const std::vector<Interval> & intervals,
+        const String & open_tag,
+        const String & close_tag,
+        ColumnString::Chars & res_data,
+        ColumnString::Offset & res_offset)
+    {
+        size_t cursor = 0;
+        for (const auto & interval : intervals)
+        {
+            /// Copy non-matched text before this interval
+            if (interval.begin > cursor)
+                append(res_data, res_offset, haystack + cursor, interval.begin - cursor);
+
+            /// Insert open tag
+            if (!open_tag.empty())
+                append(res_data, res_offset, reinterpret_cast<const UInt8 *>(open_tag.data()), open_tag.size());
+
+            /// Copy matched text (preserving original case)
+            append(res_data, res_offset, haystack + interval.begin, interval.end - interval.begin);
+
+            /// Insert close tag
+            if (!close_tag.empty())
+                append(res_data, res_offset, reinterpret_cast<const UInt8 *>(close_tag.data()), close_tag.size());
+
+            cursor = interval.end;
+        }
+
+        /// Copy remaining text after the last interval
+        if (cursor < haystack_size)
+            append(res_data, res_offset, haystack + cursor, haystack_size - cursor);
+    }
+
+    static inline void append(
+        ColumnString::Chars & data,
+        ColumnString::Offset & offset,
+        const void * src,
+        size_t size)
+    {
+        data.resize(data.size() + size);
+        memcpy(&data[offset], src, size);
+        offset += size;
+    }
+};
+
+}
Original file line number	Diff line number	Diff line change
`@@ -169,6 +169,10 @@ class AggregateFunctionNullBase : public IAggregateFunctionHelper<Derived>`
`169`	`169`
`170`	`170`	`void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override`
`171`	`171`	`{`
	`172`	`+ if constexpr (result_is_nullable)`
	`173`	`+ if (getFlag(rhs))`
	`174`	`+ setFlag(place);`
	`175`	`+`
`172`	`176`	`nested_function->merge(nestedPlace(place), nestedPlace(rhs), thread_pool, is_cancelled, arena);`
`173`	`177`	`}`
`174`	`178`
Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()`
`47`	`47`	`{"enable_materialized_cte", false, false, "New setting"},`
`48`	`48`	`{"finalize_projection_parts_synchronously", false, false, "New setting to finalize projection parts synchronously during INSERT to reduce peak memory usage."},`
`49`	`49`	`{"s3_propagate_credentials_to_other_storages", false, false, "New setting"},`
	`50`	`+ {"highlight_max_matches_per_row", 10000, 10000, "New setting to limit the number of highlight matches per row to protect against excessive memory usage."},`
`50`	`51`	`});`
`51`	`52`	`addSettingsChanges(settings_changes_history, "26.3",`
`52`	`53`	`{`
Original file line number	Diff line number	Diff line change
`@@ -291,9 +291,7 @@ ColumnUInt8::Ptr CacheDictionary<dictionary_key_type>::hasKeys(const Columns & k`
`291`	`291`	`FetchResult result_of_fetch_from_storage;`
`292`	`292`
`293`	`293`	`{`
`294`		`- /// Write lock on storage`
`295`		`- const ProfiledExclusiveLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs, ProfileEvents::DictCacheLockWriteHoldNs};`
`296`		`-`
	`294`	`+ const ProfiledSharedLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs, ProfileEvents::DictCacheLockReadHoldNs};`
`297`	`295`	`result_of_fetch_from_storage = cache_storage_ptr->fetchColumnsForKeys(keys, request, /default_mask/ nullptr);`
`298`	`296`	`}`
`299`	`297`