Conversation
|
This PR has a migration; here is the generated SQL -- start migrations
-- forward migration generic_metrics : 0010_counters_aggregate_table
Local op: CREATE TABLE IF NOT EXISTS generic_metric_counters_aggregated_local (org_id UInt64, project_id UInt64, metric_id UInt64, granularity UInt8, timestamp DateTime CODEC (DoubleDelta), retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), value AggregateFunction(sum, Float64), use_case_id LowCardinality(String)) ENGINE ReplicatedAggregatingMergeTree('/clickhouse/tables/generic_metrics_counters/{shard}/default/generic_metric_counters_aggregated_local', '{replica}') PRIMARY KEY (org_id, project_id, metric_id, granularity, timestamp) ORDER BY (org_id, project_id, metric_id, granularity, timestamp, tags.key, tags.indexed_value, tags.raw_value, retention_days, use_case_id) PARTITION BY (retention_days, toMonday(timestamp)) TTL timestamp + toIntervalDay(retention_days) SETTINGS index_granularity=2048;
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD COLUMN IF NOT EXISTS _indexed_tags_hash Array(UInt64) MATERIALIZED arrayMap((k, v) -> cityHash64(concat(toString(k), '=', toString(v))), tags.key, tags.indexed_value);
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD COLUMN IF NOT EXISTS _raw_tags_hash Array(UInt64) MATERIALIZED arrayMap((k, v) -> cityHash64(concat(toString(k), '=', v)), tags.key, tags.raw_value);
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD INDEX IF NOT EXISTS bf_indexed_tags_hash _indexed_tags_hash TYPE bloom_filter() GRANULARITY 1;
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD INDEX IF NOT EXISTS bf_raw_tags_hash _raw_tags_hash TYPE bloom_filter() GRANULARITY 1;
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD INDEX IF NOT EXISTS bf_tags_key_hash tags.key TYPE bloom_filter() GRANULARITY 1;
Distributed op: CREATE TABLE IF NOT EXISTS generic_metric_counters_aggregated_dist (org_id UInt64, project_id UInt64, metric_id UInt64, granularity UInt8, timestamp DateTime CODEC (DoubleDelta), retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), value AggregateFunction(sum, Float64), use_case_id LowCardinality(String)) ENGINE Distributed(cluster_one_sh, default, generic_metric_counters_aggregated_local);
-- end forward migration generic_metrics : 0010_counters_aggregate_table
-- backward migration generic_metrics : 0010_counters_aggregate_table
Distributed op: DROP TABLE IF EXISTS generic_metric_counters_aggregated_dist;
Local op: DROP TABLE IF EXISTS generic_metric_counters_aggregated_local;
-- end backward migration generic_metrics : 0010_counters_aggregate_table
-- forward migration generic_metrics : 0011_counters_raw_table
Local op: CREATE TABLE IF NOT EXISTS generic_metric_counters_raw_local (use_case_id LowCardinality(String), org_id UInt64, project_id UInt64, metric_id UInt64, timestamp DateTime, retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), set_values Array(UInt64), count_value Float64, distribution_values Array(Float64), metric_type LowCardinality(String), materialization_version UInt8, timeseries_id UInt32, partition UInt16, offset UInt64, granularities Array(UInt8)) ENGINE ReplicatedMergeTree('/clickhouse/tables/generic_metrics_counters/{shard}/default/generic_metric_counters_raw_local', '{replica}') ORDER BY (use_case_id, org_id, project_id, metric_id, timestamp) PARTITION BY (toStartOfInterval(timestamp, toIntervalDay(3))) TTL timestamp + toIntervalDay(7);
Distributed op: CREATE TABLE IF NOT EXISTS generic_metric_counters_raw_dist (use_case_id LowCardinality(String), org_id UInt64, project_id UInt64, metric_id UInt64, timestamp DateTime, retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), set_values Array(UInt64), count_value Float64, distribution_values Array(Float64), metric_type LowCardinality(String), materialization_version UInt8, timeseries_id UInt32, partition UInt16, offset UInt64, granularities Array(UInt8)) ENGINE Distributed(cluster_one_sh, default, generic_metric_counters_raw_local, cityHash64(timeseries_id));
-- end forward migration generic_metrics : 0011_counters_raw_table
-- backward migration generic_metrics : 0011_counters_raw_table
Distributed op: DROP TABLE IF EXISTS generic_metric_counters_raw_dist;
Local op: DROP TABLE IF EXISTS generic_metric_counters_raw_local;
-- end backward migration generic_metrics : 0011_counters_raw_table
-- forward migration generic_metrics : 0012_counters_mv
Local op: CREATE MATERIALIZED VIEW IF NOT EXISTS generic_metric_counters_aggregation_mv TO generic_metric_counters_aggregated_local (org_id UInt64, project_id UInt64, metric_id UInt64, granularity UInt8, timestamp DateTime CODEC (DoubleDelta), retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), value AggregateFunction(sum, Float64), use_case_id LowCardinality(String)) AS
SELECT
use_case_id,
org_id,
project_id,
metric_id,
arrayJoin(granularities) as granularity,
tags.key,
tags.indexed_value,
tags.raw_value,
toDateTime(multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1) *
intDiv(toUnixTimestamp(timestamp),
multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1))) as timestamp,
retention_days,
sumState(count_value) as value
FROM generic_metric_counters_raw_local
WHERE materialization_version = 1
AND metric_type = 'counter'
GROUP BY
use_case_id,
org_id,
project_id,
metric_id,
tags.key,
tags.indexed_value,
tags.raw_value,
timestamp,
granularity,
retention_days
;
-- end forward migration generic_metrics : 0012_counters_mv
-- backward migration generic_metrics : 0012_counters_mv
Local op: DROP TABLE IF EXISTS generic_metric_counters_aggregation_mv;
-- end backward migration generic_metrics : 0012_counters_mv |
Codecov ReportBase: 92.15% // Head: 92.21% // Increases project coverage by
Additional details and impacted files@@ Coverage Diff @@
## master #3612 +/- ##
==========================================
+ Coverage 92.15% 92.21% +0.06%
==========================================
Files 744 751 +7
Lines 34436 34629 +193
==========================================
+ Hits 31734 31933 +199
+ Misses 2702 2696 -6
Help us with your feedback. Take ten seconds to tell us how you rate us. Have a feature suggestion? Share it here. ☔ View full report at Codecov. |
There was a problem hiding this comment.
This mostly looks good to me, except the storage set key. I do remember @fpacifici saying at one point that I had constructed indexes wrong on some table[s], so we should make sure that mistake isn't repeated here
snuba/snuba_migrations/generic_metrics/0011_counters_raw_table.py
Outdated
Show resolved
Hide resolved
| operations.CreateTable( | ||
| storage_set=self.storage_set_key, | ||
| table_name=self.local_table_name, | ||
| engine=table_engines.AggregatingMergeTree( |
There was a problem hiding this comment.
Have you considered using a summing megetree here instead ? And what would be the pros and cons ?
There was a problem hiding this comment.
I haven't considered using the SummingMergeTree. I used the AggregatingMergeTree based on what is being done by the existing counters table of release health metrics dataset.
Let me know if you have a preference on which one to use (with possibly the reasoning behind it). I will see at that point if I need to change something or not
|
PR reverted: bcddfbf |
Context
The generic metrics platform does not have the counters dataset. It wasn't created because there was no need for the dataset. Now there is a need for counters on the generic metrics platform.
Additional information
The dataset is very similar to
generic_metrics_setsorgeneric_metrics_distributions. The primary difference is how it populates the count.Testing
Validated that the migration was run successfully and the tables were created
generic_metrics
[X] 0001_sets_aggregate_table
[X] 0002_sets_raw_table
[X] 0003_sets_mv
[X] 0004_sets_raw_add_granularities
[X] 0005_sets_replace_mv
[X] 0006_sets_raw_add_granularities_dist_table
[X] 0007_distributions_aggregate_table
[X] 0008_distributions_raw_table
[X] 0009_distributions_mv
[X] 0010_counters_aggregate_table
[X] 0011_counters_raw_table
[X] 0012_counters_mv
6749ecd45d3d :) show create table generic_metric_counters_aggregated_local
SHOW CREATE TABLE generic_metric_counters_aggregated_local
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.generic_metric_counters_aggregated_local (
org_idUInt64,project_idUInt64,metric_idUInt64,granularityUInt8,timestampDateTime CODEC(DoubleDelta),retention_daysUInt16,tags.keyArray(UInt64),tags.indexed_valueArray(UInt64),tags.raw_valueArray(String),valueAggregateFunction(sum, Float64),use_case_idLowCardinality(String),_indexed_tags_hashArray(UInt64) MATERIALIZED arrayMap((k, v) -> cityHash64(concat(toString(k), '=', toString(v))), tags.key, tags.indexed_value),_raw_tags_hashArray(UInt64) MATERIALIZED arrayMap((k, v) -> cityHash64(concat(toString(k), '=', v)), tags.key, tags.raw_value), INDEX bf_indexed_tags_hash _indexed_tags_hash TYPE bloom_filter() GRANULARITY 1, INDEX bf_raw_tags_hash _raw_tags_hash TYPE bloom_filter() GRANULARITY 1, INDEX bf_tags_key_hash tags.key TYPE bloom_filter() GRANULARITY 1) ENGINE = AggregatingMergeTree() PARTITION BY (retention_days, toMonday(timestamp)) PRIMARY KEY (org_id, project_id, metric_id, granularity, timestamp) ORDER BY (org_id, project_id, metric_id, granularity, timestamp, tags.key, tags.indexed_value, tags.raw_value, retention_days, use_case_id) TTL timestamp + toIntervalDay(retention_days) SETTINGS index_granularity = 2048 │└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.006 sec.
6749ecd45d3d :) show create table generic_metric_counters_raw_local
SHOW CREATE TABLE generic_metric_counters_raw_local
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.generic_metric_counters_raw_local (
use_case_idLowCardinality(String),org_idUInt64,project_idUInt64,metric_idUInt64,timestampDateTime,retention_daysUInt16,tags.keyArray(UInt64),tags.indexed_valueArray(UInt64),tags.raw_valueArray(String),set_valuesArray(UInt64),count_valueFloat64,distribution_valuesArray(Float64),metric_typeLowCardinality(String),materialization_versionUInt8,timeseries_idUInt32,partitionUInt16,offsetUInt64,granularitiesArray(UInt8)) ENGINE = MergeTree() PARTITION BY toStartOfInterval(timestamp, toIntervalDay(3)) ORDER BY (use_case_id, org_id, project_id, metric_id, timestamp) TTL timestamp + toIntervalDay(7) SETTINGS index_granularity = 8192 │└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.005 sec.
6749ecd45d3d :) show create table generic_metric_counters_aggregation_mv
SHOW CREATE TABLE generic_metric_counters_aggregation_mv
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE MATERIALIZED VIEW default.generic_metric_counters_aggregation_mv TO default.generic_metric_counters_aggregated_local (
org_idUInt64,project_idUInt64,metric_idUInt64,granularityUInt8,timestampDateTime CODEC(DoubleDelta),retention_daysUInt16,tags.keyArray(UInt64),tags.indexed_valueArray(UInt64),tags.raw_valueArray(String),valueAggregateFunction(sum, Float64),use_case_idLowCardinality(String)) AS SELECT use_case_id, org_id, project_id, metric_id, arrayJoin(granularities) AS granularity, tags.key, tags.indexed_value, tags.raw_value, toDateTime(multiIf(granularity = 0, 10, granularity = 1, 60, granularity = 2, 3600, granularity = 3, 86400, -1) * intDiv(toUnixTimestamp(timestamp), multiIf(granularity = 0, 10, granularity = 1, 60, granularity = 2, 3600, granularity = 3, 86400, -1))) AS timestamp, retention_days, sumState(count_value) AS value FROM default.generic_metric_counters_raw_local WHERE (materialization_version = 1) AND (metric_type = 'counter') GROUP BY use_case_id, org_id, project_id, metric_id, tags.key, tags.indexed_value, tags.raw_value, timestamp, granularity, retention_days │└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.003 sec.