feat(gen-metrics-counters) Introduce new storages for counters#3679
Merged
feat(gen-metrics-counters) Introduce new storages for counters#3679
Conversation
|
This PR has a migration; here is the generated SQL -- start migrations
-- forward migration generic_metrics : 0010_counters_aggregate_table
Local op: CREATE TABLE IF NOT EXISTS generic_metric_counters_aggregated_local (org_id UInt64, project_id UInt64, metric_id UInt64, granularity UInt8, timestamp DateTime CODEC (DoubleDelta), retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), value AggregateFunction(sum, Float64), use_case_id LowCardinality(String)) ENGINE ReplicatedAggregatingMergeTree('/clickhouse/tables/generic_metrics_counters/{shard}/default/generic_metric_counters_aggregated_local', '{replica}') PRIMARY KEY (org_id, project_id, metric_id, granularity, timestamp) ORDER BY (org_id, project_id, metric_id, granularity, timestamp, tags.key, tags.indexed_value, tags.raw_value, retention_days, use_case_id) PARTITION BY (retention_days, toMonday(timestamp)) TTL timestamp + toIntervalDay(retention_days) SETTINGS index_granularity=2048;
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD COLUMN IF NOT EXISTS _indexed_tags_hash Array(UInt64) MATERIALIZED arrayMap((k, v) -> cityHash64(concat(toString(k), '=', toString(v))), tags.key, tags.indexed_value);
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD COLUMN IF NOT EXISTS _raw_tags_hash Array(UInt64) MATERIALIZED arrayMap((k, v) -> cityHash64(concat(toString(k), '=', v)), tags.key, tags.raw_value);
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD INDEX IF NOT EXISTS bf_indexed_tags_hash _indexed_tags_hash TYPE bloom_filter() GRANULARITY 1;
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD INDEX IF NOT EXISTS bf_raw_tags_hash _raw_tags_hash TYPE bloom_filter() GRANULARITY 1;
Local op: ALTER TABLE generic_metric_counters_aggregated_local ADD INDEX IF NOT EXISTS bf_tags_key_hash tags.key TYPE bloom_filter() GRANULARITY 1;
Distributed op: CREATE TABLE IF NOT EXISTS generic_metric_counters_aggregated_dist (org_id UInt64, project_id UInt64, metric_id UInt64, granularity UInt8, timestamp DateTime CODEC (DoubleDelta), retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), value AggregateFunction(sum, Float64), use_case_id LowCardinality(String)) ENGINE Distributed(cluster_one_sh, default, generic_metric_counters_aggregated_local);
-- end forward migration generic_metrics : 0010_counters_aggregate_table
-- backward migration generic_metrics : 0010_counters_aggregate_table
Distributed op: DROP TABLE IF EXISTS generic_metric_counters_aggregated_dist;
Local op: DROP TABLE IF EXISTS generic_metric_counters_aggregated_local;
-- end backward migration generic_metrics : 0010_counters_aggregate_table
-- forward migration generic_metrics : 0011_counters_raw_table
Local op: CREATE TABLE IF NOT EXISTS generic_metric_counters_raw_local (use_case_id LowCardinality(String), org_id UInt64, project_id UInt64, metric_id UInt64, timestamp DateTime, retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), set_values Array(UInt64), count_value Float64, distribution_values Array(Float64), metric_type LowCardinality(String), materialization_version UInt8, timeseries_id UInt32, partition UInt16, offset UInt64, granularities Array(UInt8)) ENGINE ReplicatedMergeTree('/clickhouse/tables/generic_metrics_counters/{shard}/default/generic_metric_counters_raw_local', '{replica}') ORDER BY (use_case_id, org_id, project_id, metric_id, timestamp) PARTITION BY (toStartOfInterval(timestamp, toIntervalDay(3))) TTL timestamp + toIntervalDay(7);
Distributed op: CREATE TABLE IF NOT EXISTS generic_metric_counters_raw_dist (use_case_id LowCardinality(String), org_id UInt64, project_id UInt64, metric_id UInt64, timestamp DateTime, retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), set_values Array(UInt64), count_value Float64, distribution_values Array(Float64), metric_type LowCardinality(String), materialization_version UInt8, timeseries_id UInt32, partition UInt16, offset UInt64, granularities Array(UInt8)) ENGINE Distributed(cluster_one_sh, default, generic_metric_counters_raw_local, cityHash64(timeseries_id));
-- end forward migration generic_metrics : 0011_counters_raw_table
-- backward migration generic_metrics : 0011_counters_raw_table
Distributed op: DROP TABLE IF EXISTS generic_metric_counters_raw_dist;
Local op: DROP TABLE IF EXISTS generic_metric_counters_raw_local;
-- end backward migration generic_metrics : 0011_counters_raw_table
-- forward migration generic_metrics : 0012_counters_mv
Local op: CREATE MATERIALIZED VIEW IF NOT EXISTS generic_metric_counters_aggregation_mv TO generic_metric_counters_aggregated_local (org_id UInt64, project_id UInt64, metric_id UInt64, granularity UInt8, timestamp DateTime CODEC (DoubleDelta), retention_days UInt16, tags Nested(key UInt64, indexed_value UInt64, raw_value String), value AggregateFunction(sum, Float64), use_case_id LowCardinality(String)) AS
SELECT
use_case_id,
org_id,
project_id,
metric_id,
arrayJoin(granularities) as granularity,
tags.key,
tags.indexed_value,
tags.raw_value,
toDateTime(multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1) *
intDiv(toUnixTimestamp(timestamp),
multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1))) as timestamp,
retention_days,
sumState(count_value) as value
FROM generic_metric_counters_raw_local
WHERE materialization_version = 1
AND metric_type = 'counter'
GROUP BY
use_case_id,
org_id,
project_id,
metric_id,
tags.key,
tags.indexed_value,
tags.raw_value,
timestamp,
granularity,
retention_days
;
-- end forward migration generic_metrics : 0012_counters_mv
-- backward migration generic_metrics : 0012_counters_mv
Local op: DROP TABLE IF EXISTS generic_metric_counters_aggregation_mv;
-- end backward migration generic_metrics : 0012_counters_mv |
Codecov ReportBase: 92.15% // Head: 90.48% // Decreases project coverage by
Additional details and impacted files@@ Coverage Diff @@
## master #3679 +/- ##
==========================================
- Coverage 92.15% 90.48% -1.68%
==========================================
Files 744 757 +13
Lines 34436 35904 +1468
==========================================
+ Hits 31734 32487 +753
- Misses 2702 3417 +715
Help us with your feedback. Take ten seconds to tell us how you rate us. Have a feature suggestion? Share it here. ☔ View full report at Codecov. |
onewland
approved these changes
Jan 31, 2023
Merged
5 tasks
andriisoldatenko
pushed a commit
to getsentry/sentry
that referenced
this pull request
Feb 28, 2023
#42939) This PR implements prioritize by project bias. In detail: We run celery task every 24 at 8:00AM (UTC randomly selected) for every ORG (we call it *prioritise by project snuba query* ) and all projects inside this org, and for a given combination of org and projects run an adjustment model to recalculate sample rates if necessary. Then we cache sample rate using redis cluster -> `SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER` using this pattern for key: `f"ds::o:{org_id}:p:{project_id}:prioritise_projects"`. When relay fetches `projectconfig` endpoint we run `generate_rules` functions to generate all dynamic sampling biases, so and we check if we have adjusted sample rate for this project in the cache, so we apply it as **uniform bias**, otherwise we use default one. Regarding *prioritize by project snuba query* is cross org snuba query that utilizes a new generic counter metric, which was introduced in [relay]( getsentry/relay#1734) `c:transactions/count_per_root_project@none`. TODO: - [x] Provision infrastructure to run clickhouse clusters for the counters tables. This is primarily dependent on ops - [x] Start running the snuba consumers to read and write to the counters table. SnS can work on this - [x] Add unit-tests; - [x] Update snuba query using new metric - [x] Hide behind feature flag related PRs: - Implement new metric in relay: getsentry/relay#1734 - Add org generic counters [TET-695] getsentry/snuba#3708 - Introduce new storages for counters in snuba getsentry/snuba#3679 - Add feature flag: https://github.com/getsentry/getsentry/pull/9323 - Add cross organization methods for the string indexer #45076 #45076 [TET-695]: https://getsentry.atlassian.net/browse/TET-695?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ --------- Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com> Co-authored-by: Nar Saynorath <nar.saynorath@sentry.io>
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
This is a repeat of #3657. But now the production cluster is ready with all tables defined. This PR will only be merged once https://github.com/getsentry/ops/pull/5975 is merged. That would ensure that the health check should pass since storage set would be defined and the tables have been created on the cluster.