oxidecomputer · karencfv · May 18, 2026 · May 18, 2026 · May 18, 2026
diff --git a/clickhouse-admin/types/testutils/replica-server-config.xml b/clickhouse-admin/types/testutils/replica-server-config.xml
@@ -14,6 +14,38 @@
     <profiles>
         <default>
             <load_balancing>random</load_balancing>
+            <!--
+                Omit logs for fast queries. As of this writing, the vast
+                majority of clickhouse queries are INSERTS that succeed in
+                <5ms, and aren't operationally interesting. From a test rack:
+                    SELECT
+                        roundToExp2(greatest(query_duration_ms, 1)) AS bucket_start,
+                        bucket_start * 2 AS bucket_end,
+                        count() AS count,
+                        round((100 * count()) / (
+                            SELECT count()
+                            FROM system.query_log
+                        ), 4) AS pct
+                    FROM system.query_log
+                    GROUP BY bucket_start, bucket_end
+                    ORDER BY bucket_start ASC
+                        ┌─bucket_start─┬─bucket_end─┬───count─┬─────pct─┐
+                     1. │            1 │          2 │ 3590120 │ 62.6491 │
+                     2. │            2 │          4 │ 1206074 │ 21.0465 │
+                     3. │            4 │          8 │  298972 │  5.2172 │
+                     4. │            8 │         16 │  109739 │   1.915 │
+                     5. │           16 │         32 │  114881 │  2.0047 │
+                     6. │           32 │         64 │  121448 │  2.1193 │
+                     7. │           64 │        128 │  130456 │  2.2765 │
+                     8. │          128 │        256 │   87336 │  1.5241 │
+                     9. │          256 │        512 │   57767 │  1.0081 │
+                    10. │          512 │       1024 │   12327 │  0.2151 │
+                    11. │         1024 │       2048 │    1341 │  0.0234 │
+                    12. │         2048 │       4096 │      56 │   0.001 │
+                    13. │         4096 │       8192 │       1 │       0 │
+                        └──────────────┴────────────┴─────────┴─────────┘
+            -->
+            <log_queries_min_query_duration_ms>5</log_queries_min_query_duration_ms>
         </default>
 
     </profiles>
@@ -49,6 +81,31 @@
         <flush_interval_milliseconds>10000</flush_interval_milliseconds>
     </query_log>
 
+    <!--
+        Mask long queries generated by oximeter.
+
+        As of this writing, oximeter builds queries like
+
+            SELECT * FROM oximeter.measurements_* WHERE timeseries_key IN (...)
+
+        where the IN clause contains thousands of elements. A single oximeter
+        query can generate multiple queries of this form. We observe that
+        these queries comprise ~80% of the `system.query_log` table, which is
+        itself one of the largest tables in terms of compressed disk use.
+        This block truncates this long query pattern, since it's not
+        operationally useful and consumes a significant amount of disk.
+
+        Note: this rule will become irrelevant if we change the metrics
+        data model in clickhouse.
+    -->
+    <query_masking_rules>
+        <rule>
+            <name>truncate large timeseries_key IN clauses</name>
+            <regexp>(\btimeseries_key\s+IN\s*\()[^)]{120,}\)</regexp>
+            <replace>\1...)</replace>
+        </rule>
+    </query_masking_rules>
+
     <metric_log>
         <database>system</database>
         <table>metric_log</table>

diff --git a/clickhouse-admin/types/versions/src/impls/config.rs b/clickhouse-admin/types/versions/src/impls/config.rs
@@ -79,14 +79,46 @@ impl ReplicaConfig {
         let format_schema_path = data_path.clone().join("format_schemas");
         let backup_path = data_path.clone().join("backup");
         format!(
-            "<!-- generation:{generation} -->
+            r#"<!-- generation:{generation} -->
 <clickhouse>
 {logger}
     <path>{data_path}</path>
 
     <profiles>
         <default>
             <load_balancing>random</load_balancing>
+            <!--
+                Omit logs for fast queries. As of this writing, the vast
+                majority of clickhouse queries are INSERTS that succeed in
+                <5ms, and aren't operationally interesting. From a test rack:
+                    SELECT
+                        roundToExp2(greatest(query_duration_ms, 1)) AS bucket_start,
+                        bucket_start * 2 AS bucket_end,
+                        count() AS count,
+                        round((100 * count()) / (
+                            SELECT count()
+                            FROM system.query_log
+                        ), 4) AS pct
+                    FROM system.query_log
+                    GROUP BY bucket_start, bucket_end
+                    ORDER BY bucket_start ASC
+                        ┌─bucket_start─┬─bucket_end─┬───count─┬─────pct─┐
+                     1. │            1 │          2 │ 3590120 │ 62.6491 │
+                     2. │            2 │          4 │ 1206074 │ 21.0465 │
+                     3. │            4 │          8 │  298972 │  5.2172 │
+                     4. │            8 │         16 │  109739 │   1.915 │
+                     5. │           16 │         32 │  114881 │  2.0047 │
+                     6. │           32 │         64 │  121448 │  2.1193 │
+                     7. │           64 │        128 │  130456 │  2.2765 │
+                     8. │          128 │        256 │   87336 │  1.5241 │
+                     9. │          256 │        512 │   57767 │  1.0081 │
+                    10. │          512 │       1024 │   12327 │  0.2151 │
+                    11. │         1024 │       2048 │    1341 │  0.0234 │
+                    12. │         2048 │       4096 │      56 │   0.001 │
+                    13. │         4096 │       8192 │       1 │       0 │
+                        └──────────────┴────────────┴─────────┴─────────┘
+            -->
+            <log_queries_min_query_duration_ms>5</log_queries_min_query_duration_ms>
         </default>
 
     </profiles>
@@ -122,6 +154,31 @@ impl ReplicaConfig {
         <flush_interval_milliseconds>10000</flush_interval_milliseconds>
     </query_log>
 
+    <!--
+        Mask long queries generated by oximeter.
+
+        As of this writing, oximeter builds queries like
+
+            SELECT * FROM oximeter.measurements_* WHERE timeseries_key IN (...)
+
+        where the IN clause contains thousands of elements. A single oximeter
+        query can generate multiple queries of this form. We observe that
+        these queries comprise ~80% of the `system.query_log` table, which is
+        itself one of the largest tables in terms of compressed disk use.
+        This block truncates this long query pattern, since it's not
+        operationally useful and consumes a significant amount of disk.
+
+        Note: this rule will become irrelevant if we change the metrics
+        data model in clickhouse.
+    -->
+    <query_masking_rules>
+        <rule>
+            <name>truncate large timeseries_key IN clauses</name>
+            <regexp>(\btimeseries_key\s+IN\s*\()[^)]{{120,}}\)</regexp>
+            <replace>\1...)</replace>
+        </rule>
+    </query_masking_rules>
+
     <metric_log>
         <database>system</database>
         <table>metric_log</table>
@@ -199,7 +256,7 @@ impl ReplicaConfig {
 {keepers}
 
 </clickhouse>
-"
+"#
         )
     }
 }