Skip to content

Commit 3df7430

Browse files
committed
fix(turbo-kv): drop token count from log, show ratio+MB saved (layer-agnostic)
1 parent dc6af72 commit 3df7430

1 file changed

Lines changed: 13 additions & 7 deletions

File tree

LocalPackages/mlx-swift/Source/Cmlx/mlx/mlx/core/moe_stream_op.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,18 +115,24 @@ class LoadSSDExpert : public Primitive {
115115
<< count << " chunks | avg "
116116
<< std::setprecision(3) << avg_ms_per_chunk << " ms/chunk";
117117

118-
// Append TurboKV window stats if active
119-
uint64_t tkv_tokens = g_turbo_tokens.exchange(0);
118+
// Append TurboKV window stats if active.
119+
// Token count is NOT shown — it is multiplied by num_kv_layers
120+
// and would be misleading. Ratio and bytes saved are layer-agnostic
121+
// because the layer factor cancels in orig/packed.
122+
uint64_t tkv_tokens = g_turbo_tokens.exchange(0); // reset only
120123
uint64_t tkv_orig = g_turbo_bytes_orig.exchange(0);
121124
uint64_t tkv_packed = g_turbo_bytes_packed.exchange(0);
122-
if (tkv_tokens > 0 && tkv_packed > 0) {
123-
double ratio = (tkv_orig > 0) ? (double)tkv_orig / tkv_packed : 0.0;
124-
std::cerr << std::fixed << std::setprecision(0)
125-
<< " | 🗜 TurboKV " << tkv_tokens << "t "
126-
<< std::setprecision(1) << ratio << "x";
125+
(void)tkv_tokens;
126+
if (tkv_packed > 0 && tkv_orig > tkv_packed) {
127+
double ratio = (double)tkv_orig / tkv_packed;
128+
double saved_mb = (tkv_orig - tkv_packed) / 1048576.0;
129+
std::cerr << std::fixed << std::setprecision(1)
130+
<< " | \U0001f5dc TurboKV " << ratio << "x"
131+
<< " (" << std::setprecision(0) << saved_mb << "MB saved)";
127132
}
128133
std::cerr << std::endl;
129134

135+
130136
}
131137
}
132138
}

0 commit comments

Comments
 (0)