File tree Expand file tree Collapse file tree
LocalPackages/mlx-swift/Source/Cmlx/mlx/mlx/core Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -115,18 +115,24 @@ class LoadSSDExpert : public Primitive {
115115 << count << " chunks | avg "
116116 << std::setprecision (3 ) << avg_ms_per_chunk << " ms/chunk" ;
117117
118- // Append TurboKV window stats if active
119- uint64_t tkv_tokens = g_turbo_tokens.exchange (0 );
118+ // Append TurboKV window stats if active.
119+ // Token count is NOT shown — it is multiplied by num_kv_layers
120+ // and would be misleading. Ratio and bytes saved are layer-agnostic
121+ // because the layer factor cancels in orig/packed.
122+ uint64_t tkv_tokens = g_turbo_tokens.exchange (0 ); // reset only
120123 uint64_t tkv_orig = g_turbo_bytes_orig.exchange (0 );
121124 uint64_t tkv_packed = g_turbo_bytes_packed.exchange (0 );
122- if (tkv_tokens > 0 && tkv_packed > 0 ) {
123- double ratio = (tkv_orig > 0 ) ? (double )tkv_orig / tkv_packed : 0.0 ;
124- std::cerr << std::fixed << std::setprecision (0 )
125- << " | 🗜 TurboKV " << tkv_tokens << " t "
126- << std::setprecision (1 ) << ratio << " x" ;
125+ (void )tkv_tokens;
126+ if (tkv_packed > 0 && tkv_orig > tkv_packed) {
127+ double ratio = (double )tkv_orig / tkv_packed;
128+ double saved_mb = (tkv_orig - tkv_packed) / 1048576.0 ;
129+ std::cerr << std::fixed << std::setprecision (1 )
130+ << " | \U0001f5dc TurboKV " << ratio << " x"
131+ << " (" << std::setprecision (0 ) << saved_mb << " MB saved)" ;
127132 }
128133 std::cerr << std::endl;
129134
135+
130136 }
131137 }
132138 }
You can’t perform that action at this time.
0 commit comments