@@ -183,6 +183,7 @@ def main():
183183 server_proc .wait (timeout = 20 )
184184 time .sleep (3 ) # Let OS reclaim memory before next config
185185
186+ # ── Write markdown report ──
186187 with open (args .out , "w" ) as f :
187188 f .write (f"### `{ args .model } ` — Context & Memory Profile\n \n " )
188189 f .write (f"Context depths tested: { args .contexts } \n \n " )
@@ -195,6 +196,148 @@ def main():
195196 f .write (f"> **GPU Memory Allocated**: Total memory requested by the GPU — includes data swapped to SSD. This shows the TRUE memory demand and reveals TurboQuant compression benefits even when Active RAM is saturated.\n " )
196197
197198 print (f"\n Done. Matrix saved to { args .out } " )
199+
200+ # ── Console visualization ──
201+ if results :
202+ print_visualization (results , args .model , baseline_alloc )
203+
204+
205+ # ══════════════════════════════════════════════════════════════════════════════
206+ # Console Visualization
207+ # ══════════════════════════════════════════════════════════════════════════════
208+
209+ # ANSI color codes
210+ class C :
211+ RESET = "\033 [0m"
212+ BOLD = "\033 [1m"
213+ DIM = "\033 [2m"
214+ # Foreground
215+ RED = "\033 [31m"
216+ GREEN = "\033 [32m"
217+ YELLOW = "\033 [33m"
218+ BLUE = "\033 [34m"
219+ MAGENTA = "\033 [35m"
220+ CYAN = "\033 [36m"
221+ WHITE = "\033 [37m"
222+ # Background
223+ BG_BLUE = "\033 [44m"
224+ BG_MAG = "\033 [45m"
225+
226+ CONFIG_COLORS = {
227+ "Dense/Vanilla" : C .BLUE ,
228+ "SSD Stream" : C .CYAN ,
229+ "TurboQuant" : C .MAGENTA ,
230+ "SSD + TurboQuant" : C .GREEN ,
231+ }
232+
233+ def bar (value , max_val , width = 30 , fill = "█" , empty = "░" , color = "" ):
234+ if max_val <= 0 :
235+ filled = 0
236+ else :
237+ filled = int (round (value / max_val * width ))
238+ filled = min (filled , width )
239+ return f"{ color } { fill * filled } { C .DIM } { empty * (width - filled )} { C .RESET } "
240+
241+ def print_visualization (results , model_name , baseline_alloc ):
242+ W = 72 # box width
243+
244+ print ()
245+ print (f"{ C .BOLD } { C .CYAN } { '═' * W } { C .RESET } " )
246+ print (f"{ C .BOLD } { C .CYAN } { ' BENCHMARK RESULTS' :^{W }} { C .RESET } " )
247+ print (f"{ C .BOLD } { C .CYAN } { '═' * W } { C .RESET } " )
248+ print (f"{ C .DIM } Model: { model_name } | Baseline GPU: { baseline_alloc :.1f} GB{ C .RESET } " )
249+ print (f"{ C .CYAN } { '─' * W } { C .RESET } " )
250+
251+ # Group results by context size
252+ ctx_sizes = sorted (set (r ["context" ] for r in results ))
253+
254+ # ── 1) Generation Speed (TPS) ──
255+ print (f"\n { C .BOLD } ⚡ Generation Speed (tokens/sec) — higher is better{ C .RESET } " )
256+ print (f"{ C .DIM } { '─' * (W - 4 )} { C .RESET } " )
257+
258+ all_tps = [float (r ["tps" ]) for r in results if r ["tps" ] != "N/A" ]
259+ max_tps = max (all_tps ) if all_tps else 1
260+
261+ for ctx in ctx_sizes :
262+ ctx_results = [r for r in results if r ["context" ] == ctx ]
263+ ctx_label = f"{ ctx :,} tokens"
264+ print (f"\n { C .BOLD } { C .WHITE } { ctx_label } { C .RESET } " )
265+ for r in ctx_results :
266+ tps_val = float (r ["tps" ])
267+ color = CONFIG_COLORS .get (r ["config" ], "" )
268+ label = f" { r ['config' ]:<20} "
269+ b = bar (tps_val , max_tps , width = 28 , color = color )
270+ val_str = f"{ C .BOLD } { tps_val :>6.1f} { C .RESET } tok/s"
271+ # Highlight the best TPS per context group
272+ best_in_ctx = max (float (x ["tps" ]) for x in ctx_results )
273+ crown = f" { C .YELLOW } ★{ C .RESET } " if tps_val == best_in_ctx and len (ctx_results ) > 1 else ""
274+ print (f"{ label } { b } { val_str } { crown } " )
275+
276+ # ── 2) Time to First Token (TTFT) ──
277+ print (f"\n { C .BOLD } ⏱ Time to First Token (seconds) — lower is better{ C .RESET } " )
278+ print (f"{ C .DIM } { '─' * (W - 4 )} { C .RESET } " )
279+
280+ all_ttft = [float (r ["ttft" ]) for r in results if r ["ttft" ] != "N/A" ]
281+ max_ttft = max (all_ttft ) if all_ttft else 1
282+
283+ for ctx in ctx_sizes :
284+ ctx_results = [r for r in results if r ["context" ] == ctx ]
285+ ctx_label = f"{ ctx :,} tokens"
286+ print (f"\n { C .BOLD } { C .WHITE } { ctx_label } { C .RESET } " )
287+ for r in ctx_results :
288+ ttft_val = float (r ["ttft" ])
289+ color = CONFIG_COLORS .get (r ["config" ], "" )
290+ label = f" { r ['config' ]:<20} "
291+ b = bar (ttft_val , max_ttft , width = 28 , color = color )
292+ val_str = f"{ C .BOLD } { ttft_val :>7.2f} { C .RESET } s"
293+ best_in_ctx = min (float (x ["ttft" ]) for x in ctx_results )
294+ crown = f" { C .YELLOW } ★{ C .RESET } " if ttft_val == best_in_ctx and len (ctx_results ) > 1 else ""
295+ print (f"{ label } { b } { val_str } { crown } " )
296+
297+ # ── 3) GPU Memory Demand ──
298+ print (f"\n { C .BOLD } 💾 GPU Memory Allocated (GB) — lower is better{ C .RESET } " )
299+ print (f"{ C .DIM } { '─' * (W - 4 )} { C .RESET } " )
300+
301+ all_gpu = [float (r ["gpu_alloc" ]) for r in results if r ["gpu_alloc" ] != "N/A" ]
302+ max_gpu = max (all_gpu ) if all_gpu else 1
303+
304+ for ctx in ctx_sizes :
305+ ctx_results = [r for r in results if r ["context" ] == ctx ]
306+ ctx_label = f"{ ctx :,} tokens"
307+ print (f"\n { C .BOLD } { C .WHITE } { ctx_label } { C .RESET } " )
308+ for r in ctx_results :
309+ gpu_val = float (r ["gpu_alloc" ])
310+ color = CONFIG_COLORS .get (r ["config" ], "" )
311+ label = f" { r ['config' ]:<20} "
312+ b = bar (gpu_val , max_gpu , width = 28 , color = color )
313+ val_str = f"{ C .BOLD } { gpu_val :>6.1f} { C .RESET } GB"
314+ best_in_ctx = min (float (x ["gpu_alloc" ]) for x in ctx_results )
315+ crown = f" { C .YELLOW } ★{ C .RESET } " if gpu_val == best_in_ctx and len (ctx_results ) > 1 else ""
316+ print (f"{ label } { b } { val_str } { crown } " )
317+
318+ # ── 4) Summary scoreboard ──
319+ print (f"\n { C .CYAN } { '─' * W } { C .RESET } " )
320+ print (f"{ C .BOLD } 🏆 Configuration Ranking (by avg TPS across all contexts){ C .RESET } " )
321+ print (f"{ C .DIM } { '─' * (W - 4 )} { C .RESET } " )
322+
323+ config_avg = {}
324+ for cfg_name in set (r ["config" ] for r in results ):
325+ tps_vals = [float (r ["tps" ]) for r in results if r ["config" ] == cfg_name ]
326+ config_avg [cfg_name ] = sum (tps_vals ) / len (tps_vals ) if tps_vals else 0
327+
328+ ranked = sorted (config_avg .items (), key = lambda x : x [1 ], reverse = True )
329+ medals = ["🥇" , "🥈" , "🥉" , " " ]
330+
331+ for i , (cfg_name , avg_tps ) in enumerate (ranked ):
332+ medal = medals [min (i , 3 )]
333+ color = CONFIG_COLORS .get (cfg_name , "" )
334+ avg_gpu = sum (float (r ["gpu_alloc" ]) for r in results if r ["config" ] == cfg_name ) / max (1 , len ([r for r in results if r ["config" ] == cfg_name ]))
335+ print (f" { medal } { color } { C .BOLD } { cfg_name :<22} { C .RESET } avg { avg_tps :>5.1f} tok/s | avg { avg_gpu :>5.1f} GB GPU" )
336+
337+ print (f"\n { C .CYAN } { '═' * W } { C .RESET } " )
338+ print ()
339+
198340
199341if __name__ == "__main__" :
200342 main ()
343+
0 commit comments