@@ -8,7 +8,64 @@ echo " Aegis-AI MLX Profiling Benchmark Suite "
88echo " =============================================="
99echo " "
1010
11- PS3=" Select a model to benchmark (1-7): "
11+ echo " Select Benchmark Suite:"
12+ echo " 1) Test 1: Automated Context & Memory Profile (TPS & RAM matrix)"
13+ echo " 2) Test 2: Prompt Cache & Sliding Window Regression Test"
14+ echo " 3) Quit"
15+ read -p " Option (1-3): " suite_opt
16+
17+ if [ " $suite_opt " == " 3" ] || [ -z " $suite_opt " ]; then
18+ echo " Exiting."
19+ exit 0
20+ fi
21+
22+ # Quick sanity check
23+ if [ -f " .build/arm64-apple-macosx/release/SwiftLM" ]; then
24+ BIN=" .build/arm64-apple-macosx/release/SwiftLM"
25+ elif [ -f " .build/release/SwiftLM" ]; then
26+ BIN=" .build/release/SwiftLM"
27+ else
28+ echo " ⚠️ SwiftLM release binary not found! Please compile the project by running ./build.sh first."
29+ exit 1
30+ fi
31+
32+ if [ " $suite_opt " == " 2" ]; then
33+ echo " "
34+ echo " => Starting Prompt Cache Regression Test"
35+ echo " Generating /tmp/big_prompt.json (approx 5K tokens)..."
36+ python3 -c ' import json; open("/tmp/big_prompt.json", "w").write(json.dumps({"messages": [{"role": "user", "content": "apple "*4500}], "max_tokens": 30}))'
37+
38+ echo " Starting Server in background..."
39+ killall SwiftLM 2> /dev/null
40+ mkdir -p tmp
41+ $BIN --model mlx-community/gemma-4-26b-a4b-it-4bit --port 5431 --turbo-kv --stream-experts --ctx-size 16384 > ./tmp/regression_server.log 2>&1 &
42+ SERVER_PID=$!
43+
44+ echo " Waiting for server to be ready on port 5431 (this may take a minute if downloading)..."
45+ for i in {1..300}; do
46+ if curl -s http://127.0.0.1:5431/health > /dev/null; then break ; fi
47+ sleep 1
48+ done
49+
50+ echo " "
51+ echo " Server is up! Running 4-request sliding window validation..."
52+
53+ echo " === Req 1 (Big 5537t) ===" && curl -sS --max-time 120 http://127.0.0.1:5431/v1/chat/completions -H " Content-Type: application/json" -d @/tmp/big_prompt.json 2>&1 | python3 -c " import sys,json;d=json.load(sys.stdin);print('OK:',d['choices'][0]['message']['content'])" && \
54+ echo " === Req 2 (Short 18t) ===" && curl -sS --max-time 60 http://127.0.0.1:5431/v1/chat/completions -H " Content-Type: application/json" -d ' {"messages":[{"role":"user","content":"What is today?"}],"max_tokens":30}' 2>&1 | python3 -c " import sys,json;d=json.load(sys.stdin);print('OK:',d['choices'][0]['message']['content'])" && \
55+ echo " === Req 3 (Big 5537t) ===" && curl -sS --max-time 120 http://127.0.0.1:5431/v1/chat/completions -H " Content-Type: application/json" -d @/tmp/big_prompt.json 2>&1 | python3 -c " import sys,json;d=json.load(sys.stdin);print('OK:',d['choices'][0]['message']['content'])" && \
56+ echo " === Req 4 (Big Full Cache Hit) ===" && curl -sS --max-time 120 http://127.0.0.1:5431/v1/chat/completions -H " Content-Type: application/json" -d @/tmp/big_prompt.json 2>&1 | python3 -c " import sys,json;d=json.load(sys.stdin);print('OK:',d['choices'][0]['message']['content'])" && \
57+ echo " === ALL 4 PASSED ==="
58+
59+ echo " "
60+ echo " Cleaning up..."
61+ killall SwiftLM
62+ wait $SERVER_PID 2> /dev/null
63+ exit 0
64+ fi
65+
66+ # Fallback to Test 1 for anything else
67+ echo " "
68+ PS3=" Select a model to profile (1-7): "
1269options=(
1370 " gemma-4-26b-a4b-it-4bit"
1471 " gemma-4-2b-a4b-it-4bit"
@@ -50,12 +107,6 @@ echo ""
50107echo " => Starting benchmark for $MODEL with contexts: $CONTEXTS "
51108echo " "
52109
53- # Quick sanity check
54- if [ ! -f " .build/arm64-apple-macosx/release/SwiftLM" ] && [ ! -f " .build/release/SwiftLM" ]; then
55- echo " ⚠️ SwiftLM release binary not found! Please compile the project by running ./build.sh first."
56- exit 1
57- fi
58-
59110python3 -u scripts/profiling/profile_runner.py \
60111 --model " $MODEL " \
61112 --contexts " $CONTEXTS " \
0 commit comments