@@ -990,12 +990,26 @@ func handleChatStreaming(
990990 }
991991 cont. yield ( " data: [DONE] \n \n " )
992992 cont. finish ( )
993- // llama-server style generation log
993+ // llama-server style: log full response JSON on one line
994994 let dur = Date ( ) . timeIntervalSince ( genStart)
995995 let tokPerSec = dur > 0 ? Double ( completionTokenCount) / dur : 0
996- let preview = String ( fullText. prefix ( 120 ) ) . replacingOccurrences ( of: " \n " , with: " " )
997- let suffix = fullText. count > 120 ? " ... " : " "
998- print ( " [mlx-server] prompt= \( promptTokenCount) t, gen= \( completionTokenCount) t, speed= \( String ( format: " %.2f " , tokPerSec) ) t/s [stream] | \( preview) \( suffix) " )
996+ let logResp : [ String : Any ] = [
997+ " choices " : [ [
998+ " index " : 0 ,
999+ " message " : [ " role " : " assistant " , " content " : fullText] ,
1000+ " finish_reason " : reason
1001+ ] ] ,
1002+ " usage " : [
1003+ " prompt_tokens " : promptTokenCount,
1004+ " completion_tokens " : completionTokenCount,
1005+ " total_tokens " : promptTokenCount + completionTokenCount
1006+ ] ,
1007+ " timings " : [ " predicted_per_second " : tokPerSec]
1008+ ]
1009+ if let logData = try ? JSONSerialization . data ( withJSONObject: logResp) ,
1010+ let logStr = String ( data: logData, encoding: . utf8) {
1011+ print ( " srv log_server_r: response: \( logStr) " )
1012+ }
9991013 }
10001014 }
10011015 }
@@ -1053,10 +1067,7 @@ func handleChatNonStreaming(
10531067 await stats. requestFinished ( tokens: completionTokenCount, duration: duration)
10541068 await semaphore. signal ( )
10551069
1056- // ── llama-server style generation log ──
1057- let tokPerSec = duration > 0 ? Double ( completionTokenCount) / duration : 0
1058- let outputPreview = fullText. prefix ( 120 ) . replacingOccurrences ( of: " \n " , with: " " )
1059- print ( " [mlx-server] prompt= \( promptTokenCount) t, gen= \( completionTokenCount) t, speed= \( String ( format: " %.2f " , tokPerSec) ) t/s | \( outputPreview) \( fullText. count > 120 ? " ... " : " " ) " )
1070+
10601071
10611072 // ── Apply stop sequences to final text ──
10621073 var finishReason : String
@@ -1104,6 +1115,10 @@ func handleChatNonStreaming(
11041115 usage: TokenUsage ( promptTokens: promptTokenCount, completionTokens: completionTokenCount, totalTokens: totalTokens)
11051116 )
11061117 let encoded = try JSONEncoder ( ) . encode ( resp)
1118+ // llama-server style: log full response JSON on one line
1119+ if let responseStr = String ( data: encoded, encoding: . utf8) {
1120+ print ( " srv log_server_r: response: \( responseStr) " )
1121+ }
11071122 return Response (
11081123 status: . ok,
11091124 headers: jsonHeaders ( ) ,
0 commit comments