@@ -1028,7 +1028,7 @@ describe("openai transport stream", () => {
10281028 }
10291029 } ) ;
10301030
1031- it ( "parses JSON chat completions returned to streaming requests " , async ( ) => {
1031+ it ( "streams OpenAI-compatible non-streaming JSON completions as a fallback " , async ( ) => {
10321032 let capturedStreamFlag : unknown ;
10331033 const server = createServer ( ( req , res ) => {
10341034 let body = "" ;
@@ -1193,7 +1193,61 @@ describe("openai transport stream", () => {
11931193 }
11941194 } ) ;
11951195
1196- it ( "preserves reasoning tokens without double-counting them" , ( ) => {
1196+ it ( "adds a base URL hint when OpenAI-compatible streaming returns HTML" , async ( ) => {
1197+ const server = createServer ( ( _req , res ) => {
1198+ res . writeHead ( 200 , { "content-type" : "text/html; charset=utf-8" } ) ;
1199+ res . end ( "<html><body>not an API endpoint</body></html>" ) ;
1200+ } ) ;
1201+
1202+ await new Promise < void > ( ( resolve ) => server . listen ( 0 , "127.0.0.1" , resolve ) ) ;
1203+ try {
1204+ const address = server . address ( ) ;
1205+ if ( ! address || typeof address === "string" ) {
1206+ throw new Error ( "Missing loopback server address" ) ;
1207+ }
1208+ const model = {
1209+ id : "deepseek-v4-flash" ,
1210+ name : "DeepSeek V4 Flash" ,
1211+ api : "openai-completions" ,
1212+ provider : "spanagent" ,
1213+ baseUrl : `http://127.0.0.1:${ address . port } ` ,
1214+ reasoning : false ,
1215+ input : [ "text" ] ,
1216+ cost : { input : 0 , output : 0 , cacheRead : 0 , cacheWrite : 0 } ,
1217+ contextWindow : 128_000 ,
1218+ maxTokens : 4096 ,
1219+ } satisfies Model < "openai-completions" > ;
1220+ const stream = createOpenAICompletionsTransportStreamFn ( ) (
1221+ model ,
1222+ {
1223+ systemPrompt : "system" ,
1224+ messages : [ { role : "user" , content : "Reply ok" , timestamp : Date . now ( ) } ] ,
1225+ tools : [ ] ,
1226+ } as never ,
1227+ { apiKey : "test-key" } as never ,
1228+ ) ;
1229+
1230+ let errorMessage = "" ;
1231+ for await ( const event of stream as AsyncIterable < {
1232+ type : string ;
1233+ error ?: { errorMessage ?: string } ;
1234+ } > ) {
1235+ if ( event . type === "error" ) {
1236+ errorMessage = event . error ?. errorMessage ?? "" ;
1237+ }
1238+ }
1239+
1240+ expect ( errorMessage ) . toContain ( "returned HTML instead of an API response" ) ;
1241+ expect ( errorMessage ) . toContain ( "baseUrl includes the provider API path, such as /v1" ) ;
1242+ expect ( errorMessage ) . toContain ( `http://127.0.0.1:${ address . port } ` ) ;
1243+ } finally {
1244+ await new Promise < void > ( ( resolve , reject ) => {
1245+ server . close ( ( error ) => ( error ? reject ( error ) : resolve ( ) ) ) ;
1246+ } ) ;
1247+ }
1248+ } ) ;
1249+
1250+ it ( "does not double-count reasoning tokens and clamps uncached prompt usage at zero" , ( ) => {
11971251 const model = {
11981252 id : "gpt-5" ,
11991253 name : "GPT-5" ,
0 commit comments