@@ -20,7 +20,9 @@ import {
2020 type ToolProgressDetailMode ,
2121} from "openclaw/plugin-sdk/agent-harness-runtime" ;
2222import { emitTrustedDiagnosticEvent } from "openclaw/plugin-sdk/diagnostic-runtime" ;
23+ import { generatedImageAssetFromBase64 } from "openclaw/plugin-sdk/image-generation" ;
2324import type { AssistantMessage , Usage } from "openclaw/plugin-sdk/llm" ;
25+ import { saveMediaBuffer } from "openclaw/plugin-sdk/media-store" ;
2426import { resolveCodexLocalRuntimeAttribution } from "./local-runtime-attribution.js" ;
2527import {
2628 readCodexNotificationThreadId ,
@@ -106,6 +108,10 @@ const CODEX_PROMPT_TOTAL_INPUT_KEYS = [
106108
107109const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20 ;
108110const TOOL_TRANSCRIPT_OUTPUT_MAX_CHARS = 12_000 ;
111+ const GENERATED_IMAGE_MEDIA_SUBDIR = "tool-image-generation" ;
112+ const BYTES_PER_MB = 1024 * 1024 ;
113+ // Match OpenClaw's default image media cap for generated image tool outputs.
114+ const DEFAULT_GENERATED_IMAGE_MAX_BYTES = 6 * BYTES_PER_MB ;
109115const TRANSCRIPT_PROGRESS_SUPPRESSED_TOOL_NAMES = new Set ( [
110116 "message" ,
111117 "messages" ,
@@ -168,6 +174,8 @@ export class CodexAppServerEventProjector {
168174 private readonly transcriptToolProgressCallIds = new Set < string > ( ) ;
169175 private lastNativeToolError : EmbeddedRunAttemptResult [ "lastToolError" ] ;
170176 private readonly nativeGeneratedMediaUrls = new Set < string > ( ) ;
177+ private readonly nativeGeneratedMediaItemIds = new Set < string > ( ) ;
178+ private readonly nativeGeneratedMediaUrlsByItemId = new Map < string , string > ( ) ;
171179 private readonly diagnosticToolStartedAtByItem = new Map < string , number > ( ) ;
172180 private readonly afterToolCallObservedItemIds = new Set < string > ( ) ;
173181 private assistantStarted = false ;
@@ -252,7 +260,7 @@ export class CodexAppServerEventProjector {
252260 await this . handleTurnCompleted ( params ) ;
253261 break ;
254262 case "rawResponseItem/completed" :
255- this . handleRawResponseItemCompleted ( params ) ;
263+ await this . handleRawResponseItemCompleted ( params ) ;
256264 break ;
257265 case "error" :
258266 if ( readBooleanAlias ( params , [ "willRetry" , "will_retry" ] ) === true ) {
@@ -331,6 +339,7 @@ export class CodexAppServerEventProjector {
331339 const hadPotentialSideEffects =
332340 toolTelemetry . didSendViaMessagingTool ||
333341 ( toolTelemetry . successfulCronAdds ?? 0 ) > 0 ||
342+ this . nativeGeneratedMediaItemIds . size > 0 ||
334343 this . sideEffectingToolItemIds . size > 0 ||
335344 this . sideEffectingDynamicToolCallIds . size > 0 ;
336345 return {
@@ -812,9 +821,13 @@ export class CodexAppServerEventProjector {
812821 } ) ;
813822 }
814823
815- private handleRawResponseItemCompleted ( params : JsonObject ) : void {
824+ private async handleRawResponseItemCompleted ( params : JsonObject ) : Promise < void > {
816825 const item = isJsonObject ( params . item ) ? params . item : undefined ;
817- if ( ! item || readString ( item , "role" ) !== "assistant" ) {
826+ if ( ! item ) {
827+ return ;
828+ }
829+ await this . recordRawGeneratedImageMedia ( item ) ;
830+ if ( readString ( item , "role" ) !== "assistant" ) {
818831 return ;
819832 }
820833 const text = extractRawAssistantText ( item ) ;
@@ -839,10 +852,73 @@ export class CodexAppServerEventProjector {
839852 }
840853 const savedPath = readItemString ( item , "savedPath" ) ?. trim ( ) ;
841854 if ( savedPath ) {
842- this . nativeGeneratedMediaUrls . add ( savedPath ) ;
855+ this . recordNativeGeneratedMediaUrl ( {
856+ itemId : item . id ,
857+ mediaUrl : savedPath ,
858+ } ) ;
859+ }
860+ }
861+
862+ private async recordRawGeneratedImageMedia ( item : JsonObject ) : Promise < void > {
863+ if ( readString ( item , "type" ) !== "image_generation_call" ) {
864+ return ;
865+ }
866+ const result = readString ( item , "result" ) ;
867+ if ( ! result ) {
868+ return ;
869+ }
870+ const itemId = readString ( item , "id" ) ?? `raw-image-${ this . nativeGeneratedMediaItemIds . size } ` ;
871+ this . nativeGeneratedMediaItemIds . add ( itemId ) ;
872+ const maxBytes = resolveGeneratedImageMaxBytes ( this . params . config ) ;
873+ const estimatedDecodedBytes = estimateBase64DecodedBytes ( result ) ;
874+ if ( estimatedDecodedBytes !== undefined && estimatedDecodedBytes > maxBytes ) {
875+ embeddedAgentLog . warn ( "codex app-server raw image generation result exceeds media limit" , {
876+ itemId,
877+ estimatedDecodedBytes,
878+ maxBytes,
879+ } ) ;
880+ return ;
881+ }
882+ const asset = generatedImageAssetFromBase64 ( {
883+ base64 : result ,
884+ index : this . nativeGeneratedMediaItemIds . size ,
885+ revisedPrompt : readString ( item , "revised_prompt" ) ?? readString ( item , "revisedPrompt" ) ,
886+ fileNamePrefix : "codex-image-generation" ,
887+ sniffMimeType : true ,
888+ } ) ;
889+ if ( ! asset ) {
890+ return ;
891+ }
892+ try {
893+ const saved = await saveMediaBuffer (
894+ asset . buffer ,
895+ asset . mimeType ,
896+ GENERATED_IMAGE_MEDIA_SUBDIR ,
897+ maxBytes ,
898+ asset . fileName ,
899+ ) ;
900+ this . recordNativeGeneratedMediaUrl ( {
901+ itemId,
902+ mediaUrl : saved . path ,
903+ } ) ;
904+ } catch ( error ) {
905+ embeddedAgentLog . warn ( "codex app-server raw image generation result save failed" , {
906+ itemId,
907+ error,
908+ } ) ;
843909 }
844910 }
845911
912+ private recordNativeGeneratedMediaUrl ( params : { itemId : string ; mediaUrl : string } ) : void {
913+ if ( this . nativeGeneratedMediaUrlsByItemId . has ( params . itemId ) ) {
914+ this . nativeGeneratedMediaItemIds . add ( params . itemId ) ;
915+ return ;
916+ }
917+ this . nativeGeneratedMediaUrlsByItemId . set ( params . itemId , params . mediaUrl ) ;
918+ this . nativeGeneratedMediaUrls . add ( params . mediaUrl ) ;
919+ this . nativeGeneratedMediaItemIds . add ( params . itemId ) ;
920+ }
921+
846922 private buildToolMediaUrls ( toolTelemetry : CodexAppServerToolTelemetry ) : string [ ] | undefined {
847923 const mediaUrls = new Set (
848924 toolTelemetry . toolMediaUrls ?. map ( ( url ) => url . trim ( ) ) . filter ( Boolean ) ?? [ ] ,
@@ -1583,6 +1659,39 @@ function readString(record: JsonObject, key: string): string | undefined {
15831659 return typeof value === "string" ? value : undefined ;
15841660}
15851661
1662+ function estimateBase64DecodedBytes ( base64 : string ) : number | undefined {
1663+ let nonWhitespaceLength = 0 ;
1664+ let previousCode = - 1 ;
1665+ let lastCode = - 1 ;
1666+ for ( let i = 0 ; i < base64 . length ; i += 1 ) {
1667+ const code = base64 . charCodeAt ( i ) ;
1668+ if ( isBase64WhitespaceCode ( code ) ) {
1669+ continue ;
1670+ }
1671+ nonWhitespaceLength += 1 ;
1672+ previousCode = lastCode ;
1673+ lastCode = code ;
1674+ }
1675+ if ( nonWhitespaceLength === 0 ) {
1676+ return undefined ;
1677+ }
1678+ const equalsCode = "=" . charCodeAt ( 0 ) ;
1679+ const padding = lastCode === equalsCode ? ( previousCode === equalsCode ? 2 : 1 ) : 0 ;
1680+ return Math . max ( 0 , Math . floor ( ( nonWhitespaceLength * 3 ) / 4 ) - padding ) ;
1681+ }
1682+
1683+ function isBase64WhitespaceCode ( code : number ) : boolean {
1684+ return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0d ;
1685+ }
1686+
1687+ function resolveGeneratedImageMaxBytes ( config : EmbeddedRunAttemptParams [ "config" ] ) : number {
1688+ const configured = config ?. agents ?. defaults ?. mediaMaxMb ;
1689+ if ( typeof configured === "number" && Number . isFinite ( configured ) && configured > 0 ) {
1690+ return Math . floor ( configured * BYTES_PER_MB ) ;
1691+ }
1692+ return DEFAULT_GENERATED_IMAGE_MAX_BYTES ;
1693+ }
1694+
15861695function normalizeNonEmptyString ( value : unknown ) : string | undefined {
15871696 if ( typeof value !== "string" ) {
15881697 return undefined ;
0 commit comments