@@ -160,6 +160,11 @@ type Toolset struct {
160160
161161 supervisor * lifecycle.Supervisor
162162
163+ // mediaDir is the toolset-scoped temp dir holding spooled media
164+ // payloads. Created lazily on first spool, removed by Stop.
165+ mediaMu sync.Mutex
166+ mediaDir string
167+
163168 mu sync.Mutex
164169
165170 // Cached tools and prompts, invalidated via MCP notifications and
@@ -426,6 +431,7 @@ func (ts *Toolset) Start(ctx context.Context) error {
426431// Stop tears the supervisor down. Idempotent.
427432func (ts * Toolset ) Stop (ctx context.Context ) error {
428433 slog .DebugContext (ctx , "Stopping MCP toolset" , "server" , ts .logID )
434+ defer ts .cleanupMediaDir ()
429435 if ts .supervisor == nil {
430436 return nil
431437 }
@@ -694,7 +700,7 @@ func (ts *Toolset) callTool(ctx context.Context, toolCall tools.ToolCall) (*tool
694700 return nil , fmt .Errorf ("failed to call tool: %w" , err )
695701 }
696702
697- result := processMCPContent (resp )
703+ result := ts . processMCPContent (resp )
698704 slog .DebugContext (ctx , "MCP tool call completed" , "tool" , toolCall .Function .Name , "output_length" , len (result .Output ))
699705 slog .DebugContext (ctx , result .Output )
700706 return result , nil
@@ -714,7 +720,13 @@ func isInitNotificationSendError(err error) bool {
714720 return false
715721}
716722
717- func processMCPContent (toolResult * mcp.CallToolResult ) * tools.ToolCallResult {
723+ const maxInlineMediaBytes = 256 * 1024
724+
725+ // writeMediaFile is a package-level indirection so tests can simulate
726+ // disk failures without manipulating the filesystem.
727+ var writeMediaFile = defaultWriteMediaFile
728+
729+ func (ts * Toolset ) processMCPContent (toolResult * mcp.CallToolResult ) * tools.ToolCallResult {
718730 var text strings.Builder
719731 var images , audios []tools.MediaContent
720732
@@ -723,9 +735,9 @@ func processMCPContent(toolResult *mcp.CallToolResult) *tools.ToolCallResult {
723735 case * mcp.TextContent :
724736 text .WriteString (c .Text )
725737 case * mcp.ImageContent :
726- images = append (images , encodeMedia (c .Data , c .MIMEType ))
738+ images = append (images , ts . encodeMedia (c .Data , c .MIMEType ))
727739 case * mcp.AudioContent :
728- audios = append (audios , encodeMedia (c .Data , c .MIMEType ))
740+ audios = append (audios , ts . encodeMedia (c .Data , c .MIMEType ))
729741 case * mcp.ResourceLink :
730742 if c .Name != "" {
731743 // Escape ] in name and ) in URI to prevent broken markdown links.
@@ -760,12 +772,94 @@ func processMCPContent(toolResult *mcp.CallToolResult) *tools.ToolCallResult {
760772 }
761773}
762774
763- // encodeMedia re-encodes raw bytes (as decoded by the MCP SDK) back to base64
764- // for our internal MediaContent representation.
765- func encodeMedia (data []byte , mimeType string ) tools.MediaContent {
766- return tools.MediaContent {
767- Data : base64 .StdEncoding .EncodeToString (data ),
768- MimeType : mimeType ,
775+ // encodeMedia keeps small payloads inline and spools larger ones to disk so the
776+ // session and TUI do not retain duplicate base64 copies. Spooled files live
777+ // under a toolset-scoped temp directory removed by Stop.
778+ func (ts * Toolset ) encodeMedia (data []byte , mimeType string ) tools.MediaContent {
779+ media := tools.MediaContent {MimeType : mimeType }
780+ if len (data ) <= maxInlineMediaBytes {
781+ media .Data = base64 .StdEncoding .EncodeToString (data )
782+ return media
783+ }
784+
785+ dir , err := ts .ensureMediaDir ()
786+ if err == nil {
787+ var path string
788+ path , err = writeMediaFile (dir , data , mimeType )
789+ if err == nil {
790+ media .FilePath = path
791+ return media
792+ }
793+ }
794+ slog .Warn ("failed to spool MCP media to disk" , "mime_type" , mimeType , "bytes" , len (data ), "error" , err )
795+ media .Data = base64 .StdEncoding .EncodeToString (data )
796+ return media
797+ }
798+
799+ // ensureMediaDir lazily creates the toolset-scoped temp dir for spooled
800+ // media payloads. The directory is removed by Stop.
801+ func (ts * Toolset ) ensureMediaDir () (string , error ) {
802+ ts .mediaMu .Lock ()
803+ defer ts .mediaMu .Unlock ()
804+ if ts .mediaDir != "" {
805+ return ts .mediaDir , nil
806+ }
807+ dir , err := os .MkdirTemp ("" , "docker-agent-mcp-media-*" )
808+ if err != nil {
809+ return "" , err
810+ }
811+ ts .mediaDir = dir
812+ return dir , nil
813+ }
814+
815+ // cleanupMediaDir removes the toolset-scoped media spool directory, if any.
816+ func (ts * Toolset ) cleanupMediaDir () {
817+ ts .mediaMu .Lock ()
818+ dir := ts .mediaDir
819+ ts .mediaDir = ""
820+ ts .mediaMu .Unlock ()
821+ if dir == "" {
822+ return
823+ }
824+ if err := os .RemoveAll (dir ); err != nil {
825+ slog .Warn ("failed to remove MCP media spool directory" , "dir" , dir , "error" , err )
826+ }
827+ }
828+
829+ func defaultWriteMediaFile (dir string , data []byte , mimeType string ) (string , error ) {
830+ f , err := os .CreateTemp (dir , "media-*" + mediaExtension (mimeType ))
831+ if err != nil {
832+ return "" , err
833+ }
834+ path := f .Name ()
835+ if _ , err := f .Write (data ); err != nil {
836+ _ = f .Close ()
837+ _ = os .Remove (path )
838+ return "" , err
839+ }
840+ if err := f .Close (); err != nil {
841+ _ = os .Remove (path )
842+ return "" , err
843+ }
844+ return path , nil
845+ }
846+
847+ func mediaExtension (mimeType string ) string {
848+ switch mimeType {
849+ case "image/png" :
850+ return ".png"
851+ case "image/jpeg" :
852+ return ".jpg"
853+ case "image/gif" :
854+ return ".gif"
855+ case "image/webp" :
856+ return ".webp"
857+ case "audio/wav" , "audio/wave" , "audio/x-wav" :
858+ return ".wav"
859+ case "audio/mpeg" , "audio/mp3" :
860+ return ".mp3"
861+ default :
862+ return ".bin"
769863 }
770864}
771865
0 commit comments