Skip to content

Commit 6e74d77

Browse files
committed
fix(cycles): split media understanding runtime contracts
1 parent 0f77fdf commit 6e74d77

8 files changed

Lines changed: 115 additions & 69 deletions

File tree

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
export type ActiveMediaModel = {
2+
provider: string;
3+
model?: string;
4+
};

src/media-understanding/apply.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
normalizeLowercaseStringOrEmpty,
1515
normalizeOptionalLowercaseString,
1616
} from "../shared/string-coerce.js";
17+
import type { ActiveMediaModel } from "./active-model.types.js";
1718
import { resolveAttachmentKind } from "./attachments.js";
1819
import { runWithConcurrency } from "./concurrency.js";
1920
import { DEFAULT_ECHO_TRANSCRIPT_FORMAT, sendTranscriptEcho } from "./echo-transcript.js";
@@ -24,7 +25,6 @@ import {
2425
} from "./format.js";
2526
import { resolveConcurrency } from "./resolve.js";
2627
import {
27-
type ActiveMediaModel,
2828
buildProviderRegistry,
2929
createMediaAttachmentCache,
3030
normalizeMediaAttachments,

src/media-understanding/audio-preflight.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
import type { MsgContext } from "../auto-reply/templating.js";
22
import type { OpenClawConfig } from "../config/types.js";
33
import { logVerbose, shouldLogVerbose } from "../globals.js";
4+
import type { ActiveMediaModel } from "./active-model.types.js";
45
import { isAudioAttachment } from "./attachments.js";
56
import { runAudioTranscription } from "./audio-transcription-runner.js";
6-
import {
7-
type ActiveMediaModel,
8-
normalizeMediaAttachments,
9-
resolveMediaAttachmentLocalRoots,
10-
} from "./runner.js";
7+
import { normalizeMediaAttachments, resolveMediaAttachmentLocalRoots } from "./runner.js";
118
import type { MediaUnderstandingProvider } from "./types.js";
129

1310
/**

src/media-understanding/audio-transcription-runner.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { MsgContext } from "../auto-reply/templating.js";
22
import type { OpenClawConfig } from "../config/types.js";
3+
import type { ActiveMediaModel } from "./active-model.types.js";
34
import {
4-
type ActiveMediaModel,
55
buildProviderRegistry,
66
createMediaAttachmentCache,
77
normalizeMediaAttachments,

src/media-understanding/runner.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import { getDefaultMediaLocalRoots } from "../media/local-roots.js";
2626
import { runExec } from "../process/exec.js";
2727
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
2828
import { normalizeOptionalString } from "../shared/string-coerce.js";
29+
import type { ActiveMediaModel } from "./active-model.types.js";
2930
import { MediaAttachmentCache, selectAttachments } from "./attachments.js";
3031
import { resolveAutoMediaKeyProviders, resolveDefaultMediaModel } from "./defaults.js";
3132
import { isMediaUnderstandingSkipError } from "./errors.js";
@@ -52,11 +53,7 @@ import type {
5253
MediaUnderstandingProvider,
5354
} from "./types.js";
5455
export { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.attachments.js";
55-
56-
export type ActiveMediaModel = {
57-
provider: string;
58-
model?: string;
59-
};
56+
export type { ActiveMediaModel } from "./active-model.types.js";
6057

6158
type ProviderRegistry = Map<string, MediaUnderstandingProvider>;
6259

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import type { OpenClawConfig } from "../config/types.js";
2+
import type { ActiveMediaModel } from "./active-model.types.js";
3+
import type { MediaUnderstandingOutput, MediaUnderstandingProvider } from "./types.js";
4+
5+
export type RunMediaUnderstandingFileParams = {
6+
capability: "image" | "audio" | "video";
7+
filePath: string;
8+
cfg: OpenClawConfig;
9+
agentDir?: string;
10+
mime?: string;
11+
activeModel?: ActiveMediaModel;
12+
};
13+
14+
export type RunMediaUnderstandingFileResult = {
15+
text: string | undefined;
16+
provider?: string;
17+
model?: string;
18+
output?: MediaUnderstandingOutput;
19+
};
20+
21+
export type DescribeImageFileParams = {
22+
filePath: string;
23+
cfg: OpenClawConfig;
24+
agentDir?: string;
25+
mime?: string;
26+
activeModel?: ActiveMediaModel;
27+
};
28+
29+
export type DescribeImageFileWithModelParams = {
30+
filePath: string;
31+
cfg: OpenClawConfig;
32+
agentDir?: string;
33+
mime?: string;
34+
provider: string;
35+
model: string;
36+
prompt: string;
37+
maxTokens?: number;
38+
timeoutMs?: number;
39+
};
40+
41+
export type DescribeImageFileWithModelResult = Awaited<
42+
ReturnType<NonNullable<MediaUnderstandingProvider["describeImage"]>>
43+
>;
44+
45+
export type DescribeVideoFileParams = {
46+
filePath: string;
47+
cfg: OpenClawConfig;
48+
agentDir?: string;
49+
mime?: string;
50+
activeModel?: ActiveMediaModel;
51+
};
52+
53+
export type TranscribeAudioFileParams = {
54+
filePath: string;
55+
cfg: OpenClawConfig;
56+
agentDir?: string;
57+
mime?: string;
58+
activeModel?: ActiveMediaModel;
59+
language?: string;
60+
prompt?: string;
61+
};
62+
63+
export type MediaUnderstandingRuntime = {
64+
runMediaUnderstandingFile: (
65+
params: RunMediaUnderstandingFileParams,
66+
) => Promise<RunMediaUnderstandingFileResult>;
67+
describeImageFile: (params: DescribeImageFileParams) => Promise<RunMediaUnderstandingFileResult>;
68+
describeImageFileWithModel: (
69+
params: DescribeImageFileWithModelParams,
70+
) => Promise<DescribeImageFileWithModelResult>;
71+
describeVideoFile: (params: DescribeVideoFileParams) => Promise<RunMediaUnderstandingFileResult>;
72+
transcribeAudioFile: (params: TranscribeAudioFileParams) => Promise<{ text: string | undefined }>;
73+
};

src/media-understanding/runtime.ts

Lines changed: 26 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,28 @@
11
import fs from "node:fs/promises";
22
import path from "node:path";
3-
import type { OpenClawConfig } from "../config/types.js";
43
import { normalizeMediaProviderId } from "./provider-registry.js";
54
import {
65
buildProviderRegistry,
76
createMediaAttachmentCache,
87
normalizeMediaAttachments,
98
runCapability,
10-
type ActiveMediaModel,
119
} from "./runner.js";
10+
import type {
11+
DescribeImageFileParams,
12+
DescribeImageFileWithModelParams,
13+
DescribeVideoFileParams,
14+
RunMediaUnderstandingFileParams,
15+
RunMediaUnderstandingFileResult,
16+
TranscribeAudioFileParams,
17+
} from "./runtime-types.js";
18+
export type {
19+
DescribeImageFileParams,
20+
DescribeImageFileWithModelParams,
21+
DescribeVideoFileParams,
22+
RunMediaUnderstandingFileParams,
23+
RunMediaUnderstandingFileResult,
24+
TranscribeAudioFileParams,
25+
} from "./runtime-types.js";
1226

1327
type MediaUnderstandingCapability = "image" | "audio" | "video";
1428
type MediaUnderstandingOutput = Awaited<ReturnType<typeof runCapability>>["outputs"][number];
@@ -19,22 +33,6 @@ const KIND_BY_CAPABILITY: Record<MediaUnderstandingCapability, MediaUnderstandin
1933
video: "video.description",
2034
};
2135

22-
export type RunMediaUnderstandingFileParams = {
23-
capability: MediaUnderstandingCapability;
24-
filePath: string;
25-
cfg: OpenClawConfig;
26-
agentDir?: string;
27-
mime?: string;
28-
activeModel?: ActiveMediaModel;
29-
};
30-
31-
export type RunMediaUnderstandingFileResult = {
32-
text: string | undefined;
33-
provider?: string;
34-
model?: string;
35-
output?: MediaUnderstandingOutput;
36-
};
37-
3836
function buildFileContext(params: { filePath: string; mime?: string }) {
3937
return {
4038
MediaPath: params.filePath,
@@ -92,27 +90,13 @@ export async function runMediaUnderstandingFile(
9290
}
9391
}
9492

95-
export async function describeImageFile(params: {
96-
filePath: string;
97-
cfg: OpenClawConfig;
98-
agentDir?: string;
99-
mime?: string;
100-
activeModel?: ActiveMediaModel;
101-
}): Promise<RunMediaUnderstandingFileResult> {
93+
export async function describeImageFile(
94+
params: DescribeImageFileParams,
95+
): Promise<RunMediaUnderstandingFileResult> {
10296
return await runMediaUnderstandingFile({ ...params, capability: "image" });
10397
}
10498

105-
export async function describeImageFileWithModel(params: {
106-
filePath: string;
107-
cfg: OpenClawConfig;
108-
agentDir?: string;
109-
mime?: string;
110-
provider: string;
111-
model: string;
112-
prompt: string;
113-
maxTokens?: number;
114-
timeoutMs?: number;
115-
}) {
99+
export async function describeImageFileWithModel(params: DescribeImageFileWithModelParams) {
116100
const timeoutMs = params.timeoutMs ?? 30_000;
117101
const providerRegistry = buildProviderRegistry(undefined, params.cfg);
118102
const provider = providerRegistry.get(normalizeMediaProviderId(params.provider));
@@ -134,25 +118,15 @@ export async function describeImageFileWithModel(params: {
134118
});
135119
}
136120

137-
export async function describeVideoFile(params: {
138-
filePath: string;
139-
cfg: OpenClawConfig;
140-
agentDir?: string;
141-
mime?: string;
142-
activeModel?: ActiveMediaModel;
143-
}): Promise<RunMediaUnderstandingFileResult> {
121+
export async function describeVideoFile(
122+
params: DescribeVideoFileParams,
123+
): Promise<RunMediaUnderstandingFileResult> {
144124
return await runMediaUnderstandingFile({ ...params, capability: "video" });
145125
}
146126

147-
export async function transcribeAudioFile(params: {
148-
filePath: string;
149-
cfg: OpenClawConfig;
150-
agentDir?: string;
151-
mime?: string;
152-
activeModel?: ActiveMediaModel;
153-
language?: string;
154-
prompt?: string;
155-
}): Promise<{ text: string | undefined }> {
127+
export async function transcribeAudioFile(
128+
params: TranscribeAudioFileParams,
129+
): Promise<{ text: string | undefined }> {
156130
const cfg =
157131
params.language || params.prompt
158132
? {

src/plugins/runtime/types-core.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import type {
44
} from "../../agents/pi-embedded-runtime.types.js";
55
import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js";
66
import type { LogLevel } from "../../logging/levels.js";
7+
import type { MediaUnderstandingRuntime } from "../../media-understanding/runtime-types.js";
78

89
export type { HeartbeatRunResult };
910

@@ -91,11 +92,11 @@ export type PluginRuntimeCore = {
9192
listVoices: typeof import("../../tts/tts.js").listSpeechVoices;
9293
};
9394
mediaUnderstanding: {
94-
runFile: typeof import("../../media-understanding/runtime.js").runMediaUnderstandingFile;
95-
describeImageFile: typeof import("../../media-understanding/runtime.js").describeImageFile;
96-
describeImageFileWithModel: typeof import("../../media-understanding/runtime.js").describeImageFileWithModel;
97-
describeVideoFile: typeof import("../../media-understanding/runtime.js").describeVideoFile;
98-
transcribeAudioFile: typeof import("../../media-understanding/runtime.js").transcribeAudioFile;
95+
runFile: MediaUnderstandingRuntime["runMediaUnderstandingFile"];
96+
describeImageFile: MediaUnderstandingRuntime["describeImageFile"];
97+
describeImageFileWithModel: MediaUnderstandingRuntime["describeImageFileWithModel"];
98+
describeVideoFile: MediaUnderstandingRuntime["describeVideoFile"];
99+
transcribeAudioFile: MediaUnderstandingRuntime["transcribeAudioFile"];
99100
};
100101
imageGeneration: {
101102
generate: (

0 commit comments

Comments
 (0)