Skip to content

Commit 0a4fb0e

Browse files
committed
feat: implement Programmatic Tool Calling (PTC) with QuickJS sandbox
Adds code_execution tool that enables AI models to execute multiple tool calls in a single inference round-trip via JavaScript code in a sandboxed QuickJS environment. Key features: - QuickJS-emscripten runtime with Asyncify for async host functions - Tool bridge exposing all Mux tools under mux.* namespace - Static analysis: syntax validation, forbidden patterns, unavailable globals - TypeScript type generation from Zod schemas for model guidance - Real-time streaming of nested tool calls to UI - Partial results on failure for debuggability - Resource limits: 64MB memory, 5-minute timeout UI components: - CodeExecutionToolCall with collapsible code/console sections - NestedToolRenderer routing to specialized tool components - ConsoleOutput for log/warn/error display - Storybook stories for all states Gated behind PROGRAMMATIC_TOOL_CALLING experiment flag with optional PROGRAMMATIC_TOOL_CALLING_EXCLUSIVE mode for PTC-only tool availability. 136 tests passing across runtime, static analysis, type generation, streaming aggregator, and tool execution.
1 parent 87a009a commit 0a4fb0e

24 files changed

+1659
-25
lines changed

src/browser/components/Messages/ToolMessage.tsx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import { WebFetchToolCall } from "../tools/WebFetchToolCall";
1313
import { BashBackgroundListToolCall } from "../tools/BashBackgroundListToolCall";
1414
import { BashBackgroundTerminateToolCall } from "../tools/BashBackgroundTerminateToolCall";
1515
import { BashOutputToolCall } from "../tools/BashOutputToolCall";
16+
import { CodeExecutionToolCall } from "../tools/CodeExecutionToolCall";
1617
import type {
1718
BashToolArgs,
1819
BashToolResult,
@@ -136,6 +137,15 @@ function isBashOutputTool(toolName: string, args: unknown): args is BashOutputTo
136137
return TOOL_DEFINITIONS.bash_output.schema.safeParse(args).success;
137138
}
138139

140+
interface CodeExecutionToolArgs {
141+
code: string;
142+
}
143+
144+
function isCodeExecutionTool(toolName: string, args: unknown): args is CodeExecutionToolArgs {
145+
if (toolName !== "code_execution") return false;
146+
return TOOL_DEFINITIONS.code_execution.schema.safeParse(args).success;
147+
}
148+
139149
export const ToolMessage: React.FC<ToolMessageProps> = ({
140150
message,
141151
className,
@@ -329,6 +339,19 @@ export const ToolMessage: React.FC<ToolMessageProps> = ({
329339
);
330340
}
331341

342+
if (isCodeExecutionTool(message.toolName, message.args)) {
343+
return (
344+
<div className={className}>
345+
<CodeExecutionToolCall
346+
args={message.args}
347+
result={message.result as Parameters<typeof CodeExecutionToolCall>[0]["result"]}
348+
status={message.status}
349+
nestedCalls={message.nestedCalls}
350+
/>
351+
</div>
352+
);
353+
}
354+
332355
// Fallback to generic tool call
333356
return (
334357
<div className={className}>
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import React, { useState, useMemo } from "react";
2+
import { CodeIcon, TerminalIcon, CheckCircleIcon, XCircleIcon } from "lucide-react";
3+
import { DetailContent } from "./shared/ToolPrimitives";
4+
import { getStatusDisplay, type ToolStatus } from "./shared/toolUtils";
5+
import { HighlightedCode } from "./shared/HighlightedCode";
6+
import { ConsoleOutputDisplay } from "./shared/ConsoleOutput";
7+
import { NestedToolsContainer } from "./shared/NestedToolsContainer";
8+
import type { CodeExecutionResult, NestedToolCall } from "./shared/codeExecutionTypes";
9+
import { cn } from "@/common/lib/utils";
10+
11+
interface CodeExecutionToolCallProps {
12+
args: { code: string };
13+
result?: CodeExecutionResult;
14+
status?: ToolStatus;
15+
/** Nested tool calls from streaming (takes precedence over result.toolCalls) */
16+
nestedCalls?: NestedToolCall[];
17+
}
18+
19+
// Threshold for auto-collapsing long results (characters)
20+
const LONG_RESULT_THRESHOLD = 200;
21+
22+
export const CodeExecutionToolCall: React.FC<CodeExecutionToolCallProps> = ({
23+
args,
24+
result,
25+
status = "pending",
26+
nestedCalls,
27+
}) => {
28+
const [codeExpanded, setCodeExpanded] = useState(false);
29+
const [consoleExpanded, setConsoleExpanded] = useState(false);
30+
31+
// Format result for display
32+
const formattedResult = useMemo(() => {
33+
if (!result?.success || result.result === undefined) return null;
34+
return typeof result.result === "string"
35+
? result.result
36+
: JSON.stringify(result.result, null, 2);
37+
}, [result]);
38+
39+
// Auto-expand result if it's short
40+
const isLongResult = formattedResult ? formattedResult.length > LONG_RESULT_THRESHOLD : false;
41+
const [resultExpanded, setResultExpanded] = useState(!isLongResult);
42+
43+
// Use streaming nested calls if available, otherwise fall back to result
44+
const toolCalls = nestedCalls ?? [];
45+
const consoleOutput = result?.consoleOutput ?? [];
46+
const hasToolCalls = toolCalls.length > 0;
47+
const isComplete = status === "completed" || status === "failed";
48+
49+
return (
50+
<fieldset className="flex flex-col gap-3 rounded-lg border border-dashed border-white/20 px-3 pt-2 pb-3">
51+
{/* Legend title with status - sits on the border */}
52+
<legend className="flex items-center gap-2 px-2">
53+
<span className="text-foreground text-sm font-medium">Code Execution</span>
54+
<span className="text-muted text-xs">{getStatusDisplay(status)}</span>
55+
</legend>
56+
57+
{/* Code - collapsible toggle */}
58+
<div>
59+
<button
60+
type="button"
61+
onClick={() => setCodeExpanded(!codeExpanded)}
62+
className="text-muted hover:text-foreground flex items-center gap-1.5 text-xs transition-colors"
63+
>
64+
<span
65+
className={cn(
66+
"text-[10px] transition-transform duration-150",
67+
codeExpanded && "rotate-90"
68+
)}
69+
>
70+
71+
</span>
72+
<CodeIcon className="h-3 w-3" />
73+
<span>Show code</span>
74+
</button>
75+
{codeExpanded && (
76+
<div className="mt-2 rounded border border-white/10 bg-black/20 p-2">
77+
<HighlightedCode language="javascript" code={args.code} />
78+
</div>
79+
)}
80+
</div>
81+
82+
{/* Console Output - collapsible toggle */}
83+
<div>
84+
<button
85+
type="button"
86+
onClick={() => setConsoleExpanded(!consoleExpanded)}
87+
className="text-muted hover:text-foreground flex items-center gap-1.5 text-xs transition-colors"
88+
>
89+
<span
90+
className={cn(
91+
"text-[10px] transition-transform duration-150",
92+
consoleExpanded && "rotate-90"
93+
)}
94+
>
95+
96+
</span>
97+
<TerminalIcon className="h-3 w-3" />
98+
<span>Console output</span>
99+
{consoleOutput.length > 0 && <span className="text-muted">({consoleOutput.length})</span>}
100+
</button>
101+
{consoleExpanded && (
102+
<div className="mt-2 rounded border border-white/10 bg-black/20 p-2">
103+
{consoleOutput.length > 0 ? (
104+
<ConsoleOutputDisplay output={consoleOutput} />
105+
) : (
106+
<span className="text-muted text-xs italic">No output</span>
107+
)}
108+
</div>
109+
)}
110+
</div>
111+
112+
{/* Nested tool calls - stream in the middle */}
113+
{hasToolCalls && <NestedToolsContainer calls={toolCalls} />}
114+
115+
{/* Result/Error - shown when complete */}
116+
{isComplete && result && (
117+
<div>
118+
<button
119+
type="button"
120+
onClick={() => setResultExpanded(!resultExpanded)}
121+
className={cn(
122+
"flex items-center gap-1.5 text-xs transition-colors",
123+
result.success
124+
? "text-green-400 hover:text-green-300"
125+
: "text-red-400 hover:text-red-300"
126+
)}
127+
>
128+
<span
129+
className={cn(
130+
"text-[10px] transition-transform duration-150",
131+
resultExpanded && "rotate-90"
132+
)}
133+
>
134+
135+
</span>
136+
{result.success ? (
137+
<CheckCircleIcon className="h-3 w-3" />
138+
) : (
139+
<XCircleIcon className="h-3 w-3" />
140+
)}
141+
<span>{result.success ? "Result" : "Error"}</span>
142+
</button>
143+
{resultExpanded &&
144+
(result.success ? (
145+
formattedResult ? (
146+
<DetailContent className="mt-2 p-2">{formattedResult}</DetailContent>
147+
) : (
148+
<div className="text-muted mt-2 text-xs italic">(no return value)</div>
149+
)
150+
) : (
151+
<DetailContent className="mt-2 border border-red-500/30 bg-red-500/10 p-2 text-red-400">
152+
{result.error}
153+
</DetailContent>
154+
))}
155+
</div>
156+
)}
157+
</fieldset>
158+
);
159+
};
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import React from "react";
2+
import type { ConsoleRecord } from "./codeExecutionTypes";
3+
4+
interface ConsoleOutputDisplayProps {
5+
output: ConsoleRecord[];
6+
}
7+
8+
// Use CSS variables from globals.css
9+
const levelStyles: Record<string, React.CSSProperties> = {
10+
log: { color: "var(--color-muted-foreground)" },
11+
warn: { color: "var(--color-warning, #f59e0b)" },
12+
error: { color: "var(--color-error, #ef4444)" },
13+
};
14+
15+
export const ConsoleOutputDisplay: React.FC<ConsoleOutputDisplayProps> = ({ output }) => {
16+
return (
17+
<div className="space-y-0.5 font-mono text-[11px]">
18+
{output.map((record, i) => (
19+
<div key={i} className="flex gap-2" style={levelStyles[record.level]}>
20+
<span className="opacity-60">[{record.level}]</span>
21+
<span>
22+
{record.args.map((arg, j) => {
23+
// Handle all types to avoid Object.toString() issues
24+
let display: string;
25+
if (arg === null) {
26+
display = "null";
27+
} else if (arg === undefined) {
28+
display = "undefined";
29+
} else if (typeof arg === "string") {
30+
display = arg;
31+
} else if (typeof arg === "number" || typeof arg === "boolean") {
32+
display = String(arg);
33+
} else {
34+
// objects, arrays, symbols, functions - JSON.stringify handles them all
35+
display = JSON.stringify(arg);
36+
}
37+
return (
38+
<span key={j}>
39+
{display}
40+
{j < record.args.length - 1 ? " " : ""}
41+
</span>
42+
);
43+
})}
44+
</span>
45+
</div>
46+
))}
47+
</div>
48+
);
49+
};
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import React from "react";
2+
import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions";
3+
import type { ToolStatus } from "./toolUtils";
4+
import { GenericToolCall } from "../GenericToolCall";
5+
import { BashToolCall } from "../BashToolCall";
6+
import { FileEditToolCall } from "../FileEditToolCall";
7+
import { FileReadToolCall } from "../FileReadToolCall";
8+
import { WebFetchToolCall } from "../WebFetchToolCall";
9+
import type {
10+
BashToolArgs,
11+
BashToolResult,
12+
FileReadToolArgs,
13+
FileReadToolResult,
14+
FileEditReplaceStringToolArgs,
15+
FileEditReplaceStringToolResult,
16+
FileEditInsertToolArgs,
17+
FileEditInsertToolResult,
18+
WebFetchToolArgs,
19+
WebFetchToolResult,
20+
} from "@/common/types/tools";
21+
22+
interface NestedToolRendererProps {
23+
toolName: string;
24+
input: unknown;
25+
output?: unknown;
26+
status: ToolStatus;
27+
}
28+
29+
/**
30+
* Strip "mux." prefix from tool names.
31+
* PTC bridge exposes tools as mux.bash, mux.file_read, etc.
32+
*/
33+
function normalizeToolName(toolName: string): string {
34+
return toolName.startsWith("mux.") ? toolName.slice(4) : toolName;
35+
}
36+
37+
// Type guards - reuse schemas from TOOL_DEFINITIONS for validation
38+
function isBashTool(toolName: string, args: unknown): args is BashToolArgs {
39+
if (normalizeToolName(toolName) !== "bash") return false;
40+
return TOOL_DEFINITIONS.bash.schema.safeParse(args).success;
41+
}
42+
43+
function isFileReadTool(toolName: string, args: unknown): args is FileReadToolArgs {
44+
if (normalizeToolName(toolName) !== "file_read") return false;
45+
return TOOL_DEFINITIONS.file_read.schema.safeParse(args).success;
46+
}
47+
48+
function isFileEditReplaceStringTool(
49+
toolName: string,
50+
args: unknown
51+
): args is FileEditReplaceStringToolArgs {
52+
if (normalizeToolName(toolName) !== "file_edit_replace_string") return false;
53+
return TOOL_DEFINITIONS.file_edit_replace_string.schema.safeParse(args).success;
54+
}
55+
56+
function isFileEditInsertTool(toolName: string, args: unknown): args is FileEditInsertToolArgs {
57+
if (normalizeToolName(toolName) !== "file_edit_insert") return false;
58+
return TOOL_DEFINITIONS.file_edit_insert.schema.safeParse(args).success;
59+
}
60+
61+
function isWebFetchTool(toolName: string, args: unknown): args is WebFetchToolArgs {
62+
if (normalizeToolName(toolName) !== "web_fetch") return false;
63+
return TOOL_DEFINITIONS.web_fetch.schema.safeParse(args).success;
64+
}
65+
66+
/**
67+
* Routes nested tool calls to their specialized components.
68+
* Similar to ToolMessage.tsx but for nested PTC calls with simpler props.
69+
*/
70+
export const NestedToolRenderer: React.FC<NestedToolRendererProps> = ({
71+
toolName,
72+
input,
73+
output,
74+
status,
75+
}) => {
76+
const normalizedName = normalizeToolName(toolName);
77+
78+
// Bash - full styling with icons
79+
if (isBashTool(toolName, input)) {
80+
return (
81+
<BashToolCall args={input} result={output as BashToolResult | undefined} status={status} />
82+
);
83+
}
84+
85+
// File read - shows file icon and content preview
86+
if (isFileReadTool(toolName, input)) {
87+
return (
88+
<FileReadToolCall
89+
args={input}
90+
result={output as FileReadToolResult | undefined}
91+
status={status}
92+
/>
93+
);
94+
}
95+
96+
// File edit (replace string) - shows diff with icons
97+
if (isFileEditReplaceStringTool(toolName, input)) {
98+
return (
99+
<FileEditToolCall
100+
toolName="file_edit_replace_string"
101+
args={input}
102+
result={output as FileEditReplaceStringToolResult | undefined}
103+
status={status}
104+
/>
105+
);
106+
}
107+
108+
// File edit (insert) - shows diff with icons
109+
if (isFileEditInsertTool(toolName, input)) {
110+
return (
111+
<FileEditToolCall
112+
toolName="file_edit_insert"
113+
args={input}
114+
result={output as FileEditInsertToolResult | undefined}
115+
status={status}
116+
/>
117+
);
118+
}
119+
120+
// Web fetch - shows URL and content
121+
if (isWebFetchTool(toolName, input)) {
122+
return (
123+
<WebFetchToolCall
124+
args={input}
125+
result={output as WebFetchToolResult | undefined}
126+
status={status}
127+
/>
128+
);
129+
}
130+
131+
// Fallback for MCP tools and other unsupported tools - use normalized name for display
132+
return <GenericToolCall toolName={normalizedName} args={input} result={output} status={status} />;
133+
};

0 commit comments

Comments
 (0)