Unverified 提交 ed3e1350 authored 作者: Will Chen's avatar Will Chen 提交者: GitHub

web crawl/clone (#2101)

<!-- CURSOR_SUMMARY --> > [!NOTE] > Introduces website crawling for cloning workflows and updates tool I/O to support multimodal follow-ups while keeping tool results text-only for model compatibility. > > - Adds `web_crawl` tool (consent preview, XML marker, Dyad Engine call requiring Dyad Pro API key) that returns HTML/markdown/screenshot and appends cloning instructions plus screenshot and content snippets via `appendUserMessage` > - Updates tool system: new `ToolResult` types, `UserMessageContentPart`, and conversion to AI SDK V3 (`convertToolResultForAiSdk`); tool `execute` now returns `ToolResult` > - Enhances streaming: handler tracks `pendingUserMessages` and injects them between steps via `prepareStep` to deliver images/content to the model > - Registers `web_crawl` in `TOOL_DEFINITIONS`; e2e snapshot updated to include the new tool > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 0205e38c2f067a1c2307b17a1397e4dcf3ce422d. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Adds a web_crawl agent tool to crawl a URL and return page content and a screenshot for cloning. Tool results are structured text, and the screenshot is injected as a follow-up user image for multimodal models. - New Features - Added web_crawl tool with consent preview and XML marker; calls Dyad Engine /tools/web-crawl. - Returns markdown and HTML plus a screenshot URL, with clear placeholder image instructions for replication. - Introduced ToolResult (text-only) and convertToolResultForAiSdk to LanguageModelV3; updated tool execution to use it. - Added appendUserMessage and step injection to include images after tool results. - Registered the tool in TOOL_DEFINITIONS with default consent set to ask; requires a Dyad Pro API key in settings. <sup>Written for commit 0205e38c2f067a1c2307b17a1397e4dcf3ce422d. Summary will update on new commits.</sup> <!-- End of auto-generated description by cubic. -->
上级 ba6ff5ca
...@@ -303,6 +303,27 @@ ...@@ -303,6 +303,27 @@
"$schema": "http://json-schema.org/draft-07/schema#" "$schema": "http://json-schema.org/draft-07/schema#"
} }
} }
},
{
"type": "function",
"function": {
"name": "web_crawl",
"description": "\nYou can crawl a website so you can clone it.\n\n### When You MUST Trigger a Crawl\nTrigger a crawl ONLY if BOTH conditions are true:\n\n1. The user's message shows intent to CLONE / COPY / REPLICATE / RECREATE / DUPLICATE / MIMIC a website.\n - Keywords include: clone, copy, replicate, recreate, duplicate, mimic, build the same, make the same.\n\n2. The user's message contains a URL or something that appears to be a domain name.\n - e.g. \"example.com\", \"https://example.com\"\n - Do not require 'http://' or 'https://'.\n",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL to crawl"
}
},
"required": [
"url"
],
"additionalProperties": false,
"$schema": "http://json-schema.org/draft-07/schema#"
}
}
} }
], ],
"tool_choice": "auto", "tool_choice": "auto",
......
...@@ -45,6 +45,7 @@ import { ...@@ -45,6 +45,7 @@ import {
parsePartialJson, parsePartialJson,
escapeXmlAttr, escapeXmlAttr,
escapeXmlContent, escapeXmlContent,
UserMessageContentPart,
} from "./tools/types"; } from "./tools/types";
import { TOOL_DEFINITIONS } from "./tool_definitions"; import { TOOL_DEFINITIONS } from "./tool_definitions";
import { parseAiMessagesJson } from "@/ipc/utils/ai_messages_utils"; import { parseAiMessagesJson } from "@/ipc/utils/ai_messages_utils";
...@@ -140,6 +141,9 @@ export async function handleLocalAgentStream( ...@@ -140,6 +141,9 @@ export async function handleLocalAgentStream(
let fullResponse = ""; let fullResponse = "";
let streamingPreview = ""; // Temporary preview for current tool, not persisted let streamingPreview = ""; // Temporary preview for current tool, not persisted
// Track pending user messages to inject after tool results
const pendingUserMessages: UserMessageContentPart[][] = [];
try { try {
// Get model client // Get model client
const { modelClient } = await getModelClient( const { modelClient } = await getModelClient(
...@@ -185,6 +189,9 @@ export async function handleLocalAgentStream( ...@@ -185,6 +189,9 @@ export async function handleLocalAgentStream(
inputPreview: params.inputPreview, inputPreview: params.inputPreview,
}); });
}, },
appendUserMessage: (content: UserMessageContentPart[]) => {
pendingUserMessages.push(content);
},
}; };
// Build tool set (agent tools + MCP tools) // Build tool set (agent tools + MCP tools)
...@@ -219,6 +226,29 @@ export async function handleLocalAgentStream( ...@@ -219,6 +226,29 @@ export async function handleLocalAgentStream(
tools: allTools, tools: allTools,
stopWhen: stepCountIs(25), // Allow multiple tool call rounds stopWhen: stepCountIs(25), // Allow multiple tool call rounds
abortSignal: abortController.signal, abortSignal: abortController.signal,
// Inject pending user messages (e.g., images from web_crawl) between steps
prepareStep: ({ messages, ...rest }) => {
if (pendingUserMessages.length === 0) {
return undefined;
}
// Build user messages from pending content
const newMessages = [...messages];
for (const content of pendingUserMessages) {
newMessages.push({
role: "user" as const,
content: content.map((part) => {
if (part.type === "text") {
return { type: "text" as const, text: part.text };
}
// part.type === "image-url"
return { type: "image" as const, image: new URL(part.url) };
}),
});
}
// Clear pending messages after injection
pendingUserMessages.length = 0;
return { messages: newMessages, ...rest };
},
onFinish: async (response) => { onFinish: async (response) => {
const totalTokens = response.usage?.totalTokens; const totalTokens = response.usage?.totalTokens;
const inputTokens = response.usage?.inputTokens; const inputTokens = response.usage?.inputTokens;
......
...@@ -20,11 +20,14 @@ import { addIntegrationTool } from "./tools/add_integration"; ...@@ -20,11 +20,14 @@ import { addIntegrationTool } from "./tools/add_integration";
import { readLogsTool } from "./tools/read_logs"; import { readLogsTool } from "./tools/read_logs";
import { editFileTool } from "./tools/edit_file"; import { editFileTool } from "./tools/edit_file";
import { webSearchTool } from "./tools/web_search"; import { webSearchTool } from "./tools/web_search";
import { webCrawlTool } from "./tools/web_crawl";
import type { LanguageModelV3ToolResultOutput } from "@ai-sdk/provider";
import { import {
escapeXmlAttr, escapeXmlAttr,
escapeXmlContent, escapeXmlContent,
type ToolDefinition, type ToolDefinition,
type AgentContext, type AgentContext,
type ToolResult,
} from "./tools/types"; } from "./tools/types";
import type { AgentToolConsent } from "@/ipc/ipc_types"; import type { AgentToolConsent } from "@/ipc/ipc_types";
import { getSupabaseClientCode } from "@/supabase_admin/supabase_context"; import { getSupabaseClientCode } from "@/supabase_admin/supabase_context";
...@@ -45,6 +48,7 @@ export const TOOL_DEFINITIONS: readonly ToolDefinition[] = [ ...@@ -45,6 +48,7 @@ export const TOOL_DEFINITIONS: readonly ToolDefinition[] = [
addIntegrationTool, addIntegrationTool,
readLogsTool, readLogsTool,
webSearchTool, webSearchTool,
webCrawlTool,
]; ];
// ============================================================================ // ============================================================================
// Agent Tool Name Type (derived from TOOL_DEFINITIONS) // Agent Tool Name Type (derived from TOOL_DEFINITIONS)
...@@ -227,6 +231,18 @@ async function processArgPlaceholders<T extends Record<string, any>>( ...@@ -227,6 +231,18 @@ async function processArgPlaceholders<T extends Record<string, any>>(
return processValue(args) as T; return processValue(args) as T;
} }
/**
* Convert our ToolResult to AI SDK format
*/
function convertToolResultForAiSdk(
result: ToolResult,
): LanguageModelV3ToolResultOutput {
if (typeof result === "string") {
return { type: "text", value: result };
}
throw new Error(`Unsupported tool result type: ${typeof result}`);
}
/** /**
* Build ToolSet for AI SDK from tool definitions * Build ToolSet for AI SDK from tool definitions
*/ */
...@@ -255,7 +271,8 @@ export function buildAgentToolSet(ctx: AgentContext) { ...@@ -255,7 +271,8 @@ export function buildAgentToolSet(ctx: AgentContext) {
throw new Error(`User denied permission for ${tool.name}`); throw new Error(`User denied permission for ${tool.name}`);
} }
return await tool.execute(processedArgs, ctx); const result = await tool.execute(processedArgs, ctx);
return convertToolResultForAiSdk(result);
} catch (error) { } catch (error) {
const errorMessage = const errorMessage =
error instanceof Error ? error.message : String(error); error instanceof Error ? error.message : String(error);
......
...@@ -47,6 +47,12 @@ export interface AgentContext { ...@@ -47,6 +47,12 @@ export interface AgentContext {
toolDescription?: string | null; toolDescription?: string | null;
inputPreview?: string | null; inputPreview?: string | null;
}) => Promise<boolean>; }) => Promise<boolean>;
/**
* Append a user message to be sent after the tool result.
* Use this when the tool needs to provide non-text content (like images)
* that models don't support in tool result messages.
*/
appendUserMessage: (content: UserMessageContentPart[]) => void;
} }
// ============================================================================ // ============================================================================
...@@ -73,6 +79,23 @@ export function parsePartialJson<T extends Record<string, unknown>>( ...@@ -73,6 +79,23 @@ export function parsePartialJson<T extends Record<string, unknown>>(
} }
} }
// ============================================================================
// Tool Result Types
// ============================================================================
/**
* Content part types for user messages (supports images)
* These can be appended as follow-up user messages after tool results
*/
export type UserMessageContentPart =
| { type: "text"; text: string }
| { type: "image-url"; url: string };
/**
* Tool result can be a simple string or a structured result with content parts
*/
export type ToolResult = string;
// ============================================================================ // ============================================================================
// Tool Definition Interface // Tool Definition Interface
// ============================================================================ // ============================================================================
...@@ -82,7 +105,7 @@ export interface ToolDefinition<T = any> { ...@@ -82,7 +105,7 @@ export interface ToolDefinition<T = any> {
readonly description: string; readonly description: string;
readonly inputSchema: z.ZodType<T>; readonly inputSchema: z.ZodType<T>;
readonly defaultConsent: AgentToolConsent; readonly defaultConsent: AgentToolConsent;
execute: (args: T, ctx: AgentContext) => Promise<string>; execute: (args: T, ctx: AgentContext) => Promise<ToolResult>;
/** /**
* If defined, returns whether the tool should be available in the current context. * If defined, returns whether the tool should be available in the current context.
......
import { z } from "zod";
import log from "electron-log";
import { ToolDefinition, escapeXmlContent } from "./types";
import { readSettings } from "@/main/settings";
const logger = log.scope("web_crawl");
const DYAD_ENGINE_URL =
process.env.DYAD_ENGINE_URL ?? "https://engine.dyad.sh/v1";
const webCrawlSchema = z.object({
url: z.string().describe("URL to crawl"),
});
const webCrawlResponseSchema = z.object({
rootUrl: z.string(),
html: z.string().optional(),
markdown: z.string().optional(),
screenshot: z.string().optional(),
});
const DESCRIPTION = `
You can crawl a website so you can clone it.
### When You MUST Trigger a Crawl
Trigger a crawl ONLY if BOTH conditions are true:
1. The user's message shows intent to CLONE / COPY / REPLICATE / RECREATE / DUPLICATE / MIMIC a website.
- Keywords include: clone, copy, replicate, recreate, duplicate, mimic, build the same, make the same.
2. The user's message contains a URL or something that appears to be a domain name.
- e.g. "example.com", "https://example.com"
- Do not require 'http://' or 'https://'.
`;
const CLONE_INSTRUCTIONS = `
Replicate the website from the provided HTML, markdown, and screenshot.
**Use the screenshot as your primary visual reference** to understand the layout, colors, typography, and overall design of the website. The screenshot shows exactly how the page should look.
**IMPORTANT: Image Handling**
- Do NOT use or reference real external image URLs.
- Instead, create a file named "placeholder.svg" at "/public/assets/placeholder.svg".
- The file must be included in the output as its own code block.
- The SVG should be a simple neutral gray rectangle, like:
\`\`\`svg
<svg width="400" height="300" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="#e2e2e2"/>
</svg>
\`\`\`
**When generating code:**
- Replace all \`<img src="...">\` with: \`<img src="/assets/placeholder.svg" alt="placeholder" />\`
- If using Next.js Image component: \`<Image src="/assets/placeholder.svg" alt="placeholder" width={400} height={300} />\`
Always include the placeholder.svg file in your output file tree.
`;
async function callWebCrawl(
url: string,
): Promise<z.infer<typeof webCrawlResponseSchema>> {
const settings = readSettings();
const apiKey = settings.providerSettings?.auto?.apiKey?.value;
if (!apiKey) {
throw new Error("Dyad Pro API key is required for web_crawl tool");
}
const response = await fetch(`${DYAD_ENGINE_URL}/tools/web-crawl`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify({ url }),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(
`Web crawl failed: ${response.status} ${response.statusText} - ${errorText}`,
);
}
const data = webCrawlResponseSchema.parse(await response.json());
return data;
}
export const webCrawlTool: ToolDefinition<z.infer<typeof webCrawlSchema>> = {
name: "web_crawl",
description: DESCRIPTION,
inputSchema: webCrawlSchema,
defaultConsent: "ask",
getConsentPreview: (args) => `Crawl URL: "${args.url}"`,
buildXml: (args, isComplete) => {
if (!args.url) return undefined;
let xml = `<dyad-web-crawl>${escapeXmlContent(args.url)}`;
if (isComplete) {
xml += "</dyad-web-crawl>";
}
return xml;
},
execute: async (args, ctx) => {
logger.log(`Executing web crawl: ${args.url}`);
const result = await callWebCrawl(args.url);
if (!result) {
throw new Error("Web crawl returned no results");
}
if (!result.markdown) {
throw new Error("No content available from web crawl");
}
if (!result.html) {
throw new Error("No HTML available from web crawl");
}
if (!result.screenshot) {
throw new Error("No screenshot available from web crawl");
}
logger.log(`Web crawl completed for URL: ${args.url}`);
ctx.appendUserMessage([
{ type: "text", text: CLONE_INSTRUCTIONS },
{ type: "image-url", url: result.screenshot },
{
type: "text",
text: formatSnippet("Markdown snapshot:", result.markdown, "markdown"),
},
{
type: "text",
text: formatSnippet("HTML snapshot:", result.html, "html"),
},
]);
return "Web crawl completed.";
},
};
const MAX_TEXT_SNIPPET_LENGTH = 16_000;
// Format a code snippet with a label and language, truncating if necessary.
export function formatSnippet(
label: string,
value: string,
lang: string,
): string {
return `${label}:\n\`\`\`${lang}\n${truncateText(value)}\n\`\`\``;
}
function truncateText(value: string): string {
if (value.length <= MAX_TEXT_SNIPPET_LENGTH) return value;
return `${value.slice(0, MAX_TEXT_SNIPPET_LENGTH)}\n<!-- truncated -->`;
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论