web crawl/clone (#2101)

 > [!NOTE] > Introduces website crawling for cloning workflows and updates tool I/O to support multimodal follow-ups while keeping tool results text-only for model compatibility. > > - Adds `web_crawl` tool (consent preview, XML marker, Dyad Engine call requiring Dyad Pro API key) that returns HTML/markdown/screenshot and appends cloning instructions plus screenshot and content snippets via `appendUserMessage` > - Updates tool system: new `ToolResult` types, `UserMessageContentPart`, and conversion to AI SDK V3 (`convertToolResultForAiSdk`); tool `execute` now returns `ToolResult` > - Enhances streaming: handler tracks `pendingUserMessages` and injects them between steps via `prepareStep` to deliver images/content to the model > - Registers `web_crawl` in `TOOL_DEFINITIONS`; e2e snapshot updated to include the new tool > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 0205e38c2f067a1c2307b17a1397e4dcf3ce422d. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>   --- ## Summary by cubic Adds a web_crawl agent tool to crawl a URL and return page content and a screenshot for cloning. Tool results are structured text, and the screenshot is injected as a follow-up user image for multimodal models. - New Features - Added web_crawl tool with consent preview and XML marker; calls Dyad Engine /tools/web-crawl. - Returns markdown and HTML plus a screenshot URL, with clear placeholder image instructions for replication. - Introduced ToolResult (text-only) and convertToolResultForAiSdk to LanguageModelV3; updated tool execution to use it. - Added appendUserMessage and step injection to include images after tool results. - Registered the tool in TOOL_DEFINITIONS with default consent set to ask; requires a Dyad Pro API key in settings. <sup>Written for commit 0205e38c2f067a1c2307b17a1397e4dcf3ce422d. Summary will update on new commits.</sup>

web crawl/clone (#2101)
ed3e1350 · Will Chen · GitHub · ba6ff5ca · ed3e1350 · ed3e1350
--- a/e2e-tests/snapshots/local_agent_basic.spec.ts_local-agent---dump-request-1.txt
+++ b/e2e-tests/snapshots/local_agent_basic.spec.ts_local-agent---dump-request-1.txt
@@ -303,6 +303,27 @@
            "$schema": "http://json-schema.org/draft-07/schema#"
          }
        }
+      },
+      {
+        "type": "function",
+        "function": {
+          "name": "web_crawl",
+          "description": "\nYou can crawl a website so you can clone it.\n\n### When You MUST Trigger a Crawl\nTrigger a crawl ONLY if BOTH conditions are true:\n\n1. The user's message shows intent to CLONE / COPY / REPLICATE / RECREATE / DUPLICATE / MIMIC a website.\n   - Keywords include: clone, copy, replicate, recreate, duplicate, mimic, build the same, make the same.\n\n2. The user's message contains a URL or something that appears to be a domain name.\n   - e.g. \"example.com\", \"https://example.com\"\n   - Do not require 'http://' or 'https://'.\n",
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "url": {
+                "type": "string",
+                "description": "URL to crawl"
+              }
+            },
+            "required": [
+              "url"
+            ],
+            "additionalProperties": false,
+            "$schema": "http://json-schema.org/draft-07/schema#"
+          }
+        }
      }
    ],
    "tool_choice": "auto",

--- a/src/pro/main/ipc/handlers/local_agent/local_agent_handler.ts
+++ b/src/pro/main/ipc/handlers/local_agent/local_agent_handler.ts
@@ -45,6 +45,7 @@ import {
  parsePartialJson,
  escapeXmlAttr,
  escapeXmlContent,
+  UserMessageContentPart,
 } from "./tools/types";
 import { TOOL_DEFINITIONS } from "./tool_definitions";
 import { parseAiMessagesJson } from "@/ipc/utils/ai_messages_utils";
@@ -140,6 +141,9 @@ export async function handleLocalAgentStream(
  let fullResponse = "";
  let streamingPreview = ""; // Temporary preview for current tool, not persisted
+  // Track pending user messages to inject after tool results
+  const pendingUserMessages: UserMessageContentPart[][] = [];
  try {
    // Get model client
    const { modelClient } = await getModelClient(
@@ -185,6 +189,9 @@ export async function handleLocalAgentStream(
          inputPreview: params.inputPreview,
        });
      },
+      appendUserMessage: (content: UserMessageContentPart[]) => {
+        pendingUserMessages.push(content);
+      },
    };
    // Build tool set (agent tools + MCP tools)
@@ -219,6 +226,29 @@ export async function handleLocalAgentStream(
      tools: allTools,
      stopWhen: stepCountIs(25), // Allow multiple tool call rounds
      abortSignal: abortController.signal,
+      // Inject pending user messages (e.g., images from web_crawl) between steps
+      prepareStep: ({ messages, ...rest }) => {
+        if (pendingUserMessages.length === 0) {
+          return undefined;
+        }
+        // Build user messages from pending content
+        const newMessages = [...messages];
+        for (const content of pendingUserMessages) {
+          newMessages.push({
+            role: "user" as const,
+            content: content.map((part) => {
+              if (part.type === "text") {
+                return { type: "text" as const, text: part.text };
+              }
+              // part.type === "image-url"
+              return { type: "image" as const, image: new URL(part.url) };
+            }),
+          });
+        }
+        // Clear pending messages after injection
+        pendingUserMessages.length = 0;
+        return { messages: newMessages, ...rest };
+      },
      onFinish: async (response) => {
        const totalTokens = response.usage?.totalTokens;
        const inputTokens = response.usage?.inputTokens;

--- a/src/pro/main/ipc/handlers/local_agent/tool_definitions.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tool_definitions.ts
@@ -20,11 +20,14 @@ import { addIntegrationTool } from "./tools/add_integration";
 import { readLogsTool } from "./tools/read_logs";
 import { editFileTool } from "./tools/edit_file";
 import { webSearchTool } from "./tools/web_search";
+import { webCrawlTool } from "./tools/web_crawl";
+import type { LanguageModelV3ToolResultOutput } from "@ai-sdk/provider";
 import {
  escapeXmlAttr,
  escapeXmlContent,
  type ToolDefinition,
  type AgentContext,
+  type ToolResult,
 } from "./tools/types";
 import type { AgentToolConsent } from "@/ipc/ipc_types";
 import { getSupabaseClientCode } from "@/supabase_admin/supabase_context";
@@ -45,6 +48,7 @@ export const TOOL_DEFINITIONS: readonly ToolDefinition[] = [
  addIntegrationTool,
  readLogsTool,
  webSearchTool,
+  webCrawlTool,
 ];
 // ============================================================================
 // Agent Tool Name Type (derived from TOOL_DEFINITIONS)
@@ -227,6 +231,18 @@ async function processArgPlaceholders<T extends Record<string, any>>(
  return processValue(args) as T;
 }
+/**
+ * Convert our ToolResult to AI SDK format
+ */
+function convertToolResultForAiSdk(
+  result: ToolResult,
+): LanguageModelV3ToolResultOutput {
+  if (typeof result === "string") {
+    return { type: "text", value: result };
+  }
+  throw new Error(`Unsupported tool result type: ${typeof result}`);
+}
 /**
 * Build ToolSet for AI SDK from tool definitions
 */
@@ -255,7 +271,8 @@ export function buildAgentToolSet(ctx: AgentContext) {
            throw new Error(`User denied permission for ${tool.name}`);
          }
-          return await tool.execute(processedArgs, ctx);
+          const result = await tool.execute(processedArgs, ctx);
+          return convertToolResultForAiSdk(result);
        } catch (error) {
          const errorMessage =
            error instanceof Error ? error.message : String(error);

--- a/src/pro/main/ipc/handlers/local_agent/tools/types.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tools/types.ts
@@ -47,6 +47,12 @@ export interface AgentContext {
    toolDescription?: string | null;
    inputPreview?: string | null;
  }) => Promise<boolean>;
+  /**
+   * Append a user message to be sent after the tool result.
+   * Use this when the tool needs to provide non-text content (like images)
+   * that models don't support in tool result messages.
+   */
+  appendUserMessage: (content: UserMessageContentPart[]) => void;
 }
 // ============================================================================
@@ -73,6 +79,23 @@ export function parsePartialJson<T extends Record<string, unknown>>(
  }
 }
+// ============================================================================
+// Tool Result Types
+// ============================================================================
+/**
+ * Content part types for user messages (supports images)
+ * These can be appended as follow-up user messages after tool results
+ */
+export type UserMessageContentPart =
+  | { type: "text"; text: string }
+  | { type: "image-url"; url: string };
+/**
+ * Tool result can be a simple string or a structured result with content parts
+ */
+export type ToolResult = string;
 // ============================================================================
 // Tool Definition Interface
 // ============================================================================
@@ -82,7 +105,7 @@ export interface ToolDefinition<T = any> {
  readonly description: string;
  readonly inputSchema: z.ZodType<T>;
  readonly defaultConsent: AgentToolConsent;
-  execute: (args: T, ctx: AgentContext) => Promise<string>;
+  execute: (args: T, ctx: AgentContext) => Promise<ToolResult>;
  /**
   * If defined, returns whether the tool should be available in the current context.

--- a/src/pro/main/ipc/handlers/local_agent/tools/web_crawl.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tools/web_crawl.ts
+import { z } from "zod";
+import log from "electron-log";
+import { ToolDefinition, escapeXmlContent } from "./types";
+import { readSettings } from "@/main/settings";
+const logger = log.scope("web_crawl");
+const DYAD_ENGINE_URL =
+  process.env.DYAD_ENGINE_URL ?? "https://engine.dyad.sh/v1";
+const webCrawlSchema = z.object({
+  url: z.string().describe("URL to crawl"),
+});
+const webCrawlResponseSchema = z.object({
+  rootUrl: z.string(),
+  html: z.string().optional(),
+  markdown: z.string().optional(),
+  screenshot: z.string().optional(),
+});
+const DESCRIPTION = `
+You can crawl a website so you can clone it.
+### When You MUST Trigger a Crawl
+Trigger a crawl ONLY if BOTH conditions are true:
+1. The user's message shows intent to CLONE / COPY / REPLICATE / RECREATE / DUPLICATE / MIMIC a website.
+   - Keywords include: clone, copy, replicate, recreate, duplicate, mimic, build the same, make the same.
+2. The user's message contains a URL or something that appears to be a domain name.
+   - e.g. "example.com", "https://example.com"
+   - Do not require 'http://' or 'https://'.
+`;
+const CLONE_INSTRUCTIONS = `
+Replicate the website from the provided HTML, markdown, and screenshot.
+**Use the screenshot as your primary visual reference** to understand the layout, colors, typography, and overall design of the website. The screenshot shows exactly how the page should look.
+**IMPORTANT: Image Handling**
+- Do NOT use or reference real external image URLs.
+- Instead, create a file named "placeholder.svg" at "/public/assets/placeholder.svg".
+- The file must be included in the output as its own code block.
+- The SVG should be a simple neutral gray rectangle, like:
+  \`\`\`svg
+  <svg width="400" height="300" xmlns="http://www.w3.org/2000/svg">
+    <rect width="100%" height="100%" fill="#e2e2e2"/>
+  </svg>
+  \`\`\`
+**When generating code:**
+- Replace all \`<img src="...">\` with: \`<img src="/assets/placeholder.svg" alt="placeholder" />\`
+- If using Next.js Image component: \`<Image src="/assets/placeholder.svg" alt="placeholder" width={400} height={300} />\`
+Always include the placeholder.svg file in your output file tree.
+`;
+async function callWebCrawl(
+  url: string,
+): Promise<z.infer<typeof webCrawlResponseSchema>> {
+  const settings = readSettings();
+  const apiKey = settings.providerSettings?.auto?.apiKey?.value;
+  if (!apiKey) {
+    throw new Error("Dyad Pro API key is required for web_crawl tool");
+  }
+  const response = await fetch(`${DYAD_ENGINE_URL}/tools/web-crawl`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${apiKey}`,
+    },
+    body: JSON.stringify({ url }),
+  });
+  if (!response.ok) {
+    const errorText = await response.text();
+    throw new Error(
+      `Web crawl failed: ${response.status} ${response.statusText} - ${errorText}`,
+    );
+  }
+  const data = webCrawlResponseSchema.parse(await response.json());
+  return data;
+}
+export const webCrawlTool: ToolDefinition<z.infer<typeof webCrawlSchema>> = {
+  name: "web_crawl",
+  description: DESCRIPTION,
+  inputSchema: webCrawlSchema,
+  defaultConsent: "ask",
+  getConsentPreview: (args) => `Crawl URL: "${args.url}"`,
+  buildXml: (args, isComplete) => {
+    if (!args.url) return undefined;
+    let xml = `<dyad-web-crawl>${escapeXmlContent(args.url)}`;
+    if (isComplete) {
+      xml += "</dyad-web-crawl>";
+    }
+    return xml;
+  },
+  execute: async (args, ctx) => {
+    logger.log(`Executing web crawl: ${args.url}`);
+    const result = await callWebCrawl(args.url);
+    if (!result) {
+      throw new Error("Web crawl returned no results");
+    }
+    if (!result.markdown) {
+      throw new Error("No content available from web crawl");
+    }
+    if (!result.html) {
+      throw new Error("No HTML available from web crawl");
+    }
+    if (!result.screenshot) {
+      throw new Error("No screenshot available from web crawl");
+    }
+    logger.log(`Web crawl completed for URL: ${args.url}`);
+    ctx.appendUserMessage([
+      { type: "text", text: CLONE_INSTRUCTIONS },
+      { type: "image-url", url: result.screenshot },
+      {
+        type: "text",
+        text: formatSnippet("Markdown snapshot:", result.markdown, "markdown"),
+      },
+      {
+        type: "text",
+        text: formatSnippet("HTML snapshot:", result.html, "html"),
+      },
+    ]);
+    return "Web crawl completed.";
+  },
+};
+const MAX_TEXT_SNIPPET_LENGTH = 16_000;
+// Format a code snippet with a label and language, truncating if necessary.
+export function formatSnippet(
+  label: string,
+  value: string,
+  lang: string,
+): string {
+  return `${label}:\n\`\`\`${lang}\n${truncateText(value)}\n\`\`\``;
+}
+function truncateText(value: string): string {
+  if (value.length <= MAX_TEXT_SNIPPET_LENGTH) return value;
+  return `${value.slice(0, MAX_TEXT_SNIPPET_LENGTH)}\n<!-- truncated -->`;
+}