Web fetching tool (#2920)

closes #2809  --- <a href="https://app.devin.ai/review/dyad-sh/dyad/pull/2920" target="_blank"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1"> <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open with Devin"> </picture> </a>  --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>

Web fetching tool (#2920)
dd1e4881 · Mohamed Aziz Mejri · GitHub · 7db53e5a · dd1e4881 · dd1e4881
--- a/e2e-tests/fixtures/engine/local-agent/web-fetch.ts
+++ b/e2e-tests/fixtures/engine/local-agent/web-fetch.ts
+import type { LocalAgentFixture } from "../../../../testing/fake-llm-server/localAgentTypes";
+export const fixture: LocalAgentFixture = {
+  description: "Fetch and read web page content using web_fetch tool",
+  turns: [
+    {
+      text: "I'll fetch the content of that page for you.",
+      toolCalls: [
+        {
+          name: "web_fetch",
+          args: {
+            url: "https://example.com/docs/getting-started",
+          },
+        },
+      ],
+    },
+    {
+      text: "Here's a summary of the page content. The getting started guide covers three main items. Let me know if you need more details!",
+    },
+  ],
+};
--- a/e2e-tests/local_agent_web_fetch.spec.ts
+++ b/e2e-tests/local_agent_web_fetch.spec.ts
+import { testSkipIfWindows } from "./helpers/test_helper";
+/**
+ * E2E test for web_fetch tool in local-agent mode
+ * Tests fetching and reading web page content as markdown
+ * Note: web_fetch has defaultConsent: "always", so no consent flow is tested
+ */
+testSkipIfWindows("local-agent - web fetch", async ({ po }) => {
+  await po.setUpDyadPro({ localAgent: true });
+  await po.importApp("minimal");
+  await po.chatActions.selectLocalAgentMode();
+  await po.sendPrompt("tc=local-agent/web-fetch");
+  await po.snapshotMessages();
+});
--- a/e2e-tests/snapshots/local_agent_ask.spec.ts_local-agent-ask-mode-1.txt
+++ b/e2e-tests/snapshots/local_agent_ask.spec.ts_local-agent-ask-mode-1.txt
@@ -276,6 +276,27 @@
          }
        }
      },
+      {
+        "type": "function",
+        "function": {
+          "name": "web_fetch",
+          "description": "Fetch and read the content of a web page as markdown given its URL.\n\n### When to Use This Tool\nUse this tool when the user's message contains a URL (or domain name) and they want to:\n- **Read** the page's content (e.g. documentation, blog post, article)\n- **Reference** information from the page (e.g. API docs, tutorials, guides)\n- **Extract** data or context from a live web page to inform their code\n- **Follow a link** someone shared to understand its contents\n\nExamples:\n- \"Use the docs at docs.example.com/api to set up the client\"\n- \"What does this page say? https://example.com/blog/post\"\n- \"Follow the guide at example.com/tutorial\"\n\n### When NOT to Use This Tool\n- The user wants to **visually clone or replicate** a website → use `web_crawl` instead\n- The user needs to **search the web** for information without a specific URL → use `web_search` instead\n",
+          "parameters": {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+              "url": {
+                "type": "string",
+                "description": "URL to fetch content from"
+              }
+            },
+            "required": [
+              "url"
+            ],
+            "additionalProperties": false
+          }
+        }
+      },
      {
        "type": "function",
        "function": {

--- a/e2e-tests/snapshots/local_agent_auto.spec.ts_local-agent---auto-model-1.txt
+++ b/e2e-tests/snapshots/local_agent_auto.spec.ts_local-agent---auto-model-1.txt
@@ -451,6 +451,25 @@
          "additionalProperties": false
        }
      },
+      {
+        "type": "function",
+        "name": "web_fetch",
+        "description": "Fetch and read the content of a web page as markdown given its URL.\n\n### When to Use This Tool\nUse this tool when the user's message contains a URL (or domain name) and they want to:\n- **Read** the page's content (e.g. documentation, blog post, article)\n- **Reference** information from the page (e.g. API docs, tutorials, guides)\n- **Extract** data or context from a live web page to inform their code\n- **Follow a link** someone shared to understand its contents\n\nExamples:\n- \"Use the docs at docs.example.com/api to set up the client\"\n- \"What does this page say? https://example.com/blog/post\"\n- \"Follow the guide at example.com/tutorial\"\n\n### When NOT to Use This Tool\n- The user wants to **visually clone or replicate** a website → use `web_crawl` instead\n- The user needs to **search the web** for information without a specific URL → use `web_search` instead\n",
+        "parameters": {
+          "$schema": "http://json-schema.org/draft-07/schema#",
+          "type": "object",
+          "properties": {
+            "url": {
+              "type": "string",
+              "description": "URL to fetch content from"
+            }
+          },
+          "required": [
+            "url"
+          ],
+          "additionalProperties": false
+        }
+      },
      {
        "type": "function",
        "name": "generate_image",

--- a/e2e-tests/snapshots/local_agent_basic.spec.ts_local-agent---dump-request-1.txt
+++ b/e2e-tests/snapshots/local_agent_basic.spec.ts_local-agent---dump-request-1.txt
@@ -465,6 +465,27 @@
          }
        }
      },
+      {
+        "type": "function",
+        "function": {
+          "name": "web_fetch",
+          "description": "Fetch and read the content of a web page as markdown given its URL.\n\n### When to Use This Tool\nUse this tool when the user's message contains a URL (or domain name) and they want to:\n- **Read** the page's content (e.g. documentation, blog post, article)\n- **Reference** information from the page (e.g. API docs, tutorials, guides)\n- **Extract** data or context from a live web page to inform their code\n- **Follow a link** someone shared to understand its contents\n\nExamples:\n- \"Use the docs at docs.example.com/api to set up the client\"\n- \"What does this page say? https://example.com/blog/post\"\n- \"Follow the guide at example.com/tutorial\"\n\n### When NOT to Use This Tool\n- The user wants to **visually clone or replicate** a website → use `web_crawl` instead\n- The user needs to **search the web** for information without a specific URL → use `web_search` instead\n",
+          "parameters": {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+              "url": {
+                "type": "string",
+                "description": "URL to fetch content from"
+              }
+            },
+            "required": [
+              "url"
+            ],
+            "additionalProperties": false
+          }
+        }
+      },
      {
        "type": "function",
        "function": {

--- a/e2e-tests/snapshots/local_agent_web_fetch.spec.ts_local-agent---web-fetch-1.aria.yml
+++ b/e2e-tests/snapshots/local_agent_web_fetch.spec.ts_local-agent---web-fetch-1.aria.yml
+- paragraph: /Generate an AI_RULES\.md file for this app\. Describe the tech stack in 5-\d+ bullet points and describe clear rules about what libraries to use for what\./
+- button "file1.txt file1.txt Edit":
+  - img
+  - text: ""
+  - button "Edit":
+    - img
+    - text: ""
+  - img
+- paragraph: More EOM
+- button "Copy":
+  - img
+- img
+- text: Approved
+- img
+- text: claude-opus-4-5
+- img
+- text: less than a minute ago
+- img
+- text: (1 files changed)
+- button "Copy Request ID":
+  - img
+  - text: ""
+- paragraph: tc=local-agent/web-fetch
+- paragraph: I'll fetch the content of that page for you.
+- img
+- text: Web Fetch
+- img
+- text: Done https://example.com/docs/getting-started
+- paragraph: Here's a summary of the page content. The getting started guide covers three main items. Let me know if you need more details!
+- button "Copy":
+  - img
+- img
+- text: claude-opus-4-5
+- img
+- text: less than a minute ago
+- button "Copy Request ID":
+  - img
+  - text: ""
+- button "Undo":
+  - img
+  - text: ""
+- button "Retry":
+  - img
+  - text: ""
\ No newline at end of file
--- a/src/components/chat/DyadMarkdownParser.tsx
+++ b/src/components/chat/DyadMarkdownParser.tsx
@@ -27,6 +27,7 @@ import { DyadMcpToolResult } from "./DyadMcpToolResult";
 import { DyadWebSearchResult } from "./DyadWebSearchResult";
 import { DyadWebSearch } from "./DyadWebSearch";
 import { DyadWebCrawl } from "./DyadWebCrawl";
+import { DyadWebFetch } from "./DyadWebFetch";
 import { DyadImageGeneration } from "./DyadImageGeneration";
 import { DyadCodeSearchResult } from "./DyadCodeSearchResult";
 import { DyadCodeSearch } from "./DyadCodeSearch";
@@ -64,6 +65,7 @@ const DYAD_CUSTOM_TAGS = [
  "dyad-web-search-result",
  "dyad-web-search",
  "dyad-web-crawl",
+  "dyad-web-fetch",
  "dyad-code-search-result",
  "dyad-code-search",
  "dyad-read",
@@ -392,6 +394,18 @@ function renderCustomTag(
          {content}
        </DyadWebCrawl>
      );
+    case "dyad-web-fetch":
+      return (
+        <DyadWebFetch
+          node={{
+            properties: {
+              state: getState({ isStreaming, inProgress }),
+            },
+          }}
+        >
+          {content}
+        </DyadWebFetch>
+      );
    case "dyad-code-search":
      return (
        <DyadCodeSearch

--- a/src/components/chat/DyadWebFetch.tsx
+++ b/src/components/chat/DyadWebFetch.tsx
+import type { FC, ReactNode } from "react";
+import { Globe } from "lucide-react";
+import {
+  DyadCard,
+  DyadCardHeader,
+  DyadBadge,
+  DyadStateIndicator,
+} from "./DyadCardPrimitives";
+import { CustomTagState } from "./stateTypes";
+interface DyadWebFetchProps {
+  children?: ReactNode;
+  node?: {
+    properties: {
+      state?: CustomTagState;
+    };
+  };
+}
+export const DyadWebFetch: FC<DyadWebFetchProps> = ({ children, node }) => {
+  const state = node?.properties?.state as CustomTagState;
+  return (
+    <DyadCard state={state} accentColor="blue">
+      <DyadCardHeader icon={<Globe size={15} />} accentColor="blue">
+        <DyadBadge color="blue">Web Fetch</DyadBadge>
+        {state && (
+          <DyadStateIndicator
+            state={state}
+            pendingLabel="Fetching..."
+            finishedLabel="Done"
+            abortedLabel="Aborted"
+          />
+        )}
+      </DyadCardHeader>
+      {children && (
+        <div className="px-3 pb-2 text-sm italic text-muted-foreground">
+          {children}
+        </div>
+      )}
+    </DyadCard>
+  );
+};
--- a/src/pro/main/ipc/handlers/local_agent/tool_definitions.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tool_definitions.ts
@@ -24,6 +24,7 @@ import { editFileTool } from "./tools/edit_file";
 import { searchReplaceTool } from "./tools/search_replace";
 import { webSearchTool } from "./tools/web_search";
 import { webCrawlTool } from "./tools/web_crawl";
+import { webFetchTool } from "./tools/web_fetch";
 import { generateImageTool } from "./tools/generate_image";
 import { updateTodosTool } from "./tools/update_todos";
 import { runTypeChecksTool } from "./tools/run_type_checks";
@@ -65,6 +66,7 @@ export const TOOL_DEFINITIONS: readonly ToolDefinition[] = [
  readLogsTool,
  webSearchTool,
  webCrawlTool,
+  webFetchTool,
  generateImageTool,
  updateTodosTool,
  runTypeChecksTool,

--- a/src/pro/main/ipc/handlers/local_agent/tools/web_fetch.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tools/web_fetch.ts
+import { z } from "zod";
+import log from "electron-log";
+import { ToolDefinition, escapeXmlContent, AgentContext } from "./types";
+import { engineFetch } from "./engine_fetch";
+const logger = log.scope("web_fetch");
+function validateHttpUrl(url: string): void {
+  let parsed: URL;
+  try {
+    parsed = new URL(url);
+  } catch {
+    throw new Error(`Invalid URL: ${url}`);
+  }
+  if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
+    throw new Error(
+      `Unsupported URL scheme "${parsed.protocol}" — only http and https are allowed`,
+    );
+  }
+}
+const MAX_CONTENT_LENGTH = 80_000;
+function truncateContent(value: string): string {
+  if (value.length <= MAX_CONTENT_LENGTH) return value;
+  return `${value.slice(0, MAX_CONTENT_LENGTH)}\n\n<!-- truncated -->`;
+}
+const webFetchSchema = z.object({
+  url: z.string().describe("URL to fetch content from"),
+});
+const webFetchResponseSchema = z.object({
+  rootUrl: z.string(),
+  markdown: z.string().optional(),
+  pages: z.array(
+    z.object({
+      url: z.string(),
+      markdown: z.string(),
+    }),
+  ),
+});
+const DESCRIPTION = `Fetch and read the content of a web page as markdown given its URL.
+### When to Use This Tool
+Use this tool when the user's message contains a URL (or domain name) and they want to:
+- **Read** the page's content (e.g. documentation, blog post, article)
+- **Reference** information from the page (e.g. API docs, tutorials, guides)
+- **Extract** data or context from a live web page to inform their code
+- **Follow a link** someone shared to understand its contents
+Examples:
+- "Use the docs at docs.example.com/api to set up the client"
+- "What does this page say? https://example.com/blog/post"
+- "Follow the guide at example.com/tutorial"
+### When NOT to Use This Tool
+- The user wants to **visually clone or replicate** a website → use \`web_crawl\` instead
+- The user needs to **search the web** for information without a specific URL → use \`web_search\` instead
+`;
+async function callWebFetch(
+  url: string,
+  ctx: Pick<AgentContext, "dyadRequestId">,
+): Promise<z.infer<typeof webFetchResponseSchema>> {
+  const response = await engineFetch(ctx, "/tools/web-crawl", {
+    method: "POST",
+    body: JSON.stringify({ url, markdownOnly: true }),
+  });
+  if (!response.ok) {
+    const errorText = await response.text();
+    throw new Error(
+      `Web fetch failed: ${response.status} ${response.statusText} - ${errorText}`,
+    );
+  }
+  const data = webFetchResponseSchema.parse(await response.json());
+  return data;
+}
+export const webFetchTool: ToolDefinition<z.infer<typeof webFetchSchema>> = {
+  name: "web_fetch",
+  description: DESCRIPTION,
+  inputSchema: webFetchSchema,
+  defaultConsent: "always",
+  // Requires Dyad Pro engine API
+  isEnabled: (ctx) => ctx.isDyadPro,
+  getConsentPreview: (args) => `Fetch URL: "${args.url}"`,
+  buildXml: (args, isComplete) => {
+    if (!args.url) return undefined;
+    // When complete, return undefined so execute's onXmlComplete provides the final XML
+    if (isComplete) return undefined;
+    return `<dyad-web-fetch>${escapeXmlContent(args.url)}`;
+  },
+  execute: async (args, ctx) => {
+    logger.log(`Executing web fetch: ${args.url}`);
+    validateHttpUrl(args.url);
+    ctx.onXmlStream(`<dyad-web-fetch>${escapeXmlContent(args.url)}`);
+    try {
+      const result = await callWebFetch(args.url, ctx);
+      if (!result) {
+        throw new Error("Web fetch returned no results");
+      }
+      // Combine markdown from all pages
+      const allContent = result.pages
+        .map((page) => `## ${page.url}\n\n${page.markdown}`)
+        .join("\n\n---\n\n");
+      if (!allContent) {
+        throw new Error("No content available from web fetch");
+      }
+      logger.log(
+        `Web fetch completed for URL: ${args.url} (${result.pages.length} pages)`,
+      );
+      ctx.onXmlComplete(
+        `<dyad-web-fetch>${escapeXmlContent(args.url)}</dyad-web-fetch>`,
+      );
+      return truncateContent(allContent);
+    } catch (error) {
+      ctx.onXmlComplete(
+        `<dyad-web-fetch>${escapeXmlContent(args.url)}</dyad-web-fetch>`,
+      );
+      throw error;
+    }
+  },
+};
--- a/testing/fake-llm-server/index.ts
+++ b/testing/fake-llm-server/index.ts
@@ -274,6 +274,28 @@ app.post("/engine/v1/images/generations", (req, res) => {
  }
 });
+// Dyad Engine web-crawl endpoint for web_fetch tool
+app.post("/engine/v1/tools/web-crawl", (req, res) => {
+  const { url, markdownOnly } = req.body;
+  console.log(`* web-crawl: url="${url}", markdownOnly=${markdownOnly}`);
+  try {
+    res.json({
+      rootUrl: url,
+      markdown: `# Page content from ${url}`,
+      pages: [
+        {
+          url,
+          markdown: `# Page content from ${url}\n\nThis is the fetched content of the web page.\n\n- Item 1\n- Item 2\n- Item 3`,
+        },
+      ],
+    });
+  } catch (error) {
+    console.error(`* web-crawl error:`, error);
+    res.status(400).json({ error: String(error) });
+  }
+});
 // Start the server
 const server = createServer(app);
 server.listen(PORT, () => {