Image generation tool (#2789)

 --- <a href="https://app.devin.ai/review/dyad-sh/dyad/pull/2789" target="_blank"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1"> <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open with Devin"> </picture> </a>  --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Will Chen <willchen90@gmail.com>

Image generation tool (#2789)
ce03beac · Mohamed Aziz Mejri · GitHub · 744d00ca · ce03beac · ce03beac
--- a/e2e-tests/fixtures/engine/local-agent/generate-image.ts
+++ b/e2e-tests/fixtures/engine/local-agent/generate-image.ts
+import type { LocalAgentFixture } from "../../../../testing/fake-llm-server/localAgentTypes";
+
+export const fixture: LocalAgentFixture = {
+  description: "Generate an image using the generate_image tool",
+  turns: [
+    {
+      text: "I'll generate a hero image for your landing page.",
+      toolCalls: [
+        {
+          name: "generate_image",
+          args: {
+            prompt:
+              "A modern, minimal hero illustration of a rocket launching from a laptop screen, flat design style, blue and purple gradient background, clean lines",
+          },
+        },
+      ],
+    },
+    {
+      text: "I've generated the hero image and saved it to your project. You can find it in the .dyad/media directory.",
+    },
+  ],
+};
--- a/e2e-tests/local_agent_generate_image.spec.ts
+++ b/e2e-tests/local_agent_generate_image.spec.ts
+import { testSkipIfWindows } from "./helpers/test_helper";
+
+/**
+ * E2E tests for the generate_image agent tool
+ * Tests image generation in local-agent mode
+ */
+
+testSkipIfWindows("local-agent - generate image", async ({ po }) => {
+  await po.setUpDyadPro({ localAgent: true });
+  await po.importApp("minimal");
+  await po.chatActions.selectLocalAgentMode();
+
+  await po.sendPrompt("tc=local-agent/generate-image");
+
+  await po.snapshotMessages();
+});
--- a/e2e-tests/snapshots/local_agent_auto.spec.ts_local-agent---auto-model-1.txt
+++ b/e2e-tests/snapshots/local_agent_auto.spec.ts_local-agent---auto-model-1.txt
--- a/e2e-tests/snapshots/local_agent_basic.spec.ts_local-agent---dump-request-1.txt
+++ b/e2e-tests/snapshots/local_agent_basic.spec.ts_local-agent---dump-request-1.txt
@@ -465,6 +465,27 @@
          }
        }
      },
+      {
+        "type": "function",
+        "function": {
+          "name": "generate_image",
+          "description": "Generate an image using AI based on a text prompt. The generated image is saved to the project's .dyad/media directory.\n\n### When to Use\n- User requests a custom image, illustration, icon, or graphic for their app\n- User wants a hero image, background, banner, or visual asset\n- Creating images that are more visually relevant than placeholder rectangles\n\n### Prompt Guidelines\nWrite detailed, descriptive prompts. Be specific about:\n- **Subject**: What is in the image (objects, people, scenes)\n- **Style**: Photography, illustration, flat design, 3D render, watercolor, etc.\n- **Composition**: Layout, perspective, framing\n- **Colors**: Specific color palette or mood\n- **Mood**: Cheerful, professional, dramatic, minimal, etc.\n\n### Examples\n- \"A modern flat illustration of a team collaborating around a laptop, using a blue and purple color palette, clean minimal style with subtle gradients, white background\"\n- \"Professional product photography of a sleek smartphone on a marble surface, soft studio lighting, shallow depth of field, warm neutral tones\"\n\n### After Generation\nThe tool returns the file path in .dyad/media. Use the copy_file tool to copy it to the appropriate location in the project (e.g., public/assets/) and reference that path in your code.\n",
+          "parameters": {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+              "prompt": {
+                "type": "string",
+                "description": "A detailed, descriptive prompt for the image to generate. Be specific about colors, composition, style, mood, and subject matter. Avoid generic or vague descriptions."
+              }
+            },
+            "required": [
+              "prompt"
+            ],
+            "additionalProperties": false
+          }
+        }
+      },
      {
        "type": "function",
        "function": {

--- a/e2e-tests/snapshots/local_agent_generate_image.spec.ts_local-agent---generate-image-1.aria.yml
+++ b/e2e-tests/snapshots/local_agent_generate_image.spec.ts_local-agent---generate-image-1.aria.yml
+- paragraph: /Generate an AI_RULES\.md file for this app\. Describe the tech stack in 5-\d+ bullet points and describe clear rules about what libraries to use for what\./
+- button "file1.txt file1.txt Edit":
+  - img
+  - text: ""
+  - button "Edit":
+    - img
+    - text: ""
+  - img
+- paragraph: More EOM
+- button "Copy":
+  - img
+- img
+- text: Approved
+- img
+- text: claude-opus-4-5
+- img
+- text: less than a minute ago
+- button "Copy Request ID":
+  - img
+  - text: ""
+- paragraph: tc=local-agent/generate-image
+- paragraph: I'll generate a hero image for your landing page.
+- button "Image Generation A modern, minimal hero illustration of a rocket launching from a laptop screen, flat design style, blue and purple gradient background, clean lines":
+  - img
+  - text: ""
+  - img
+- paragraph: I've generated the hero image and saved it to your project. You can find it in the .dyad/media directory.
+- button "Copy":
+  - img
+- img
+- text: claude-opus-4-5
+- img
+- text: less than a minute ago
+- button "Copy Request ID":
+  - img
+  - text: ""
+- button "Undo":
+  - img
+  - text: ""
+- button "Retry":
+  - img
+  - text: ""
\ No newline at end of file
--- a/src/__tests__/__snapshots__/local_agent_prompt.test.ts.snap
+++ b/src/__tests__/__snapshots__/local_agent_prompt.test.ts.snap
@@ -90,6 +90,15 @@ After every edit, read the file to verify changes applied correctly. If somethin
 6. **Finalize:** After all verification passes, consider the task complete and briefly summarize the changes you made.
 </development_workflow>

+<image_generation_guidelines>
+When a user explicitly requests custom images, illustrations, or visual media for their app:
+- Use the \`generate_image\` tool instead of using placeholder images or broken external URLs
+- Do NOT generate images when an existing asset, SVG, or icon library (e.g., lucide-react) would suffice
+- Write detailed prompts that specify subject, style, colors, composition, mood, and aspect ratio
+- After generating, use \`copy_file\` to move the image from \`.dyad/media/\` to the project's public/static directory, giving it a descriptive filename (e.g., \`public/assets/hero-banner.png\`)
+- Reference the copied path in code (e.g., \`<img src="/assets/hero-banner.png" />\`)
+</image_generation_guidelines>
+
 # Tech Stack
 - You are building a React application.
 - Use TypeScript.

--- a/src/components/chat/DyadImageGeneration.tsx
+++ b/src/components/chat/DyadImageGeneration.tsx
+import type React from "react";
+import { useState, type ReactNode } from "react";
+import { ImageIcon } from "lucide-react";
+import { CustomTagState } from "./stateTypes";
+import {
+  DyadCard,
+  DyadCardHeader,
+  DyadBadge,
+  DyadExpandIcon,
+  DyadStateIndicator,
+  DyadCardContent,
+} from "./DyadCardPrimitives";
+
+interface DyadImageGenerationNode {
+  properties: {
+    prompt: string;
+    path: string;
+    state: CustomTagState;
+  };
+}
+
+interface DyadImageGenerationProps {
+  children?: ReactNode;
+  node?: DyadImageGenerationNode;
+}
+
+export const DyadImageGeneration: React.FC<DyadImageGenerationProps> = ({
+  children,
+  node,
+}) => {
+  const [isExpanded, setIsExpanded] = useState(false);
+  const prompt = node?.properties?.prompt ?? "";
+  const imagePath = node?.properties?.path ?? "";
+  const state = node?.properties?.state;
+  const inProgress = state === "pending";
+  const aborted = state === "aborted";
+
+  return (
+    <DyadCard
+      state={state}
+      accentColor="violet"
+      isExpanded={isExpanded}
+      onClick={() => setIsExpanded(!isExpanded)}
+    >
+      <DyadCardHeader icon={<ImageIcon size={15} />} accentColor="violet">
+        <DyadBadge color="violet">Image Generation</DyadBadge>
+        {!isExpanded && prompt && (
+          <span className="text-sm text-muted-foreground italic truncate">
+            {prompt}
+          </span>
+        )}
+        {inProgress && (
+          <DyadStateIndicator state="pending" pendingLabel="Generating..." />
+        )}
+        {aborted && (
+          <DyadStateIndicator state="aborted" abortedLabel="Did not finish" />
+        )}
+        <div className="ml-auto">
+          <DyadExpandIcon isExpanded={isExpanded} />
+        </div>
+      </DyadCardHeader>
+      <DyadCardContent isExpanded={isExpanded}>
+        <div className="text-sm text-muted-foreground space-y-2">
+          {prompt && (
+            <div>
+              <span className="text-xs font-medium text-muted-foreground">
+                Prompt:
+              </span>
+              <div className="italic mt-0.5 text-foreground">{prompt}</div>
+            </div>
+          )}
+          {imagePath && (
+            <div>
+              <span className="text-xs font-medium text-muted-foreground">
+                Saved to:
+              </span>
+              <div className="mt-0.5 font-mono text-xs text-foreground">
+                {imagePath}
+              </div>
+            </div>
+          )}
+          {children && <div className="mt-0.5 text-foreground">{children}</div>}
+        </div>
+      </DyadCardContent>
+    </DyadCard>
+  );
+};
--- a/src/components/chat/DyadMarkdownParser.tsx
+++ b/src/components/chat/DyadMarkdownParser.tsx
@@ -27,6 +27,7 @@ import { DyadMcpToolResult } from "./DyadMcpToolResult";
 import { DyadWebSearchResult } from "./DyadWebSearchResult";
 import { DyadWebSearch } from "./DyadWebSearch";
 import { DyadWebCrawl } from "./DyadWebCrawl";
+import { DyadImageGeneration } from "./DyadImageGeneration";
 import { DyadCodeSearchResult } from "./DyadCodeSearchResult";
 import { DyadCodeSearch } from "./DyadCodeSearch";
 import { DyadRead } from "./DyadRead";
@@ -76,6 +77,7 @@ const DYAD_CUSTOM_TAGS = [
  "dyad-status",
  "dyad-compaction",
  "dyad-copy",
+  "dyad-image-generation",
  // Plan mode tags
  "dyad-write-plan",
  "dyad-exit-plan",
@@ -727,6 +729,21 @@ function renderCustomTag(
        </DyadSupabaseProjectInfo>
      );

+    case "dyad-image-generation":
+      return (
+        <DyadImageGeneration
+          node={{
+            properties: {
+              prompt: attributes.prompt || "",
+              path: attributes.path || "",
+              state: getState({ isStreaming, inProgress }),
+            },
+          }}
+        >
+          {content}
+        </DyadImageGeneration>
+      );
+
    case "dyad-status":
      return (
        <DyadStatus

--- a/src/pro/main/ipc/handlers/local_agent/tool_definitions.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tool_definitions.ts
@@ -24,6 +24,7 @@ import { editFileTool } from "./tools/edit_file";
 import { searchReplaceTool } from "./tools/search_replace";
 import { webSearchTool } from "./tools/web_search";
 import { webCrawlTool } from "./tools/web_crawl";
+import { generateImageTool } from "./tools/generate_image";
 import { updateTodosTool } from "./tools/update_todos";
 import { runTypeChecksTool } from "./tools/run_type_checks";
 import { grepTool } from "./tools/grep";
@@ -64,6 +65,7 @@ export const TOOL_DEFINITIONS: readonly ToolDefinition[] = [
  readLogsTool,
  webSearchTool,
  webCrawlTool,
+  generateImageTool,
  updateTodosTool,
  runTypeChecksTool,
  // Plan mode tools

--- a/src/pro/main/ipc/handlers/local_agent/tools/generate_image.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tools/generate_image.ts
+import { z } from "zod";
+import log from "electron-log";
+import fs from "node:fs/promises";
+import path from "node:path";
+import crypto from "node:crypto";
+import {
+  ToolDefinition,
+  AgentContext,
+  escapeXmlAttr,
+  escapeXmlContent,
+} from "./types";
+import { engineFetch } from "./engine_fetch";
+import { DYAD_MEDIA_DIR_NAME } from "@/ipc/utils/media_path_utils";
+
+const logger = log.scope("generate_image");
+
+const generateImageSchema = z.object({
+  prompt: z
+    .string()
+    .describe(
+      "A detailed, descriptive prompt for the image to generate. Be specific about colors, composition, style, mood, and subject matter. Avoid generic or vague descriptions.",
+    ),
+});
+
+const imageDataSchema = z.object({
+  url: z.string().nullable().optional(),
+  b64_json: z.string().nullable().optional(),
+  revised_prompt: z.string().nullable().optional(),
+});
+
+const generateImageResponseSchema = z.object({
+  created: z.number(),
+  data: z.array(imageDataSchema),
+});
+
+const DESCRIPTION = `Generate an image using AI based on a text prompt. The generated image is saved to the project's .dyad/media directory.
+
+### When to Use
+- User requests a custom image, illustration, icon, or graphic for their app
+- User wants a hero image, background, banner, or visual asset
+- Creating images that are more visually relevant than placeholder rectangles
+
+### Prompt Guidelines
+Write detailed, descriptive prompts. Be specific about:
+- **Subject**: What is in the image (objects, people, scenes)
+- **Style**: Photography, illustration, flat design, 3D render, watercolor, etc.
+- **Composition**: Layout, perspective, framing
+- **Colors**: Specific color palette or mood
+- **Mood**: Cheerful, professional, dramatic, minimal, etc.
+
+### Examples
+- "A modern flat illustration of a team collaborating around a laptop, using a blue and purple color palette, clean minimal style with subtle gradients, white background"
+- "Professional product photography of a sleek smartphone on a marble surface, soft studio lighting, shallow depth of field, warm neutral tones"
+
+### After Generation
+The tool returns the file path in .dyad/media. Use the copy_file tool to copy it to the appropriate location in the project (e.g., public/assets/) and reference that path in your code.
+`;
+
+async function callGenerateImage(
+  prompt: string,
+  ctx: Pick<AgentContext, "dyadRequestId">,
+): Promise<z.infer<typeof imageDataSchema>> {
+  const response = await engineFetch(ctx, "/images/generations", {
+    method: "POST",
+    body: JSON.stringify({
+      prompt,
+      model: "gpt-image-1.5",
+    }),
+  });
+
+  if (!response.ok) {
+    const errorText = await response.text();
+    throw new Error(
+      `Image generation failed: ${response.status} ${response.statusText} - ${errorText}`,
+    );
+  }
+
+  const data = generateImageResponseSchema.parse(await response.json());
+
+  if (!data.data || data.data.length === 0) {
+    throw new Error("Image generation returned no results");
+  }
+
+  return data.data[0];
+}
+
+async function saveGeneratedImage(
+  imageData: z.infer<typeof imageDataSchema>,
+  appPath: string,
+): Promise<string> {
+  const mediaDir = path.join(appPath, DYAD_MEDIA_DIR_NAME);
+  await fs.mkdir(mediaDir, { recursive: true });
+
+  const hash = crypto.randomBytes(8).toString("hex");
+  const timestamp = Date.now();
+  const fileName = `generated-${timestamp}-${hash}.png`;
+  const filePath = path.join(mediaDir, fileName);
+  const relativePath = path.join(DYAD_MEDIA_DIR_NAME, fileName);
+
+  if (imageData.b64_json) {
+    const buffer = Buffer.from(imageData.b64_json, "base64");
+    await fs.writeFile(filePath, buffer);
+  } else if (imageData.url) {
+    const response = await fetch(imageData.url);
+    if (!response.ok) {
+      throw new Error(`Failed to download generated image: ${response.status}`);
+    }
+    const arrayBuffer = await response.arrayBuffer();
+    await fs.writeFile(filePath, Buffer.from(arrayBuffer));
+  } else {
+    throw new Error("Image generation returned no image data");
+  }
+
+  return relativePath;
+}
+
+export const generateImageTool: ToolDefinition<
+  z.infer<typeof generateImageSchema>
+> = {
+  name: "generate_image",
+  description: DESCRIPTION,
+  inputSchema: generateImageSchema,
+  defaultConsent: "always",
+  modifiesState: true,
+
+  isEnabled: (ctx) => ctx.isDyadPro,
+
+  getConsentPreview: (args) => `Generate image: "${args.prompt}"`,
+
+  buildXml: (args, isComplete) => {
+    if (!args.prompt) return undefined;
+    if (isComplete) return undefined;
+    return `<dyad-image-generation prompt="${escapeXmlAttr(args.prompt)}">`;
+  },
+
+  execute: async (args, ctx: AgentContext) => {
+    logger.log(`Executing image generation with prompt: ${args.prompt}`);
+
+    ctx.onXmlStream(
+      `<dyad-image-generation prompt="${escapeXmlAttr(args.prompt)}">`,
+    );
+
+    try {
+      const imageData = await callGenerateImage(args.prompt, ctx);
+
+      const relativePath = await saveGeneratedImage(imageData, ctx.appPath);
+
+      ctx.onXmlComplete(
+        `<dyad-image-generation prompt="${escapeXmlAttr(args.prompt)}" path="${escapeXmlAttr(relativePath)}">${escapeXmlContent(relativePath)}</dyad-image-generation>`,
+      );
+
+      logger.log(`Image generation completed, saved to: ${relativePath}`);
+
+      return `Image generated and saved to: ${relativePath}\nUse the copy_file tool to copy it from "${relativePath}" to the appropriate location in the project (e.g., public/assets/), then reference the copied path in your code.`;
+    } catch (error) {
+      ctx.onXmlComplete(
+        `<dyad-image-generation prompt="${escapeXmlAttr(args.prompt)}"></dyad-image-generation>`,
+      );
+      throw error;
+    }
+  },
+};
--- a/src/prompts/local_agent_prompt.ts
+++ b/src/prompts/local_agent_prompt.ts
@@ -190,6 +190,19 @@ You have READ-ONLY tools at your disposal to understand the codebase. Follow the
 [[AI_RULES]]
 `;

+// ============================================================================
+// Image Generation Block (Pro mode only)
+// ============================================================================
+
+const IMAGE_GENERATION_BLOCK = `<image_generation_guidelines>
+When a user explicitly requests custom images, illustrations, or visual media for their app:
+- Use the \`generate_image\` tool instead of using placeholder images or broken external URLs
+- Do NOT generate images when an existing asset, SVG, or icon library (e.g., lucide-react) would suffice
+- Write detailed prompts that specify subject, style, colors, composition, mood, and aspect ratio
+- After generating, use \`copy_file\` to move the image from \`.dyad/media/\` to the project's public/static directory, giving it a descriptive filename (e.g., \`public/assets/hero-banner.png\`)
+- Reference the copied path in code (e.g., \`<img src="/assets/hero-banner.png" />\`)
+</image_generation_guidelines>`;
+
 // ============================================================================
 // Full System Prompts (assembled from blocks)
 // ============================================================================
@@ -213,6 +226,8 @@ ${PRO_FILE_EDITING_TOOL_SELECTION_BLOCK}

 ${PRO_DEVELOPMENT_WORKFLOW_BLOCK}

+${IMAGE_GENERATION_BLOCK}
+
 [[AI_RULES]]
 `;


--- a/testing/fake-llm-server/index.ts
+++ b/testing/fake-llm-server/index.ts
@@ -248,6 +248,32 @@ app.post("/engine/v1/tools/code-search", (req, res) => {
  }
 });

+// Dyad Engine image generation endpoint for generate_image tool
+app.post("/engine/v1/images/generations", (req, res) => {
+  const { prompt, model } = req.body;
+  console.log(
+    `* images/generations: model=${model}, prompt="${prompt?.slice(0, 50)}..."`,
+  );
+
+  try {
+    // Return a small 1x1 white PNG as base64 for testing
+    const TINY_PNG_B64 =
+      "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==";
+
+    res.json({
+      created: Math.floor(Date.now() / 1000),
+      data: [
+        {
+          b64_json: TINY_PNG_B64,
+        },
+      ],
+    });
+  } catch (error) {
+    console.error(`* images/generations error:`, error);
+    res.status(400).json({ error: String(error) });
+  }
+});
+
 // Start the server
 const server = createServer(app);
 server.listen(PORT, () => {