e2e tests for engine (#322)

fc1ebe9e · Will Chen · GitHub · 7235eab2 · fc1ebe9e · fc1ebe9e
--- a/e2e-tests/attach_image.spec.ts
+++ b/e2e-tests/attach_image.spec.ts
@@ -13,7 +13,7 @@ test("attach image - home chat", async ({ po }) => {
    .locator("input[type='file']")
    .setInputFiles("e2e-tests/fixtures/images/logo.png");
  await po.sendPrompt("[dump]");
-  await po.snapshotServerDump({ onlyLastMessage: true });
+  await po.snapshotServerDump("last-message");
  await po.snapshotMessages({ replaceDumpPath: true });
 });

@@ -27,7 +27,7 @@ test("attach image - chat", async ({ po }) => {
    .locator("input[type='file']")
    .setInputFiles("e2e-tests/fixtures/images/logo.png");
  await po.sendPrompt("[dump]");
-  await po.snapshotServerDump({ onlyLastMessage: true });
+  await po.snapshotServerDump("last-message");
  await po.snapshotMessages({ replaceDumpPath: true });
 });

@@ -65,6 +65,6 @@ test("attach image via drag - chat", async ({ po }) => {
  // submit and verify
  await po.sendPrompt("[dump]");
  // Note: this should match EXACTLY the server dump from the previous test.
-  await po.snapshotServerDump({ onlyLastMessage: true });
+  await po.snapshotServerDump("last-message");
  await po.snapshotMessages({ replaceDumpPath: true });
 });
--- a/e2e-tests/engine.spec.ts
+++ b/e2e-tests/engine.spec.ts
+import { test } from "./helpers/test_helper";
+
+test("send message to engine", async ({ po }) => {
+  await po.setUpDyadPro();
+  // By default, it's using auto which points to Flash 2.5 and doesn't
+  // use engine.
+  await po.selectModel({ provider: "Google", model: "Gemini 2.5 Pro" });
+  await po.sendPrompt("[dump] tc=turbo-edits");
+
+  await po.snapshotServerDump("request");
+  await po.snapshotMessages({ replaceDumpPath: true });
+});
--- a/e2e-tests/fixtures/engine/turbo-edits.md
+++ b/e2e-tests/fixtures/engine/turbo-edits.md
+Example with turbo edit
+<dyad-edit path="foo/bar/file.js" description="turbo edit description">
+
+<!-- hello -->
+
+"making some edits"
+</dyad-edit>
+End of turbo edit
--- a/e2e-tests/helpers/test_helper.ts
+++ b/e2e-tests/helpers/test_helper.ts
@@ -36,6 +36,35 @@ class PageObject {
    await this.selectTestModel();
  }

+  async setUpDyadPro({ autoApprove = false }: { autoApprove?: boolean } = {}) {
+    await this.goToSettingsTab();
+    if (autoApprove) {
+      await this.toggleAutoApprove();
+    }
+    await this.setUpDyadProvider();
+    await this.goToAppsTab();
+  }
+
+  async setUpDyadProvider() {
+    // await page.getByRole('link', { name: 'Settings' }).click();
+    await this.page
+      .locator("div")
+      .filter({ hasText: /^DyadNeeds Setup$/ })
+      .nth(1)
+      .click();
+    await this.page.getByRole("textbox", { name: "Set Dyad API Key" }).click();
+    await this.page
+      .getByRole("textbox", { name: "Set Dyad API Key" })
+      .fill("testdyadkey");
+    await this.page.getByRole("button", { name: "Save Key" }).click();
+    // await page.getByRole('link', { name: 'Apps' }).click();
+    // await page.getByTestId('home-chat-input-container').getByRole('button', { name: 'Pro' }).click();
+    // await page.getByRole('switch', { name: 'Turbo Edits' }).click();
+    // await page.getByRole('switch', { name: 'Turbo Edits' }).click();
+    // await page.locator('div').filter({ hasText: /^Import App$/ }).click();
+    // await page.getByRole('button', { name: 'Select Folder' }).press('Escape');
+  }
+
  async snapshotMessages({
    replaceDumpPath = false,
  }: { replaceDumpPath?: boolean } = {}) {
@@ -110,9 +139,9 @@ class PageObject {
    });
  }

-  async snapshotServerDump({
-    onlyLastMessage = false,
-  }: { onlyLastMessage?: boolean } = {}) {
+  async snapshotServerDump(
+    type: "all-messages" | "last-message" | "request" = "all-messages",
+  ) {
    // Get the text content of the messages list
    const messagesListText = await this.page
      .getByTestId("messages-list")
@@ -133,9 +162,16 @@ class PageObject {
    const dumpContent = fs.readFileSync(dumpFilePath, "utf-8");

    // Perform snapshot comparison
-    expect(prettifyDump(dumpContent, { onlyLastMessage })).toMatchSnapshot(
-      "server-dump.txt",
-    );
+    const parsedDump = JSON.parse(dumpContent);
+    if (type === "request") {
+      expect(dumpContent).toMatchSnapshot("server-dump-request.json");
+      return;
+    }
+    expect(
+      prettifyDump(parsedDump["body"]["messages"], {
+        onlyLastMessage: type === "last-message",
+      }),
+    ).toMatchSnapshot("server-dump.txt");
  }

  async waitForChatCompletion() {
@@ -185,6 +221,12 @@ class PageObject {
    await this.waitForChatCompletion();
  }

+  async selectModel({ provider, model }: { provider: string; model: string }) {
+    await this.page.getByRole("button", { name: "Model: Auto" }).click();
+    await this.page.getByText(provider).click();
+    await this.page.getByText(model).click();
+  }
+
  async selectTestModel() {
    await this.page.getByRole("button", { name: "Model: Auto" }).click();
    await this.page.getByText("test-provider").click();
@@ -430,6 +472,7 @@ export const test = base.extend<{
      process.env.OLLAMA_HOST = "http://localhost:3500/ollama";
      process.env.LM_STUDIO_BASE_URL_FOR_TESTING =
        "http://localhost:3500/lmstudio";
+      process.env.DYAD_LOCAL_ENGINE = "http://localhost:3500/engine/v1";
      process.env.E2E_TEST_BUILD = "true";
      // This is just a hack to avoid the AI setup screen.
      process.env.OPENAI_API_KEY = "sk-test";
@@ -495,15 +538,13 @@ export const test = base.extend<{
 export const testSkipIfWindows = os.platform() === "win32" ? test.skip : test;

 function prettifyDump(
-  dumpContent: string,
-  { onlyLastMessage = false }: { onlyLastMessage?: boolean } = {},
-) {
-  const parsedDump = JSON.parse(dumpContent) as Array<{
+  allMessages: {
    role: string;
    content: string | Array<{}>;
-  }>;
-
-  const messages = onlyLastMessage ? parsedDump.slice(-1) : parsedDump;
+  }[],
+  { onlyLastMessage = false }: { onlyLastMessage?: boolean } = {},
+) {
+  const messages = onlyLastMessage ? allMessages.slice(-1) : allMessages;

  return messages
    .map((message) => {

--- a/e2e-tests/snapshots/engine.spec.ts_send-message-to-engine-1.aria.yml
+++ b/e2e-tests/snapshots/engine.spec.ts_send-message-to-engine-1.aria.yml
+- paragraph: "[dump] tc=turbo-edits"
+- paragraph: "[[dyad-dump-path=*]]"
+- button "Retry":
+  - img
\ No newline at end of file
--- a/e2e-tests/snapshots/engine.spec.ts_server-dump-request.json
+++ b/e2e-tests/snapshots/engine.spec.ts_server-dump-request.json
--- a/e2e-tests/snapshots/engine.spec.ts_server-dump.txt
+++ b/e2e-tests/snapshots/engine.spec.ts_server-dump.txt
--- a/src/utils/codebase.ts
+++ b/src/utils/codebase.ts
@@ -339,6 +339,13 @@ export async function extractCodebase(appPath: string): Promise<{

  const endTime = Date.now();
  logger.log("extractCodebase: time taken", endTime - startTime);
+  if (process.env.E2E_TEST_BUILD) {
+    // Why? For some reason, file ordering is not stable on Windows.
+    // This is a workaround to ensure stable ordering, although
+    // ideally we'd like to sort it by modification time which is
+    // important for cache-ability.
+    filesArray.sort((a, b) => a.path.localeCompare(b.path));
+  }
  return {
    formattedOutput,
    files: filesArray,

--- a/testing/fake-llm-server/chatCompletionHandler.ts
+++ b/testing/fake-llm-server/chatCompletionHandler.ts
+import { Request, Response } from "express";
+import fs from "fs";
+import path from "path";
+import { CANNED_MESSAGE, createStreamChunk } from ".";
+
+let globalCounter = 0;
+
+export const createChatCompletionHandler =
+  (prefix: string) => (req: Request, res: Response) => {
+    const { stream = false, messages = [] } = req.body;
+    console.log("* Received messages", messages);
+
+    // Check if the last message contains "[429]" to simulate rate limiting
+    const lastMessage = messages[messages.length - 1];
+    if (lastMessage && lastMessage.content === "[429]") {
+      return res.status(429).json({
+        error: {
+          message: "Too many requests. Please try again later.",
+          type: "rate_limit_error",
+          param: null,
+          code: "rate_limit_exceeded",
+        },
+      });
+    }
+
+    let messageContent = CANNED_MESSAGE;
+    console.error("LASTMESSAGE", lastMessage);
+    // Check if the last message is "[dump]" to write messages to file and return path
+    if (
+      lastMessage &&
+      (Array.isArray(lastMessage.content)
+        ? lastMessage.content.some(
+            (part: { type: string; text: string }) =>
+              part.type === "text" && part.text.includes("[dump]"),
+          )
+        : lastMessage.content.includes("[dump]"))
+    ) {
+      const timestamp = Date.now();
+      const generatedDir = path.join(__dirname, "generated");
+
+      // Create generated directory if it doesn't exist
+      if (!fs.existsSync(generatedDir)) {
+        fs.mkdirSync(generatedDir, { recursive: true });
+      }
+
+      const dumpFilePath = path.join(generatedDir, `${timestamp}.json`);
+
+      try {
+        fs.writeFileSync(
+          dumpFilePath,
+          JSON.stringify(
+            {
+              body: req.body,
+              headers: { authorization: req.headers["authorization"] },
+            },
+            null,
+            2,
+          ),
+          "utf-8",
+        );
+        console.log(`* Dumped messages to: ${dumpFilePath}`);
+        messageContent = `[[dyad-dump-path=${dumpFilePath}]]`;
+      } catch (error) {
+        console.error(`* Error writing dump file: ${error}`);
+        messageContent = `Error: Could not write dump file: ${error}`;
+      }
+    }
+
+    if (lastMessage && lastMessage.content === "[increment]") {
+      globalCounter++;
+      messageContent = `counter=${globalCounter}`;
+    }
+
+    // Check if the last message starts with "tc=" to load test case file
+    if (
+      lastMessage &&
+      lastMessage.content &&
+      typeof lastMessage.content === "string" &&
+      lastMessage.content.startsWith("tc=")
+    ) {
+      const testCaseName = lastMessage.content.slice(3); // Remove "tc=" prefix
+      const testFilePath = path.join(
+        __dirname,
+        "..",
+        "..",
+        "..",
+        "e2e-tests",
+        "fixtures",
+        prefix,
+        `${testCaseName}.md`,
+      );
+
+      try {
+        if (fs.existsSync(testFilePath)) {
+          messageContent = fs.readFileSync(testFilePath, "utf-8");
+          console.log(`* Loaded test case: ${testCaseName}`);
+        } else {
+          console.log(`* Test case file not found: ${testFilePath}`);
+          messageContent = `Error: Test case file not found: ${testCaseName}.md`;
+        }
+      } catch (error) {
+        console.error(`* Error reading test case file: ${error}`);
+        messageContent = `Error: Could not read test case file: ${testCaseName}.md`;
+      }
+    }
+
+    // Non-streaming response
+    if (!stream) {
+      return res.json({
+        id: `chatcmpl-${Date.now()}`,
+        object: "chat.completion",
+        created: Math.floor(Date.now() / 1000),
+        model: "fake-model",
+        choices: [
+          {
+            index: 0,
+            message: {
+              role: "assistant",
+              content: messageContent,
+            },
+            finish_reason: "stop",
+          },
+        ],
+      });
+    }
+
+    // Streaming response
+    res.setHeader("Content-Type", "text/event-stream");
+    res.setHeader("Cache-Control", "no-cache");
+    res.setHeader("Connection", "keep-alive");
+
+    // Split the message into characters to simulate streaming
+    const message = messageContent;
+    const messageChars = message.split("");
+
+    // Stream each character with a delay
+    let index = 0;
+    const batchSize = 8;
+
+    // Send role first
+    res.write(createStreamChunk("", "assistant"));
+
+    const interval = setInterval(() => {
+      if (index < messageChars.length) {
+        // Get the next batch of characters (up to batchSize)
+        const batch = messageChars.slice(index, index + batchSize).join("");
+        res.write(createStreamChunk(batch));
+        index += batchSize;
+      } else {
+        // Send the final chunk
+        res.write(createStreamChunk("", "assistant", true));
+        clearInterval(interval);
+        res.end();
+      }
+    }, 10);
+  };
--- a/testing/fake-llm-server/index.ts
+++ b/testing/fake-llm-server/index.ts
-import express, { Request, Response } from "express";
+import express from "express";
 import { createServer } from "http";
 import cors from "cors";
-import fs from "fs";
-import path from "path";
+import { createChatCompletionHandler } from "./chatCompletionHandler";

 // Create Express app
 const app = express();
@@ -13,7 +12,7 @@ app.use(express.urlencoded({ extended: true, limit: "50mb" }));
 const PORT = 3500;

 // Helper function to create OpenAI-like streaming response chunks
-function createStreamChunk(
+export function createStreamChunk(
  content: string,
  role: string = "assistant",
  isLast: boolean = false,
@@ -35,7 +34,7 @@ function createStreamChunk(
  return `data: ${JSON.stringify(chunk)}\n\n${isLast ? "data: [DONE]\n\n" : ""}`;
 }

-const CANNED_MESSAGE = `
+export const CANNED_MESSAGE = `
  <think>
  \`<dyad-write>\`:
  I'll think about the problem and write a bug report.
@@ -93,8 +92,6 @@ app.get("/ollama/api/tags", (req, res) => {
  res.json(ollamaModels);
 });

-let globalCounter = 0;
-
 app.post("/ollama/chat", (req, res) => {
  // Tell the client we're going to stream NDJSON
  res.setHeader("Content-Type", "application/x-ndjson");
@@ -183,151 +180,16 @@ app.get("/lmstudio/api/v0/models", (req, res) => {
  res.json(lmStudioModels);
 });

-app.post("/lmstudio/v1/chat/completions", chatCompletionHandler);
-
-// Handle POST requests to /v1/chat/completions
-app.post("/v1/chat/completions", chatCompletionHandler);
-
-function chatCompletionHandler(req: Request, res: Response) {
-  const { stream = false, messages = [] } = req.body;
-  console.log("* Received messages", messages);
-
-  // Check if the last message contains "[429]" to simulate rate limiting
-  const lastMessage = messages[messages.length - 1];
-  if (lastMessage && lastMessage.content === "[429]") {
-    return res.status(429).json({
-      error: {
-        message: "Too many requests. Please try again later.",
-        type: "rate_limit_error",
-        param: null,
-        code: "rate_limit_exceeded",
-      },
-    });
-  }
-
-  let messageContent = CANNED_MESSAGE;
-  console.error("LASTMESSAGE", lastMessage);
-  // Check if the last message is "[dump]" to write messages to file and return path
-  if (
-    lastMessage &&
-    (Array.isArray(lastMessage.content)
-      ? lastMessage.content.some(
-          (part: { type: string; text: string }) =>
-            part.type === "text" && part.text.includes("[dump]"),
-        )
-      : lastMessage.content.includes("[dump]"))
-  ) {
-    const timestamp = Date.now();
-    const generatedDir = path.join(__dirname, "generated");
-
-    // Create generated directory if it doesn't exist
-    if (!fs.existsSync(generatedDir)) {
-      fs.mkdirSync(generatedDir, { recursive: true });
-    }
-
-    const dumpFilePath = path.join(generatedDir, `${timestamp}.json`);
-
-    try {
-      fs.writeFileSync(
-        dumpFilePath,
-        JSON.stringify(messages, null, 2),
-        "utf-8",
-      );
-      console.log(`* Dumped messages to: ${dumpFilePath}`);
-      messageContent = `[[dyad-dump-path=${dumpFilePath}]]`;
-    } catch (error) {
-      console.error(`* Error writing dump file: ${error}`);
-      messageContent = `Error: Could not write dump file: ${error}`;
-    }
-  }
+app.post(
+  "/lmstudio/v1/chat/completions",
+  createChatCompletionHandler("lmstudio"),
+);

-  if (lastMessage && lastMessage.content === "[increment]") {
-    globalCounter++;
-    messageContent = `counter=${globalCounter}`;
-  }
+app.post("/engine/v1/chat/completions", createChatCompletionHandler("engine"));

-  // Check if the last message starts with "tc=" to load test case file
-  if (
-    lastMessage &&
-    lastMessage.content &&
-    typeof lastMessage.content === "string" &&
-    lastMessage.content.startsWith("tc=")
-  ) {
-    const testCaseName = lastMessage.content.slice(3); // Remove "tc=" prefix
-    const testFilePath = path.join(
-      __dirname,
-      "..",
-      "..",
-      "..",
-      "e2e-tests",
-      "fixtures",
-      `${testCaseName}.md`,
-    );
-
-    try {
-      if (fs.existsSync(testFilePath)) {
-        messageContent = fs.readFileSync(testFilePath, "utf-8");
-        console.log(`* Loaded test case: ${testCaseName}`);
-      } else {
-        console.log(`* Test case file not found: ${testFilePath}`);
-        messageContent = `Error: Test case file not found: ${testCaseName}.md`;
-      }
-    } catch (error) {
-      console.error(`* Error reading test case file: ${error}`);
-      messageContent = `Error: Could not read test case file: ${testCaseName}.md`;
-    }
-  }
-
-  // Non-streaming response
-  if (!stream) {
-    return res.json({
-      id: `chatcmpl-${Date.now()}`,
-      object: "chat.completion",
-      created: Math.floor(Date.now() / 1000),
-      model: "fake-model",
-      choices: [
-        {
-          index: 0,
-          message: {
-            role: "assistant",
-            content: messageContent,
-          },
-          finish_reason: "stop",
-        },
-      ],
-    });
-  }
-
-  // Streaming response
-  res.setHeader("Content-Type", "text/event-stream");
-  res.setHeader("Cache-Control", "no-cache");
-  res.setHeader("Connection", "keep-alive");
-
-  // Split the message into characters to simulate streaming
-  const message = messageContent;
-  const messageChars = message.split("");
-
-  // Stream each character with a delay
-  let index = 0;
-  const batchSize = 8;
-
-  // Send role first
-  res.write(createStreamChunk("", "assistant"));
+// Handle POST requests to /v1/chat/completions
+app.post("/v1/chat/completions", createChatCompletionHandler("."));

-  const interval = setInterval(() => {
-    if (index < messageChars.length) {
-      // Get the next batch of characters (up to batchSize)
-      const batch = messageChars.slice(index, index + batchSize).join("");
-      res.write(createStreamChunk(batch));
-      index += batchSize;
-    } else {
-      // Send the final chunk
-      res.write(createStreamChunk("", "assistant", true));
-      clearInterval(interval);
-      res.end();
-    }
-  }, 10);
-}
 // Start the server
 const server = createServer(app);
 server.listen(PORT, () => {