Handle local agent connection recovery (#2853)

Handle connection-drop and retry behavior in local agent IPC handling. Add an end-to-end scenario that verifies recovery after a temporary local agent disconnect. Align test fixtures and snapshots for local-agent reconnection behavior.

Handle local agent connection recovery (#2853)
6f53e896 · Will Chen · GitHub · 709222c5 · 6f53e896 · 6f53e896
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,8 @@ dist/
 # playwright
 playwright-report/
 test-results/
+blob-report/
+flakiness-report/
 # Diagnostic reports (https://nodejs.org/api/report.html)
 report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

--- a/e2e-tests/fixtures/engine/local-agent/connection-drop-after-tool-call.ts
+++ b/e2e-tests/fixtures/engine/local-agent/connection-drop-after-tool-call.ts
+import type { LocalAgentFixture } from "../../../../testing/fake-llm-server/localAgentTypes";
+/**
+ * Tests retry behavior when connection drops after tool-call chunks were emitted
+ * but before the stream is finalized. This simulates an orphaned tool-call retry
+ * window and ensures we don't duplicate tool execution.
+ */
+export const fixture: LocalAgentFixture = {
+  description: "Connection drop after streaming tool-call chunks",
+  dropConnectionAfterToolCallByTurn: [{ turnIndex: 0, attempts: [1] }],
+  turns: [
+    {
+      text: "I'll create a file for you.",
+      toolCalls: [
+        {
+          name: "write_file",
+          args: {
+            path: "src/recovered-after-tool-call.ts",
+            content: `export const recoveredAfterToolCall = true;\n`,
+            description: "File created after tool-call termination recovery",
+          },
+        },
+      ],
+    },
+    {
+      text: "Successfully created the file after retrying from a tool-call termination.",
+    },
+  ],
+};
--- a/e2e-tests/fixtures/engine/local-agent/connection-drop.ts
+++ b/e2e-tests/fixtures/engine/local-agent/connection-drop.ts
+import type { LocalAgentFixture } from "../../../../testing/fake-llm-server/localAgentTypes";
+/**
+ * Tests automatic retry after connection drop (e.g., TCP terminated mid-stream).
+ * This fixture drops the connection on the first attempt of turn 1 (the
+ * post-tool text turn), which is more realistic than dropping before any
+ * tool activity. The local agent handler should automatically retry and
+ * continue without re-running completed work.
+ */
+export const fixture: LocalAgentFixture = {
+  description: "Automatic retry after connection drop",
+  dropConnectionByTurn: [{ turnIndex: 1, attempts: [1] }],
+  turns: [
+    {
+      text: "I'll create a file for you.",
+      toolCalls: [
+        {
+          name: "write_file",
+          args: {
+            path: "src/recovered.ts",
+            content: `export const recovered = true;\n`,
+            description: "File created after connection recovery",
+          },
+        },
+      ],
+    },
+    {
+      text: "Successfully created the file after automatic retry.",
+    },
+  ],
+};
--- a/e2e-tests/local_agent_connection_retry.spec.ts
+++ b/e2e-tests/local_agent_connection_retry.spec.ts
+import { expect } from "@playwright/test";
+import { testSkipIfWindows } from "./helpers/test_helper";
+/**
+ * E2E test for local-agent connection retry resilience.
+ * Verifies that the agent automatically recovers from transient connection
+ * drops (e.g., TCP terminated mid-stream) by retrying the stream.
+ */
+testSkipIfWindows(
+  "local-agent - recovers from connection drop",
+  async ({ po }) => {
+    await po.setUpDyadPro({ localAgent: true });
+    await po.importApp("minimal");
+    await po.chatActions.selectLocalAgentMode();
+    // The connection-drop fixture drops on turn 1 (after a tool turn already
+    // completed) to simulate a realistic interrupted follow-up request.
+    await po.sendPrompt("tc=local-agent/connection-drop");
+    // Verify the turn still completed and no error box leaked to the UI.
+    await expect(po.page.getByTestId("chat-error-box")).toHaveCount(0);
+    const introText = po.page.getByText("I'll create a file for you.");
+    const completionText = po.page.getByText(
+      "Successfully created the file after automatic retry.",
+    );
+    await expect(introText).toHaveCount(1);
+    await expect(completionText).toHaveCount(1);
+    await expect(introText).toBeVisible();
+    await expect(completionText).toBeVisible();
+    // Partial chunks from the dropped attempt must not leak into final UI.
+    await expect(
+      po.page.getByText("Partial response before connection dr"),
+    ).toHaveCount(0);
+    // Verify exactly one recovered.ts edit card is shown in chat.
+    const recoveredEditCard = po.page.getByRole("button", {
+      name: /recovered\.ts .*src\/recovered\.ts.*Edit/,
+    });
+    await expect(recoveredEditCard).toHaveCount(1);
+    // The replayed conversation order must stay:
+    // intro assistant text -> tool edit card -> completion assistant text.
+    const introY = (await introText.boundingBox())?.y;
+    const editCardY = (await recoveredEditCard.boundingBox())?.y;
+    const completionY = (await completionText.boundingBox())?.y;
+    expect(introY).toBeDefined();
+    expect(editCardY).toBeDefined();
+    expect(completionY).toBeDefined();
+    expect(introY!).toBeLessThan(editCardY!);
+    expect(editCardY!).toBeLessThan(completionY!);
+    // Snapshot end state for chat + filesystem.
+    await po.snapshotMessages();
+    await po.snapshotAppFiles({
+      name: "after-connection-retry",
+      files: ["src/recovered.ts"],
+    });
+  },
+);
+testSkipIfWindows(
+  "local-agent - recovers when drop happens after tool-call stream",
+  async ({ po }) => {
+    await po.setUpDyadPro({ localAgent: true });
+    await po.importApp("minimal");
+    await po.chatActions.selectLocalAgentMode();
+    await po.sendPrompt("tc=local-agent/connection-drop-after-tool-call");
+    await expect(po.page.getByTestId("chat-error-box")).toHaveCount(0);
+    await expect(
+      po.page.getByText(
+        "Successfully created the file after retrying from a tool-call termination.",
+      ),
+    ).toBeVisible();
+    await expect(
+      po.page
+        .getByRole("button", {
+          name: /recovered-after-tool-call\.ts .*src\/recovered-after-tool-call\.ts.*Edit/,
+        })
+        .first(),
+    ).toBeVisible();
+    await po.snapshotAppFiles({
+      name: "after-tool-call-connection-retry",
+      files: ["src/recovered-after-tool-call.ts"],
+    });
+  },
+);
--- a/e2e-tests/snapshots/local_agent_connection_retry.spec.ts_after-connection-retry.txt
+++ b/e2e-tests/snapshots/local_agent_connection_retry.spec.ts_after-connection-retry.txt
+=== src/recovered.ts ===
+export const recovered = true;
--- a/e2e-tests/snapshots/local_agent_connection_retry.spec.ts_after-tool-call-connection-retry.txt
+++ b/e2e-tests/snapshots/local_agent_connection_retry.spec.ts_after-tool-call-connection-retry.txt
+=== src/recovered-after-tool-call.ts ===
+export const recoveredAfterToolCall = true;
--- a/e2e-tests/snapshots/local_agent_connection_retry.spec.ts_local-agent---recovers-from-connection-drop-1.aria.yml
+++ b/e2e-tests/snapshots/local_agent_connection_retry.spec.ts_local-agent---recovers-from-connection-drop-1.aria.yml
+- paragraph: /Generate an AI_RULES\.md file for this app\. Describe the tech stack in 5-\d+ bullet points and describe clear rules about what libraries to use for what\./
+- button "file1.txt file1.txt Edit":
+  - img
+  - text: ""
+  - button "Edit":
+    - img
+    - text: ""
+  - img
+- paragraph: More EOM
+- button "Copy":
+  - img
+- img
+- text: Approved
+- img
+- text: claude-opus-4-5
+- img
+- text: less than a minute ago
+- img
+- text: (1 files changed)
+- button "Copy Request ID":
+  - img
+  - text: ""
+- paragraph: tc=local-agent/connection-drop
+- paragraph: I'll create a file for you.
+- 'button "recovered.ts src/recovered.ts Edit Summary: File created after connection recovery"':
+  - img
+  - text: ""
+  - button "Edit":
+    - img
+    - text: ""
+  - img
+  - text: ""
+- paragraph: Successfully created the file after automatic retry.
+- button "Copy":
+  - img
+- img
+- text: claude-opus-4-5
+- img
+- text: less than a minute ago
+- button "Copy Request ID":
+  - img
+  - text: ""
+- button "Undo":
+  - img
+  - text: ""
+- button "Retry":
+  - img
+  - text: ""
\ No newline at end of file
--- a/src/__tests__/local_agent_handler.test.ts
+++ b/src/__tests__/local_agent_handler.test.ts
@@ -847,6 +847,184 @@ describe("handleLocalAgentStream", () => {
      expect(lastContentUpdate.data.content).toContain("Hello, ");
      expect(lastContentUpdate.data.content).toContain("world!");
    });
+    it("should retry and resume when a stream terminates transiently", async () => {
+      // Arrange
+      const { event, getMessagesByChannel } = createFakeEvent();
+      mockSettings = buildTestSettings({ enableDyadPro: true });
+      mockChatData = buildTestChat();
+      const streamMessagesByAttempt: any[][] = [];
+      let attemptCount = 0;
+      mockStreamTextImpl = (options) => {
+        attemptCount += 1;
+        streamMessagesByAttempt.push(options.messages ?? []);
+        if (attemptCount === 1) {
+          return {
+            fullStream: (async function* () {
+              yield { type: "text-delta", text: "Partial response. " };
+              throw new TypeError("terminated");
+            })(),
+            response: Promise.resolve({ messages: [] }),
+            steps: Promise.resolve([]),
+          };
+        }
+        return {
+          fullStream: (async function* () {
+            yield { type: "text-delta", text: "Recovered output." };
+          })(),
+          response: Promise.resolve({
+            messages: [
+              {
+                role: "assistant",
+                content: [{ type: "text", text: "Recovered output." }],
+              },
+            ],
+          }),
+          steps: Promise.resolve([{ toolCalls: [] }]),
+        };
+      };
+      // Act
+      await handleLocalAgentStream(
+        event,
+        { chatId: 1, prompt: "test" },
+        new AbortController(),
+        {
+          placeholderMessageId: 10,
+          systemPrompt: "You are helpful",
+          dyadRequestId,
+        },
+      );
+      // Assert
+      expect(attemptCount).toBe(2);
+      expect(getMessagesByChannel("chat:response:error")).toHaveLength(0);
+      const contentUpdates = dbOperations.updates.filter(
+        (u) => u.data.content !== undefined,
+      );
+      const finalContent = contentUpdates[contentUpdates.length - 1].data
+        .content as string;
+      expect(finalContent).toContain("Partial response.");
+      expect(finalContent).toContain("Recovered output.");
+      const continuationInstructionFound = (
+        streamMessagesByAttempt[1] ?? []
+      ).some(
+        (message: any) =>
+          message.role === "user" &&
+          Array.isArray(message.content) &&
+          message.content.some(
+            (part: any) =>
+              part.type === "text" &&
+              typeof part.text === "string" &&
+              part.text.includes(
+                "previous response stream was interrupted by a transient network error",
+              ),
+          ),
+      );
+      expect(continuationInstructionFound).toBe(true);
+    });
+    it("should replay emitted tool events before retrying a terminated stream", async () => {
+      // Arrange
+      const { event, getMessagesByChannel } = createFakeEvent();
+      mockSettings = buildTestSettings({ enableDyadPro: true });
+      mockChatData = buildTestChat();
+      const streamMessagesByAttempt: any[][] = [];
+      let attemptCount = 0;
+      mockStreamTextImpl = (options) => {
+        attemptCount += 1;
+        streamMessagesByAttempt.push(options.messages ?? []);
+        if (attemptCount === 1) {
+          return {
+            fullStream: (async function* () {
+              yield { type: "text-delta", text: "Working with tools. " };
+              yield {
+                type: "tool-call",
+                toolCallId: "call_replay_1",
+                toolName: "read_file",
+                input: { path: "README.md" },
+              };
+              yield {
+                type: "tool-result",
+                toolCallId: "call_replay_1",
+                toolName: "read_file",
+                output: "README content",
+              };
+              throw new TypeError("terminated");
+            })(),
+            response: Promise.resolve({ messages: [] }),
+            steps: Promise.resolve([]),
+          };
+        }
+        return {
+          fullStream: (async function* () {
+            yield { type: "text-delta", text: "Resumed after replay." };
+          })(),
+          response: Promise.resolve({
+            messages: [
+              {
+                role: "assistant",
+                content: [{ type: "text", text: "Resumed after replay." }],
+              },
+            ],
+          }),
+          steps: Promise.resolve([{ toolCalls: [] }]),
+        };
+      };
+      // Act
+      await handleLocalAgentStream(
+        event,
+        { chatId: 1, prompt: "test" },
+        new AbortController(),
+        {
+          placeholderMessageId: 10,
+          systemPrompt: "You are helpful",
+          dyadRequestId,
+        },
+      );
+      // Assert
+      expect(attemptCount).toBe(2);
+      expect(getMessagesByChannel("chat:response:error")).toHaveLength(0);
+      const secondAttemptMessages = streamMessagesByAttempt[1] ?? [];
+      const hasReplayedToolCall = secondAttemptMessages.some(
+        (message: any) =>
+          message.role === "assistant" &&
+          Array.isArray(message.content) &&
+          message.content.some(
+            (part: any) =>
+              part.type === "tool-call" &&
+              part.toolCallId === "call_replay_1" &&
+              part.toolName === "read_file",
+          ),
+      );
+      const hasReplayedToolResult = secondAttemptMessages.some(
+        (message: any) =>
+          message.role === "tool" &&
+          Array.isArray(message.content) &&
+          message.content.some(
+            (part: any) =>
+              part.type === "tool-result" &&
+              part.toolCallId === "call_replay_1" &&
+              part.toolName === "read_file" &&
+              part.output?.type === "text" &&
+              part.output?.value === "README content",
+          ),
+      );
+      expect(hasReplayedToolCall).toBe(true);
+      expect(hasReplayedToolResult).toBe(true);
+    });
  });
  describe("Stream processing - reasoning blocks", () => {

--- a/src/pro/main/ipc/handlers/local_agent/local_agent_handler.ts
+++ b/src/pro/main/ipc/handlers/local_agent/local_agent_handler.ts
--- a/testing/fake-llm-server/chatCompletionHandler.ts
+++ b/testing/fake-llm-server/chatCompletionHandler.ts
@@ -53,11 +53,14 @@ export const createChatCompletionHandler =
    // First, check if the LAST user message is a fixture trigger
    let localAgentFixture = extractLocalAgentFixture(userTextContent);
-    // If last message isn't a fixture but contains a todo reminder, search earlier messages
+    // If the last user message is synthetic (e.g., todo reminder or retry
-    // This handles the outer loop case where a reminder is injected after the original fixture trigger
+    // continuation instruction), search earlier user messages for the original
-    // Note: This magic string must match the reminder text in prepare_step_utils.ts
+    // fixture trigger.
-    // buildTodoReminderMessage(). Update both if the text changes.
+    if (
-    if (!localAgentFixture && userTextContent.includes("incomplete todo(s)")) {
+      !localAgentFixture &&
+      (userTextContent.includes("incomplete todo(s)") ||
+        userTextContent.includes("previous response stream was interrupted"))
+    ) {
      for (const msg of userMessages) {
        const textContent = getTextContent(msg);
        const fixture = extractLocalAgentFixture(textContent);

--- a/testing/fake-llm-server/localAgentHandler.ts
+++ b/testing/fake-llm-server/localAgentHandler.ts
@@ -21,6 +21,10 @@ try {
 // Cache loaded fixtures to avoid re-importing
 const fixtureCache = new Map<string, LocalAgentFixture>();
+// Track connection attempts per session+turn for connection drop simulation.
+// Key: `${sessionId}-${passIndex}-${turnIndex}`, Value: attempt count
+const connectionAttempts = new Map<string, number>();
 /**
 * Generate a session ID from the first user message
 * This allows us to track conversation state across requests
@@ -246,7 +250,11 @@ async function streamTextResponse(
 /**
 * Stream a turn with tool calls
 */
-async function streamToolCallResponse(res: Response, turn: Turn) {
+async function streamToolCallResponse(
+  res: Response,
+  turn: Turn,
+  options?: { dropAfterToolCalls?: boolean },
+) {
  res.setHeader("Content-Type", "text/event-stream");
  res.setHeader("Cache-Control", "no-cache");
  res.setHeader("Connection", "keep-alive");
@@ -320,6 +328,16 @@ async function streamToolCallResponse(res: Response, turn: Turn) {
    }
  }
+  if (options?.dropAfterToolCalls) {
+    console.log(
+      `[local-agent] Simulating connection drop after streaming tool calls`,
+    );
+    // Drop before finish_reason/[DONE] so tool calls were emitted but the
+    // provider response did not complete.
+    res.socket?.destroy();
+    return;
+  }
  // 4) Send finish (with optional usage data)
  const finishReason =
    turn.toolCalls && turn.toolCalls.length > 0 ? "tool_calls" : "stop";
@@ -340,6 +358,7 @@ async function streamToolCallResponse(res: Response, turn: Turn) {
    finishChunk.usage = turn.usage;
  }
  res.write(`data: ${JSON.stringify(finishChunk)}\n\n`);
  res.write("data: [DONE]\n\n");
  res.end();
 }
@@ -413,9 +432,64 @@ export async function handleLocalAgentFixture(
      }
    }
+    // Check if we should simulate a connection drop for this attempt
+    const turnScopedDropAttempts =
+      fixture.dropConnectionByTurn?.find((rule) => rule.turnIndex === turnIndex)
+        ?.attempts ?? fixture.dropConnectionOnAttempts;
+    const turnScopedDropAfterToolCallAttempts =
+      fixture.dropConnectionAfterToolCallByTurn?.find(
+        (rule) => rule.turnIndex === turnIndex,
+      )?.attempts;
+    if (turnScopedDropAttempts && turnScopedDropAttempts.length > 0) {
+      const attemptKey = `${sessionId}-${passIndex}-${turnIndex}`;
+      const currentAttempt = (connectionAttempts.get(attemptKey) || 0) + 1;
+      connectionAttempts.set(attemptKey, currentAttempt);
+      console.log(
+        `[local-agent] Connection attempt ${currentAttempt} for ${attemptKey}, ` +
+          `drop on: [${turnScopedDropAttempts.join(", ")}]`,
+      );
+      if (turnScopedDropAttempts.includes(currentAttempt)) {
+        console.log(
+          `[local-agent] Simulating connection drop on attempt ${currentAttempt}`,
+        );
+        // Stream partial data then destroy the socket to simulate a network interruption
+        res.setHeader("Content-Type", "text/event-stream");
+        res.setHeader("Cache-Control", "no-cache");
+        res.setHeader("Connection", "keep-alive");
+        res.write(
+          createStreamChunk(
+            "Partial response before connection dr",
+            "assistant",
+          ),
+        );
+        // Destroy the underlying socket to trigger a "terminated" error on the client
+        res.socket?.destroy();
+        return;
+      }
+    }
    // If this turn has tool calls, stream them
    if (turn.toolCalls && turn.toolCalls.length > 0) {
-      await streamToolCallResponse(res, turn);
+      const dropAfterToolCalls =
+        turnScopedDropAfterToolCallAttempts &&
+        turnScopedDropAfterToolCallAttempts.length > 0
+          ? (() => {
+              const attemptKey = `${sessionId}-${passIndex}-${turnIndex}-after-tool-call`;
+              const currentAttempt =
+                (connectionAttempts.get(attemptKey) || 0) + 1;
+              connectionAttempts.set(attemptKey, currentAttempt);
+              return turnScopedDropAfterToolCallAttempts.includes(
+                currentAttempt,
+              );
+            })()
+          : false;
+      await streamToolCallResponse(res, turn, {
+        dropAfterToolCalls,
+      });
    } else {
      // Text-only turn
      await streamTextResponse(res, turn.text || "Done.", turn.usage);

--- a/testing/fake-llm-server/localAgentTypes.ts
+++ b/testing/fake-llm-server/localAgentTypes.ts
@@ -47,4 +47,33 @@ export type LocalAgentFixture = {
   * Use this when testing todo follow-up loop behavior.
   */
  passes?: Pass[];
+  /**
+   * For testing connection resilience: drop the connection on these attempt
+   * numbers (1-indexed) for the first turn. The fake server will stream partial
+   * data then destroy the socket, simulating a network interruption.
+   * E.g., [1] means drop on the 1st attempt, succeed on the 2nd.
+   */
+  dropConnectionOnAttempts?: number[];
+  /**
+   * Optional per-turn connection drop configuration.
+   * Useful for simulating drops after prior tool activity within the same turn.
+   * Example: [{ turnIndex: 1, attempts: [1] }] drops the first attempt of turn 1.
+   */
+  dropConnectionByTurn?: Array<{
+    /** 0-based turn index within the active pass */
+    turnIndex: number;
+    /** Attempt numbers (1-indexed) to drop for this turn */
+    attempts: number[];
+  }>;
+  /**
+   * Optional per-turn configuration to drop the connection AFTER streaming
+   * tool-call chunks for a turn (before [DONE]). This simulates termination in
+   * the window where a tool call was emitted but no tool result was captured.
+   */
+  dropConnectionAfterToolCallByTurn?: Array<{
+    /** 0-based turn index within the active pass */
+    turnIndex: number;
+    /** Attempt numbers (1-indexed) to drop for this turn */
+    attempts: number[];
+  }>;
 };