Unverified 提交 6f53e896 authored 作者: Will Chen's avatar Will Chen 提交者: GitHub

Handle local agent connection recovery (#2853)

Handle connection-drop and retry behavior in local agent IPC handling. Add an end-to-end scenario that verifies recovery after a temporary local agent disconnect. Align test fixtures and snapshots for local-agent reconnection behavior.
上级 709222c5
...@@ -12,6 +12,8 @@ dist/ ...@@ -12,6 +12,8 @@ dist/
# playwright # playwright
playwright-report/ playwright-report/
test-results/ test-results/
blob-report/
flakiness-report/
# Diagnostic reports (https://nodejs.org/api/report.html) # Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
......
import type { LocalAgentFixture } from "../../../../testing/fake-llm-server/localAgentTypes";
/**
* Tests retry behavior when connection drops after tool-call chunks were emitted
* but before the stream is finalized. This simulates an orphaned tool-call retry
* window and ensures we don't duplicate tool execution.
*/
export const fixture: LocalAgentFixture = {
description: "Connection drop after streaming tool-call chunks",
dropConnectionAfterToolCallByTurn: [{ turnIndex: 0, attempts: [1] }],
turns: [
{
text: "I'll create a file for you.",
toolCalls: [
{
name: "write_file",
args: {
path: "src/recovered-after-tool-call.ts",
content: `export const recoveredAfterToolCall = true;\n`,
description: "File created after tool-call termination recovery",
},
},
],
},
{
text: "Successfully created the file after retrying from a tool-call termination.",
},
],
};
import type { LocalAgentFixture } from "../../../../testing/fake-llm-server/localAgentTypes";
/**
* Tests automatic retry after connection drop (e.g., TCP terminated mid-stream).
* This fixture drops the connection on the first attempt of turn 1 (the
* post-tool text turn), which is more realistic than dropping before any
* tool activity. The local agent handler should automatically retry and
* continue without re-running completed work.
*/
export const fixture: LocalAgentFixture = {
description: "Automatic retry after connection drop",
dropConnectionByTurn: [{ turnIndex: 1, attempts: [1] }],
turns: [
{
text: "I'll create a file for you.",
toolCalls: [
{
name: "write_file",
args: {
path: "src/recovered.ts",
content: `export const recovered = true;\n`,
description: "File created after connection recovery",
},
},
],
},
{
text: "Successfully created the file after automatic retry.",
},
],
};
import { expect } from "@playwright/test";
import { testSkipIfWindows } from "./helpers/test_helper";
/**
* E2E test for local-agent connection retry resilience.
* Verifies that the agent automatically recovers from transient connection
* drops (e.g., TCP terminated mid-stream) by retrying the stream.
*/
testSkipIfWindows(
"local-agent - recovers from connection drop",
async ({ po }) => {
await po.setUpDyadPro({ localAgent: true });
await po.importApp("minimal");
await po.chatActions.selectLocalAgentMode();
// The connection-drop fixture drops on turn 1 (after a tool turn already
// completed) to simulate a realistic interrupted follow-up request.
await po.sendPrompt("tc=local-agent/connection-drop");
// Verify the turn still completed and no error box leaked to the UI.
await expect(po.page.getByTestId("chat-error-box")).toHaveCount(0);
const introText = po.page.getByText("I'll create a file for you.");
const completionText = po.page.getByText(
"Successfully created the file after automatic retry.",
);
await expect(introText).toHaveCount(1);
await expect(completionText).toHaveCount(1);
await expect(introText).toBeVisible();
await expect(completionText).toBeVisible();
// Partial chunks from the dropped attempt must not leak into final UI.
await expect(
po.page.getByText("Partial response before connection dr"),
).toHaveCount(0);
// Verify exactly one recovered.ts edit card is shown in chat.
const recoveredEditCard = po.page.getByRole("button", {
name: /recovered\.ts .*src\/recovered\.ts.*Edit/,
});
await expect(recoveredEditCard).toHaveCount(1);
// The replayed conversation order must stay:
// intro assistant text -> tool edit card -> completion assistant text.
const introY = (await introText.boundingBox())?.y;
const editCardY = (await recoveredEditCard.boundingBox())?.y;
const completionY = (await completionText.boundingBox())?.y;
expect(introY).toBeDefined();
expect(editCardY).toBeDefined();
expect(completionY).toBeDefined();
expect(introY!).toBeLessThan(editCardY!);
expect(editCardY!).toBeLessThan(completionY!);
// Snapshot end state for chat + filesystem.
await po.snapshotMessages();
await po.snapshotAppFiles({
name: "after-connection-retry",
files: ["src/recovered.ts"],
});
},
);
testSkipIfWindows(
"local-agent - recovers when drop happens after tool-call stream",
async ({ po }) => {
await po.setUpDyadPro({ localAgent: true });
await po.importApp("minimal");
await po.chatActions.selectLocalAgentMode();
await po.sendPrompt("tc=local-agent/connection-drop-after-tool-call");
await expect(po.page.getByTestId("chat-error-box")).toHaveCount(0);
await expect(
po.page.getByText(
"Successfully created the file after retrying from a tool-call termination.",
),
).toBeVisible();
await expect(
po.page
.getByRole("button", {
name: /recovered-after-tool-call\.ts .*src\/recovered-after-tool-call\.ts.*Edit/,
})
.first(),
).toBeVisible();
await po.snapshotAppFiles({
name: "after-tool-call-connection-retry",
files: ["src/recovered-after-tool-call.ts"],
});
},
);
=== src/recovered-after-tool-call.ts ===
export const recoveredAfterToolCall = true;
- paragraph: /Generate an AI_RULES\.md file for this app\. Describe the tech stack in 5-\d+ bullet points and describe clear rules about what libraries to use for what\./
- button "file1.txt file1.txt Edit":
- img
- text: ""
- button "Edit":
- img
- text: ""
- img
- paragraph: More EOM
- button "Copy":
- img
- img
- text: Approved
- img
- text: claude-opus-4-5
- img
- text: less than a minute ago
- img
- text: (1 files changed)
- button "Copy Request ID":
- img
- text: ""
- paragraph: tc=local-agent/connection-drop
- paragraph: I'll create a file for you.
- 'button "recovered.ts src/recovered.ts Edit Summary: File created after connection recovery"':
- img
- text: ""
- button "Edit":
- img
- text: ""
- img
- text: ""
- paragraph: Successfully created the file after automatic retry.
- button "Copy":
- img
- img
- text: claude-opus-4-5
- img
- text: less than a minute ago
- button "Copy Request ID":
- img
- text: ""
- button "Undo":
- img
- text: ""
- button "Retry":
- img
- text: ""
\ No newline at end of file
...@@ -847,6 +847,184 @@ describe("handleLocalAgentStream", () => { ...@@ -847,6 +847,184 @@ describe("handleLocalAgentStream", () => {
expect(lastContentUpdate.data.content).toContain("Hello, "); expect(lastContentUpdate.data.content).toContain("Hello, ");
expect(lastContentUpdate.data.content).toContain("world!"); expect(lastContentUpdate.data.content).toContain("world!");
}); });
it("should retry and resume when a stream terminates transiently", async () => {
// Arrange
const { event, getMessagesByChannel } = createFakeEvent();
mockSettings = buildTestSettings({ enableDyadPro: true });
mockChatData = buildTestChat();
const streamMessagesByAttempt: any[][] = [];
let attemptCount = 0;
mockStreamTextImpl = (options) => {
attemptCount += 1;
streamMessagesByAttempt.push(options.messages ?? []);
if (attemptCount === 1) {
return {
fullStream: (async function* () {
yield { type: "text-delta", text: "Partial response. " };
throw new TypeError("terminated");
})(),
response: Promise.resolve({ messages: [] }),
steps: Promise.resolve([]),
};
}
return {
fullStream: (async function* () {
yield { type: "text-delta", text: "Recovered output." };
})(),
response: Promise.resolve({
messages: [
{
role: "assistant",
content: [{ type: "text", text: "Recovered output." }],
},
],
}),
steps: Promise.resolve([{ toolCalls: [] }]),
};
};
// Act
await handleLocalAgentStream(
event,
{ chatId: 1, prompt: "test" },
new AbortController(),
{
placeholderMessageId: 10,
systemPrompt: "You are helpful",
dyadRequestId,
},
);
// Assert
expect(attemptCount).toBe(2);
expect(getMessagesByChannel("chat:response:error")).toHaveLength(0);
const contentUpdates = dbOperations.updates.filter(
(u) => u.data.content !== undefined,
);
const finalContent = contentUpdates[contentUpdates.length - 1].data
.content as string;
expect(finalContent).toContain("Partial response.");
expect(finalContent).toContain("Recovered output.");
const continuationInstructionFound = (
streamMessagesByAttempt[1] ?? []
).some(
(message: any) =>
message.role === "user" &&
Array.isArray(message.content) &&
message.content.some(
(part: any) =>
part.type === "text" &&
typeof part.text === "string" &&
part.text.includes(
"previous response stream was interrupted by a transient network error",
),
),
);
expect(continuationInstructionFound).toBe(true);
});
it("should replay emitted tool events before retrying a terminated stream", async () => {
// Arrange
const { event, getMessagesByChannel } = createFakeEvent();
mockSettings = buildTestSettings({ enableDyadPro: true });
mockChatData = buildTestChat();
const streamMessagesByAttempt: any[][] = [];
let attemptCount = 0;
mockStreamTextImpl = (options) => {
attemptCount += 1;
streamMessagesByAttempt.push(options.messages ?? []);
if (attemptCount === 1) {
return {
fullStream: (async function* () {
yield { type: "text-delta", text: "Working with tools. " };
yield {
type: "tool-call",
toolCallId: "call_replay_1",
toolName: "read_file",
input: { path: "README.md" },
};
yield {
type: "tool-result",
toolCallId: "call_replay_1",
toolName: "read_file",
output: "README content",
};
throw new TypeError("terminated");
})(),
response: Promise.resolve({ messages: [] }),
steps: Promise.resolve([]),
};
}
return {
fullStream: (async function* () {
yield { type: "text-delta", text: "Resumed after replay." };
})(),
response: Promise.resolve({
messages: [
{
role: "assistant",
content: [{ type: "text", text: "Resumed after replay." }],
},
],
}),
steps: Promise.resolve([{ toolCalls: [] }]),
};
};
// Act
await handleLocalAgentStream(
event,
{ chatId: 1, prompt: "test" },
new AbortController(),
{
placeholderMessageId: 10,
systemPrompt: "You are helpful",
dyadRequestId,
},
);
// Assert
expect(attemptCount).toBe(2);
expect(getMessagesByChannel("chat:response:error")).toHaveLength(0);
const secondAttemptMessages = streamMessagesByAttempt[1] ?? [];
const hasReplayedToolCall = secondAttemptMessages.some(
(message: any) =>
message.role === "assistant" &&
Array.isArray(message.content) &&
message.content.some(
(part: any) =>
part.type === "tool-call" &&
part.toolCallId === "call_replay_1" &&
part.toolName === "read_file",
),
);
const hasReplayedToolResult = secondAttemptMessages.some(
(message: any) =>
message.role === "tool" &&
Array.isArray(message.content) &&
message.content.some(
(part: any) =>
part.type === "tool-result" &&
part.toolCallId === "call_replay_1" &&
part.toolName === "read_file" &&
part.output?.type === "text" &&
part.output?.value === "README content",
),
);
expect(hasReplayedToolCall).toBe(true);
expect(hasReplayedToolResult).toBe(true);
});
}); });
describe("Stream processing - reasoning blocks", () => { describe("Stream processing - reasoning blocks", () => {
......
...@@ -53,11 +53,14 @@ export const createChatCompletionHandler = ...@@ -53,11 +53,14 @@ export const createChatCompletionHandler =
// First, check if the LAST user message is a fixture trigger // First, check if the LAST user message is a fixture trigger
let localAgentFixture = extractLocalAgentFixture(userTextContent); let localAgentFixture = extractLocalAgentFixture(userTextContent);
// If last message isn't a fixture but contains a todo reminder, search earlier messages // If the last user message is synthetic (e.g., todo reminder or retry
// This handles the outer loop case where a reminder is injected after the original fixture trigger // continuation instruction), search earlier user messages for the original
// Note: This magic string must match the reminder text in prepare_step_utils.ts // fixture trigger.
// buildTodoReminderMessage(). Update both if the text changes. if (
if (!localAgentFixture && userTextContent.includes("incomplete todo(s)")) { !localAgentFixture &&
(userTextContent.includes("incomplete todo(s)") ||
userTextContent.includes("previous response stream was interrupted"))
) {
for (const msg of userMessages) { for (const msg of userMessages) {
const textContent = getTextContent(msg); const textContent = getTextContent(msg);
const fixture = extractLocalAgentFixture(textContent); const fixture = extractLocalAgentFixture(textContent);
......
...@@ -21,6 +21,10 @@ try { ...@@ -21,6 +21,10 @@ try {
// Cache loaded fixtures to avoid re-importing // Cache loaded fixtures to avoid re-importing
const fixtureCache = new Map<string, LocalAgentFixture>(); const fixtureCache = new Map<string, LocalAgentFixture>();
// Track connection attempts per session+turn for connection drop simulation.
// Key: `${sessionId}-${passIndex}-${turnIndex}`, Value: attempt count
const connectionAttempts = new Map<string, number>();
/** /**
* Generate a session ID from the first user message * Generate a session ID from the first user message
* This allows us to track conversation state across requests * This allows us to track conversation state across requests
...@@ -246,7 +250,11 @@ async function streamTextResponse( ...@@ -246,7 +250,11 @@ async function streamTextResponse(
/** /**
* Stream a turn with tool calls * Stream a turn with tool calls
*/ */
async function streamToolCallResponse(res: Response, turn: Turn) { async function streamToolCallResponse(
res: Response,
turn: Turn,
options?: { dropAfterToolCalls?: boolean },
) {
res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache"); res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive"); res.setHeader("Connection", "keep-alive");
...@@ -320,6 +328,16 @@ async function streamToolCallResponse(res: Response, turn: Turn) { ...@@ -320,6 +328,16 @@ async function streamToolCallResponse(res: Response, turn: Turn) {
} }
} }
if (options?.dropAfterToolCalls) {
console.log(
`[local-agent] Simulating connection drop after streaming tool calls`,
);
// Drop before finish_reason/[DONE] so tool calls were emitted but the
// provider response did not complete.
res.socket?.destroy();
return;
}
// 4) Send finish (with optional usage data) // 4) Send finish (with optional usage data)
const finishReason = const finishReason =
turn.toolCalls && turn.toolCalls.length > 0 ? "tool_calls" : "stop"; turn.toolCalls && turn.toolCalls.length > 0 ? "tool_calls" : "stop";
...@@ -340,6 +358,7 @@ async function streamToolCallResponse(res: Response, turn: Turn) { ...@@ -340,6 +358,7 @@ async function streamToolCallResponse(res: Response, turn: Turn) {
finishChunk.usage = turn.usage; finishChunk.usage = turn.usage;
} }
res.write(`data: ${JSON.stringify(finishChunk)}\n\n`); res.write(`data: ${JSON.stringify(finishChunk)}\n\n`);
res.write("data: [DONE]\n\n"); res.write("data: [DONE]\n\n");
res.end(); res.end();
} }
...@@ -413,9 +432,64 @@ export async function handleLocalAgentFixture( ...@@ -413,9 +432,64 @@ export async function handleLocalAgentFixture(
} }
} }
// Check if we should simulate a connection drop for this attempt
const turnScopedDropAttempts =
fixture.dropConnectionByTurn?.find((rule) => rule.turnIndex === turnIndex)
?.attempts ?? fixture.dropConnectionOnAttempts;
const turnScopedDropAfterToolCallAttempts =
fixture.dropConnectionAfterToolCallByTurn?.find(
(rule) => rule.turnIndex === turnIndex,
)?.attempts;
if (turnScopedDropAttempts && turnScopedDropAttempts.length > 0) {
const attemptKey = `${sessionId}-${passIndex}-${turnIndex}`;
const currentAttempt = (connectionAttempts.get(attemptKey) || 0) + 1;
connectionAttempts.set(attemptKey, currentAttempt);
console.log(
`[local-agent] Connection attempt ${currentAttempt} for ${attemptKey}, ` +
`drop on: [${turnScopedDropAttempts.join(", ")}]`,
);
if (turnScopedDropAttempts.includes(currentAttempt)) {
console.log(
`[local-agent] Simulating connection drop on attempt ${currentAttempt}`,
);
// Stream partial data then destroy the socket to simulate a network interruption
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
res.write(
createStreamChunk(
"Partial response before connection dr",
"assistant",
),
);
// Destroy the underlying socket to trigger a "terminated" error on the client
res.socket?.destroy();
return;
}
}
// If this turn has tool calls, stream them // If this turn has tool calls, stream them
if (turn.toolCalls && turn.toolCalls.length > 0) { if (turn.toolCalls && turn.toolCalls.length > 0) {
await streamToolCallResponse(res, turn); const dropAfterToolCalls =
turnScopedDropAfterToolCallAttempts &&
turnScopedDropAfterToolCallAttempts.length > 0
? (() => {
const attemptKey = `${sessionId}-${passIndex}-${turnIndex}-after-tool-call`;
const currentAttempt =
(connectionAttempts.get(attemptKey) || 0) + 1;
connectionAttempts.set(attemptKey, currentAttempt);
return turnScopedDropAfterToolCallAttempts.includes(
currentAttempt,
);
})()
: false;
await streamToolCallResponse(res, turn, {
dropAfterToolCalls,
});
} else { } else {
// Text-only turn // Text-only turn
await streamTextResponse(res, turn.text || "Done.", turn.usage); await streamTextResponse(res, turn.text || "Done.", turn.usage);
......
...@@ -47,4 +47,33 @@ export type LocalAgentFixture = { ...@@ -47,4 +47,33 @@ export type LocalAgentFixture = {
* Use this when testing todo follow-up loop behavior. * Use this when testing todo follow-up loop behavior.
*/ */
passes?: Pass[]; passes?: Pass[];
/**
* For testing connection resilience: drop the connection on these attempt
* numbers (1-indexed) for the first turn. The fake server will stream partial
* data then destroy the socket, simulating a network interruption.
* E.g., [1] means drop on the 1st attempt, succeed on the 2nd.
*/
dropConnectionOnAttempts?: number[];
/**
* Optional per-turn connection drop configuration.
* Useful for simulating drops after prior tool activity within the same turn.
* Example: [{ turnIndex: 1, attempts: [1] }] drops the first attempt of turn 1.
*/
dropConnectionByTurn?: Array<{
/** 0-based turn index within the active pass */
turnIndex: number;
/** Attempt numbers (1-indexed) to drop for this turn */
attempts: number[];
}>;
/**
* Optional per-turn configuration to drop the connection AFTER streaming
* tool-call chunks for a turn (before [DONE]). This simulates termination in
* the window where a tool call was emitted but no tool result was captured.
*/
dropConnectionAfterToolCallByTurn?: Array<{
/** 0-based turn index within the active pass */
turnIndex: number;
/** Attempt numbers (1-indexed) to drop for this turn */
attempts: number[];
}>;
}; };
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论