fix: limit grep results to prevent context_length_exceeded errors (#2510)

## Summary - Add a configurable `limit` parameter to the grep tool (default: 100, max: 500) to prevent context window overflow - Truncate individual line text to 500 characters to handle very long lines - Add clear truncation notice telling the AI to narrow its search when results are truncated - Add `total` and `truncated` attributes to XML output for visibility ## Test plan - [x] Build passes (`npm run build`) - [x] Lint passes (`npm run lint`) - [x] All 669 unit tests pass (`npm test`) - Manual testing: grep with a broad query like "import" should now return limited results with a truncation notice instead of overflowing the context window Fixes #2509 🤖 Generated with [Claude Code](https://claude.com/claude-code)  --- <a href="https://app.devin.ai/review/dyad-sh/dyad/pull/2510" target="_blank"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1"> <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open with Devin"> </picture> </a>   --- ## Summary by cubic Limits grep results and line lengths to prevent context window overflows and context_length_exceeded errors. Addresses Linear #2509 with a configurable cap, consistent sorting, clearer UI output, and ignoring include_pattern "*" when it matches all files. - **Bug Fixes** - Added limit parameter (default 100, max 250) to cap matches. - Truncated each matched line to 500 chars. - Sorted results by path and line number, and ignored include_pattern "*" with a note to avoid broad searches. - Added truncation notice and "X of Y matches" in UI. - Exposed total and truncated flags in XML output for visibility. <sup>Written for commit ad5979b9352ffc754019e13ead9c6be2e7b24ce9. Summary will update on new commits.</sup>  --------- Co-authored-by: Will Chen <willchen90@gmail.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>

fix: limit grep results to prevent context_length_exceeded errors (#2510)
3deb70b1 · wwwillchen-bot · GitHub · aa71f805 · 3deb70b1 · 3deb70b1
--- a/e2e-tests/snapshots/local_agent_ask.spec.ts_local-agent-ask-mode-1.txt
+++ b/e2e-tests/snapshots/local_agent_ask.spec.ts_local-agent-ask-mode-1.txt
@@ -96,7 +96,7 @@
        "type": "function",
        "function": {
          "name": "grep",
-          "description": "Search for a regex pattern in the codebase using ripgrep.\n\n- Returns matching lines with file paths and line numbers\n- By default, the search is case-insensitive\n- Use include_pattern to filter by file type (e.g. '*.tsx')\n- Use exclude_pattern to skip certain files (e.g. '*.test.ts')",
+          "description": "Search for a regex pattern in the codebase using ripgrep.\n\n- Returns matching lines with file paths and line numbers\n- By default, the search is case-insensitive\n- Use include_pattern to filter by file type (e.g. '*.tsx')\n- Use exclude_pattern to skip certain files (e.g. '*.test.ts')\n- Results are limited to 100 matches by default (max 250). If results are truncated, narrow your search with include_pattern or a more specific query.",
          "parameters": {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
@@ -116,6 +116,12 @@
              "case_sensitive": {
                "description": "Whether the search should be case sensitive (default: false)",
                "type": "boolean"
+              },
+              "limit": {
+                "description": "Maximum number of matches to return (default: 100, max: 250). Use include_pattern to narrow results if limit is reached.",
+                "type": "number",
+                "minimum": 1,
+                "maximum": 250
              }
            },
            "required": [

--- a/e2e-tests/snapshots/local_agent_auto.spec.ts_local-agent---auto-model-1.txt
+++ b/e2e-tests/snapshots/local_agent_auto.spec.ts_local-agent---auto-model-1.txt
@@ -232,7 +232,7 @@
      {
        "type": "function",
        "name": "grep",
-        "description": "Search for a regex pattern in the codebase using ripgrep.\n\n- Returns matching lines with file paths and line numbers\n- By default, the search is case-insensitive\n- Use include_pattern to filter by file type (e.g. '*.tsx')\n- Use exclude_pattern to skip certain files (e.g. '*.test.ts')",
+        "description": "Search for a regex pattern in the codebase using ripgrep.\n\n- Returns matching lines with file paths and line numbers\n- By default, the search is case-insensitive\n- Use include_pattern to filter by file type (e.g. '*.tsx')\n- Use exclude_pattern to skip certain files (e.g. '*.test.ts')\n- Results are limited to 100 matches by default (max 250). If results are truncated, narrow your search with include_pattern or a more specific query.",
        "parameters": {
          "$schema": "http://json-schema.org/draft-07/schema#",
          "type": "object",
@@ -252,6 +252,12 @@
            "case_sensitive": {
              "description": "Whether the search should be case sensitive (default: false)",
              "type": "boolean"
+            },
+            "limit": {
+              "description": "Maximum number of matches to return (default: 100, max: 250). Use include_pattern to narrow results if limit is reached.",
+              "type": "number",
+              "minimum": 1,
+              "maximum": 250
            }
          },
          "required": [

--- a/e2e-tests/snapshots/local_agent_basic.spec.ts_local-agent---dump-request-1.txt
+++ b/e2e-tests/snapshots/local_agent_basic.spec.ts_local-agent---dump-request-1.txt
@@ -231,7 +231,7 @@
        "type": "function",
        "function": {
          "name": "grep",
-          "description": "Search for a regex pattern in the codebase using ripgrep.\n\n- Returns matching lines with file paths and line numbers\n- By default, the search is case-insensitive\n- Use include_pattern to filter by file type (e.g. '*.tsx')\n- Use exclude_pattern to skip certain files (e.g. '*.test.ts')",
+          "description": "Search for a regex pattern in the codebase using ripgrep.\n\n- Returns matching lines with file paths and line numbers\n- By default, the search is case-insensitive\n- Use include_pattern to filter by file type (e.g. '*.tsx')\n- Use exclude_pattern to skip certain files (e.g. '*.test.ts')\n- Results are limited to 100 matches by default (max 250). If results are truncated, narrow your search with include_pattern or a more specific query.",
          "parameters": {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
@@ -251,6 +251,12 @@
              "case_sensitive": {
                "description": "Whether the search should be case sensitive (default: false)",
                "type": "boolean"
+              },
+              "limit": {
+                "description": "Maximum number of matches to return (default: 100, max: 250). Use include_pattern to narrow results if limit is reached.",
+                "type": "number",
+                "minimum": 1,
+                "maximum": 250
              }
            },
            "required": [

--- a/e2e-tests/snapshots/local_agent_grep.spec.ts_local-agent---grep-search-1.aria.yml
+++ b/e2e-tests/snapshots/local_agent_grep.spec.ts_local-agent---grep-search-1.aria.yml
@@ -34,7 +34,7 @@
 - button "Copy":
  - img
 - text: log
- code: "src/main.tsx:2: import App from \"./App.tsx\"; src/main.tsx:4: createRoot(document.getElementById(\"root\")!).render(<App />); src/App.tsx:1: const App = () => <div>Minimal imported app</div>; src/App.tsx:3: export default App;"
+- code: "src/App.tsx:1: const App = () => <div>Minimal imported app</div>; src/App.tsx:3: export default App; src/main.tsx:2: import App from \"./App.tsx\"; src/main.tsx:4: createRoot(document.getElementById(\"root\")!).render(<App />);"
 - paragraph: I found the matches! The React app is initialized in src/main.tsx using createRoot, and the App component is defined in src/App.tsx and imported in src/main.tsx.
 - button "Copy":
  - img

--- a/e2e-tests/snapshots/local_agent_grep.spec.ts_local-agent---grep-search-3.aria.yml
+++ b/e2e-tests/snapshots/local_agent_grep.spec.ts_local-agent---grep-search-3.aria.yml
@@ -5,4 +5,4 @@
 - button "Copy":
  - img
 - text: log
- code: "src/main.tsx:2: import App from \"./App.tsx\"; src/main.tsx:4: createRoot(document.getElementById(\"root\")!).render(<App />); src/App.tsx:1: const App = () => <div>Minimal imported app</div>; src/App.tsx:3: export default App;"
\ No newline at end of file
+- code: "src/App.tsx:1: const App = () => <div>Minimal imported app</div>; src/App.tsx:3: export default App; src/main.tsx:2: import App from \"./App.tsx\"; src/main.tsx:4: createRoot(document.getElementById(\"root\")!).render(<App />);"
\ No newline at end of file
--- a/src/components/chat/DyadGrep.tsx
+++ b/src/components/chat/DyadGrep.tsx
@@ -21,6 +21,8 @@ interface DyadGrepProps {
      exclude?: string;
      "case-sensitive"?: string;
      count?: string;
+      total?: string;
+      truncated?: string;
    };
  };
 }
@@ -39,6 +41,8 @@ export const DyadGrep: React.FC<DyadGrepProps> = ({ children, node }) => {
  const excludePattern = node?.properties?.exclude || "";
  const caseSensitive = node?.properties?.["case-sensitive"] === "true";
  const count = node?.properties?.count || "";
+  const total = node?.properties?.total || "";
+  const truncated = node?.properties?.truncated === "true";
  const hasResults = count !== "" && count !== "0";

  // Build description
@@ -55,7 +59,9 @@ export const DyadGrep: React.FC<DyadGrepProps> = ({ children, node }) => {

  // Build result summary
  const resultSummary = count
-    ? `${count} match${count === "1" ? "" : "es"}`
+    ? truncated && total
+      ? `${count} of ${total} matches`
+      : `${count} match${count === "1" ? "" : "es"}`
    : "";

  // Dynamic border styling

--- a/src/components/chat/DyadMarkdownParser.tsx
+++ b/src/components/chat/DyadMarkdownParser.tsx
@@ -523,6 +523,8 @@ function renderCustomTag(
              exclude: attributes.exclude || "",
              "case-sensitive": attributes["case-sensitive"] || "",
              count: attributes.count || "",
+              total: attributes.total || "",
+              truncated: attributes.truncated || "",
            },
          }}
        >

--- a/src/pro/main/ipc/handlers/local_agent/tools/grep.spec.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tools/grep.spec.ts
--- a/src/pro/main/ipc/handlers/local_agent/tools/grep.ts
+++ b/src/pro/main/ipc/handlers/local_agent/tools/grep.ts
@@ -15,6 +15,10 @@ import log from "electron-log";

 const logger = log.scope("grep");

+const DEFAULT_LIMIT = 100;
+const MAX_LIMIT = 250;
+const MAX_LINE_LENGTH = 500;
+
 const grepSchema = z.object({
  query: z.string().describe("The regex pattern to search for"),
  include_pattern: z
@@ -31,6 +35,14 @@ const grepSchema = z.object({
    .boolean()
    .optional()
    .describe("Whether the search should be case sensitive (default: false)"),
+  limit: z
+    .number()
+    .min(1)
+    .max(MAX_LIMIT)
+    .optional()
+    .describe(
+      `Maximum number of matches to return (default: ${DEFAULT_LIMIT}, max: ${MAX_LIMIT}). Use include_pattern to narrow results if limit is reached.`,
+    ),
 });

 interface RipgrepMatch {
@@ -42,6 +54,7 @@ interface RipgrepMatch {
 function buildGrepAttributes(
  args: Partial<z.infer<typeof grepSchema>>,
  count?: number,
+  totalCount?: number,
 ): string {
  const attrs: string[] = [];
  if (args.query) {
@@ -59,9 +72,20 @@ function buildGrepAttributes(
  if (count !== undefined) {
    attrs.push(`count="${count}"`);
  }
+  if (totalCount !== undefined && totalCount > (count ?? 0)) {
+    attrs.push(`total="${totalCount}"`);
+    attrs.push(`truncated="true"`);
+  }
  return attrs.join(" ");
 }

+function truncateLineText(text: string): string {
+  if (text.length <= MAX_LINE_LENGTH) {
+    return text;
+  }
+  return text.slice(0, MAX_LINE_LENGTH) + "...";
+}
+
 async function runRipgrep({
  appPath,
  query,
@@ -82,7 +106,6 @@ async function runRipgrep({
      "--no-config",
      "--max-filesize",
      `${MAX_FILE_SEARCH_SIZE}`,
-      ...RIPGREP_EXCLUDED_GLOBS.flatMap((glob) => ["--glob", glob]),
    ];

    // Case sensitivity: default is case-insensitive
@@ -90,8 +113,9 @@ async function runRipgrep({
      args.push("--ignore-case");
    }

-    // Include pattern
-    if (includePat) {
+    // Include pattern (skip no-op "*" which would override exclusion globs
+    // and .gitignore rules since --glob always takes precedence over ignore logic)
+    if (includePat && includePat !== "*") {
      args.push("--glob", includePat);
    }

@@ -100,6 +124,10 @@ async function runRipgrep({
      args.push("--glob", `!${excludePat}`);
    }

+    // Exclusion globs come LAST so they always take precedence over any
+    // include pattern (later --glob flags override earlier ones in ripgrep)
+    args.push(...RIPGREP_EXCLUDED_GLOBS.flatMap((glob) => ["--glob", glob]));
+
    args.push("--", query, ".");

    const rg = spawn(getRgExecutablePath(), args, { cwd: appPath });
@@ -168,7 +196,8 @@ export const grepTool: ToolDefinition<z.infer<typeof grepSchema>> = {
 - Returns matching lines with file paths and line numbers
 - By default, the search is case-insensitive
 - Use include_pattern to filter by file type (e.g. '*.tsx')
- Use exclude_pattern to skip certain files (e.g. '*.test.ts')`,
+- Use exclude_pattern to skip certain files (e.g. '*.test.ts')
+- Results are limited to ${DEFAULT_LIMIT} matches by default (max ${MAX_LIMIT}). If results are truncated, narrow your search with include_pattern or a more specific query.`,
  inputSchema: grepSchema,
  defaultConsent: "always",

@@ -192,7 +221,9 @@ export const grepTool: ToolDefinition<z.infer<typeof grepSchema>> = {
  },

  execute: async (args, ctx: AgentContext) => {
-    const matches = await runRipgrep({
+    const includePatWasWildcard = args.include_pattern === "*";
+
+    const allMatches = await runRipgrep({
      appPath: ctx.appPath,
      query: args.query,
      includePat: args.include_pattern,
@@ -200,18 +231,37 @@ export const grepTool: ToolDefinition<z.infer<typeof grepSchema>> = {
      caseSensitive: args.case_sensitive,
    });

-    const attrs = buildGrepAttributes(args, matches.length);
+    const totalCount = allMatches.length;
+    const limit = Math.min(args.limit ?? DEFAULT_LIMIT, MAX_LIMIT);
+    // Sort for deterministic output (ripgrep's parallel execution can produce varying order)
+    const sortedMatches = [...allMatches].sort(
+      (a, b) => a.path.localeCompare(b.path) || a.lineNumber - b.lineNumber,
+    );
+    const matches = sortedMatches.slice(0, limit);
+    const wasTruncated = totalCount > limit;
+
+    const attrs = buildGrepAttributes(args, matches.length, totalCount);

    if (matches.length === 0) {
      ctx.onXmlComplete(`<dyad-grep ${attrs}>No matches found.</dyad-grep>`);
      return "No matches found.";
    }

-    // Format output: path:line: content
+    // Format output: path:line: content (with truncated line text)
    const lines = matches.map(
-      (m) => `${m.path}:${m.lineNumber}: ${m.lineText}`,
+      (m) => `${m.path}:${m.lineNumber}: ${truncateLineText(m.lineText)}`,
    );
-    const resultText = lines.join("\n");
+    let resultText = lines.join("\n");
+
+    // Add truncation notice for the AI
+    if (wasTruncated) {
+      resultText += `\n\n[TRUNCATED: Showing ${matches.length} of ${totalCount} matches. Use include_pattern to narrow your search (e.g., include_pattern="*.tsx") or use a more specific query.]`;
+    }
+
+    // Warn the LLM that "*" was ignored so it doesn't retry with the same pattern
+    if (includePatWasWildcard) {
+      resultText += `\n\n[NOTE: include_pattern="*" was ignored because it matches all files including git-ignored files! Omit include_pattern to search all files, or use a specific glob like "*.ts".]`;
+    }

    ctx.onXmlComplete(
      `<dyad-grep ${attrs}>\n${escapeXmlContent(resultText)}\n</dyad-grep>`,