Unverified 提交 ae1ec684 authored 作者: Will Chen's avatar Will Chen 提交者: GitHub

Fuzzy turbo edits (#1700)

<!-- CURSOR_SUMMARY --> > [!NOTE] > Makes search-replace tolerant to typos/whitespace via two-pass fuzzy matching with Levenshtein and smart-quote normalization, with clearer errors and tests. > > - **Search/Replace Processor (`src/pro/main/ipc/processors/search_replace_processor.ts`)** > - Add two-pass fuzzy matching: quick exact-line prefilter + Levenshtein scoring (`fastest-levenshtein`). > - Match flow: exact > whitespace-insensitive > fuzzy; detect ambiguity and improve error messages with similarity %. > - Introduce thresholds and limits: `FUZZY_MATCH_THRESHOLD=0.9`, `EARLY_STOP_THRESHOLD=0.95`, `MAX_FUZZY_SEARCH_TIME_MS=10000`. > - Normalize text before scoring using `normalizeString` (handles quotes, dashes, ellipsis, NBSP, soft hyphen, BOM). > - Preserve indentation for replacements; keep existing unescape/validation logic. > - **Utils (`src/utils/text_normalization.ts`)** > - New `normalizeString` helper for Unicode normalization used by fuzzy matching. > - **Tests (`src/pro/main/ipc/processors/search_replace_processor.test.ts`)** > - Add cases for typos, smart quotes, below-threshold failure, exact-over-fuzzy preference, whitespace differences, and ambiguity. > - **Dependencies** > - Add `fastest-levenshtein` to `package.json`. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 289dab915c37bc4f9ab4bf0209ff3f95a57341fc. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Makes applySearchReplace more tolerant by preferring exact and whitespace-insensitive matches, then falling back to fast two-pass Levenshtein-based fuzzy matching with Unicode normalization. Improves failure messages and adds tests to cover common edit scenarios. - **New Features** - Exact > whitespace-insensitive > fuzzy matching flow (threshold 0.9). - Two-pass fuzzy search with exact-line prefilter; early stop at 0.95 and 10s timeout. - normalizeString handles smart quotes, dashes, ellipsis, NBSPs, soft hyphen, and BOM. - Errors now report best fuzzy similarity when below threshold. - Tests for typos, smart quotes, below-threshold cases, exact-over-fuzzy, whitespace differences, and ambiguity. - **Dependencies** - Add fastest-levenshtein. <sup>Written for commit 289dab915c37bc4f9ab4bf0209ff3f95a57341fc. Summary will update automatically on new commits.</sup> <!-- End of auto-generated description by cubic. -->
上级 369149b2
{ {
"name": "dyad", "name": "dyad",
"version": "0.25.0-beta.1", "version": "0.27.0-beta.1",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "dyad", "name": "dyad",
"version": "0.25.0-beta.1", "version": "0.27.0-beta.1",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@ai-sdk/amazon-bedrock": "^3.0.15", "@ai-sdk/amazon-bedrock": "^3.0.15",
...@@ -62,6 +62,7 @@ ...@@ -62,6 +62,7 @@
"electron-playwright-helpers": "^1.7.1", "electron-playwright-helpers": "^1.7.1",
"electron-squirrel-startup": "^1.0.1", "electron-squirrel-startup": "^1.0.1",
"esbuild-register": "^3.6.0", "esbuild-register": "^3.6.0",
"fastest-levenshtein": "^1.0.16",
"fix-path": "^4.0.0", "fix-path": "^4.0.0",
"framer-motion": "^12.6.3", "framer-motion": "^12.6.3",
"geist": "^1.3.1", "geist": "^1.3.1",
...@@ -11830,6 +11831,15 @@ ...@@ -11830,6 +11831,15 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/fastest-levenshtein": {
"version": "1.0.16",
"resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz",
"integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==",
"license": "MIT",
"engines": {
"node": ">= 4.9.1"
}
},
"node_modules/fastq": { "node_modules/fastq": {
"version": "1.19.1", "version": "1.19.1",
"resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz",
......
...@@ -138,6 +138,7 @@ ...@@ -138,6 +138,7 @@
"electron-playwright-helpers": "^1.7.1", "electron-playwright-helpers": "^1.7.1",
"electron-squirrel-startup": "^1.0.1", "electron-squirrel-startup": "^1.0.1",
"esbuild-register": "^3.6.0", "esbuild-register": "^3.6.0",
"fastest-levenshtein": "^1.0.16",
"fix-path": "^4.0.0", "fix-path": "^4.0.0",
"framer-motion": "^12.6.3", "framer-motion": "^12.6.3",
"geist": "^1.3.1", "geist": "^1.3.1",
......
import { describe, it, expect } from "vitest";
import { applySearchReplace } from "./search_replace_processor";
describe("applySearchReplace", () => {
describe("fuzzy matching with Levenshtein distance", () => {
it("should match content with minor typos", () => {
const originalContent = `function hello() {
console.log("Hello, World!");
return true;
}`;
// Search block has a typo: "consle" instead of "console"
const diffContent = `<<<<<<< SEARCH
function hello() {
consle.log("Hello, World!");
return true;
}
=======
function hello() {
console.log("Hello, Universe!");
return true;
}
>>>>>>> REPLACE`;
const result = applySearchReplace(originalContent, diffContent);
expect(result.success).toBe(true);
expect(result.content).toContain("Hello, Universe!");
});
it("should match content with smart quotes normalized", () => {
const originalContent = `function greet() {
console.log("Hello");
}`;
// Search block uses smart quotes
const diffContent = `<<<<<<< SEARCH
function greet() {
console.log("Hello");
}
=======
function greet() {
console.log("Goodbye");
}
>>>>>>> REPLACE`;
const result = applySearchReplace(originalContent, diffContent);
expect(result.success).toBe(true);
expect(result.content).toContain("Goodbye");
});
it("should fail when similarity is below threshold", () => {
const originalContent = `function hello() {
console.log("Hello, World!");
return true;
}`;
// Search block is too different (multiple typos and changes)
const diffContent = `<<<<<<< SEARCH
function goodbye() {
consle.error("Bye, Earth!");
return false;
}
=======
function hello() {
console.log("Hello, Universe!");
return true;
}
>>>>>>> REPLACE`;
const result = applySearchReplace(originalContent, diffContent);
expect(result.success).toBe(false);
expect(result.error).toContain("Best fuzzy match had similarity");
});
it("should prefer exact match over fuzzy match", () => {
const originalContent = `function hello() {
console.log("Hello");
}
function hello() {
consle.log("Hello");
}`;
// Should match the first exact occurrence, not the fuzzy one
const diffContent = `<<<<<<< SEARCH
function hello() {
console.log("Hello");
}
=======
function hello() {
console.log("Goodbye");
}
>>>>>>> REPLACE`;
const result = applySearchReplace(originalContent, diffContent);
expect(result.success).toBe(true);
// Should only replace the first exact match
expect(result.content).toContain('console.log("Goodbye")');
expect(result.content).toContain('consle.log("Hello")');
});
it("should handle whitespace differences with lenient matching before fuzzy", () => {
const originalContent = `function test() {
console.log("test");
}`;
// Different indentation
const diffContent = `<<<<<<< SEARCH
function test() {
console.log("test");
}
=======
function test() {
console.log("updated");
}
>>>>>>> REPLACE`;
const result = applySearchReplace(originalContent, diffContent);
expect(result.success).toBe(true);
expect(result.content).toContain("updated");
});
});
describe("existing functionality", () => {
it("should handle exact matches", () => {
const originalContent = `function hello() {
console.log("Hello");
}`;
const diffContent = `<<<<<<< SEARCH
function hello() {
console.log("Hello");
}
=======
function hello() {
console.log("Goodbye");
}
>>>>>>> REPLACE`;
const result = applySearchReplace(originalContent, diffContent);
expect(result.success).toBe(true);
expect(result.content).toContain("Goodbye");
});
it("should detect ambiguous matches", () => {
const originalContent = `function hello() {
console.log("Hello");
}
function hello() {
console.log("Hello");
}`;
const diffContent = `<<<<<<< SEARCH
function hello() {
console.log("Hello");
}
=======
function hello() {
console.log("Goodbye");
}
>>>>>>> REPLACE`;
const result = applySearchReplace(originalContent, diffContent);
expect(result.success).toBe(false);
expect(result.error).toContain("ambiguous");
});
});
});
/* eslint-disable no-irregular-whitespace */ /* eslint-disable no-irregular-whitespace */
import { parseSearchReplaceBlocks } from "@/pro/shared/search_replace_parser"; import { parseSearchReplaceBlocks } from "@/pro/shared/search_replace_parser";
import { distance } from "fastest-levenshtein";
import { normalizeString } from "@/utils/text_normalization";
// Minimum similarity threshold for fuzzy matching (0 to 1, where 1 is exact match)
const FUZZY_MATCH_THRESHOLD = 0.9;
// Early termination threshold - stop searching if we find a match this good
const EARLY_STOP_THRESHOLD = 0.95;
// Maximum time to spend on fuzzy matching (in milliseconds)
const MAX_FUZZY_SEARCH_TIME_MS = 10_000; // 10 seconds
function unescapeMarkers(content: string): string { function unescapeMarkers(content: string): string {
return content return content
...@@ -9,6 +20,148 @@ function unescapeMarkers(content: string): string { ...@@ -9,6 +20,148 @@ function unescapeMarkers(content: string): string {
.replace(/^\\>>>>>>>/gm, ">>>>>>>"); .replace(/^\\>>>>>>>/gm, ">>>>>>>");
} }
/**
* Calculate similarity between two strings using Levenshtein distance
* Returns a value between 0 and 1, where 1 is an exact match
*/
function getSimilarity(original: string, search: string): number {
// Empty searches are no longer supported
if (search === "") {
return 0;
}
// Use the normalizeString utility to handle smart quotes and other special characters
const normalizedOriginal = normalizeString(original);
const normalizedSearch = normalizeString(search);
if (normalizedOriginal === normalizedSearch) {
return 1;
}
// Calculate Levenshtein distance using fastest-levenshtein's distance function
const dist = distance(normalizedOriginal, normalizedSearch);
// Calculate similarity ratio (0 to 1, where 1 is an exact match)
const maxLength = Math.max(
normalizedOriginal.length,
normalizedSearch.length,
);
return 1 - dist / maxLength;
}
/**
* Quick scoring function that counts how many lines exactly match.
* This is much faster than Levenshtein and serves as a good pre-filter.
*/
function quickScoreByExactLines(
targetLines: string[],
searchLines: string[],
startIdx: number,
): number {
let exactMatches = 0;
for (let i = 0; i < searchLines.length; i++) {
if (startIdx + i >= targetLines.length) break;
if (
normalizeString(targetLines[startIdx + i]) ===
normalizeString(searchLines[i])
) {
exactMatches++;
}
}
return exactMatches / searchLines.length;
}
/**
* Fast fuzzy search using a two-pass approach:
* 1. Quick pre-filter pass: Count exact line matches (fast)
* 2. Detailed pass: Only compute Levenshtein on promising candidates (expensive)
*
* The key insight: If two blocks are similar enough for fuzzy matching (e.g., 90%),
* then likely at least 60% of their lines will match exactly.
*/
function fastFuzzySearch(
lines: string[],
searchChunk: string,
startIndex: number,
endIndex: number,
) {
const searchLines = searchChunk.split(/\r?\n/);
const searchLen = searchLines.length;
// Track start time for timeout
const startTime = performance.now();
// Quick threshold: require at least 60% exact line matches to be a candidate
const QUICK_THRESHOLD = 0.6;
// First pass: find candidates with high exact line match ratio (very fast)
const candidates: Array<{ index: number; quickScore: number }> = [];
for (let i = startIndex; i <= endIndex - searchLen; i++) {
// Check time limit
const elapsed = performance.now() - startTime;
if (elapsed > MAX_FUZZY_SEARCH_TIME_MS) {
console.warn(
`Fast fuzzy search timed out during pre-filter after ${(elapsed / 1000).toFixed(1)}s`,
);
break;
}
const quickScore = quickScoreByExactLines(lines, searchLines, i);
if (quickScore >= QUICK_THRESHOLD) {
candidates.push({ index: i, quickScore });
}
}
// Sort candidates by quick score (best first)
candidates.sort((a, b) => b.quickScore - a.quickScore);
// Second pass: only compute expensive Levenshtein on top candidates
let bestScore = 0;
let bestMatchIndex = -1;
const MAX_CANDIDATES_TO_CHECK = 10; // Only check top 10 candidates
for (
let i = 0;
i < Math.min(candidates.length, MAX_CANDIDATES_TO_CHECK);
i++
) {
const candidate = candidates[i];
// Check time limit
const elapsed = performance.now() - startTime;
if (elapsed > MAX_FUZZY_SEARCH_TIME_MS) {
console.warn(
`Fast fuzzy search timed out during detailed pass after ${(elapsed / 1000).toFixed(1)}s. Best match: ${(bestScore * 100).toFixed(1)}%`,
);
break;
}
const originalChunk = lines
.slice(candidate.index, candidate.index + searchLen)
.join("\n");
const similarity = getSimilarity(originalChunk, searchChunk);
if (similarity > bestScore) {
bestScore = similarity;
bestMatchIndex = candidate.index;
// Early exit if we found a very good match
if (bestScore >= EARLY_STOP_THRESHOLD) {
return { bestScore, bestMatchIndex };
}
}
}
return { bestScore, bestMatchIndex };
}
export function applySearchReplace( export function applySearchReplace(
originalContent: string, originalContent: string,
diffContent: string, diffContent: string,
...@@ -113,14 +266,29 @@ export function applySearchReplace( ...@@ -113,14 +266,29 @@ export function applySearchReplace(
}; };
} }
if (candidates.length === 0) { if (candidates.length === 1) {
matchIndex = candidates[0];
}
}
// If still no match, try fuzzy matching with Levenshtein distance
if (matchIndex === -1) {
const searchChunk = searchLines.join("\n");
const { bestScore, bestMatchIndex } = fastFuzzySearch(
resultLines,
searchChunk,
0,
resultLines.length,
);
if (bestScore >= FUZZY_MATCH_THRESHOLD) {
matchIndex = bestMatchIndex;
} else {
return { return {
success: false, success: false,
error: "Search block did not match any content in the target file", error: `Search block did not match any content in the target file. Best fuzzy match had similarity of ${(bestScore * 100).toFixed(1)}% (threshold: ${(FUZZY_MATCH_THRESHOLD * 100).toFixed(1)}%)`,
}; };
} }
matchIndex = candidates[0];
} }
const matchedLines = resultLines.slice( const matchedLines = resultLines.slice(
......
/**
* Normalizes text for comparison by handling smart quotes and other special characters
*/
export function normalizeString(text: string): string {
return (
text
// Normalize smart quotes to regular quotes
.replace(/[\u2018\u2019]/g, "'") // Single quotes
.replace(/[\u201C\u201D]/g, '"') // Double quotes
// Normalize different types of dashes
.replace(/[\u2013\u2014]/g, "-") // En dash and em dash to hyphen
// Normalize ellipsis
.replace(/\u2026/g, "...") // Ellipsis to three dots
// Normalize non-breaking spaces
.replace(/\u00A0/g, " ") // Non-breaking space to regular space
// Normalize other common Unicode variants
.replace(/\u00AD/g, "") // Soft hyphen (remove)
.replace(/[\uFEFF]/g, "")
); // Zero-width no-break space (remove)
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论