Unverified 提交 0beeb501 authored 作者: Ryan Groch's avatar Ryan Groch 提交者: GitHub

perf: reduce number of native git calls when extracting a codebase (#3105)

Currently, `collectFiles` is calling `isGitIgnored` on each (non-excluded) recursion. Although there is caching, we're frequently executing Git just to check whether an individual file or directory is gitignored, meaning that the number of Git invocations scales with the number of files in the user's app. This amounts to a substantial number of Git invocations. For smaller projects it could be dozens; for larger projects it could be thousands. It's particularly a problem for native Git, because each `exec` call comes with a lot of overhead even though Git itself is quite fast. Although I'm not 100% sure, I suspect that this was the underlying cause of both #2795 and #1642, because: 1. Both mention Dyad freezing when dealing with larger projects, and this issue is far more noticeable for large projects. 2. Both specifically mention that the freeze happens upon opening their project, which is when `collectFiles` runs. 3. I was able to replicate the crash consistently on Windows 10 and inconsistently on Linux Mint by importing a large project into Dyad. I don't yet have a good automated test for this, though. The solution that I wrote for this PR puts the responsibility of traversing the app's files onto native Git instead of doing it manually. This means that we'll only have one Git invocation per call to the function (formerly named) `collectFiles`. I've also done my best to keep the output of `collectFilesNativeGit` as close as possible to the original `collectFiles`. The ordering of the files will be different, but I don't think that should make a difference given that we later sort them anyway. Some alternatives I've thought of if we decide we want to keep the current traversal logic: - Run `git check-ignore` on batches of files (e.g. each result of `fsAsync.readdir`) rather than one at a time. This would still result in multiple Git calls, though. - Run `git check-ignore` on all of the files at once at the end of `collectFiles`. We wouldn't be able to prune gitignored directories in our traversal, but at least we'd still avoid the directories in `EXCLUDED_DIRS`, such as `node_modules` and `.next`. I've left the logic of `collectFiles` untouched for isomorphic-git for now. There might be a good way to optimize that as well, but it will likely be a bit different because isomorphic-git has different capabilities than native Git. <!-- devin-review-badge-begin --> --- <a href="https://app.devin.ai/review/dyad-sh/dyad/pull/3105" target="_blank"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1"> <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open with Devin"> </picture> </a> <!-- devin-review-badge-end --> --------- Co-authored-by: 's avatarClaude <noreply@anthropic.com>
上级 7b8fcd81
import { afterEach, describe, expect, it, vi } from "vitest";
import { execFile } from "node:child_process";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { promisify } from "node:util";
vi.mock("electron-log", () => ({
default: {
scope: () => ({
debug: vi.fn(),
error: vi.fn(),
info: vi.fn(),
log: vi.fn(),
warn: vi.fn(),
}),
},
}));
vi.mock("../main/settings", () => ({
readSettings: vi.fn(),
}));
import { gitListFilesNative } from "../ipc/utils/git_utils";
const execFileAsync = promisify(execFile);
async function runGit(repoDir: string, args: string[]): Promise<void> {
await execFileAsync("git", args, { cwd: repoDir });
}
describe("gitListFilesNative", () => {
let repoDir: string | undefined;
afterEach(async () => {
if (repoDir) {
await fs.promises.rm(repoDir, { recursive: true, force: true });
repoDir = undefined;
}
});
it("excludes files inside skipped directories recursively", async () => {
repoDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "git-utils-"));
await runGit(repoDir, ["init"]);
await fs.promises.mkdir(path.join(repoDir, "src"), { recursive: true });
await fs.promises.mkdir(path.join(repoDir, "dist"), { recursive: true });
await fs.promises.mkdir(path.join(repoDir, "build"), { recursive: true });
await fs.promises.mkdir(path.join(repoDir, "packages", "app", "dist"), {
recursive: true,
});
await fs.promises.mkdir(path.join(repoDir, "node_modules", "pkg"), {
recursive: true,
});
await fs.promises.writeFile(path.join(repoDir, "src", "index.ts"), "src");
await fs.promises.writeFile(
path.join(repoDir, "dist", "tracked.js"),
"tracked dist output",
);
await fs.promises.writeFile(
path.join(repoDir, "build", "tracked.js"),
"tracked build output",
);
await fs.promises.writeFile(
path.join(repoDir, "packages", "app", "dist", "nested.js"),
"nested dist output",
);
await fs.promises.writeFile(
path.join(repoDir, "node_modules", "pkg", "index.js"),
"dependency output",
);
await fs.promises.writeFile(
path.join(repoDir, "package-lock.json"),
'{"lockfileVersion":3}',
);
await runGit(repoDir, [
"add",
"src/index.ts",
"dist/tracked.js",
"build/tracked.js",
]);
const files = await gitListFilesNative({
path: repoDir,
excludedDirs: ["node_modules", "dist", "build"],
excludedFiles: ["package-lock.json"],
});
expect(files).toEqual(["src/index.ts"]);
});
});
......@@ -18,6 +18,10 @@ export interface GitCommitParams extends GitBaseParams {
export interface GitFileParams extends GitBaseParams {
filepath: string;
}
export interface GitListFilesParams extends GitBaseParams {
excludedFiles: string[];
excludedDirs: string[];
}
export interface GitCheckoutParams extends GitBaseParams {
ref: string;
}
......
......@@ -110,6 +110,7 @@ async function execGit(
import type {
GitBaseParams,
GitFileParams,
GitListFilesParams,
GitCheckoutParams,
GitBranchRenameParams,
GitCloneParams,
......@@ -1155,12 +1156,56 @@ export async function gitIsIgnored({
throw new DyadError(result.stderr.toString(), DyadErrorKind.Conflict);
} else {
// isomorphic-git version
return await gitIsIgnoredIso({ path, filepath });
}
}
/**
* Check whether a specific file/directory is gitignored using isomorphic-git
*/
export async function gitIsIgnoredIso({
path,
filepath,
}: GitFileParams): Promise<boolean> {
return await git.isIgnored({
fs,
dir: path,
filepath,
});
}
/**
* Lists all of the files in a git repository, such that:
* - Both tracked and untracked files are included.
* - Gitignored files/directories are excluded.
* - We can exclude additional files/directories as needed.
*/
export async function gitListFilesNative({
path,
excludedFiles,
excludedDirs,
}: GitListFilesParams): Promise<string[]> {
const result = await execGit(
[
"ls-files",
"-z",
"--cached",
"--others",
"--exclude-standard",
"--",
".",
...excludedFiles.map((file) => `:(exclude,glob)**/${file}`),
...excludedDirs.map((dir) => `:(exclude,glob)**/${dir}/**`),
],
path,
);
if (result.exitCode !== 0) {
throw new DyadError(
`Failed to list files: ${result.stderr.trim() || result.stdout.trim()}`,
DyadErrorKind.Conflict,
);
}
return result.stdout.split("\0").filter(Boolean).map(normalizePath);
}
export async function gitLogNative(
......
import fsAsync from "node:fs/promises";
import path from "node:path";
import { gitIsIgnored } from "../ipc/utils/git_utils";
import { gitIsIgnoredIso, gitListFilesNative } from "../ipc/utils/git_utils";
import log from "electron-log";
import { IS_TEST_BUILD } from "../ipc/utils/test_utils";
import { glob } from "glob";
......@@ -120,9 +120,9 @@ const gitIgnoreCache = new Map<string, boolean>();
const gitIgnoreMtimes = new Map<string, number>();
/**
* Check if a path should be ignored based on git ignore rules
* Check if a path should be ignored based on git ignore rules. Uses isomorphic-git
*/
async function isGitIgnored(
async function isGitIgnoredIso(
filePath: string,
baseDir: string,
): Promise<boolean> {
......@@ -175,7 +175,7 @@ async function isGitIgnored(
}
const relativePath = path.relative(baseDir, filePath);
const result = await gitIsIgnored({
const result = await gitIsIgnoredIso({
path: baseDir,
filepath: relativePath,
});
......@@ -244,9 +244,59 @@ export async function readFileWithCache(
}
/**
* Recursively walk a directory and collect all relevant files
* Traverses a directory and collects all relevant files using native Git.
*/
async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
async function collectFilesNativeGit(dir: string): Promise<string[]> {
let files: string[] = [];
try {
// We put the vast majority of the computational burden on Git for the
// sake of performance. Nonetheless, the behavior of this function
// should still be as close as possible to collectFilesIsoGit.
files = (
await gitListFilesNative({
path: dir,
excludedFiles: EXCLUDED_FILES,
excludedDirs: EXCLUDED_DIRS,
})
).map((file) => path.join(dir, file));
} catch (error) {
logger.error(
`Git failed to read directory ${dir} and is falling back to isomorphic-git:`,
error,
);
// Since collectFilesIsoGit traverses the directory tree manually,
// we'll still be able to collect the files even if git fails
return await collectFilesIsoGit(dir, dir);
}
// Git cannot exclude files by size, so we still need to do that manually
return (
await Promise.all(
files.map(async (file) => {
try {
const stats = await fsAsync.lstat(file);
if (!stats.isFile() || stats.size > MAX_FILE_SIZE) {
return "";
}
return file;
} catch (error) {
logger.error(`Failed to read file ${file}:`, error);
return "";
}
}),
)
).filter(Boolean);
}
/**
* Recursively walk a directory and collect all relevant files. Uses
* isomorphic-git to check whether files and directories are gitignored.
*/
async function collectFilesIsoGit(
dir: string,
baseDir: string,
): Promise<string[]> {
const files: string[] = [];
// Check if directory exists
......@@ -271,13 +321,13 @@ async function collectFiles(dir: string, baseDir: string): Promise<string[]> {
}
// Skip if the entry is git ignored
if (await isGitIgnored(fullPath, baseDir)) {
if (await isGitIgnoredIso(fullPath, baseDir)) {
return;
}
if (entry.isDirectory()) {
// Recursively process subdirectories
const subDirFiles = await collectFiles(fullPath, baseDir);
const subDirFiles = await collectFilesIsoGit(fullPath, baseDir);
files.push(...subDirFiles);
} else if (entry.isFile()) {
// Skip excluded files
......@@ -456,7 +506,9 @@ export async function extractCodebase({
const startTime = Date.now();
// Collect all relevant files
let files = await collectFiles(appPath, appPath);
let files = settings.enableNativeGit
? await collectFilesNativeGit(appPath)
: await collectFilesIsoGit(appPath, appPath);
// Apply virtual filesystem modifications if provided
if (virtualFileSystem) {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论