Skip to content

Commit d3b112b

Browse files
authored
Optimize diff checks for clean worktrees (#2109)
Avoid full before-and-after git diff calls when hooks run from a cleaned worktree and leave it unchanged. Closes #1464 Related #1327
1 parent 3184e7d commit d3b112b

5 files changed

Lines changed: 333 additions & 27 deletions

File tree

crates/prek/src/cli/run/diff.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
use std::path::Path;
2+
3+
use anyhow::Result;
4+
5+
use crate::git;
6+
7+
pub(super) struct DiffTracker<'a> {
8+
path: &'a Path,
9+
baseline: DiffBaseline,
10+
}
11+
12+
enum DiffBaseline {
13+
Clean,
14+
Unknown,
15+
Snapshot(Vec<u8>),
16+
}
17+
18+
impl<'a> DiffTracker<'a> {
19+
pub(super) fn clean_baseline(path: &'a Path) -> Self {
20+
Self {
21+
path,
22+
baseline: DiffBaseline::Clean,
23+
}
24+
}
25+
26+
pub(super) fn unknown_baseline(path: &'a Path) -> Self {
27+
Self {
28+
path,
29+
baseline: DiffBaseline::Unknown,
30+
}
31+
}
32+
33+
pub(super) async fn prepare_for_group(&mut self, may_modify_files: bool) -> Result<()> {
34+
if may_modify_files && let DiffBaseline::Unknown = self.baseline {
35+
self.baseline = DiffBaseline::Snapshot(git::get_diff(self.path).await?);
36+
}
37+
Ok(())
38+
}
39+
40+
pub(super) async fn changed_after_group(
41+
&mut self,
42+
may_modify_files: bool,
43+
all_skipped: bool,
44+
) -> Result<bool> {
45+
// Read-only groups and fully skipped groups cannot change files, so avoid
46+
// asking git about the working tree.
47+
if !may_modify_files || all_skipped {
48+
return Ok(false);
49+
}
50+
51+
match &mut self.baseline {
52+
DiffBaseline::Clean => {
53+
// `WorkTreeKeeper` already removed unstaged changes. A quiet
54+
// worktree check is enough unless the hook actually wrote files.
55+
if !git::has_worktree_diff(self.path).await? {
56+
return Ok(false);
57+
}
58+
// Capture the dirty state after this group so later groups can
59+
// compare against the exact diff left by previous hooks.
60+
self.baseline = DiffBaseline::Snapshot(git::get_diff(self.path).await?);
61+
Ok(true)
62+
}
63+
DiffBaseline::Snapshot(prev_diff) => {
64+
// Unknown initial state, `--all-files`, and later dirty groups
65+
// need a full before/after diff comparison to avoid confusing
66+
// pre-existing user changes with hook changes.
67+
let curr_diff = git::get_diff(self.path).await?;
68+
let modified = curr_diff != *prev_diff;
69+
*prev_diff = curr_diff;
70+
Ok(modified)
71+
}
72+
DiffBaseline::Unknown => {
73+
unreachable!("diff baseline must be captured before hooks can modify files")
74+
}
75+
}
76+
}
77+
}

crates/prek/src/cli/run/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ pub(crate) use install::{InstallCache, install_hooks};
66
pub(crate) use run::run;
77
pub(crate) use selector::{SelectorSource, Selectors};
88

9+
mod diff;
910
mod filter;
1011
mod install;
1112
mod keeper;

crates/prek/src/cli/run/run.rs

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use tracing::{debug, trace};
1616
use unicode_width::UnicodeWidthStr;
1717

1818
use crate::cli::reporter::{HookInitReporter, HookInstallReporter, HookRunReporter};
19+
use crate::cli::run::diff::DiffTracker;
1920
use crate::cli::run::keeper::WorkTreeKeeper;
2021
use crate::cli::run::{
2122
CollectOptions, FileTagCache, HookFileFilter, ProjectFiles, RunInput, Selectors,
@@ -220,6 +221,7 @@ pub(crate) async fn run(
220221
show_diff_on_failure,
221222
fail_fast,
222223
dry_run,
224+
should_stash,
223225
verbose,
224226
printer,
225227
)
@@ -445,6 +447,7 @@ async fn run_hooks<'paths>(
445447
show_diff_on_failure: bool,
446448
fail_fast: Option<bool>,
447449
dry_run: bool,
450+
worktree_cleaned: bool,
448451
verbose: bool,
449452
printer: Printer,
450453
) -> Result<ExitStatus> {
@@ -481,28 +484,30 @@ async fn run_hooks<'paths>(
481484
hooks.sort_by(|a, b| a.priority.cmp(&b.priority).then(a.idx.cmp(&b.idx)));
482485

483486
session.render_project_header(project, projects_len)?;
484-
let mut prev_diff = None;
487+
// The worktree is only known clean at the start of the whole run. Once
488+
// an earlier project leaves a diff behind, later projects need a fresh
489+
// per-project snapshot to avoid attributing that diff to their hooks.
490+
let mut diff_tracker = if worktree_cleaned && !session.file_modified {
491+
DiffTracker::clean_baseline(project.path())
492+
} else {
493+
DiffTracker::unknown_baseline(project.path())
494+
};
485495

486496
let project_fail_fast = fail_fast.or(project.config().fail_fast).unwrap_or(false);
487497

488498
for group_hooks in PriorityGroups::new(hooks) {
489499
let group_may_modify_files =
490500
!session.dry_run && group_hooks.iter().any(|hook| hooks::may_modify_files(hook));
491-
if group_may_modify_files && prev_diff.is_none() {
492-
prev_diff = Some(git::get_diff(project.path()).await?);
493-
}
501+
diff_tracker
502+
.prepare_for_group(group_may_modify_files)
503+
.await?;
494504

495505
let group_results = session
496506
.run_priority_group(group_hooks, &project_input, tag_cache)
497507
.await?;
498508

499509
let hook_fail_fast = session
500-
.finish_priority_group(
501-
group_results,
502-
project,
503-
group_may_modify_files,
504-
&mut prev_diff,
505-
)
510+
.finish_priority_group(group_results, group_may_modify_files, &mut diff_tracker)
506511
.await?;
507512

508513
if !session.success && (project_fail_fast || hook_fail_fast) {
@@ -614,26 +619,17 @@ impl<'a> HookRunSession<'a> {
614619
async fn finish_priority_group(
615620
&mut self,
616621
mut group_results: Vec<RunResult>,
617-
project: &Project,
618622
group_may_modify_files: bool,
619-
prev_diff: &mut Option<Vec<u8>>,
623+
diff_tracker: &mut DiffTracker<'_>,
620624
) -> Result<bool> {
621625
// Print results in a stable order (same order as config within the project).
622626
group_results.sort_unstable_by_key(|a| a.hook.idx);
623627

624628
// Check if any files were modified by this group of hooks.
625629
let all_skipped = group_results.iter().all(|r| r.status.is_skipped());
626-
let group_modified_files = if group_may_modify_files && !all_skipped {
627-
let prev_diff = prev_diff
628-
.as_mut()
629-
.expect("previous diff must be captured before file-modifying hooks run");
630-
let curr_diff = git::get_diff(project.path()).await?;
631-
let group_modified_files = curr_diff != *prev_diff;
632-
*prev_diff = curr_diff;
633-
group_modified_files
634-
} else {
635-
false
636-
};
630+
let group_modified_files = diff_tracker
631+
.changed_after_group(group_may_modify_files, all_skipped)
632+
.await?;
637633

638634
self.file_modified |= group_modified_files;
639635

crates/prek/src/git.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,32 @@ async fn parse_merge_msg_for_conflicts() -> Result<Vec<PathBuf>, Error> {
355355
Ok(conflicts)
356356
}
357357

358+
#[instrument(level = "trace")]
359+
pub(crate) async fn has_worktree_diff(path: &Path) -> Result<bool, Error> {
360+
let mut cmd = git_cmd("check worktree diff")?;
361+
let status = cmd
362+
.arg("diff-files")
363+
.arg("--quiet")
364+
.arg("--no-ext-diff")
365+
.arg("--no-textconv")
366+
.arg("--ignore-submodules")
367+
.arg("--")
368+
.arg(path)
369+
.check(false)
370+
.status()
371+
.await?;
372+
373+
if status.success() {
374+
return Ok(false);
375+
}
376+
if status.code() == Some(1) {
377+
return Ok(true);
378+
}
379+
380+
cmd.check_status(status)?;
381+
Ok(true)
382+
}
383+
358384
#[instrument(level = "trace")]
359385
pub(crate) async fn get_diff(path: &Path) -> Result<Vec<u8>, Error> {
360386
let output = git_cmd("git diff")?

0 commit comments

Comments
 (0)