diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc index 246ab417c24a10..c0a3b80615e034 100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc @@ -9,7 +9,7 @@ git-backfill - Download missing objects in a partial clone SYNOPSIS -------- [synopsis] -git backfill [--min-batch-size=] [--[no-]sparse] +git backfill [--min-batch-size=] [--[no-]sparse] [--[no-]include-edges] [] DESCRIPTION ----------- @@ -43,7 +43,7 @@ smaller network calls than downloading the entire repository at clone time. By default, `git backfill` downloads all blobs reachable from the `HEAD` -commit. This set can be restricted or expanded using various options. +commit. This set can be restricted or expanded using various options below. THIS COMMAND IS EXPERIMENTAL. ITS BEHAVIOR MAY CHANGE IN THE FUTURE. @@ -63,7 +63,23 @@ OPTIONS current sparse-checkout. If the sparse-checkout feature is enabled, then `--sparse` is assumed and can be disabled with `--no-sparse`. -You may also specify the commit limiting options from linkgit:git-rev-list[1]. +`--include-edges`:: +`--no-include-edges`:: + Include blobs from boundary commits in the backfill. Useful in + preparation for commands like `git log -p A..B` or `git replay + --onto TARGET A..B`, where A..B normally excludes A but you need + the blobs from A as well. `--include-edges` is the default. + +``:: + Backfill only blobs reachable from commits in the specified + revision range. When no __ is specified, it + defaults to `HEAD` (i.e. the whole history leading to the + current commit). For a complete list of ways to spell + __, see the "Specifying Ranges" section of + linkgit:gitrevisions[7]. ++ +You may also use commit-limiting options understood by +linkgit:git-rev-list[1] such as `--first-parent`, `--since`, or pathspecs. SEE ALSO -------- diff --git a/builtin/backfill.c b/builtin/backfill.c index d794dd842f65ce..7ffab2ea74f5cc 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -26,7 +26,7 @@ #include "path-walk.h" static const char * const builtin_backfill_usage[] = { - N_("git backfill [--min-batch-size=] [--[no-]sparse]"), + N_("git backfill [--min-batch-size=] [--[no-]sparse] [--[no-]include-edges] []"), NULL }; @@ -35,6 +35,7 @@ struct backfill_context { struct oid_array current_batch; size_t min_batch_size; int sparse; + int include_edges; struct rev_info revs; }; @@ -78,6 +79,28 @@ static int fill_missing_blobs(const char *path UNUSED, return 0; } +static void reject_unsupported_rev_list_options(struct rev_info *revs) +{ + if (revs->diffopt.pickaxe) + die(_("'%s' cannot be used with 'git backfill'"), + (revs->diffopt.pickaxe_opts & DIFF_PICKAXE_REGEX) ? "-G" : "-S"); + if (revs->diffopt.filter || revs->diffopt.filter_not) + die(_("'%s' cannot be used with 'git backfill'"), + "--diff-filter"); + if (revs->diffopt.flags.follow_renames) + die(_("'%s' cannot be used with 'git backfill'"), + "--follow"); + if (revs->line_level_traverse) + die(_("'%s' cannot be used with 'git backfill'"), + "-L"); + if (revs->explicit_diff_merges) + die(_("'%s' cannot be used with 'git backfill'"), + "--diff-merges"); + if (revs->filter.choice) + die(_("'%s' cannot be used with 'git backfill'"), + "--filter"); +} + static int do_backfill(struct backfill_context *ctx) { struct path_walk_info info = PATH_WALK_INFO_INIT; @@ -94,6 +117,8 @@ static int do_backfill(struct backfill_context *ctx) /* Walk from HEAD if otherwise unspecified. */ if (!ctx->revs.pending.nr) add_head_to_pending(&ctx->revs); + if (ctx->include_edges) + ctx->revs.edge_hint = 1; info.blobs = 1; info.tags = info.commits = info.trees = 0; @@ -121,12 +146,15 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit .min_batch_size = 50000, .sparse = -1, .revs = REV_INFO_INIT, + .include_edges = 1, }; struct option options[] = { OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size, N_("Minimum number of objects to request at a time")), OPT_BOOL(0, "sparse", &ctx.sparse, N_("Restrict the missing objects to the current sparse-checkout")), + OPT_BOOL(0, "include-edges", &ctx.include_edges, + N_("Include blobs from boundary commits in the backfill")), OPT_END(), }; struct repo_config_values *cfg = repo_config_values(the_repository); @@ -144,6 +172,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit if (argc > 1) die(_("unrecognized argument: %s"), argv[1]); + reject_unsupported_rev_list_options(&ctx.revs); repo_config(repo, git_default_config, NULL); diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index f3b5e39493677b..94f35ce1901671 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -257,11 +257,12 @@ test_expect_success 'backfill with revision range' ' git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - git -C backfill-revs backfill HEAD~2..HEAD && + GIT_TRACE2_EVENT="$(pwd)/backfill-trace" git -C backfill-revs backfill HEAD~2..HEAD && - # 30 objects downloaded. + # 36 objects downloaded, 12 still missing + test_trace2_data promisor fetch_count 36 missing && - test_line_count = 18 missing + test_line_count = 12 missing ' test_expect_success 'backfill with revisions over stdin' ' @@ -279,11 +280,12 @@ test_expect_success 'backfill with revisions over stdin' ' ^HEAD~2 EOF - git -C backfill-revs backfill --stdin missing && - test_line_count = 18 missing + test_line_count = 12 missing ' test_expect_success 'backfill with prefix pathspec' ' @@ -398,6 +400,102 @@ test_expect_success 'backfill with --since' ' test_line_count = 6 missing ' +test_expect_success 'backfill range with include-edges enables fetch-free git-log' ' + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-log && + + # Backfill the range with default include edges. + git -C backfill-log backfill HEAD~2..HEAD && + + # git log -p needs edge blobs for the "before" side of + # diffs. With edge inclusion, all needed blobs are local. + GIT_TRACE2_EVENT="$(pwd)/log-trace" git \ + -C backfill-log log -p HEAD~2..HEAD >log-output && + + # No promisor fetches should have been needed. + ! grep "fetch_count" log-trace +' + +test_expect_success 'backfill range without include edges causes on-demand fetches in git-log' ' + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-log-no-bdy && + + # Backfill WITHOUT include edges -- file.3 v1 blobs are missing. + git -C backfill-log-no-bdy backfill --no-include-edges HEAD~2..HEAD && + + # git log -p HEAD~2..HEAD computes diff of commit 7 against + # commit 6. It needs file.3 v1 (the "before" side), which was + # not backfilled. This triggers on-demand promisor fetches. + GIT_TRACE2_EVENT="$(pwd)/log-no-bdy-trace" git \ + -C backfill-log-no-bdy log -p HEAD~2..HEAD >log-output && + + grep "fetch_count" log-no-bdy-trace +' + +test_expect_success 'backfill range enables fetch-free replay' ' + # Create a repo with a branch to replay. + git init replay-src && + ( + cd replay-src && + git config uploadpack.allowfilter 1 && + git config uploadpack.allowanysha1inwant 1 && + test_commit base && + git checkout -b topic && + test_commit topic-change && + git checkout main && + test_commit main-change + ) && + git clone --bare --filter=blob:none \ + "file://$(pwd)/replay-src" replay-dest.git && + + # Backfill the replay range: --onto main, replaying topic~1..topic. + # For replay, we need TARGET^! plus the range. + main_oid=$(git -C replay-dest.git rev-parse main) && + topic_oid=$(git -C replay-dest.git rev-parse topic) && + base_oid=$(git -C replay-dest.git rev-parse topic~1) && + git -C replay-dest.git backfill \ + "$main_oid^!" "$base_oid..$topic_oid" && + + # Now replay should complete without any promisor fetches. + GIT_TRACE2_EVENT="$(pwd)/replay-trace" git -C replay-dest.git \ + replay --onto main topic~1..topic >replay-out && + + ! grep "fetch_count" replay-trace +' + +test_expect_success 'backfill enables fetch-free merge' ' + # Create a repo with two branches to merge. + git init merge-src && + ( + cd merge-src && + git config uploadpack.allowfilter 1 && + git config uploadpack.allowanysha1inwant 1 && + test_commit merge-base && + git checkout -b side && + test_commit side-change && + git checkout main && + test_commit main-side-change + ) && + git clone --filter=blob:none \ + "file://$(pwd)/merge-src" merge-dest && + + # The clone checked out main, fetching its blobs. + # Backfill the three endpoint commits needed for merge. + main_oid=$(git -C merge-dest rev-parse origin/main) && + side_oid=$(git -C merge-dest rev-parse origin/side) && + mbase=$(git -C merge-dest merge-base origin/main origin/side) && + git -C merge-dest backfill --no-include-edges \ + "$main_oid^!" "$side_oid^!" "$mbase^!" && + + # Merge should complete without promisor fetches. + GIT_TRACE2_EVENT="$(pwd)/merge-trace" git -C merge-dest \ + merge origin/side -m "test merge" && + + ! grep "fetch_count" merge-trace +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd