From 7954df1009a314c7e5057698c7a5855659bb8066 Mon Sep 17 00:00:00 2001 From: Shubham Kanodia Date: Sat, 28 Dec 2024 10:07:41 +0000 Subject: [PATCH] maintenance: add prune-remote-refs task Remote-tracking refs can accumulate in local repositories even as branches are deleted on remotes, impacting git performance negatively. Existing alternatives to keep refs pruned have a few issues: 1. Running `git fetch` with either `--prune` or `fetch.prune=true` set, with the default refspec to copy all their branches into our remote-tracking branches, will prune stale refs, but also pulls in new branches from remote. That is undesirable if the user wants to only work with a selected few remote branches. 2. `git remote prune` cleans up refs without adding to the existing list but requires periodic user intervention. Add a new maintenance task 'prune-remote-refs' that runs 'git remote prune' for each configured remote daily. Leave the task disabled by default, as it may be unexpected to see their remote-tracking branches to disappear while they are not watching for unsuspecting users. Signed-off-by: Shubham Kanodia Signed-off-by: Junio C Hamano --- Documentation/git-maintenance.txt | 20 +++++++ builtin/gc.c | 92 ++++++++++++++++++++++++++++--- t/t7900-maintenance.sh | 82 +++++++++++++++++++++++++++ 3 files changed, 187 insertions(+), 7 deletions(-) diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt index 6e6651309d3253..df59d43ec8868a 100644 --- a/Documentation/git-maintenance.txt +++ b/Documentation/git-maintenance.txt @@ -158,6 +158,26 @@ pack-refs:: need to iterate across many references. See linkgit:git-pack-refs[1] for more information. +prune-remote-refs:: + The `prune-remote-refs` task runs `git remote prune` on each remote + repository registered in the local repository. This task helps clean + up deleted remote branches, improving the performance of operations + that iterate through the refs. See linkgit:git-remote[1] for more + information. This task is disabled by default. ++ +NOTE: This task is opt-in to prevent unexpected removal of remote refs +for users of linkgit:git-maintenance[1]. For most users, configuring `fetch.prune=true` +is an acceptable solution, as it will automatically clean up stale remote-tracking +branches during normal fetch operations. However, this task can be useful in +specific scenarios: ++ +-- +* When using selective fetching (e.g., `git fetch origin +foo:refs/remotes/origin/foo`) + where `fetch.prune` would only affect refs that are explicitly fetched. +* When third-party tools might perform unexpected full fetches, and you want + periodic cleanup independently of fetch operations. +-- + OPTIONS ------- --auto:: diff --git a/builtin/gc.c b/builtin/gc.c index a9b1c36de27da2..ae2a6762a922c5 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -23,6 +23,7 @@ #include "lockfile.h" #include "parse-options.h" #include "run-command.h" +#include "remote.h" #include "sigchain.h" #include "strvec.h" #include "commit.h" @@ -916,6 +917,63 @@ static int maintenance_opt_schedule(const struct option *opt, const char *arg, return 0; } +struct remote_cb_data { + struct maintenance_run_opts *maintenance_opts; + struct string_list failed_remotes; +}; + +static void report_failed_remotes(struct string_list *failed_remotes, + const char *action_name) +{ + if (failed_remotes->nr) { + int i; + struct strbuf msg = STRBUF_INIT; + strbuf_addf(&msg, _("failed to %s the following remotes: "), + action_name); + for (i = 0; i < failed_remotes->nr; i++) { + if (i) + strbuf_addstr(&msg, ", "); + strbuf_addstr(&msg, failed_remotes->items[i].string); + } + error("%s", msg.buf); + strbuf_release(&msg); + } +} + +static int prune_remote(struct remote *remote, void *cb_data) +{ + struct child_process child = CHILD_PROCESS_INIT; + struct remote_cb_data *data = cb_data; + + if (!remote->url.nr) + return 0; + + child.git_cmd = 1; + strvec_pushl(&child.args, "remote", "prune", remote->name, NULL); + + if (run_command(&child)) + string_list_append(&data->failed_remotes, remote->name); + + return 0; +} + +static int maintenance_task_prune_remote(struct maintenance_run_opts *opts, + struct gc_config *cfg UNUSED) +{ + struct remote_cb_data cbdata = { .maintenance_opts = opts, + .failed_remotes = STRING_LIST_INIT_DUP }; + + int result; + result = for_each_remote(prune_remote, &cbdata); + + report_failed_remotes(&cbdata.failed_remotes, "prune"); + if (cbdata.failed_remotes.nr) + result = 1; + + string_list_clear(&cbdata.failed_remotes, 0); + return result; +} + /* Remember to update object flag allocation in object.h */ #define SEEN (1u<<0) @@ -1036,8 +1094,8 @@ static int maintenance_task_commit_graph(struct maintenance_run_opts *opts, static int fetch_remote(struct remote *remote, void *cbdata) { - struct maintenance_run_opts *opts = cbdata; struct child_process child = CHILD_PROCESS_INIT; + struct remote_cb_data *data = cbdata; if (remote->skip_default_update) return 0; @@ -1048,21 +1106,34 @@ static int fetch_remote(struct remote *remote, void *cbdata) "--no-write-fetch-head", "--recurse-submodules=no", NULL); - if (opts->quiet) + if (data->maintenance_opts->quiet) strvec_push(&child.args, "--quiet"); - return !!run_command(&child); + if (run_command(&child)) + string_list_append(&data->failed_remotes, remote->name); + + return 0; } static int maintenance_task_prefetch(struct maintenance_run_opts *opts, struct gc_config *cfg UNUSED) { - if (for_each_remote(fetch_remote, opts)) { - error(_("failed to prefetch remotes")); - return 1; + struct remote_cb_data cbdata = { .maintenance_opts = opts, + .failed_remotes = STRING_LIST_INIT_DUP }; + + int result = 0; + + if (for_each_remote(fetch_remote, &cbdata)) { + error(_("failed to prefetch some remotes")); + result = 1; } - return 0; + report_failed_remotes(&cbdata.failed_remotes, "prefetch"); + if (cbdata.failed_remotes.nr) + result = 1; + + string_list_clear(&cbdata.failed_remotes, 0); + return result; } static int maintenance_task_gc(struct maintenance_run_opts *opts, @@ -1378,6 +1449,7 @@ enum maintenance_task_label { TASK_GC, TASK_COMMIT_GRAPH, TASK_PACK_REFS, + TASK_PRUNE_REMOTE_REFS, /* Leave as final value */ TASK__COUNT @@ -1414,6 +1486,10 @@ static struct maintenance_task tasks[] = { maintenance_task_pack_refs, pack_refs_condition, }, + [TASK_PRUNE_REMOTE_REFS] = { + "prune-remote-refs", + maintenance_task_prune_remote, + }, }; static int compare_tasks_by_selection(const void *a_, const void *b_) @@ -1508,6 +1584,8 @@ static void initialize_maintenance_strategy(void) tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY; tasks[TASK_PACK_REFS].enabled = 1; tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY; + tasks[TASK_PRUNE_REMOTE_REFS].enabled = 0; + tasks[TASK_PRUNE_REMOTE_REFS].schedule = SCHEDULE_DAILY; } } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 1909aed95e08ad..34e8fa6b5fb482 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -447,6 +447,88 @@ test_expect_success 'pack-refs task' ' test_subcommand git pack-refs --all --prune err && + test_subcommand ! git remote prune origin err || true && + + # Verify pruning happened for good remote despite bad remote failures + test_subcommand git remote prune ccc-good err && test_grep "at most one" err