commit ef8ec60674275af0116b26e9c02ab4f4bd7bcd72 from: Stefan Sperling date: Tue Jul 27 13:08:52 2021 UTC prevent a race where 'gotadmin cleanup' deletes concurrently created objects commit - 3f338f0a096f8648ea0bb148ba5e4383d6434eaa commit + ef8ec60674275af0116b26e9c02ab4f4bd7bcd72 blob - 7447f80c0b353f5e721127e74978c14924336573 blob + 55073892c008a0924a723fa71265758d5fa587a2 --- gotadmin/gotadmin.1 +++ gotadmin/gotadmin.1 @@ -181,7 +181,7 @@ and a break-down of the number of objects per object t .It Cm ls Short alias for .Cm listpack . -.It Cm cleanup Oo Fl p Oc Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc +.It Cm cleanup Oo Fl a Oc Oo Fl p Oc Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc Purge unreferenced loose objects from the repository and display the amount of disk space which has been freed as a result. .Pp @@ -258,6 +258,13 @@ The options for .Cm gotadmin cleanup are as follows: .Bl -tag -width Ds +.It Fl a +Delete all loose objects. +By default, objects which are newer than an implementation-defined +modification timestamp are kept on disk to prevent race conditions +with other commands that add new objects to the repository while +.Cm gotadmin cleanup +is running. .It Fl p Instead of purging unreferenced loose objects, remove any pack index files which do not have a corresponding pack file. blob - 810a5cc02697494c98449bfb5e56bfdb7b79349e blob + 928fcd8b030f65a1702ac22c29886c7438444548 --- gotadmin/gotadmin.c +++ gotadmin/gotadmin.c @@ -897,7 +897,7 @@ done: __dead static void usage_cleanup(void) { - fprintf(stderr, "usage: %s cleanup [-p] [-n] [-r repository-path] " + fprintf(stderr, "usage: %s cleanup [-a] [-p] [-n] [-r repository-path] " "[-q]\n", getprogname()); exit(1); } @@ -989,7 +989,7 @@ cmd_cleanup(int argc, char *argv[]) char *cwd = NULL, *repo_path = NULL; struct got_repository *repo = NULL; int ch, dry_run = 0, npacked = 0, verbosity = 0; - int remove_lonely_packidx = 0; + int remove_lonely_packidx = 0, ignore_mtime = 0; struct got_cleanup_progress_arg cpa; struct got_lonely_packidx_progress_arg lpa; off_t size_before, size_after; @@ -999,8 +999,11 @@ cmd_cleanup(int argc, char *argv[]) char **extensions; int nextensions, i; - while ((ch = getopt(argc, argv, "pr:nq")) != -1) { + while ((ch = getopt(argc, argv, "apr:nq")) != -1) { switch (ch) { + case 'a': + ignore_mtime = 1; + break; case 'p': remove_lonely_packidx = 1; break; @@ -1071,7 +1074,7 @@ cmd_cleanup(int argc, char *argv[]) cpa.dry_run = dry_run; cpa.verbosity = verbosity; error = got_repo_purge_unreferenced_loose_objects(repo, - &size_before, &size_after, &npacked, dry_run, + &size_before, &size_after, &npacked, dry_run, ignore_mtime, cleanup_progress, &cpa, check_cancelled, NULL); if (cpa.printed_something) printf("\n"); blob - d1e9be3e619d56a5e9e4569edfdc2cf9071ae49c blob + 8bc7ea591673f80b94917ac36991936b0fc46038 --- include/got_repository_admin.h +++ include/got_repository_admin.h @@ -76,6 +76,8 @@ typedef const struct got_error *(*got_cleanup_progress * Walk objects reachable via references to determine whether any loose * objects can be removed from disk. Do remove such objects from disk * unless the dry_run parameter is set. + * Do not remove objects with a modification timestamp above an + * implementation-defined timestamp threshold, unless ignore_mtime is set. * Return the disk space size occupied by loose objects before and after * the operation. * Return the number of loose objects which are also stored in a pack file. @@ -83,7 +85,7 @@ typedef const struct got_error *(*got_cleanup_progress const struct got_error * got_repo_purge_unreferenced_loose_objects(struct got_repository *repo, off_t *size_before, off_t *size_after, int *npacked, int dry_run, - got_cleanup_progress_cb progress_cb, void *progress_arg, + int ignore_mtime, got_cleanup_progress_cb progress_cb, void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg); /* A callback function which gets invoked with cleanup information to print. */ blob - f315741522c813f2f4e3d56e2caa15969cd6200d blob + deb76e2c5553e4ba47753fc3862764b88217ec6a --- lib/repository_admin.c +++ lib/repository_admin.c @@ -1028,6 +1028,8 @@ struct purge_loose_object_arg { int npurged; off_t size_purged; int dry_run; + time_t max_mtime; + int ignore_mtime; }; static const struct got_error * @@ -1053,21 +1055,29 @@ purge_loose_object(struct got_object_id *id, void *dat goto done; } - if (!a->dry_run) { - err = got_lockfile_lock(&lf, path, -1); - if (err) - goto done; - if (unlink(path) == -1) { - err = got_error_from_errno2("unlink", path); - goto done; + /* + * Do not delete objects which are younger than our maximum + * modification time threshold. This prevents a race where + * new objects which are being added to the repository + * concurrently would be deleted. + */ + if (a->ignore_mtime || sb.st_mtime <= a->max_mtime) { + if (!a->dry_run) { + err = got_lockfile_lock(&lf, path, -1); + if (err) + goto done; + if (unlink(path) == -1) { + err = got_error_from_errno2("unlink", path); + goto done; + } } - } - a->npurged++; - a->size_purged += sb.st_size; - if (a->progress_cb) { - err = a->progress_cb(a->progress_arg, a->nloose, - a->ncommits, a->npurged); + a->npurged++; + a->size_purged += sb.st_size; + if (a->progress_cb) { + err = a->progress_cb(a->progress_arg, a->nloose, + a->ncommits, a->npurged); + } } done: if (fd != -1 && close(fd) == -1 && err == NULL) @@ -1081,7 +1091,7 @@ done: const struct got_error * got_repo_purge_unreferenced_loose_objects(struct got_repository *repo, off_t *size_before, off_t *size_after, int *npacked, int dry_run, - got_cleanup_progress_cb progress_cb, void *progress_arg, + int ignore_mtime, got_cleanup_progress_cb progress_cb, void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err; @@ -1090,7 +1100,9 @@ got_repo_purge_unreferenced_loose_objects(struct got_r struct got_object_id **referenced_ids; int i, nreferenced, nloose, ncommits = 0; struct got_reflist_head refs; + struct got_reflist_entry *re; struct purge_loose_object_arg arg; + time_t max_mtime = 0; TAILQ_INIT(&refs); @@ -1117,6 +1129,19 @@ got_repo_purge_unreferenced_loose_objects(struct got_r err = got_ref_list(&refs, repo, "", got_ref_cmp_by_name, NULL); if (err) goto done; + if (!ignore_mtime) { + TAILQ_FOREACH(re, &refs, entry) { + time_t mtime = got_ref_get_mtime(re->ref); + if (mtime > max_mtime) + max_mtime = mtime; + } + /* + * For safety, keep objects created within 10 minutes + * before the youngest reference was created. + */ + if (max_mtime >= 600) + max_mtime -= 600; + } err = get_reflist_object_ids(&referenced_ids, &nreferenced, (1 << GOT_OBJ_TYPE_COMMIT) | (1 << GOT_OBJ_TYPE_TAG), @@ -1149,6 +1174,8 @@ got_repo_purge_unreferenced_loose_objects(struct got_r arg.size_purged = 0; arg.ncommits = ncommits; arg.dry_run = dry_run; + arg.max_mtime = max_mtime; + arg.ignore_mtime = ignore_mtime; err = got_object_idset_for_each(loose_ids, purge_loose_object, &arg); if (err) goto done; blob - 72a0111c612df2b974ee3335c72a8d724d58f0df blob + b403a1396b48b2fdd75dda910d436150bee4efbb --- regress/cmdline/cleanup.sh +++ regress/cmdline/cleanup.sh @@ -72,7 +72,7 @@ test_cleanup_unreferenced_loose_objects() { # cleanup -n should not remove any objects ls -R $testroot/repo/.git/objects > $testroot/objects-before - gotadmin cleanup -n -q -r $testroot/repo > $testroot/stdout + gotadmin cleanup -a -n -q -r $testroot/repo > $testroot/stdout echo -n > $testroot/stdout.expected cmp -s $testroot/stdout.expected $testroot/stdout ret="$?" @@ -91,7 +91,7 @@ test_cleanup_unreferenced_loose_objects() { fi # cleanup should remove loose objects that belonged to the branch - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout ret="$?" if [ "$ret" != "0" ]; then echo "gotadmin cleanup failed unexpectedly" >&2 @@ -169,7 +169,7 @@ test_cleanup_redundant_loose_objects() { # cleanup -n should not remove any objects ls -R $testroot/repo/.git/objects > $testroot/objects-before - gotadmin cleanup -n -q -r $testroot/repo > $testroot/stdout + gotadmin cleanup -a -n -q -r $testroot/repo > $testroot/stdout echo -n > $testroot/stdout.expected cmp -s $testroot/stdout.expected $testroot/stdout ret="$?" @@ -196,7 +196,7 @@ test_cleanup_redundant_loose_objects() { fi # cleanup should remove all loose objects - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout ret="$?" if [ "$ret" != "0" ]; then echo "gotadmin cleanup failed unexpectedly" >&2 @@ -244,7 +244,7 @@ test_cleanup_precious_objects() { (cd $testroot/repo && git config extensions.preciousObjects true) # cleanup should now refuse to purge objects - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \ + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout \ 2> $testroot/stderr ret="$?" if [ "$ret" == "0" ]; then @@ -294,7 +294,7 @@ test_cleanup_missing_pack_file() { rm $testroot/repo/.git/objects/pack/pack-$packname # cleanup should now refuse to purge objects - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \ + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout \ 2> $testroot/stderr ret="$?" if [ "$ret" == "0" ]; then @@ -317,7 +317,7 @@ test_cleanup_missing_pack_file() { return 1 fi - gotadmin cleanup -r $testroot/repo -p -n > $testroot/stdout + gotadmin cleanup -a -r $testroot/repo -p -n > $testroot/stdout ret="$?" if [ "$ret" != "0" ]; then echo "gotadmin cleanup failed unexpectedly" >&2 @@ -334,7 +334,7 @@ test_cleanup_missing_pack_file() { return 1 fi - gotadmin cleanup -r $testroot/repo -p > $testroot/stdout + gotadmin cleanup -a -r $testroot/repo -p > $testroot/stdout ret="$?" if [ "$ret" != "0" ]; then echo "gotadmin cleanup failed unexpectedly" >&2 @@ -351,7 +351,7 @@ test_cleanup_missing_pack_file() { fi # cleanup should now attempt to purge objects - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \ + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout \ 2> $testroot/stderr ret="$?" if [ "$ret" != "0" ]; then