commit bf60f5a292208fc82ac27b5992aecaf567a00b10 from: Stefan Sperling date: Tue Jan 21 21:32:52 2025 UTC do not delete directly referenced trees and blobs during 'gotadmin cleanup' We only considered referenced commit and tag objects during cleanup. Directly referenced trees or blobs were seen as unreferenced, and deleted during cleanup. Add test coverage for this problem and fix it. commit - 219faa299243f26361a9990660a20a5f68a8bfab commit + bf60f5a292208fc82ac27b5992aecaf567a00b10 blob - 62a5c6397e3238e2deac79abfc06c76a3d586dcd blob + 6a69dc4a2cda758167861d0358d22d0a9dd1b468 --- lib/repository_admin.c +++ lib/repository_admin.c @@ -990,11 +990,19 @@ load_commit_or_tag(int *ncommits, struct got_object_id &qid->id); if (err) goto done; + tree_id = got_object_commit_get_tree_id(commit); break; case GOT_OBJ_TYPE_TAG: err = got_object_open_as_tag(&tag, repo, &qid->id); if (err) goto done; + /* tree_id will be set below */ + break; + case GOT_OBJ_TYPE_TREE: + tree_id = &qid->id; + break; + case GOT_OBJ_TYPE_BLOB: + tree_id = NULL; break; default: /* should not happen */ @@ -1002,10 +1010,8 @@ load_commit_or_tag(int *ncommits, struct got_object_id goto done; } - /* Find a tree object to scan. */ - if (commit) { - tree_id = got_object_commit_get_tree_id(commit); - } else if (tag) { + if (tag) { + /* Find a tree object to scan. */ obj_type = got_object_tag_get_object_type(tag); switch (obj_type) { case GOT_OBJ_TYPE_COMMIT: @@ -1033,6 +1039,9 @@ load_commit_or_tag(int *ncommits, struct got_object_id goto done; break; } + } else if (tree_id == NULL) { + /* Blob which has already been marked as traversed. */ + continue; } if (tree_id) { @@ -1040,6 +1049,7 @@ load_commit_or_tag(int *ncommits, struct got_object_id repo, cancel_cb, cancel_arg); if (err) break; + tree_id = NULL; } if (commit || tag) @@ -1531,8 +1541,7 @@ got_repo_cleanup(struct got_repository *repo, } err = get_reflist_object_ids(&referenced_ids, &nreferenced, - (1 << GOT_OBJ_TYPE_COMMIT) | (1 << GOT_OBJ_TYPE_TAG), - &refs, repo, cancel_cb, cancel_arg); + GOT_OBJ_TYPE_ANY, &refs, repo, cancel_cb, cancel_arg); if (err) goto done; blob - f4099794bb7ebc53f96a1addc9b3d92c43e8c93d blob + 8b953f1ee28370617e8dbb0865f36ea6604711e0 --- regress/cmdline/cleanup.sh +++ regress/cmdline/cleanup.sh @@ -459,10 +459,101 @@ test_cleanup_missing_pack_file() { fi test_done "$testroot" "$ret" } + +test_cleanup_non_commit_ref() { + local testroot=`test_init cleanup_non_commit_ref` + local commit_id=`git_show_head $testroot/repo` + + mkdir -p $testroot/t + echo foo > $testroot/t/foo + + foo_id=$(git -C $testroot/repo hash-object -t blob -w $testroot/t/foo) + + # verify that the blob object can be read + got cat -r $testroot/repo "$foo_id" > $testroot/stdout + cmp -s $testroot/t/foo $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/t/foo $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + # create a reference which points at the blob + got ref -r $testroot/repo -c $foo_id blobref + + # create a tree object + printf "10644 blob $foo_id\tfoo\n" > $testroot/tree-desc + tree_id=$(git -C $testroot/repo mktree < $testroot/tree-desc) + + # verify that the tree object can be read + got cat -r $testroot/repo "$tree_id" > $testroot/stdout + printf "$foo_id 0010644 foo\n" > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + # create a reference which points at the tree + got ref -r $testroot/repo -c "$tree_id" treeref + + # list references + got ref -r $testroot/repo -l > $testroot/stdout + cat > $testroot/stdout.expected < $testroot/stdout \ + 2> $testroot/stderr + ret=$? + if [ $ret -ne 0 ]; then + echo "gotadmin cleanup failed unexpectedly" >&2 + test_done "$testroot" "1" + return 1 + fi + + # verify that the blob object can be read + got cat -r $testroot/repo "$foo_id" > $testroot/stdout + cmp -s $testroot/t/foo $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/t/foo $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + # verify that the tree object can be read + got cat -r $testroot/repo "$tree_id" > $testroot/stdout + printf "$foo_id 0010644 foo\n" > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + test_done "$testroot" "$ret" +} + test_parseargs "$@" run_test test_cleanup_unreferenced_loose_objects run_test test_cleanup_redundant_loose_objects run_test test_cleanup_redundant_pack_files run_test test_cleanup_precious_objects run_test test_cleanup_missing_pack_file +run_test test_cleanup_non_commit_ref