commit 2df845d59f19a7c87fc48af1f9d4124e57ebf2b0 from: Omar Polo date: Fri Jul 07 15:31:39 2023 UTC add an initial implementation of gotadmin dump gotadmin dump is used to export (part of) the history of the repository; at the moment it only generates git bundles (which are pack files with a header) but support to generate a fast-import stream is planned. ok/tweaks stsp and jamsek commit - bafea5babab6072bfa7528ad7c767bde76a446a3 commit + 2df845d59f19a7c87fc48af1f9d4124e57ebf2b0 blob - 6e194139586ae1731a9aef3771d60596d3d0c756 blob + 80e118616d09947850bdc2b12829cac8eb2c8b3f --- gotadmin/Makefile +++ gotadmin/Makefile @@ -11,7 +11,8 @@ SRCS= gotadmin.c \ worktree_open.c hash.c bloom.c murmurhash2.c ratelimit.c \ sigs.c buf.c date.c object_open_privsep.c \ read_gitconfig_privsep.c read_gotconfig_privsep.c \ - pack_create_privsep.c pollfd.c reference_parse.c object_qid.c + pack_create_privsep.c pollfd.c reference_parse.c object_qid.c \ + dump.c MAN = ${PROG}.1 CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib blob - 8be42abe6d6ca03b938b1aebc0e2200cf5ffbf9f blob + f19fef0cdaa1a53fa46ff731dbd91266b8419ab5 --- gotadmin/gotadmin.1 +++ gotadmin/gotadmin.1 @@ -329,6 +329,39 @@ Instead of purging unreferenced loose objects and redu remove any pack index files which do not have a corresponding pack file. .It Fl q Suppress progress reporting and disk space summary output. +.It Fl r Ar repository-path +Use the repository at the specified path. +If not specified, assume the repository is located at or above the current +working directory. +If this directory is a +.Xr got 1 +work tree, use the repository path associated with this work tree. +.El +.It Xo +.Cm dump +.Op Fl q +.Op Fl r Ar repository-path +.Op Fl x Ar reference +.Op Ar reference ... +.Xc +Dump the contents of the repository to standard output. +.Pp +If one or more +.Ar reference +arguments is specified, only add objects which are reachable via the specified +references. +Each +.Ar reference +argument may either specify a specific reference or a reference namespace, +in which case all references within this namespace will be used. +.Pp +The options for +.Nm +.Cm dump +are as follows: +.Bl -tag -width Ds +.It Fl q +Suppress progress reporting output. .It Fl r Ar repository-path Use the repository at the specified path. If not specified, assume the repository is located at or above the current @@ -336,6 +369,21 @@ working directory. If this directory is a .Xr got 1 work tree, use the repository path associated with this work tree. +.It Fl x Ar reference +Exclude objects reachable via the specified +.Ar reference +from the dump file. +The +.Ar reference +argument may either specify a specific reference or a reference namespace, +in which case all references within this namespace will be excluded. +The +.Fl x +option may be specified multiple times to build a list of references to exclude. +.Pp +Exclusion takes precedence over inclusion. +If a reference appears in both the included and excluded lists, it will +be excluded. .El .El .Sh EXIT STATUS blob - 5551b60b73215eb11a82c034e1d2601ed9a15c31 blob + 78fd2d69dfbdea474b964eb51ee58209f1d0734d --- gotadmin/gotadmin.c +++ gotadmin/gotadmin.c @@ -39,6 +39,7 @@ #include "got_cancel.h" #include "got_repository.h" #include "got_repository_admin.h" +#include "got_repository_dump.h" #include "got_gotconfig.h" #include "got_path.h" #include "got_privsep.h" @@ -86,6 +87,7 @@ __dead static void usage_pack(void); __dead static void usage_indexpack(void); __dead static void usage_listpack(void); __dead static void usage_cleanup(void); +__dead static void usage_dump(void); static const struct got_error* cmd_init(int, char *[]); static const struct got_error* cmd_info(int, char *[]); @@ -93,6 +95,7 @@ static const struct got_error* cmd_pack(int, char *[] static const struct got_error* cmd_indexpack(int, char *[]); static const struct got_error* cmd_listpack(int, char *[]); static const struct got_error* cmd_cleanup(int, char *[]); +static const struct got_error* cmd_dump(int, char *[]); static const struct gotadmin_cmd gotadmin_commands[] = { { "init", cmd_init, usage_init, "" }, @@ -101,6 +104,7 @@ static const struct gotadmin_cmd gotadmin_commands[] = { "indexpack", cmd_indexpack, usage_indexpack,"ix" }, { "listpack", cmd_listpack, usage_listpack, "ls" }, { "cleanup", cmd_cleanup, usage_cleanup, "cl" }, + { "dump", cmd_dump, usage_dump, "" }, }; static void @@ -459,6 +463,7 @@ usage_pack(void) } struct got_pack_progress_arg { + FILE *out; char last_scaled_size[FMT_SCALED_STRSIZE]; int last_ncolored; int last_nfound; @@ -475,20 +480,20 @@ struct got_pack_progress_arg { }; static void -print_load_info(int print_colored, int print_found, int print_trees, +print_load_info(FILE *out, int print_colored, int print_found, int print_trees, int ncolored, int nfound, int ntrees) { if (print_colored) { - printf("%d commit%s colored", ncolored, + fprintf(out, "%d commit%s colored", ncolored, ncolored == 1 ? "" : "s"); } if (print_found) { - printf("%s%d object%s found", + fprintf(out, "%s%d object%s found", ncolored > 0 ? "; " : "", nfound, nfound == 1 ? "" : "s"); } if (print_trees) { - printf("; %d tree%s scanned", ntrees, + fprintf(out, "; %d tree%s scanned", ntrees, ntrees == 1 ? "" : "s"); } } @@ -528,16 +533,16 @@ pack_progress(void *arg, int ncolored, int nfound, int if ((print_colored || print_found || print_trees) && !a->loading_done) { - printf("\r"); - print_load_info(print_colored, print_found, print_trees, - ncolored, nfound, ntrees); + fprintf(a->out, "\r"); + print_load_info(a->out, print_colored, print_found, + print_trees, ncolored, nfound, ntrees); a->printed_something = 1; - fflush(stdout); + fflush(a->out); return NULL; } else if (!a->loading_done) { - printf("\r"); - print_load_info(1, 1, 1, ncolored, nfound, ntrees); - printf("\n"); + fprintf(a->out, "\r"); + print_load_info(a->out, 1, 1, 1, ncolored, nfound, ntrees); + fprintf(a->out, "\n"); a->loading_done = 1; } @@ -585,22 +590,22 @@ pack_progress(void *arg, int ncolored, int nfound, int } if (print_searching || print_total || print_deltify || print_written) - printf("\r"); + fprintf(a->out, "\r"); if (print_searching) - printf("packing %d reference%s", ncommits, + fprintf(a->out, "packing %d reference%s", ncommits, ncommits == 1 ? "" : "s"); if (print_total) - printf("; %d object%s", nobj_total, + fprintf(a->out, "; %d object%s", nobj_total, nobj_total == 1 ? "" : "s"); if (print_deltify) - printf("; deltify: %d%%", p_deltify); + fprintf(a->out, "; deltify: %d%%", p_deltify); if (print_written) - printf("; writing pack: %*s %d%%", FMT_SCALED_STRSIZE - 2, - scaled_size, p_written); + fprintf(a->out, "; writing pack: %*s %d%%", + FMT_SCALED_STRSIZE - 2, scaled_size, p_written); if (print_searching || print_total || print_deltify || print_written) { a->printed_something = 1; - fflush(stdout); + fflush(a->out); } return NULL; } @@ -799,6 +804,7 @@ cmd_pack(int argc, char *argv[]) } memset(&ppa, 0, sizeof(ppa)); + ppa.out = stdout; ppa.last_scaled_size[0] = '\0'; ppa.last_p_indexed = -1; ppa.last_p_resolved = -1; @@ -900,6 +906,7 @@ cmd_indexpack(int argc, char *argv[]) goto done; memset(&ppa, 0, sizeof(ppa)); + ppa.out = stdout; ppa.last_scaled_size[0] = '\0'; ppa.last_p_indexed = -1; ppa.last_p_resolved = -1; @@ -1393,9 +1400,146 @@ done: if (pack_fds) { const struct got_error *pack_err = got_repo_pack_fds_close(pack_fds); + if (error == NULL) + error = pack_err; + } + free(repo_path); + return error; +} + +__dead static void +usage_dump(void) +{ + fprintf(stderr, "usage: %s dump [-q] [-r repository-path] " + "[-x reference] [reference]...\n", getprogname()); + exit(1); +} + +static const struct got_error * +cmd_dump(int argc, char *argv[]) +{ + const struct got_error *error = NULL; + struct got_pack_progress_arg ppa; + struct got_repository *repo = NULL; + struct got_pathlist_head exclude_args; + struct got_pathlist_entry *pe; + struct got_reflist_head exclude_refs; + struct got_reflist_head include_refs; + struct got_reflist_entry *re, *new; + const char *refname; + char *repo_path = NULL; + int *pack_fds = NULL; + int verbosity = 0; + int i, ch; + + TAILQ_INIT(&exclude_args); + TAILQ_INIT(&exclude_refs); + TAILQ_INIT(&include_refs); + +#ifndef PROFILE + if (pledge("stdio rpath wpath cpath flock proc exec sendfd unveil", + NULL) == -1) + err(1, "pledge"); +#endif + + while ((ch = getopt(argc, argv, "qr:x:")) != -1) { + switch (ch) { + case 'q': + verbosity = -1; + break; + case 'r': + repo_path = realpath(optarg, NULL); + if (repo_path == NULL) + return got_error_from_errno2("realpath", + optarg); + got_path_strip_trailing_slashes(repo_path); + break; + case 'x': + error = got_pathlist_append(&exclude_args, + optarg, NULL); + if (error) + return error; + break; + default: + usage_dump(); + /* NOTREACHED */ + } + } + argc -= optind; + argv += optind; + + if (repo_path == NULL) { + error = get_repo_path(&repo_path); + if (error) + goto done; + } + error = got_repo_pack_fds_open(&pack_fds); + if (error != NULL) + goto done; + error = got_repo_open(&repo, repo_path, NULL, pack_fds); + if (error) + goto done; + + error = apply_unveil(got_repo_get_path_git_dir(repo), 1); + if (error) + goto done; + + TAILQ_FOREACH(pe, &exclude_args, entry) { + refname = pe->path; + error = add_ref(&new, &exclude_refs, refname, repo); + if (error) + goto done; + } + + if (argc == 0) { + error = got_ref_list(&include_refs, repo, "", + got_ref_cmp_by_name, NULL); + if (error) + goto done; + } else { + for (i = 0; i < argc; i++) { + got_path_strip_trailing_slashes(argv[i]); + refname = argv[i]; + error = add_ref(&new, &include_refs, refname, repo); + if (error) + goto done; + } + } + + /* Ignore references in the refs/got/ namespace. */ + TAILQ_FOREACH_SAFE(re, &include_refs, entry, new) { + refname = got_ref_get_name(re->ref); + if (strncmp("refs/got/", refname, 9) != 0) + continue; + TAILQ_REMOVE(&include_refs, re, entry); + got_ref_close(re->ref); + free(re); + } + + memset(&ppa, 0, sizeof(ppa)); + ppa.out = stderr; + ppa.verbosity = verbosity; + + error = got_repo_dump(stdout, &include_refs, &exclude_refs, + repo, pack_progress, &ppa, check_cancelled, NULL); + if (ppa.printed_something) + fprintf(stderr, "\n"); + done: + if (repo) + got_repo_close(repo); + + if (pack_fds) { + const struct got_error *pack_err; + + pack_err = got_repo_pack_fds_close(pack_fds); if (error == NULL) error = pack_err; } + + got_pathlist_free(&exclude_args, GOT_PATHLIST_FREE_NONE); + got_ref_list_free(&exclude_refs); + got_ref_list_free(&include_refs); free(repo_path); + return error; } blob - /dev/null blob + 73dc382e826ef264c3c23706e60a92c75f81d4d6 (mode 644) --- /dev/null +++ include/got_repository_dump.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* Output a bundle to the given file. */ +const struct got_error * +got_repo_dump(FILE *out, struct got_reflist_head *include_refs, + struct got_reflist_head *exclude_refs, struct got_repository *repo, + got_pack_progress_cb progress_cb, void *progress_arg, + got_cancel_cb cancel_cb, void *cancel_arg); blob - /dev/null blob + 845ac0dd1d3f99357c7a70e09b1f47d509ac1770 (mode 644) --- /dev/null +++ lib/dump.c @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2023 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "got_error.h" +#include "got_cancel.h" +#include "got_reference.h" +#include "got_repository_admin.h" /* XXX for pack_progress */ +#include "got_object.h" +#include "got_opentemp.h" +#include "got_repository_dump.h" + +#include "got_lib_delta.h" +#include "got_lib_object.h" +#include "got_lib_object_idset.h" +#include "got_lib_ratelimit.h" +#include "got_lib_pack_create.h" + +#define GIT_BUNDLE_SIGNATURE_V2 "# v2 git bundle" + +struct idvec { + struct got_object_id **ids; + size_t len; + size_t size; +}; + +static const struct got_error * +idvec_push(struct idvec *v, struct got_object_id *id) +{ + size_t newsize; + void *t; + + if (v->len == v->size) { + newsize = v->size + 8; + t = reallocarray(v->ids, newsize, sizeof(*v->ids)); + if (t == NULL) + return got_error_from_errno("reallocarray"); + v->ids = t; + v->size = newsize; + } + + v->ids[v->len++] = id; + return NULL; +} + +static void +idvec_free(struct idvec *v) +{ + size_t i; + + for (i = 0; i < v->len; ++i) + free(v->ids[i]); + free(v->ids); +} + +const struct got_error * +got_repo_dump(FILE *out, struct got_reflist_head *include_refs, + struct got_reflist_head *exclude_refs, struct got_repository *repo, + got_pack_progress_cb progress_cb, void *progress_arg, + got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err = NULL; + struct got_ratelimit rl; + uint8_t packsha[SHA1_DIGEST_LENGTH]; + FILE *delta_cache = NULL; + struct got_reflist_entry *e; + struct got_object_id *id = NULL; + struct got_commit_object *commit = NULL; + struct idvec ours, theirs; + char *nl, *s, *hex, *logmsg = NULL; + const char *refname; + int r; + + got_ratelimit_init(&rl, 0, 500); + + memset(&ours, 0, sizeof(ours)); + memset(&theirs, 0, sizeof(theirs)); + + r = fprintf(out, "%s\n", GIT_BUNDLE_SIGNATURE_V2); + if (r != strlen(GIT_BUNDLE_SIGNATURE_V2) + 1) + return got_ferror(out, GOT_ERR_IO); + + TAILQ_FOREACH(e, exclude_refs, entry) { + err = got_ref_resolve(&id, repo, e->ref); + if (err) + goto done; + + idvec_push(&theirs, id); + if (err) + goto done; + + err = got_object_open_as_commit(&commit, repo, id); + if (err) + goto done; + + err = got_object_commit_get_logmsg(&logmsg, commit); + if (err) + goto done; + + s = logmsg; + while (isspace((unsigned char)*s)) + s++; + nl = strchr(s, '\n'); + if (nl) + *nl = '\0'; + + err = got_object_id_str(&hex, id); + if (err) + goto done; + fprintf(out, "-%s %s\n", hex, s); + free(hex); + + got_object_commit_close(commit); + commit = NULL; + + free(logmsg); + logmsg = NULL; + } + + TAILQ_FOREACH(e, include_refs, entry) { + err = got_ref_resolve(&id, repo, e->ref); + if (err) + goto done; + + err = idvec_push(&ours, id); + if (err) + goto done; + + refname = got_ref_get_name(e->ref); + + err = got_object_id_str(&hex, id); + if (err) + goto done; + fprintf(out, "%s %s\n", hex, refname); + free(hex); + } + + if (fputc('\n', out) == EOF || fflush(out) == EOF) { + err = got_ferror(out, GOT_ERR_IO); + goto done; + } + + delta_cache = got_opentemp(); + if (delta_cache == NULL) { + err = got_error_from_errno("got_opentemp"); + goto done; + } + + err = got_pack_create(&packsha[0], fileno(out), delta_cache, + theirs.ids, theirs.len, ours.ids, ours.len, + repo, 0, 0, 0, progress_cb, progress_arg, &rl, + cancel_cb, cancel_arg); + + done: + idvec_free(&ours); + idvec_free(&theirs); + if (commit) + got_object_commit_close(commit); + if (delta_cache && fclose(delta_cache) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + return err; +} blob - 9308738c9d24a7b328192c49138b59da3a6756c2 blob + 0b66aa7ed80c3e660be33ffdb8bb00ac61dcacd8 --- regress/cmdline/Makefile +++ regress/cmdline/Makefile @@ -1,7 +1,7 @@ REGRESS_TARGETS=checkout update status log add rm diff blame branch tag \ ref commit revert cherrypick backout rebase init import histedit \ integrate merge stage unstage cat clone fetch send tree patch pack \ - cleanup + cleanup dump NOOBJ=Yes GOT_TEST_ROOT=/tmp @@ -99,5 +99,7 @@ pack: cleanup: ./cleanup.sh -q -r "$(GOT_TEST_ROOT)" +dump: + ./dump.sh -q -r "$(GOT_TEST_ROOT)" .include blob - /dev/null blob + f7c13d273418faebbf6a5954344c0c7ced9da08c (mode 755) --- /dev/null +++ regress/cmdline/dump.sh @@ -0,0 +1,97 @@ +#!/bin/sh +# +# Copyright (c) 2023 Omar Polo +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +. ./common.sh + +test_dump_bundle() { + local testroot=`test_init test_dump_bundle` + + # add a fake reference so that `got log' appears the same in + # the cloned repository + (cd "$testroot/repo" && got branch -n origin/master) + + (cd "$testroot/repo" && got log -p >$testroot/repo.log) + + (cd "$testroot/repo" && gotadmin dump -q master >$testroot/r.bundle) + if [ $? -ne 0 ]; then + echo "gotadmin dump failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + if ! (cd "$testroot" && git clone -b master -q r.bundle); then + echo "failed to git clone from the generated bundle" >&2 + test_done "$testroot" 1 + return 1 + fi + + if ! (cd "$testroot/r" && got log -p >$testroot/r.log); then + echo "got log failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + if ! cmp -s "$testroot/repo.log" "$testroot/r.log"; then + echo "history differs after clone" >&2 + diff -u "$testroot/repo.log" "$testroot/r.log" + test_done "$testroot" 1 + return 1 + fi + + (cd "$testroot/repo" && git checkout -q -b newbranch) + + # commit some changes in the repo + for i in `seq 5`; do + echo "alpha edit #$i" > $testroot/repo/alpha + git_commit "$testroot/repo" -m "edit alpha" + done + + (cd "$testroot/repo" && \ + gotadmin dump -q -x master newbranch >$testroot/r.bundle) + if [ $? -ne 0 ]; then + echo "gotadmin dump failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + (cd "$testroot/r" && git checkout -q -b newbranch && \ + git pull -q "$testroot/r.bundle" newbranch) + if [ $? -ne 0 ]; then + echo "git pull failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + (cd "$testroot/repo" && got log -p >$testroot/repo.log) + + if ! (cd "$testroot/r" && got log -p >$testroot/r.log); then + echo "got log failed unexpectedly" >&2 + test_done "$testroot" 1 + return 1 + fi + + if ! cmp -s "$testroot/repo.log" "$testroot/r.log"; then + echo "history differs after pull" >&2 + diff -u "$testroot/repo.log" "$testroot/r.log" + test_done "$testroot" 1 + return 1 + fi + + test_done "$testroot" 0 +} + +test_parseargs "$@" +run_test test_dump_bundle