2 * Copyright (c) 2022 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/queue.h>
31 #include "got_error.h"
32 #include "got_object.h"
33 #include "got_repository.h"
36 #include "got_lib_delta.h"
37 #include "got_lib_object.h"
38 #include "got_lib_object_cache.h"
39 #include "got_lib_object_parse.h"
40 #include "got_lib_pack.h"
41 #include "got_lib_repository.h"
42 #include "got_lib_inflate.h"
43 #include "got_lib_hash.h"
45 const struct got_error *
46 got_object_open_packed(struct got_object **obj, struct got_object_id *id,
47 struct got_repository *repo)
49 const struct got_error *err = NULL;
50 struct got_pack *pack = NULL;
51 struct got_packidx *packidx = NULL;
55 err = got_repo_search_packidx(&packidx, &idx, repo, id);
59 err = got_packidx_get_packfile_path(&path_packfile,
60 packidx->path_packidx);
64 pack = got_repo_get_cached_pack(repo, path_packfile);
66 err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
71 err = got_packfile_open_object(obj, pack, packidx, idx, id);
76 err = got_repo_cache_object(repo, id, *obj);
78 if (err->code == GOT_ERR_OBJ_EXISTS ||
79 err->code == GOT_ERR_OBJ_TOO_LARGE)
87 const struct got_error *
88 got_object_open_from_packfile(struct got_object **obj, struct got_object_id *id,
89 struct got_pack *pack, struct got_packidx *packidx, int obj_idx,
90 struct got_repository *repo)
92 const struct got_error *err;
94 *obj = got_repo_get_cached_object(repo, id);
100 err = got_packfile_open_object(obj, pack, packidx, obj_idx, id);
105 err = got_repo_cache_object(repo, id, *obj);
107 if (err->code == GOT_ERR_OBJ_EXISTS ||
108 err->code == GOT_ERR_OBJ_TOO_LARGE)
116 const struct got_error *
117 got_object_read_raw_delta(uint64_t *base_size, uint64_t *result_size,
118 off_t *delta_size, off_t *delta_compressed_size, off_t *delta_offset,
119 off_t *delta_out_offset, struct got_object_id **base_id, int delta_cache_fd,
120 struct got_packidx *packidx, int obj_idx, struct got_object_id *id,
121 struct got_repository *repo)
123 return got_error(GOT_ERR_NOT_IMPL);
126 const struct got_error *
127 got_object_open(struct got_object **obj, struct got_repository *repo,
128 struct got_object_id *id)
130 const struct got_error *err = NULL;
133 *obj = got_repo_get_cached_object(repo, id);
139 err = got_object_open_packed(obj, id, repo);
141 if (err->code != GOT_ERR_NO_OBJ)
146 err = got_object_open_loose_fd(&fd, id, repo);
148 if (err->code == GOT_ERR_ERRNO && errno == ENOENT)
149 err = got_error_no_obj(id);
153 err = got_object_read_header(obj, fd);
157 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
160 err = got_repo_cache_object(repo, id, *obj);
162 if (err->code == GOT_ERR_OBJ_EXISTS ||
163 err->code == GOT_ERR_OBJ_TOO_LARGE)
167 if (close(fd) == -1 && err == NULL)
168 err = got_error_from_errno("close");
172 static const struct got_error *
173 wrap_fd(FILE **f, int wrapped_fd)
175 const struct got_error *err = NULL;
178 if (ftruncate(wrapped_fd, 0L) == -1)
179 return got_error_from_errno("ftruncate");
181 if (lseek(wrapped_fd, 0L, SEEK_SET) == -1)
182 return got_error_from_errno("lseek");
184 fd = dup(wrapped_fd);
186 return got_error_from_errno("dup");
188 *f = fdopen(fd, "w+");
190 err = got_error_from_errno("fdopen");
196 static const struct got_error *
197 read_packed_object_raw(uint8_t **outbuf, off_t *size, size_t *hdrlen,
198 int outfd, struct got_pack *pack, struct got_packidx *packidx, int idx,
199 struct got_object_id *id)
201 const struct got_error *err = NULL;
202 uint64_t raw_size = 0;
203 struct got_object *obj;
204 FILE *outfile = NULL, *basefile = NULL, *accumfile = NULL;
210 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
214 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
215 err = got_pack_get_max_delta_object_size(&raw_size, obj, pack);
219 raw_size = obj->size;
221 if (raw_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
223 err = got_packfile_extract_object_to_mem(outbuf, &len,
230 * XXX This uses 3 file extra descriptors for no good reason.
231 * We should have got_packfile_extract_object_to_fd().
233 err = wrap_fd(&outfile, outfd);
236 err = wrap_fd(&basefile, pack->basefd);
239 err = wrap_fd(&accumfile, pack->accumfd);
242 err = got_packfile_extract_object(pack, obj, outfile, basefile,
249 *hdrlen = obj->hdrlen;
251 got_object_close(obj);
252 if (outfile && fclose(outfile) == EOF && err == NULL)
253 err = got_error_from_errno("fclose");
254 if (basefile && fclose(basefile) == EOF && err == NULL)
255 err = got_error_from_errno("fclose");
256 if (accumfile && fclose(accumfile) == EOF && err == NULL)
257 err = got_error_from_errno("fclose");
263 put_raw_object_tempfile(struct got_raw_object *obj)
265 struct got_repository *repo = obj->close_arg;
267 if (obj->tempfile_idx != -1)
268 got_repo_temp_fds_put(obj->tempfile_idx, repo);
271 /* *outfd must be initialized to -1 by caller */
272 const struct got_error *
273 got_object_raw_open(struct got_raw_object **obj, int *outfd,
274 struct got_repository *repo, struct got_object_id *id)
276 const struct got_error *err = NULL;
277 struct got_packidx *packidx = NULL;
278 int idx, tempfd, tempfile_idx;
279 uint8_t *outbuf = NULL;
282 char *path_packfile = NULL;
284 *obj = got_repo_get_cached_raw_object(repo, id);
290 err = got_repo_temp_fds_get(&tempfd, &tempfile_idx, repo);
294 err = got_repo_search_packidx(&packidx, &idx, repo, id);
296 struct got_pack *pack = NULL;
298 err = got_packidx_get_packfile_path(&path_packfile,
299 packidx->path_packidx);
303 pack = got_repo_get_cached_pack(repo, path_packfile);
305 err = got_repo_cache_pack(&pack, repo, path_packfile,
310 err = read_packed_object_raw(&outbuf, &size, &hdrlen,
311 tempfd, pack, packidx, idx, id);
314 } else if (err->code == GOT_ERR_NO_OBJ) {
317 err = got_object_open_loose_fd(&fd, id, repo);
320 err = got_object_read_raw(&outbuf, &size, &hdrlen,
321 GOT_DELTA_RESULT_SIZE_CACHED_MAX, tempfd, id, fd);
322 if (close(fd) == -1 && err == NULL)
323 err = got_error_from_errno("close");
328 if (outbuf == NULL) {
330 err = got_error_msg(GOT_ERR_NOT_IMPL, "bad outfd");
335 * Duplicate tempfile descriptor to allow use of
336 * fdopen(3) inside got_object_raw_alloc().
338 *outfd = dup(tempfd);
340 err = got_error_from_errno("dup");
345 err = got_object_raw_alloc(obj, outbuf, outfd,
346 GOT_DELTA_RESULT_SIZE_CACHED_MAX, hdrlen, size);
350 err = got_repo_cache_raw_object(repo, id, *obj);
352 if (err->code == GOT_ERR_OBJ_EXISTS ||
353 err->code == GOT_ERR_OBJ_TOO_LARGE)
360 got_object_raw_close(*obj);
364 got_repo_temp_fds_put(tempfile_idx, repo);
370 if (((*obj)->f == NULL && (*obj)->fd == -1)) {
371 /* This raw object is not backed by a file. */
372 got_repo_temp_fds_put(tempfile_idx, repo);
378 (*obj)->tempfile_idx = tempfile_idx;
379 (*obj)->close_cb = put_raw_object_tempfile;
380 (*obj)->close_arg = repo;
386 static const struct got_error *
387 open_commit(struct got_commit_object **commit,
388 struct got_repository *repo, struct got_object_id *id, int check_cache)
390 const struct got_error *err = NULL;
391 struct got_packidx *packidx = NULL;
393 char *path_packfile = NULL;
396 *commit = got_repo_get_cached_commit(repo, id);
397 if (*commit != NULL) {
404 err = got_repo_search_packidx(&packidx, &idx, repo, id);
406 struct got_pack *pack = NULL;
407 struct got_object *obj;
411 err = got_packidx_get_packfile_path(&path_packfile,
412 packidx->path_packidx);
416 pack = got_repo_get_cached_pack(repo, path_packfile);
418 err = got_repo_cache_pack(&pack, repo, path_packfile,
423 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
426 err = got_packfile_extract_object_to_mem(&buf, &len,
428 got_object_close(obj);
431 err = got_object_parse_commit(commit, buf, len);
433 } else if (err->code == GOT_ERR_NO_OBJ) {
436 err = got_object_open_loose_fd(&fd, id, repo);
439 err = got_object_read_commit(commit, fd, id, 0);
440 if (close(fd) == -1 && err == NULL)
441 err = got_error_from_errno("close");
448 err = got_repo_cache_commit(repo, id, *commit);
450 if (err->code == GOT_ERR_OBJ_EXISTS ||
451 err->code == GOT_ERR_OBJ_TOO_LARGE)
460 const struct got_error *
461 got_object_open_as_commit(struct got_commit_object **commit,
462 struct got_repository *repo, struct got_object_id *id)
464 *commit = got_repo_get_cached_commit(repo, id);
465 if (*commit != NULL) {
470 return open_commit(commit, repo, id, 0);
473 const struct got_error *
474 got_object_commit_open(struct got_commit_object **commit,
475 struct got_repository *repo, struct got_object *obj)
477 return open_commit(commit, repo, got_object_get_id(obj), 1);
480 static const struct got_error *
481 open_tree(struct got_tree_object **tree,
482 struct got_repository *repo, struct got_object_id *id, int check_cache)
484 const struct got_error *err = NULL;
485 struct got_packidx *packidx = NULL;
487 char *path_packfile = NULL;
488 struct got_parsed_tree_entry *entries = NULL;
489 size_t nentries = 0, nentries_alloc = 0, i;
493 *tree = got_repo_get_cached_tree(repo, id);
501 err = got_repo_search_packidx(&packidx, &idx, repo, id);
503 struct got_pack *pack = NULL;
504 struct got_object *obj;
507 err = got_packidx_get_packfile_path(&path_packfile,
508 packidx->path_packidx);
512 pack = got_repo_get_cached_pack(repo, path_packfile);
514 err = got_repo_cache_pack(&pack, repo, path_packfile,
519 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
522 err = got_packfile_extract_object_to_mem(&buf, &len,
524 got_object_close(obj);
527 err = got_object_parse_tree(&entries, &nentries,
528 &nentries_alloc, buf, len);
531 } else if (err->code == GOT_ERR_NO_OBJ) {
534 err = got_object_open_loose_fd(&fd, id, repo);
537 err = got_object_read_tree(&entries, &nentries,
538 &nentries_alloc, &buf, fd, id);
539 if (close(fd) == -1 && err == NULL)
540 err = got_error_from_errno("close");
546 *tree = malloc(sizeof(**tree));
548 err = got_error_from_errno("malloc");
551 (*tree)->entries = calloc(nentries, sizeof(struct got_tree_entry));
552 if ((*tree)->entries == NULL) {
553 err = got_error_from_errno("malloc");
556 (*tree)->nentries = nentries;
559 for (i = 0; i < nentries; i++) {
560 struct got_parsed_tree_entry *pe = &entries[i];
561 struct got_tree_entry *te = &(*tree)->entries[i];
563 if (strlcpy(te->name, pe->name,
564 sizeof(te->name)) >= sizeof(te->name)) {
565 err = got_error(GOT_ERR_NO_SPACE);
568 memcpy(te->id.sha1, pe->id, SHA1_DIGEST_LENGTH);
578 err = got_repo_cache_tree(repo, id, *tree);
580 if (err->code == GOT_ERR_OBJ_EXISTS ||
581 err->code == GOT_ERR_OBJ_TOO_LARGE)
587 free((*tree)->entries);
594 const struct got_error *
595 got_object_open_as_tree(struct got_tree_object **tree,
596 struct got_repository *repo, struct got_object_id *id)
598 *tree = got_repo_get_cached_tree(repo, id);
604 return open_tree(tree, repo, id, 0);
607 const struct got_error *
608 got_object_tree_open(struct got_tree_object **tree,
609 struct got_repository *repo, struct got_object *obj)
611 return open_tree(tree, repo, got_object_get_id(obj), 1);
614 static const struct got_error *
615 read_packed_blob(uint8_t **outbuf, size_t *size, size_t *hdrlen,
616 int outfd, struct got_pack *pack, struct got_packidx *packidx, int idx,
617 struct got_object_id *id, struct got_repository *repo)
619 const struct got_error *err = NULL;
620 struct got_object *obj;
621 FILE *outfile = NULL, *basefile = NULL, *accumfile = NULL;
626 err = got_object_open_from_packfile(&obj, id, pack, packidx, idx,
631 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
632 err = got_pack_get_max_delta_object_size(&blob_size, obj,
637 blob_size = obj->size;
639 if (blob_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
640 err = got_packfile_extract_object_to_mem(outbuf, size,
644 * XXX This uses 3 file extra descriptors for no good reason.
645 * We should have got_packfile_extract_object_to_fd().
647 err = wrap_fd(&outfile, outfd);
650 err = wrap_fd(&basefile, pack->basefd);
653 err = wrap_fd(&accumfile, pack->accumfd);
656 err = got_packfile_extract_object(pack, obj, outfile, basefile,
663 /* XXX verify checksum? */
665 got_object_close(obj);
666 if (outfile && fclose(outfile) == EOF && err == NULL)
667 err = got_error_from_errno("fclose");
668 if (basefile && fclose(basefile) == EOF && err == NULL)
669 err = got_error_from_errno("fclose");
670 if (accumfile && fclose(accumfile) == EOF && err == NULL)
671 err = got_error_from_errno("fclose");
675 static const struct got_error *
676 read_blob(uint8_t **outbuf, size_t *size, size_t *hdrlen, int outfd, int infd,
677 struct got_object_id *id, struct got_repository *repo)
679 const struct got_error *err = NULL;
680 struct got_object *obj = NULL;
682 struct got_object_id expected_id;
683 struct got_inflate_checksum csum;
686 got_hash_init(&ctx, GOT_HASH_SHA1);
687 memset(&csum, 0, sizeof(csum));
688 csum.output_ctx = &ctx;
690 memcpy(&expected_id, id, sizeof(expected_id));
692 err = got_object_read_header(&obj, infd);
696 if (lseek(infd, SEEK_SET, 0) == -1) {
697 err = got_error_from_errno("lseek");
701 f = fdopen(infd, "rb");
703 err = got_error_from_errno("fdopen");
708 if (obj->size + obj->hdrlen <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
709 err = got_inflate_to_mem(outbuf, size, NULL, &csum, f);
713 err = got_inflate_to_fd(size, f, &csum, outfd);
718 if (*size < obj->hdrlen) {
719 err = got_error(GOT_ERR_BAD_OBJ_HDR);
723 *hdrlen = obj->hdrlen;
725 got_hash_final_object_id(&ctx, id);
726 if (got_object_id_cmp(&expected_id, id) != 0) {
727 err = got_error_checksum(&expected_id);
731 if (f && fclose(f) == EOF && err == NULL)
732 err = got_error_from_errno("fclose");
733 if (infd != -1 && close(infd) == -1 && err == NULL)
734 err = got_error_from_errno("close");
739 static const struct got_error *
740 open_blob(struct got_blob_object **blob, struct got_repository *repo,
741 struct got_object_id *id, size_t blocksize, int outfd)
743 const struct got_error *err = NULL;
744 struct got_packidx *packidx = NULL;
746 char *path_packfile = NULL;
751 *blob = calloc(1, sizeof(**blob));
753 return got_error_from_errno("calloc");
755 (*blob)->read_buf = malloc(blocksize);
756 if ((*blob)->read_buf == NULL) {
757 err = got_error_from_errno("malloc");
761 if (ftruncate(outfd, 0L) == -1) {
762 err = got_error_from_errno("ftruncate");
765 if (lseek(outfd, SEEK_SET, 0) == -1) {
766 err = got_error_from_errno("lseek");
770 err = got_repo_search_packidx(&packidx, &idx, repo, id);
772 struct got_pack *pack = NULL;
774 err = got_packidx_get_packfile_path(&path_packfile,
775 packidx->path_packidx);
779 pack = got_repo_get_cached_pack(repo, path_packfile);
781 err = got_repo_cache_pack(&pack, repo, path_packfile,
786 err = read_packed_blob(&outbuf, &size, &hdrlen, outfd,
787 pack, packidx, idx, id, repo);
788 } else if (err->code == GOT_ERR_NO_OBJ) {
791 err = got_object_open_loose_fd(&infd, id, repo);
794 err = read_blob(&outbuf, &size, &hdrlen, outfd, infd,
801 err = got_error(GOT_ERR_BAD_OBJ_HDR);
806 (*blob)->f = fmemopen(outbuf, size, "rb");
807 if ((*blob)->f == NULL) {
808 err = got_error_from_errno("fmemopen");
812 (*blob)->data = outbuf;
814 if (fstat(outfd, &sb) == -1) {
815 err = got_error_from_errno("fstat");
819 if (sb.st_size != size) {
820 err = got_error(GOT_ERR_PRIVSEP_LEN);
826 err = got_error_from_errno("dup");
830 (*blob)->f = fdopen(dfd, "rb");
831 if ((*blob)->f == NULL) {
832 err = got_error_from_errno("fdopen");
839 (*blob)->hdrlen = hdrlen;
840 (*blob)->blocksize = blocksize;
841 memcpy(&(*blob)->id, id, sizeof(*id));
847 got_object_blob_close(*blob);
854 const struct got_error *
855 got_object_open_as_blob(struct got_blob_object **blob,
856 struct got_repository *repo, struct got_object_id *id, size_t blocksize,
859 return open_blob(blob, repo, id, blocksize, outfd);
862 const struct got_error *
863 got_object_blob_open(struct got_blob_object **blob,
864 struct got_repository *repo, struct got_object *obj, size_t blocksize,
867 return open_blob(blob, repo, got_object_get_id(obj), blocksize, outfd);
870 static const struct got_error *
871 open_tag(struct got_tag_object **tag, struct got_repository *repo,
872 struct got_object_id *id, int check_cache)
874 const struct got_error *err = NULL;
875 struct got_packidx *packidx = NULL;
877 char *path_packfile = NULL;
878 struct got_object *obj = NULL;
879 int obj_type = GOT_OBJ_TYPE_ANY;
882 *tag = got_repo_get_cached_tag(repo, id);
890 err = got_repo_search_packidx(&packidx, &idx, repo, id);
892 struct got_pack *pack = NULL;
896 err = got_packidx_get_packfile_path(&path_packfile,
897 packidx->path_packidx);
901 pack = got_repo_get_cached_pack(repo, path_packfile);
903 err = got_repo_cache_pack(&pack, repo, path_packfile,
909 /* Beware of "lightweight" tags: Check object type first. */
910 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
913 obj_type = obj->type;
914 if (obj_type != GOT_OBJ_TYPE_TAG) {
915 err = got_error(GOT_ERR_OBJ_TYPE);
916 got_object_close(obj);
919 err = got_packfile_extract_object_to_mem(&buf, &len,
921 got_object_close(obj);
924 err = got_object_parse_tag(tag, buf, len);
926 } else if (err->code == GOT_ERR_NO_OBJ) {
929 err = got_object_open_loose_fd(&fd, id, repo);
932 err = got_object_read_header(&obj, fd);
933 if (close(fd) == -1 && err == NULL)
934 err = got_error_from_errno("close");
937 obj_type = obj->type;
938 got_object_close(obj);
939 if (obj_type != GOT_OBJ_TYPE_TAG)
940 return got_error(GOT_ERR_OBJ_TYPE);
942 err = got_object_open_loose_fd(&fd, id, repo);
945 err = got_object_read_tag(tag, fd, id, 0);
946 if (close(fd) == -1 && err == NULL)
947 err = got_error_from_errno("close");
954 err = got_repo_cache_tag(repo, id, *tag);
956 if (err->code == GOT_ERR_OBJ_EXISTS ||
957 err->code == GOT_ERR_OBJ_TOO_LARGE)
966 const struct got_error *
967 got_object_open_as_tag(struct got_tag_object **tag,
968 struct got_repository *repo, struct got_object_id *id)
970 *tag = got_repo_get_cached_tag(repo, id);
976 return open_tag(tag, repo, id, 0);
979 const struct got_error *
980 got_object_tag_open(struct got_tag_object **tag,
981 struct got_repository *repo, struct got_object *obj)
983 return open_tag(tag, repo, got_object_get_id(obj), 1);