2 * Copyright (c) 2022 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include "got_compat.h"
19 #include <sys/queue.h>
30 #include "got_error.h"
31 #include "got_object.h"
32 #include "got_repository.h"
35 #include "got_lib_delta.h"
36 #include "got_lib_object.h"
37 #include "got_lib_object_cache.h"
38 #include "got_lib_object_parse.h"
39 #include "got_lib_pack.h"
40 #include "got_lib_repository.h"
41 #include "got_lib_inflate.h"
42 #include "got_lib_hash.h"
44 const struct got_error *
45 got_object_open_packed(struct got_object **obj, struct got_object_id *id,
46 struct got_repository *repo)
48 const struct got_error *err = NULL;
49 struct got_pack *pack = NULL;
50 struct got_packidx *packidx = NULL;
54 err = got_repo_search_packidx(&packidx, &idx, repo, id);
58 err = got_packidx_get_packfile_path(&path_packfile,
59 packidx->path_packidx);
63 pack = got_repo_get_cached_pack(repo, path_packfile);
65 err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
70 err = got_packfile_open_object(obj, pack, packidx, idx, id);
75 err = got_repo_cache_object(repo, id, *obj);
77 if (err->code == GOT_ERR_OBJ_EXISTS ||
78 err->code == GOT_ERR_OBJ_TOO_LARGE)
86 const struct got_error *
87 got_object_open_from_packfile(struct got_object **obj, struct got_object_id *id,
88 struct got_pack *pack, struct got_packidx *packidx, int obj_idx,
89 struct got_repository *repo)
91 const struct got_error *err;
93 *obj = got_repo_get_cached_object(repo, id);
99 err = got_packfile_open_object(obj, pack, packidx, obj_idx, id);
104 err = got_repo_cache_object(repo, id, *obj);
106 if (err->code == GOT_ERR_OBJ_EXISTS ||
107 err->code == GOT_ERR_OBJ_TOO_LARGE)
115 const struct got_error *
116 got_object_read_raw_delta(uint64_t *base_size, uint64_t *result_size,
117 off_t *delta_size, off_t *delta_compressed_size, off_t *delta_offset,
118 off_t *delta_out_offset, struct got_object_id **base_id, int delta_cache_fd,
119 struct got_packidx *packidx, int obj_idx, struct got_object_id *id,
120 struct got_repository *repo)
122 return got_error(GOT_ERR_NOT_IMPL);
125 const struct got_error *
126 got_object_open(struct got_object **obj, struct got_repository *repo,
127 struct got_object_id *id)
129 const struct got_error *err = NULL;
132 *obj = got_repo_get_cached_object(repo, id);
138 err = got_object_open_packed(obj, id, repo);
140 if (err->code != GOT_ERR_NO_OBJ)
145 err = got_object_open_loose_fd(&fd, id, repo);
147 if (err->code == GOT_ERR_ERRNO && errno == ENOENT)
148 err = got_error_no_obj(id);
152 err = got_object_read_header(obj, fd);
156 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
159 err = got_repo_cache_object(repo, id, *obj);
161 if (err->code == GOT_ERR_OBJ_EXISTS ||
162 err->code == GOT_ERR_OBJ_TOO_LARGE)
166 if (close(fd) == -1 && err == NULL)
167 err = got_error_from_errno("close");
171 static const struct got_error *
172 wrap_fd(FILE **f, int wrapped_fd)
174 const struct got_error *err = NULL;
177 if (ftruncate(wrapped_fd, 0L) == -1)
178 return got_error_from_errno("ftruncate");
180 if (lseek(wrapped_fd, 0L, SEEK_SET) == -1)
181 return got_error_from_errno("lseek");
183 fd = dup(wrapped_fd);
185 return got_error_from_errno("dup");
187 *f = fdopen(fd, "w+");
189 err = got_error_from_errno("fdopen");
195 static const struct got_error *
196 read_packed_object_raw(uint8_t **outbuf, off_t *size, size_t *hdrlen,
197 int outfd, struct got_pack *pack, struct got_packidx *packidx, int idx,
198 struct got_object_id *id)
200 const struct got_error *err = NULL;
201 uint64_t raw_size = 0;
202 struct got_object *obj;
203 FILE *outfile = NULL, *basefile = NULL, *accumfile = NULL;
209 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
213 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
214 err = got_pack_get_max_delta_object_size(&raw_size, obj, pack);
218 raw_size = obj->size;
220 if (raw_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
222 err = got_packfile_extract_object_to_mem(outbuf, &len,
229 * XXX This uses 3 file extra descriptors for no good reason.
230 * We should have got_packfile_extract_object_to_fd().
232 err = wrap_fd(&outfile, outfd);
235 err = wrap_fd(&basefile, pack->basefd);
238 err = wrap_fd(&accumfile, pack->accumfd);
241 err = got_packfile_extract_object(pack, obj, outfile, basefile,
248 *hdrlen = obj->hdrlen;
250 got_object_close(obj);
251 if (outfile && fclose(outfile) == EOF && err == NULL)
252 err = got_error_from_errno("fclose");
253 if (basefile && fclose(basefile) == EOF && err == NULL)
254 err = got_error_from_errno("fclose");
255 if (accumfile && fclose(accumfile) == EOF && err == NULL)
256 err = got_error_from_errno("fclose");
262 put_raw_object_tempfile(struct got_raw_object *obj)
264 struct got_repository *repo = obj->close_arg;
266 if (obj->tempfile_idx != -1)
267 got_repo_temp_fds_put(obj->tempfile_idx, repo);
270 /* *outfd must be initialized to -1 by caller */
271 const struct got_error *
272 got_object_raw_open(struct got_raw_object **obj, int *outfd,
273 struct got_repository *repo, struct got_object_id *id)
275 const struct got_error *err = NULL;
276 struct got_packidx *packidx = NULL;
277 int idx, tempfd, tempfile_idx;
278 uint8_t *outbuf = NULL;
281 char *path_packfile = NULL;
283 *obj = got_repo_get_cached_raw_object(repo, id);
289 err = got_repo_temp_fds_get(&tempfd, &tempfile_idx, repo);
293 err = got_repo_search_packidx(&packidx, &idx, repo, id);
295 struct got_pack *pack = NULL;
297 err = got_packidx_get_packfile_path(&path_packfile,
298 packidx->path_packidx);
302 pack = got_repo_get_cached_pack(repo, path_packfile);
304 err = got_repo_cache_pack(&pack, repo, path_packfile,
309 err = read_packed_object_raw(&outbuf, &size, &hdrlen,
310 tempfd, pack, packidx, idx, id);
313 } else if (err->code == GOT_ERR_NO_OBJ) {
316 err = got_object_open_loose_fd(&fd, id, repo);
319 err = got_object_read_raw(&outbuf, &size, &hdrlen,
320 GOT_DELTA_RESULT_SIZE_CACHED_MAX, tempfd, id, fd);
321 if (close(fd) == -1 && err == NULL)
322 err = got_error_from_errno("close");
327 if (outbuf == NULL) {
329 err = got_error_msg(GOT_ERR_NOT_IMPL, "bad outfd");
334 * Duplicate tempfile descriptor to allow use of
335 * fdopen(3) inside got_object_raw_alloc().
337 *outfd = dup(tempfd);
339 err = got_error_from_errno("dup");
344 err = got_object_raw_alloc(obj, outbuf, outfd,
345 GOT_DELTA_RESULT_SIZE_CACHED_MAX, hdrlen, size);
349 err = got_repo_cache_raw_object(repo, id, *obj);
351 if (err->code == GOT_ERR_OBJ_EXISTS ||
352 err->code == GOT_ERR_OBJ_TOO_LARGE)
359 got_object_raw_close(*obj);
363 got_repo_temp_fds_put(tempfile_idx, repo);
369 if (((*obj)->f == NULL && (*obj)->fd == -1)) {
370 /* This raw object is not backed by a file. */
371 got_repo_temp_fds_put(tempfile_idx, repo);
377 (*obj)->tempfile_idx = tempfile_idx;
378 (*obj)->close_cb = put_raw_object_tempfile;
379 (*obj)->close_arg = repo;
385 static const struct got_error *
386 open_commit(struct got_commit_object **commit,
387 struct got_repository *repo, struct got_object_id *id, int check_cache)
389 const struct got_error *err = NULL;
390 struct got_packidx *packidx = NULL;
392 char *path_packfile = NULL;
395 *commit = got_repo_get_cached_commit(repo, id);
396 if (*commit != NULL) {
403 err = got_repo_search_packidx(&packidx, &idx, repo, id);
405 struct got_pack *pack = NULL;
406 struct got_object *obj;
410 err = got_packidx_get_packfile_path(&path_packfile,
411 packidx->path_packidx);
415 pack = got_repo_get_cached_pack(repo, path_packfile);
417 err = got_repo_cache_pack(&pack, repo, path_packfile,
422 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
425 err = got_packfile_extract_object_to_mem(&buf, &len,
427 got_object_close(obj);
430 err = got_object_parse_commit(commit, buf, len);
432 } else if (err->code == GOT_ERR_NO_OBJ) {
435 err = got_object_open_loose_fd(&fd, id, repo);
438 err = got_object_read_commit(commit, fd, id, 0);
439 if (close(fd) == -1 && err == NULL)
440 err = got_error_from_errno("close");
447 err = got_repo_cache_commit(repo, id, *commit);
449 if (err->code == GOT_ERR_OBJ_EXISTS ||
450 err->code == GOT_ERR_OBJ_TOO_LARGE)
459 const struct got_error *
460 got_object_open_as_commit(struct got_commit_object **commit,
461 struct got_repository *repo, struct got_object_id *id)
463 *commit = got_repo_get_cached_commit(repo, id);
464 if (*commit != NULL) {
469 return open_commit(commit, repo, id, 0);
472 const struct got_error *
473 got_object_commit_open(struct got_commit_object **commit,
474 struct got_repository *repo, struct got_object *obj)
476 return open_commit(commit, repo, got_object_get_id(obj), 1);
479 static const struct got_error *
480 open_tree(struct got_tree_object **tree,
481 struct got_repository *repo, struct got_object_id *id, int check_cache)
483 const struct got_error *err = NULL;
484 struct got_packidx *packidx = NULL;
486 char *path_packfile = NULL;
487 struct got_parsed_tree_entry *entries = NULL;
488 size_t nentries = 0, nentries_alloc = 0, i;
492 *tree = got_repo_get_cached_tree(repo, id);
500 err = got_repo_search_packidx(&packidx, &idx, repo, id);
502 struct got_pack *pack = NULL;
503 struct got_object *obj;
506 err = got_packidx_get_packfile_path(&path_packfile,
507 packidx->path_packidx);
511 pack = got_repo_get_cached_pack(repo, path_packfile);
513 err = got_repo_cache_pack(&pack, repo, path_packfile,
518 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
521 err = got_packfile_extract_object_to_mem(&buf, &len,
523 got_object_close(obj);
526 err = got_object_parse_tree(&entries, &nentries,
527 &nentries_alloc, buf, len);
530 } else if (err->code == GOT_ERR_NO_OBJ) {
533 err = got_object_open_loose_fd(&fd, id, repo);
536 err = got_object_read_tree(&entries, &nentries,
537 &nentries_alloc, &buf, fd, id);
538 if (close(fd) == -1 && err == NULL)
539 err = got_error_from_errno("close");
545 *tree = malloc(sizeof(**tree));
547 err = got_error_from_errno("malloc");
550 (*tree)->entries = calloc(nentries, sizeof(struct got_tree_entry));
551 if ((*tree)->entries == NULL) {
552 err = got_error_from_errno("malloc");
555 (*tree)->nentries = nentries;
558 for (i = 0; i < nentries; i++) {
559 struct got_parsed_tree_entry *pe = &entries[i];
560 struct got_tree_entry *te = &(*tree)->entries[i];
562 if (strlcpy(te->name, pe->name,
563 sizeof(te->name)) >= sizeof(te->name)) {
564 err = got_error(GOT_ERR_NO_SPACE);
567 memcpy(te->id.sha1, pe->id, SHA1_DIGEST_LENGTH);
577 err = got_repo_cache_tree(repo, id, *tree);
579 if (err->code == GOT_ERR_OBJ_EXISTS ||
580 err->code == GOT_ERR_OBJ_TOO_LARGE)
586 free((*tree)->entries);
593 const struct got_error *
594 got_object_open_as_tree(struct got_tree_object **tree,
595 struct got_repository *repo, struct got_object_id *id)
597 *tree = got_repo_get_cached_tree(repo, id);
603 return open_tree(tree, repo, id, 0);
606 const struct got_error *
607 got_object_tree_open(struct got_tree_object **tree,
608 struct got_repository *repo, struct got_object *obj)
610 return open_tree(tree, repo, got_object_get_id(obj), 1);
613 static const struct got_error *
614 read_packed_blob(uint8_t **outbuf, size_t *size, size_t *hdrlen,
615 int outfd, struct got_pack *pack, struct got_packidx *packidx, int idx,
616 struct got_object_id *id, struct got_repository *repo)
618 const struct got_error *err = NULL;
619 struct got_object *obj;
620 FILE *outfile = NULL, *basefile = NULL, *accumfile = NULL;
625 err = got_object_open_from_packfile(&obj, id, pack, packidx, idx,
630 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
631 err = got_pack_get_max_delta_object_size(&blob_size, obj,
636 blob_size = obj->size;
638 if (blob_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
639 err = got_packfile_extract_object_to_mem(outbuf, size,
643 * XXX This uses 3 file extra descriptors for no good reason.
644 * We should have got_packfile_extract_object_to_fd().
646 err = wrap_fd(&outfile, outfd);
649 err = wrap_fd(&basefile, pack->basefd);
652 err = wrap_fd(&accumfile, pack->accumfd);
655 err = got_packfile_extract_object(pack, obj, outfile, basefile,
662 /* XXX verify checksum? */
664 got_object_close(obj);
665 if (outfile && fclose(outfile) == EOF && err == NULL)
666 err = got_error_from_errno("fclose");
667 if (basefile && fclose(basefile) == EOF && err == NULL)
668 err = got_error_from_errno("fclose");
669 if (accumfile && fclose(accumfile) == EOF && err == NULL)
670 err = got_error_from_errno("fclose");
674 static const struct got_error *
675 read_blob(uint8_t **outbuf, size_t *size, size_t *hdrlen, int outfd, int infd,
676 struct got_object_id *id, struct got_repository *repo)
678 const struct got_error *err = NULL;
679 struct got_object *obj = NULL;
681 struct got_object_id expected_id;
682 struct got_inflate_checksum csum;
685 got_hash_init(&ctx, GOT_HASH_SHA1);
686 memset(&csum, 0, sizeof(csum));
687 csum.output_ctx = &ctx;
689 memcpy(&expected_id, id, sizeof(expected_id));
691 err = got_object_read_header(&obj, infd);
695 if (lseek(infd, SEEK_SET, 0) == -1) {
696 err = got_error_from_errno("lseek");
700 f = fdopen(infd, "rb");
702 err = got_error_from_errno("fdopen");
707 if (obj->size + obj->hdrlen <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
708 err = got_inflate_to_mem(outbuf, size, NULL, &csum, f);
712 err = got_inflate_to_fd(size, f, &csum, outfd);
717 if (*size < obj->hdrlen) {
718 err = got_error(GOT_ERR_BAD_OBJ_HDR);
722 *hdrlen = obj->hdrlen;
724 got_hash_final_object_id(&ctx, id);
725 if (got_object_id_cmp(&expected_id, id) != 0) {
726 err = got_error_checksum(&expected_id);
730 if (f && fclose(f) == EOF && err == NULL)
731 err = got_error_from_errno("fclose");
732 if (infd != -1 && close(infd) == -1 && err == NULL)
733 err = got_error_from_errno("close");
738 static const struct got_error *
739 open_blob(struct got_blob_object **blob, struct got_repository *repo,
740 struct got_object_id *id, size_t blocksize, int outfd)
742 const struct got_error *err = NULL;
743 struct got_packidx *packidx = NULL;
745 char *path_packfile = NULL;
750 *blob = calloc(1, sizeof(**blob));
752 return got_error_from_errno("calloc");
754 (*blob)->read_buf = malloc(blocksize);
755 if ((*blob)->read_buf == NULL) {
756 err = got_error_from_errno("malloc");
760 if (ftruncate(outfd, 0L) == -1) {
761 err = got_error_from_errno("ftruncate");
764 if (lseek(outfd, SEEK_SET, 0) == -1) {
765 err = got_error_from_errno("lseek");
769 err = got_repo_search_packidx(&packidx, &idx, repo, id);
771 struct got_pack *pack = NULL;
773 err = got_packidx_get_packfile_path(&path_packfile,
774 packidx->path_packidx);
778 pack = got_repo_get_cached_pack(repo, path_packfile);
780 err = got_repo_cache_pack(&pack, repo, path_packfile,
785 err = read_packed_blob(&outbuf, &size, &hdrlen, outfd,
786 pack, packidx, idx, id, repo);
787 } else if (err->code == GOT_ERR_NO_OBJ) {
790 err = got_object_open_loose_fd(&infd, id, repo);
793 err = read_blob(&outbuf, &size, &hdrlen, outfd, infd,
800 err = got_error(GOT_ERR_BAD_OBJ_HDR);
805 (*blob)->f = fmemopen(outbuf, size, "rb");
806 if ((*blob)->f == NULL) {
807 err = got_error_from_errno("fmemopen");
811 (*blob)->data = outbuf;
813 if (fstat(outfd, &sb) == -1) {
814 err = got_error_from_errno("fstat");
818 if (sb.st_size != size) {
819 err = got_error(GOT_ERR_PRIVSEP_LEN);
825 err = got_error_from_errno("dup");
829 (*blob)->f = fdopen(dfd, "rb");
830 if ((*blob)->f == NULL) {
831 err = got_error_from_errno("fdopen");
838 (*blob)->hdrlen = hdrlen;
839 (*blob)->blocksize = blocksize;
840 memcpy(&(*blob)->id, id, sizeof(*id));
846 got_object_blob_close(*blob);
853 const struct got_error *
854 got_object_open_as_blob(struct got_blob_object **blob,
855 struct got_repository *repo, struct got_object_id *id, size_t blocksize,
858 return open_blob(blob, repo, id, blocksize, outfd);
861 const struct got_error *
862 got_object_blob_open(struct got_blob_object **blob,
863 struct got_repository *repo, struct got_object *obj, size_t blocksize,
866 return open_blob(blob, repo, got_object_get_id(obj), blocksize, outfd);
869 static const struct got_error *
870 open_tag(struct got_tag_object **tag, struct got_repository *repo,
871 struct got_object_id *id, int check_cache)
873 const struct got_error *err = NULL;
874 struct got_packidx *packidx = NULL;
876 char *path_packfile = NULL;
877 struct got_object *obj = NULL;
878 int obj_type = GOT_OBJ_TYPE_ANY;
881 *tag = got_repo_get_cached_tag(repo, id);
889 err = got_repo_search_packidx(&packidx, &idx, repo, id);
891 struct got_pack *pack = NULL;
895 err = got_packidx_get_packfile_path(&path_packfile,
896 packidx->path_packidx);
900 pack = got_repo_get_cached_pack(repo, path_packfile);
902 err = got_repo_cache_pack(&pack, repo, path_packfile,
908 /* Beware of "lightweight" tags: Check object type first. */
909 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
912 obj_type = obj->type;
913 if (obj_type != GOT_OBJ_TYPE_TAG) {
914 err = got_error(GOT_ERR_OBJ_TYPE);
915 got_object_close(obj);
918 err = got_packfile_extract_object_to_mem(&buf, &len,
920 got_object_close(obj);
923 err = got_object_parse_tag(tag, buf, len);
925 } else if (err->code == GOT_ERR_NO_OBJ) {
928 err = got_object_open_loose_fd(&fd, id, repo);
931 err = got_object_read_header(&obj, fd);
932 if (close(fd) == -1 && err == NULL)
933 err = got_error_from_errno("close");
936 obj_type = obj->type;
937 got_object_close(obj);
938 if (obj_type != GOT_OBJ_TYPE_TAG)
939 return got_error(GOT_ERR_OBJ_TYPE);
941 err = got_object_open_loose_fd(&fd, id, repo);
944 err = got_object_read_tag(tag, fd, id, 0);
945 if (close(fd) == -1 && err == NULL)
946 err = got_error_from_errno("close");
953 err = got_repo_cache_tag(repo, id, *tag);
955 if (err->code == GOT_ERR_OBJ_EXISTS ||
956 err->code == GOT_ERR_OBJ_TOO_LARGE)
965 const struct got_error *
966 got_object_open_as_tag(struct got_tag_object **tag,
967 struct got_repository *repo, struct got_object_id *id)
969 *tag = got_repo_get_cached_tag(repo, id);
975 return open_tag(tag, repo, id, 0);
978 const struct got_error *
979 got_object_tag_open(struct got_tag_object **tag,
980 struct got_repository *repo, struct got_object *obj)
982 return open_tag(tag, repo, got_object_get_id(obj), 1);