commit 05fd31691c7f3c33867fb0acd5008fa2f555b461 from: Stefan Sperling date: Tue Mar 19 13:47:25 2024 UTC add support for reading blobs to object_open_io.c commit - 4c9b88110ffd0deb6eed9861f320246d8354bae2 commit + 05fd31691c7f3c33867fb0acd5008fa2f555b461 blob - babca5450a093226d85ce667e921aa5d399584b8 blob + f786101c27c52e49d41dab5ca56ae883ecf272a5 --- lib/object_open_io.c +++ lib/object_open_io.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "got_error.h" #include "got_object.h" @@ -38,6 +39,8 @@ #include "got_lib_object_parse.h" #include "got_lib_pack.h" #include "got_lib_repository.h" +#include "got_lib_inflate.h" +#include "got_lib_hash.h" const struct got_error * got_object_open_packed(struct got_object **obj, struct got_object_id *id, @@ -607,13 +610,253 @@ got_object_tree_open(struct got_tree_object **tree, { return open_tree(tree, repo, got_object_get_id(obj), 1); } + +static const struct got_error * +read_packed_blob(uint8_t **outbuf, size_t *size, size_t *hdrlen, + int outfd, struct got_pack *pack, struct got_packidx *packidx, int idx, + struct got_object_id *id, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object *obj; + FILE *outfile = NULL, *basefile = NULL, *accumfile = NULL; + uint64_t blob_size; + + *hdrlen = 0; + + err = got_object_open_from_packfile(&obj, id, pack, packidx, idx, + repo); + if (err) + return err; + if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) { + err = got_pack_get_max_delta_object_size(&blob_size, obj, + pack); + if (err) + goto done; + } else + blob_size = obj->size; + + if (blob_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) { + err = got_packfile_extract_object_to_mem(outbuf, size, + obj, pack); + } else { + /* + * XXX This uses 3 file extra descriptors for no good reason. + * We should have got_packfile_extract_object_to_fd(). + */ + err = wrap_fd(&outfile, outfd); + if (err) + goto done; + err = wrap_fd(&basefile, pack->basefd); + if (err) + goto done; + err = wrap_fd(&accumfile, pack->accumfd); + if (err) + goto done; + err = got_packfile_extract_object(pack, obj, outfile, basefile, + accumfile); + if (err) + goto done; + *size = obj->size; + } + + /* XXX verify checksum? */ +done: + got_object_close(obj); + if (outfile && fclose(outfile) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + if (basefile && fclose(basefile) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + if (accumfile && fclose(accumfile) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + return err; +} + +static const struct got_error * +read_blob(uint8_t **outbuf, size_t *size, size_t *hdrlen, int outfd, int infd, + struct got_object_id *id, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object *obj = NULL; + FILE *f = NULL; + struct got_object_id expected_id; + struct got_inflate_checksum csum; + struct got_hash ctx; + + got_hash_init(&ctx, GOT_HASH_SHA1); + memset(&csum, 0, sizeof(csum)); + csum.output_ctx = &ctx; + + memcpy(&expected_id, id, sizeof(expected_id)); + + err = got_object_read_header(&obj, infd); + if (err) + goto done; + + if (lseek(infd, SEEK_SET, 0) == -1) { + err = got_error_from_errno("lseek"); + goto done; + } + + f = fdopen(infd, "rb"); + if (f == NULL) { + err = got_error_from_errno("fdopen"); + goto done; + } + infd = -1; + + if (obj->size + obj->hdrlen <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) { + err = got_inflate_to_mem(outbuf, size, NULL, &csum, f); + if (err) + goto done; + } else { + err = got_inflate_to_fd(size, f, &csum, outfd); + if (err) + goto done; + } + + if (*size < obj->hdrlen) { + err = got_error(GOT_ERR_BAD_OBJ_HDR); + goto done; + } + + *hdrlen = obj->hdrlen; + + got_hash_final_object_id(&ctx, id); + if (got_object_id_cmp(&expected_id, id) != 0) { + err = got_error_checksum(&expected_id); + goto done; + } +done: + if (f && fclose(f) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + if (infd != -1 && close(infd) == -1 && err == NULL) + err = got_error_from_errno("close"); + + return err; +} + +static const struct got_error * +open_blob(struct got_blob_object **blob, struct got_repository *repo, + struct got_object_id *id, size_t blocksize, int outfd) +{ + const struct got_error *err = NULL; + struct got_packidx *packidx = NULL; + int idx, dfd = -1; + char *path_packfile = NULL; + uint8_t *outbuf; + size_t size, hdrlen; + struct stat sb; + + *blob = calloc(1, sizeof(**blob)); + if (*blob == NULL) + return got_error_from_errno("calloc"); + + (*blob)->read_buf = malloc(blocksize); + if ((*blob)->read_buf == NULL) { + err = got_error_from_errno("malloc"); + goto done; + } + + if (ftruncate(outfd, 0L) == -1) { + err = got_error_from_errno("ftruncate"); + goto done; + } + if (lseek(outfd, SEEK_SET, 0) == -1) { + err = got_error_from_errno("lseek"); + goto done; + } + + err = got_repo_search_packidx(&packidx, &idx, repo, id); + if (err == NULL) { + struct got_pack *pack = NULL; + + err = got_packidx_get_packfile_path(&path_packfile, + packidx->path_packidx); + if (err) + goto done; + + pack = got_repo_get_cached_pack(repo, path_packfile); + if (pack == NULL) { + err = got_repo_cache_pack(&pack, repo, path_packfile, + packidx); + if (err) + goto done; + } + err = read_packed_blob(&outbuf, &size, &hdrlen, outfd, + pack, packidx, idx, id, repo); + } else if (err->code == GOT_ERR_NO_OBJ) { + int infd; + + err = got_object_open_loose_fd(&infd, id, repo); + if (err) + goto done; + err = read_blob(&outbuf, &size, &hdrlen, outfd, infd, + id, repo); + } + if (err) + goto done; + + if (hdrlen > size) { + err = got_error(GOT_ERR_BAD_OBJ_HDR); + goto done; + } + + if (outbuf) { + (*blob)->f = fmemopen(outbuf, size, "rb"); + if ((*blob)->f == NULL) { + err = got_error_from_errno("fmemopen"); + free(outbuf); + goto done; + } + (*blob)->data = outbuf; + } else { + if (fstat(outfd, &sb) == -1) { + err = got_error_from_errno("fstat"); + goto done; + } + + if (sb.st_size != size) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + goto done; + } + + dfd = dup(outfd); + if (dfd == -1) { + err = got_error_from_errno("dup"); + goto done; + } + + (*blob)->f = fdopen(dfd, "rb"); + if ((*blob)->f == NULL) { + err = got_error_from_errno("fdopen"); + close(dfd); + dfd = -1; + goto done; + } + } + + (*blob)->hdrlen = hdrlen; + (*blob)->blocksize = blocksize; + memcpy(&(*blob)->id, id, sizeof(*id)); + +done: + free(path_packfile); + if (err) { + if (*blob) { + got_object_blob_close(*blob); + *blob = NULL; + } + } + return err; +} + const struct got_error * got_object_open_as_blob(struct got_blob_object **blob, struct got_repository *repo, struct got_object_id *id, size_t blocksize, int outfd) { - return got_error(GOT_ERR_NOT_IMPL); + return open_blob(blob, repo, id, blocksize, outfd); } const struct got_error * @@ -621,7 +864,7 @@ got_object_blob_open(struct got_blob_object **blob, struct got_repository *repo, struct got_object *obj, size_t blocksize, int outfd) { - return got_error(GOT_ERR_NOT_IMPL); + return open_blob(blob, repo, got_object_get_id(obj), blocksize, outfd); } static const struct got_error *