2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <sys/queue.h>
32 #include "got_error.h"
33 #include "got_object.h"
34 #include "got_repository.h"
42 #define GOT_PACK_PREFIX "pack-"
43 #define GOT_PACKFILE_SUFFIX ".pack"
44 #define GOT_PACKIDX_SUFFIX ".idx"
45 #define GOT_PACKFILE_NAMELEN (strlen(GOT_PACK_PREFIX) + \
46 SHA1_DIGEST_STRING_LENGTH - 1 + \
47 strlen(GOT_PACKFILE_SUFFIX))
48 #define GOT_PACKIDX_NAMELEN (strlen(GOT_PACK_PREFIX) + \
49 SHA1_DIGEST_STRING_LENGTH - 1 + \
50 strlen(GOT_PACKIDX_SUFFIX))
53 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
56 static const struct got_error *
57 verify_fanout_table(uint32_t *fanout_table)
61 for (i = 0; i < 0xff - 1; i++) {
62 if (be32toh(fanout_table[i]) > be32toh(fanout_table[i + 1]))
63 return got_error(GOT_ERR_BAD_PACKIDX);
69 static const struct got_error *
70 get_packfile_size(size_t *size, const char *path_idx)
74 char base_path[PATH_MAX];
77 if (strlcpy(base_path, path_idx, PATH_MAX) > PATH_MAX)
78 return got_error(GOT_ERR_NO_SPACE);
80 dot = strrchr(base_path, '.');
82 return got_error(GOT_ERR_BAD_PATH);
84 if (asprintf(&path_pack, "%s.pack", base_path) == -1)
85 return got_error(GOT_ERR_NO_MEM);
87 if (stat(path_pack, &sb) != 0) {
89 return got_error_from_errno();
97 const struct got_error *
98 got_packidx_open(struct got_packidx_v2_hdr **packidx, const char *path)
100 struct got_packidx_v2_hdr *p;
102 const struct got_error *err = NULL;
103 size_t n, nobj, packfile_size;
105 uint8_t sha1[SHA1_DIGEST_LENGTH];
109 f = fopen(path, "rb");
111 return got_error(GOT_ERR_BAD_PATH);
113 err = get_packfile_size(&packfile_size, path);
117 p = calloc(1, sizeof(*p));
119 err = got_error(GOT_ERR_NO_MEM);
123 n = fread(&p->magic, sizeof(p->magic), 1, f);
125 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
129 if (betoh32(p->magic) != GOT_PACKIDX_V2_MAGIC) {
130 err = got_error(GOT_ERR_BAD_PACKIDX);
134 SHA1Update(&ctx, (uint8_t *)&p->magic, sizeof(p->magic));
136 n = fread(&p->version, sizeof(p->version), 1, f);
138 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
142 if (betoh32(p->version) != GOT_PACKIDX_VERSION) {
143 err = got_error(GOT_ERR_BAD_PACKIDX);
147 SHA1Update(&ctx, (uint8_t *)&p->version, sizeof(p->version));
149 n = fread(&p->fanout_table, sizeof(p->fanout_table), 1, f);
151 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
155 err = verify_fanout_table(p->fanout_table);
159 SHA1Update(&ctx, (uint8_t *)p->fanout_table, sizeof(p->fanout_table));
161 nobj = betoh32(p->fanout_table[0xff]);
163 p->sorted_ids = calloc(nobj, sizeof(*p->sorted_ids));
164 if (p->sorted_ids == NULL) {
165 err = got_error(GOT_ERR_NO_MEM);
169 n = fread(p->sorted_ids, sizeof(*p->sorted_ids), nobj, f);
171 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
175 SHA1Update(&ctx, (uint8_t *)p->sorted_ids,
176 nobj * sizeof(*p->sorted_ids));
178 p->crc32 = calloc(nobj, sizeof(*p->crc32));
179 if (p->crc32 == NULL) {
180 err = got_error(GOT_ERR_NO_MEM);
184 n = fread(p->crc32, sizeof(*p->crc32), nobj, f);
186 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
190 SHA1Update(&ctx, (uint8_t *)p->crc32, nobj * sizeof(*p->crc32));
192 p->offsets = calloc(nobj, sizeof(*p->offsets));
193 if (p->offsets == NULL) {
194 err = got_error(GOT_ERR_NO_MEM);
198 n = fread(p->offsets, sizeof(*p->offsets), nobj, f);
200 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
204 SHA1Update(&ctx, (uint8_t *)p->offsets, nobj * sizeof(*p->offsets));
206 /* Large file offsets are contained only in files > 2GB. */
207 if (packfile_size <= 0x80000000)
210 p->large_offsets = calloc(nobj, sizeof(*p->large_offsets));
211 if (p->large_offsets == NULL) {
212 err = got_error(GOT_ERR_NO_MEM);
216 n = fread(p->large_offsets, sizeof(*p->large_offsets), nobj, f);
218 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
222 SHA1Update(&ctx, (uint8_t*)p->large_offsets,
223 nobj * sizeof(*p->large_offsets));
226 n = fread(&p->trailer, sizeof(p->trailer), 1, f);
228 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
232 SHA1Update(&ctx, p->trailer.packfile_sha1, SHA1_DIGEST_LENGTH);
233 SHA1Final(sha1, &ctx);
234 if (memcmp(p->trailer.packidx_sha1, sha1, SHA1_DIGEST_LENGTH) != 0)
235 err = got_error(GOT_ERR_PACKIDX_CSUM);
239 got_packidx_close(p);
246 got_packidx_close(struct got_packidx_v2_hdr *packidx)
248 free(packidx->sorted_ids);
249 free(packidx->offsets);
250 free(packidx->crc32);
251 free(packidx->large_offsets);
256 is_packidx_filename(const char *name, size_t len)
258 if (len != GOT_PACKIDX_NAMELEN)
261 if (strncmp(name, GOT_PACK_PREFIX, strlen(GOT_PACK_PREFIX)) != 0)
264 if (strcmp(name + strlen(GOT_PACK_PREFIX) +
265 SHA1_DIGEST_STRING_LENGTH - 1, GOT_PACKIDX_SUFFIX) != 0)
272 get_object_offset(struct got_packidx_v2_hdr *packidx, int idx)
274 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
275 uint32_t offset = betoh32(packidx->offsets[idx]);
276 if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
278 idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
279 if (idx < 0 || idx > totobj || packidx->large_offsets == NULL)
281 loffset = betoh64(packidx->large_offsets[idx]);
282 return (loffset > INT64_MAX ? -1 : (off_t)loffset);
284 return (off_t)(offset & GOT_PACKIDX_OFFSET_VAL_MASK);
288 get_object_idx(struct got_packidx_v2_hdr *packidx, struct got_object_id *id)
290 u_int8_t id0 = id->sha1[0];
291 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
295 i = betoh32(packidx->fanout_table[id0 - 1]);
298 struct got_object_id *oid = &packidx->sorted_ids[i];
300 int cmp = got_object_id_cmp(id, oid);
312 static const struct got_error *
313 search_packidx(struct got_packidx_v2_hdr **packidx, int *idx,
314 struct got_repository *repo, struct got_object_id *id)
316 const struct got_error *err;
322 path_packdir = got_repo_get_path_objects_pack(repo);
323 if (path_packdir == NULL)
324 return got_error(GOT_ERR_NO_MEM);
326 packdir = opendir(path_packdir);
327 if (packdir == NULL) {
328 err = got_error_from_errno();
332 while ((dent = readdir(packdir)) != NULL) {
333 if (!is_packidx_filename(dent->d_name, dent->d_namlen))
336 if (asprintf(&path_packidx, "%s/%s", path_packdir,
337 dent->d_name) == -1) {
338 err = got_error(GOT_ERR_NO_MEM);
342 err = got_packidx_open(packidx, path_packidx);
347 *idx = get_object_idx(*packidx, id);
349 err = NULL; /* found the object */
353 got_packidx_close(*packidx);
357 err = got_error(GOT_ERR_NO_OBJ);
360 if (packdir && closedir(packdir) != 0 && err == 0)
361 err = got_error_from_errno();
365 static const struct got_error *
366 get_packfile_path(char **path_packfile, struct got_repository *repo,
367 struct got_packidx_v2_hdr *packidx)
370 char hex[SHA1_DIGEST_STRING_LENGTH];
374 *path_packfile = NULL;
376 path_packdir = got_repo_get_path_objects_pack(repo);
377 if (path_packdir == NULL)
378 return got_error(GOT_ERR_NO_MEM);
380 sha1str = got_sha1_digest_to_str(packidx->trailer.packfile_sha1,
383 return got_error(GOT_ERR_PACKIDX_CSUM);
385 if (asprintf(path_packfile, "%s/%s%s%s", path_packdir,
386 GOT_PACK_PREFIX, sha1str, GOT_PACKFILE_SUFFIX) == -1) {
387 *path_packfile = NULL;
388 return got_error(GOT_ERR_NO_MEM);
394 const struct got_error *
395 read_packfile_hdr(FILE *f, struct got_packidx_v2_hdr *packidx)
397 const struct got_error *err = NULL;
398 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
399 struct got_packfile_hdr hdr;
402 n = fread(&hdr, sizeof(hdr), 1, f);
404 return got_ferror(f, GOT_ERR_BAD_PACKIDX);
406 if (betoh32(hdr.signature) != GOT_PACKFILE_SIGNATURE ||
407 betoh32(hdr.version) != GOT_PACKFILE_VERSION ||
408 betoh32(hdr.nobjects) != totobj)
409 err = got_error(GOT_ERR_BAD_PACKFILE);
414 static const struct got_error *
415 open_packfile(FILE **packfile, char **path_packfile,
416 struct got_repository *repo, struct got_packidx_v2_hdr *packidx)
418 const struct got_error *err;
422 err = get_packfile_path(path_packfile, repo, packidx);
426 *packfile = fopen(*path_packfile, "rb");
427 if (*packfile == NULL) {
428 err = got_error_from_errno();
429 free(*path_packfile);
433 err = read_packfile_hdr(*packfile, packidx);
441 static const struct got_error *
442 parse_object_type_and_size(uint8_t *type, uint64_t *size, size_t *len,
452 /* We do not support size values which don't fit in 64 bit. */
454 return got_error(GOT_ERR_NO_SPACE);
456 n = fread(&sizeN, sizeof(sizeN), 1, packfile);
458 return got_ferror(packfile, GOT_ERR_BAD_PACKIDX);
461 t = (sizeN & GOT_PACK_OBJ_SIZE0_TYPE_MASK) >>
462 GOT_PACK_OBJ_SIZE0_TYPE_MASK_SHIFT;
463 s = (sizeN & GOT_PACK_OBJ_SIZE0_VAL_MASK);
465 size_t shift = 4 + 7 * (i - 1);
466 s |= ((sizeN & GOT_PACK_OBJ_SIZE_VAL_MASK) << shift);
469 } while (sizeN & GOT_PACK_OBJ_SIZE_MORE);
473 *len = i * sizeof(sizeN);
477 static const struct got_error *
478 open_plain_object(struct got_object **obj, const char *path_packfile,
479 struct got_object_id *id, uint8_t type, off_t offset, size_t size)
481 *obj = calloc(1, sizeof(**obj));
483 return got_error(GOT_ERR_NO_MEM);
485 (*obj)->path_packfile = strdup(path_packfile);
486 if ((*obj)->path_packfile == NULL) {
489 return got_error(GOT_ERR_NO_MEM);
493 (*obj)->flags = GOT_OBJ_FLAG_PACKED;
496 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
497 (*obj)->pack_offset = offset;
502 static const struct got_error *
503 parse_negative_offset(int64_t *offset, size_t *len, FILE *packfile)
511 /* We do not support offset values which don't fit in 64 bit. */
513 return got_error(GOT_ERR_NO_SPACE);
515 n = fread(&offN, sizeof(offN), 1, packfile);
517 return got_ferror(packfile, GOT_ERR_BAD_PACKIDX);
520 o = (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK);
524 o += (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK);
527 } while (offN & GOT_PACK_OBJ_DELTA_OFF_MORE);
530 *len = i * sizeof(offN);
534 static const struct got_error *
535 parse_offset_delta(off_t *base_offset, FILE *packfile, off_t offset)
537 const struct got_error *err;
541 err = parse_negative_offset(&negoffset, &negofflen, packfile);
545 /* Compute the base object's offset (must be in the same pack file). */
546 *base_offset = (offset - negoffset);
547 if (*base_offset <= 0)
548 return got_error(GOT_ERR_BAD_PACKFILE);
553 static const struct got_error *resolve_delta_chain(struct got_delta_chain *,
554 struct got_repository *repo, FILE *, const char *, int, off_t, size_t);
556 static const struct got_error *
557 resolve_offset_delta(struct got_delta_chain *deltas,
558 struct got_repository *repo, FILE *packfile, const char *path_packfile,
561 const struct got_error *err;
567 err = parse_offset_delta(&base_offset, packfile, delta_offset);
571 /* An offset delta must be in the same packfile. */
572 if (fseeko(packfile, base_offset, SEEK_SET) != 0)
573 return got_error_from_errno();
575 err = parse_object_type_and_size(&base_type, &base_size, &base_tslen,
580 return resolve_delta_chain(deltas, repo, packfile, path_packfile,
581 base_type, base_offset + base_tslen, base_size);
584 static const struct got_error *
585 resolve_ref_delta(struct got_delta_chain *deltas, struct got_repository *repo,
586 FILE *packfile, const char *path_packfile, off_t delta_offset)
588 const struct got_error *err;
589 struct got_object_id id;
590 struct got_packidx_v2_hdr *packidx;
598 char *path_base_packfile;
600 n = fread(&id, sizeof(id), 1, packfile);
602 return got_ferror(packfile, GOT_ERR_IO);
604 err = search_packidx(&packidx, &idx, repo, &id);
608 base_offset = get_object_offset(packidx, idx);
609 if (base_offset == (uint64_t)-1) {
610 got_packidx_close(packidx);
611 return got_error(GOT_ERR_BAD_PACKIDX);
614 err = open_packfile(&base_packfile, &path_base_packfile, repo, packidx);
615 got_packidx_close(packidx);
619 if (fseeko(base_packfile, base_offset, SEEK_SET) != 0) {
620 err = got_error_from_errno();
624 err = parse_object_type_and_size(&base_type, &base_size, &base_tslen,
629 err = resolve_delta_chain(deltas, repo, base_packfile,
630 path_base_packfile, base_type, base_offset + base_tslen, base_size);
632 free(path_base_packfile);
633 if (base_packfile && fclose(base_packfile) == -1 && err == 0)
634 err = got_error_from_errno();
638 static const struct got_error *
639 resolve_delta_chain(struct got_delta_chain *deltas, struct got_repository *repo,
640 FILE *packfile, const char *path_packfile, int delta_type,
641 off_t delta_offset, size_t delta_size)
643 const struct got_error *err = NULL;
644 struct got_delta *delta;
646 delta = got_delta_open(path_packfile, delta_type, delta_offset,
649 return got_error(GOT_ERR_NO_MEM);
651 SIMPLEQ_INSERT_HEAD(&deltas->entries, delta, entry);
652 /* In case of error below, delta is freed in got_object_close(). */
654 switch (delta_type) {
655 case GOT_OBJ_TYPE_COMMIT:
656 case GOT_OBJ_TYPE_TREE:
657 case GOT_OBJ_TYPE_BLOB:
658 case GOT_OBJ_TYPE_TAG:
659 /* Plain types are the final delta base. Recursion ends. */
661 case GOT_OBJ_TYPE_OFFSET_DELTA:
662 err = resolve_offset_delta(deltas, repo, packfile,
663 path_packfile, delta_offset);
665 case GOT_OBJ_TYPE_REF_DELTA:
666 err = resolve_ref_delta(deltas, repo, packfile, path_packfile,
670 return got_error(GOT_ERR_NOT_IMPL);
676 static const struct got_error *
677 open_delta_object(struct got_object **obj, struct got_repository *repo,
678 struct got_packidx_v2_hdr *packidx, const char *path_packfile,
679 FILE *packfile, struct got_object_id *id, off_t offset, size_t tslen,
680 int delta_type, size_t delta_size)
682 const struct got_error *err = NULL;
683 struct got_object_id base_id;
689 *obj = calloc(1, sizeof(**obj));
691 return got_error(GOT_ERR_NO_MEM);
695 (*obj)->size = 0; /* Not yet known because deltas aren't combined. */
696 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
697 (*obj)->pack_offset = offset + tslen;
699 (*obj)->path_packfile = strdup(path_packfile);
700 if ((*obj)->path_packfile == NULL) {
701 err = got_error(GOT_ERR_NO_MEM);
704 (*obj)->flags |= GOT_OBJ_FLAG_PACKED;
706 SIMPLEQ_INIT(&(*obj)->deltas.entries);
707 (*obj)->flags |= GOT_OBJ_FLAG_DELTIFIED;
709 err = resolve_delta_chain(&(*obj)->deltas, repo, packfile,
710 path_packfile, delta_type, offset, delta_size);
714 err = got_delta_chain_get_base_type(&resolved_type, &(*obj)->deltas);
717 (*obj)->type = resolved_type;
721 got_object_close(*obj);
727 static const struct got_error *
728 open_packed_object(struct got_object **obj, struct got_repository *repo,
729 struct got_packidx_v2_hdr *packidx, int idx, struct got_object_id *id)
731 const struct got_error *err = NULL;
741 offset = get_object_offset(packidx, idx);
742 if (offset == (uint64_t)-1)
743 return got_error(GOT_ERR_BAD_PACKIDX);
745 err = open_packfile(&packfile, &path_packfile, repo, packidx);
749 if (fseeko(packfile, offset, SEEK_SET) != 0) {
750 err = got_error_from_errno();
754 err = parse_object_type_and_size(&type, &size, &tslen, packfile);
759 case GOT_OBJ_TYPE_COMMIT:
760 case GOT_OBJ_TYPE_TREE:
761 case GOT_OBJ_TYPE_BLOB:
762 case GOT_OBJ_TYPE_TAG:
763 err = open_plain_object(obj, path_packfile, id, type,
764 offset + tslen, size);
767 case GOT_OBJ_TYPE_OFFSET_DELTA:
768 case GOT_OBJ_TYPE_REF_DELTA:
769 err = open_delta_object(obj, repo, packidx, path_packfile,
770 packfile, id, offset, tslen, type, size);
774 err = got_error(GOT_ERR_NOT_IMPL);
779 if (packfile && fclose(packfile) == -1 && err == 0)
780 err = got_error_from_errno();
784 const struct got_error *
785 got_packfile_open_object(struct got_object **obj, struct got_object_id *id,
786 struct got_repository *repo)
788 const struct got_error *err = NULL;
789 struct got_packidx_v2_hdr *packidx = NULL;
792 err = search_packidx(&packidx, &idx, repo, id);
796 err = open_packed_object(obj, repo, packidx, idx, id);
797 got_packidx_close(packidx);
801 static const struct got_error *
802 dump_delta_chain(struct got_delta_chain *deltas, FILE *outfile)
804 const struct got_error *err = NULL;
805 struct got_delta *delta;
806 FILE *base_file, *accum_file;
809 if (SIMPLEQ_EMPTY(&deltas->entries))
810 return got_error(GOT_ERR_BAD_DELTA_CHAIN);
812 base_file = got_opentemp();
813 if (base_file == NULL)
814 return got_error_from_errno();
816 accum_file = got_opentemp();
817 if (accum_file == NULL) {
818 err = got_error_from_errno();
823 /* Deltas are ordered in ascending order. */
824 SIMPLEQ_FOREACH(delta, &deltas->entries, entry) {
825 uint8_t *delta_buf = NULL;
826 size_t delta_len = 0;
829 delta_file = fopen(delta->path_packfile, "rb");
830 if (delta_file == NULL) {
831 err = got_error_from_errno();
835 if (fseeko(delta_file, delta->offset, SEEK_SET) != 0) {
837 err = got_error_from_errno();
841 /* Delta streams should always fit in memory. */
842 err = got_inflate_to_mem(&delta_buf, &delta_len, delta_file);
848 err = got_delta_apply(base_file, delta_buf, delta_len,
849 /* Final delta application writes to the output file. */
850 ++n < deltas->nentries ? accum_file : outfile);
855 if (n < deltas->nentries) {
856 /* Accumulated delta becomes the new base. */
857 FILE *tmp = accum_file;
858 accum_file = base_file;
872 const struct got_error *
873 got_packfile_extract_object(FILE **f, struct got_object *obj,
874 struct got_repository *repo)
876 const struct got_error *err = NULL;
877 FILE *packfile = NULL;
879 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
880 return got_error(GOT_ERR_OBJ_NOT_PACKED);
884 err = got_error(GOT_ERR_FILE_OPEN);
888 if ((obj->flags & GOT_OBJ_FLAG_DELTIFIED) == 0) {
889 packfile = fopen(obj->path_packfile, "rb");
890 if (packfile == NULL) {
891 err = got_error_from_errno();
895 if (fseeko(packfile, obj->pack_offset, SEEK_SET) != 0) {
896 err = got_error_from_errno();
900 err = got_inflate_to_file(&obj->size, packfile, *f);
902 err = dump_delta_chain(&obj->deltas, *f);