Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
21 #include <dirent.h>
22 #include <errno.h>
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <limits.h>
28 #include <sha1.h>
29 #include <endian.h>
30 #include <zlib.h>
32 #include "got_error.h"
33 #include "got_object.h"
34 #include "got_repository.h"
35 #include "got_sha1.h"
36 #include "pack.h"
37 #include "path.h"
39 #define GOT_PACK_PREFIX "pack-"
40 #define GOT_PACKFILE_SUFFIX ".pack"
41 #define GOT_PACKIDX_SUFFIX ".idx"
42 #define GOT_PACKFILE_NAMELEN (strlen(GOT_PACK_PREFIX) + \
43 SHA1_DIGEST_STRING_LENGTH - 1 + \
44 strlen(GOT_PACKFILE_SUFFIX))
45 #define GOT_PACKIDX_NAMELEN (strlen(GOT_PACK_PREFIX) + \
46 SHA1_DIGEST_STRING_LENGTH - 1 + \
47 strlen(GOT_PACKIDX_SUFFIX))
49 #ifndef MIN
50 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
51 #endif
53 static const struct got_error *
54 verify_fanout_table(uint32_t *fanout_table)
55 {
56 int i;
58 for (i = 0; i < 0xff - 1; i++) {
59 if (be32toh(fanout_table[i]) > be32toh(fanout_table[i + 1]))
60 return got_error(GOT_ERR_BAD_PACKIDX);
61 }
63 return NULL;
64 }
66 static const struct got_error *
67 get_packfile_size(size_t *size, const char *path_idx)
68 {
69 struct stat sb;
70 char *path_pack;
71 char base_path[PATH_MAX];
72 char *dot;
74 if (strlcpy(base_path, path_idx, PATH_MAX) > PATH_MAX)
75 return got_error(GOT_ERR_NO_SPACE);
77 dot = strrchr(base_path, '.');
78 if (dot == NULL)
79 return got_error(GOT_ERR_BAD_PATH);
80 *dot = '\0';
81 if (asprintf(&path_pack, "%s.pack", base_path) == -1)
82 return got_error(GOT_ERR_NO_MEM);
84 if (stat(path_pack, &sb) != 0) {
85 free(path_pack);
86 return got_error_from_errno();
87 }
89 free(path_pack);
90 *size = sb.st_size;
91 return 0;
92 }
94 const struct got_error *
95 got_packidx_open(struct got_packidx_v2_hdr **packidx, const char *path)
96 {
97 struct got_packidx_v2_hdr *p;
98 FILE *f;
99 const struct got_error *err = NULL;
100 size_t n, nobj, packfile_size;
101 SHA1_CTX ctx;
102 uint8_t sha1[SHA1_DIGEST_LENGTH];
104 SHA1Init(&ctx);
106 f = fopen(path, "rb");
107 if (f == NULL)
108 return got_error(GOT_ERR_BAD_PATH);
110 err = get_packfile_size(&packfile_size, path);
111 if (err)
112 return err;
114 p = calloc(1, sizeof(*p));
115 if (p == NULL) {
116 err = got_error(GOT_ERR_NO_MEM);
117 goto done;
120 n = fread(&p->magic, sizeof(p->magic), 1, f);
121 if (n != 1) {
122 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
123 goto done;
126 if (betoh32(p->magic) != GOT_PACKIDX_V2_MAGIC) {
127 err = got_error(GOT_ERR_BAD_PACKIDX);
128 goto done;
131 SHA1Update(&ctx, (uint8_t *)&p->magic, sizeof(p->magic));
133 n = fread(&p->version, sizeof(p->version), 1, f);
134 if (n != 1) {
135 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
136 goto done;
139 if (betoh32(p->version) != GOT_PACKIDX_VERSION) {
140 err = got_error(GOT_ERR_BAD_PACKIDX);
141 goto done;
144 SHA1Update(&ctx, (uint8_t *)&p->version, sizeof(p->version));
146 n = fread(&p->fanout_table, sizeof(p->fanout_table), 1, f);
147 if (n != 1) {
148 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
149 goto done;
152 err = verify_fanout_table(p->fanout_table);
153 if (err)
154 goto done;
156 SHA1Update(&ctx, (uint8_t *)p->fanout_table, sizeof(p->fanout_table));
158 nobj = betoh32(p->fanout_table[0xff]);
160 p->sorted_ids = calloc(nobj, sizeof(*p->sorted_ids));
161 if (p->sorted_ids == NULL) {
162 err = got_error(GOT_ERR_NO_MEM);
163 goto done;
166 n = fread(p->sorted_ids, sizeof(*p->sorted_ids), nobj, f);
167 if (n != nobj) {
168 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
169 goto done;
172 SHA1Update(&ctx, (uint8_t *)p->sorted_ids,
173 nobj * sizeof(*p->sorted_ids));
175 p->crc32 = calloc(nobj, sizeof(*p->crc32));
176 if (p->crc32 == NULL) {
177 err = got_error(GOT_ERR_NO_MEM);
178 goto done;
181 n = fread(p->crc32, sizeof(*p->crc32), nobj, f);
182 if (n != nobj) {
183 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
184 goto done;
187 SHA1Update(&ctx, (uint8_t *)p->crc32, nobj * sizeof(*p->crc32));
189 p->offsets = calloc(nobj, sizeof(*p->offsets));
190 if (p->offsets == NULL) {
191 err = got_error(GOT_ERR_NO_MEM);
192 goto done;
195 n = fread(p->offsets, sizeof(*p->offsets), nobj, f);
196 if (n != nobj) {
197 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
198 goto done;
201 SHA1Update(&ctx, (uint8_t *)p->offsets, nobj * sizeof(*p->offsets));
203 /* Large file offsets are contained only in files > 2GB. */
204 if (packfile_size <= 0x80000000)
205 goto checksum;
207 p->large_offsets = calloc(nobj, sizeof(*p->large_offsets));
208 if (p->large_offsets == NULL) {
209 err = got_error(GOT_ERR_NO_MEM);
210 goto done;
213 n = fread(p->large_offsets, sizeof(*p->large_offsets), nobj, f);
214 if (n != nobj) {
215 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
216 goto done;
219 SHA1Update(&ctx, (uint8_t*)p->large_offsets,
220 nobj * sizeof(*p->large_offsets));
222 checksum:
223 n = fread(&p->trailer, sizeof(p->trailer), 1, f);
224 if (n != 1) {
225 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
226 goto done;
229 SHA1Update(&ctx, p->trailer.packfile_sha1, SHA1_DIGEST_LENGTH);
230 SHA1Final(sha1, &ctx);
231 if (memcmp(p->trailer.packidx_sha1, sha1, SHA1_DIGEST_LENGTH) != 0)
232 err = got_error(GOT_ERR_PACKIDX_CSUM);
233 done:
234 fclose(f);
235 if (err)
236 got_packidx_close(p);
237 else
238 *packidx = p;
239 return err;
242 void
243 got_packidx_close(struct got_packidx_v2_hdr *packidx)
245 free(packidx->sorted_ids);
246 free(packidx->offsets);
247 free(packidx->crc32);
248 free(packidx->large_offsets);
249 free(packidx);
252 static int
253 is_packidx_filename(const char *name, size_t len)
255 if (len != GOT_PACKIDX_NAMELEN)
256 return 0;
258 if (strncmp(name, GOT_PACK_PREFIX, strlen(GOT_PACK_PREFIX)) != 0)
259 return 0;
261 if (strcmp(name + strlen(GOT_PACK_PREFIX) +
262 SHA1_DIGEST_STRING_LENGTH - 1, GOT_PACKIDX_SUFFIX) != 0)
263 return 0;
265 return 1;
268 static off_t
269 get_object_offset(struct got_packidx_v2_hdr *packidx, int idx)
271 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
272 uint32_t offset = betoh32(packidx->offsets[idx]);
273 if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
274 uint64_t loffset;
275 idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
276 if (idx < 0 || idx > totobj || packidx->large_offsets == NULL)
277 return -1;
278 loffset = betoh64(packidx->large_offsets[idx]);
279 return (loffset > INT64_MAX ? -1 : (off_t)loffset);
281 return (off_t)(offset & GOT_PACKIDX_OFFSET_VAL_MASK);
284 static int
285 get_object_idx(struct got_packidx_v2_hdr *packidx, struct got_object_id *id)
287 u_int8_t id0 = id->sha1[0];
288 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
289 int i = 0;
291 if (id0 > 0)
292 i = betoh32(packidx->fanout_table[id0 - 1]);
294 while (i < totobj) {
295 struct got_object_id *oid = &packidx->sorted_ids[i++];
296 uint32_t offset;
297 int cmp = got_object_id_cmp(id, oid);
299 if (cmp < 0)
300 continue;
301 if (cmp > 0)
302 break;
304 return i;
307 return -1;
310 const struct got_error *
311 read_packfile_hdr(FILE *f, struct got_packidx_v2_hdr *packidx)
313 const struct got_error *err = NULL;
314 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
315 struct got_packfile_hdr hdr;
316 size_t n;
318 n = fread(&hdr, sizeof(hdr), 1, f);
319 if (n != 1)
320 return got_ferror(f, GOT_ERR_BAD_PACKIDX);
322 if (betoh32(hdr.signature) != GOT_PACKFILE_SIGNATURE ||
323 betoh32(hdr.version) != GOT_PACKFILE_VERSION ||
324 betoh32(hdr.nobjects) != totobj)
325 err = got_error(GOT_ERR_BAD_PACKFILE);
327 return err;
330 static const struct got_error *
331 dump_plain_object(FILE *infile, uint8_t type, uint64_t size, FILE *outfile)
333 const char *type_tag = got_object_get_type_tag(type);
334 size_t n;
336 if (type_tag == NULL)
337 return got_error(GOT_ERR_OBJ_TYPE);
339 fprintf(outfile, "%s %llu", type_tag, size);
340 fputc('\0', outfile);
342 while (size > 0) {
343 uint8_t data[2048];
344 size_t len = MIN(size, sizeof(data));
346 n = fread(data, len, 1, infile);
347 if (n != 1)
348 return got_ferror(infile, GOT_ERR_BAD_PACKIDX);
350 n = fwrite(data, len, 1, outfile);
351 if (n != 1)
352 return got_ferror(outfile, GOT_ERR_BAD_PACKIDX);
354 size -= len;
357 return NULL;
360 static const struct got_error *
361 decode_type_and_size(uint8_t *type, uint64_t *size, FILE *packfile)
363 uint8_t t = 0;
364 uint64_t s = 0;
365 uint8_t sizeN;
366 size_t n;
367 int i = 0;
369 do {
370 /* We do not support size values which don't fit in 64 bit. */
371 if (i > 9)
372 return got_error(GOT_ERR_NO_SPACE);
374 n = fread(&sizeN, sizeof(sizeN), 1, packfile);
375 if (n != 1)
376 return got_ferror(packfile, GOT_ERR_BAD_PACKIDX);
378 if (i == 0) {
379 t = (sizeN & GOT_PACK_OBJ_SIZE0_TYPE_MASK) >>
380 GOT_PACK_OBJ_SIZE0_TYPE_MASK_SHIFT;
381 s = (sizeN & GOT_PACK_OBJ_SIZE0_VAL_MASK);
382 } else {
383 size_t shift = 4 + 7 * (i - 1);
384 s |= ((sizeN & GOT_PACK_OBJ_SIZE_VAL_MASK) << shift);
386 i++;
387 } while (sizeN & GOT_PACK_OBJ_SIZE_MORE);
389 *type = t;
390 *size = s;
391 return NULL;
394 static const struct got_error *
395 dump_packed_object(FILE **f, FILE *packfile, off_t offset)
397 const struct got_error *err = NULL;
398 const char *template = "/tmp/got.XXXXXXXXXX";
399 uint8_t type;
400 uint64_t size;
401 FILE *outfile = NULL;
403 *f = got_opentemp();
404 if (*f == NULL) {
405 err = got_error(GOT_ERR_FILE_OPEN);
406 goto done;
409 if (fseeko(packfile, offset, SEEK_SET) != 0) {
410 err = got_error_from_errno();
411 goto done;
414 err = decode_type_and_size(&type, &size, packfile);
415 if (err)
416 goto done;
418 switch (type) {
419 case GOT_OBJ_TYPE_COMMIT:
420 case GOT_OBJ_TYPE_TREE:
421 case GOT_OBJ_TYPE_BLOB:
422 err = dump_plain_object(packfile, type, size, *f);
423 break;
424 case GOT_OBJ_TYPE_REF_DELTA:
425 case GOT_OBJ_TYPE_TAG:
426 case GOT_OBJ_TYPE_OFFSET_DELTA:
427 default:
428 err = got_error(GOT_ERR_NOT_IMPL);
429 goto done;
432 rewind(*f);
433 done:
434 if (err && *f)
435 fclose(*f);
436 return err;
439 static const struct got_error *
440 extract_object(FILE **f, const char *path_packdir,
441 struct got_packidx_v2_hdr *packidx, struct got_object_id *id)
443 const struct got_error *err = NULL;
444 int idx = get_object_idx(packidx, id);
445 off_t offset;
446 char *path_packfile;
447 FILE *packfile;
448 char hex[SHA1_DIGEST_STRING_LENGTH];
449 char *sha1str;
451 *f = NULL;
452 if (idx == -1) /* object not found in pack index */
453 return NULL;
455 offset = get_object_offset(packidx, idx);
456 if (offset == (uint64_t)-1)
457 return got_error(GOT_ERR_BAD_PACKIDX);
459 sha1str = got_sha1_digest_to_str(packidx->trailer.packfile_sha1,
460 hex, sizeof(hex));
461 if (sha1str == NULL)
462 return got_error(GOT_ERR_PACKIDX_CSUM);
464 if (asprintf(&path_packfile, "%s/%s%s%s", path_packdir,
465 GOT_PACK_PREFIX, sha1str, GOT_PACKFILE_SUFFIX) == -1)
466 return got_error(GOT_ERR_NO_MEM);
468 packfile = fopen(path_packfile, "rb");
469 if (packfile == NULL) {
470 err = got_error_from_errno();
471 goto done;
474 err = read_packfile_hdr(packfile, packidx);
475 if (err)
476 goto done;
478 printf("Dumping object at offset %llu\n", offset);
479 err = dump_packed_object(f, packfile, offset);
480 if (err)
481 goto done;
483 done:
484 free(path_packfile);
485 if (packfile && fclose(packfile) == -1 && err == 0)
486 err = got_error_from_errno();
487 return err;
490 const struct got_error *
491 got_packfile_extract_object(FILE **f, struct got_object_id *id,
492 struct got_repository *repo)
494 const struct got_error *err = NULL;
495 DIR *packdir = NULL;
496 struct dirent *dent;
497 char *path_packdir = got_repo_get_path_objects_pack(repo);
499 if (path_packdir == NULL) {
500 err = got_error(GOT_ERR_NO_MEM);
501 goto done;
504 packdir = opendir(path_packdir);
505 if (packdir == NULL) {
506 err = got_error_from_errno();
507 goto done;
510 while ((dent = readdir(packdir)) != NULL) {
511 struct got_packidx_v2_hdr *packidx;
512 char *path_packidx, *path_object;
514 if (!is_packidx_filename(dent->d_name, dent->d_namlen))
515 continue;
517 if (asprintf(&path_packidx, "%s/%s", path_packdir,
518 dent->d_name) == -1) {
519 err = got_error(GOT_ERR_NO_MEM);
520 goto done;
523 err = got_packidx_open(&packidx, path_packidx);
524 free(path_packidx);
525 if (err)
526 goto done;
528 err = extract_object(f, path_packdir, packidx, id);
529 if (err)
530 goto done;
531 if (*f != NULL)
532 break;
535 done:
536 free(path_packdir);
537 if (packdir && closedir(packdir) != 0 && err == 0)
538 err = got_error_from_errno();
539 return err;