Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/stat.h>
18 #include <sys/queue.h>
20 #include <errno.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sha1.h>
25 #include <zlib.h>
26 #include <ctype.h>
27 #include <limits.h>
29 #include "got_error.h"
30 #include "got_object.h"
31 #include "got_repository.h"
33 #include "got_sha1_lib.h"
34 #include "got_delta_lib.h"
35 #include "got_pack_lib.h"
36 #include "got_zbuf_lib.h"
37 #include "got_object_lib.h"
39 #ifndef MIN
40 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
41 #endif
43 #ifndef nitems
44 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
45 #endif
47 #define GOT_OBJ_TAG_COMMIT "commit"
48 #define GOT_OBJ_TAG_TREE "tree"
49 #define GOT_OBJ_TAG_BLOB "blob"
51 #define GOT_COMMIT_TAG_TREE "tree "
52 #define GOT_COMMIT_TAG_PARENT "parent "
53 #define GOT_COMMIT_TAG_AUTHOR "author "
54 #define GOT_COMMIT_TAG_COMMITTER "committer "
56 const struct got_error *
57 got_object_id_str(char **outbuf, struct got_object_id *id)
58 {
59 static const size_t len = SHA1_DIGEST_STRING_LENGTH;
61 *outbuf = calloc(1, len);
62 if (*outbuf == NULL)
63 return got_error(GOT_ERR_NO_MEM);
65 if (got_sha1_digest_to_str(id->sha1, *outbuf, len) == NULL) {
66 free(*outbuf);
67 *outbuf = NULL;
68 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
69 }
71 return NULL;
72 }
74 int
75 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
76 {
77 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
78 }
80 int
81 got_object_get_type(struct got_object *obj)
82 {
83 switch (obj->type) {
84 case GOT_OBJ_TYPE_COMMIT:
85 case GOT_OBJ_TYPE_TREE:
86 case GOT_OBJ_TYPE_BLOB:
87 case GOT_OBJ_TYPE_TAG:
88 return obj->type;
89 default:
90 abort();
91 break;
92 }
94 /* not reached */
95 return 0;
96 }
98 static const struct got_error *
99 parse_object_header(struct got_object **obj, char *buf, size_t len)
101 const char *obj_tags[] = {
102 GOT_OBJ_TAG_COMMIT,
103 GOT_OBJ_TAG_TREE,
104 GOT_OBJ_TAG_BLOB
105 };
106 const int obj_types[] = {
107 GOT_OBJ_TYPE_COMMIT,
108 GOT_OBJ_TYPE_TREE,
109 GOT_OBJ_TYPE_BLOB,
110 };
111 int type = 0;
112 size_t size = 0, hdrlen = 0;
113 int i;
114 char *p = strchr(buf, '\0');
116 if (p == NULL)
117 return got_error(GOT_ERR_BAD_OBJ_HDR);
119 hdrlen = strlen(buf) + 1 /* '\0' */;
121 for (i = 0; i < nitems(obj_tags); i++) {
122 const char *tag = obj_tags[i];
123 size_t tlen = strlen(tag);
124 const char *errstr;
126 if (strncmp(buf, tag, tlen) != 0)
127 continue;
129 type = obj_types[i];
130 if (len <= tlen)
131 return got_error(GOT_ERR_BAD_OBJ_HDR);
132 size = strtonum(buf + tlen, 0, LONG_MAX, &errstr);
133 if (errstr != NULL)
134 return got_error(GOT_ERR_BAD_OBJ_HDR);
135 break;
138 if (type == 0)
139 return got_error(GOT_ERR_BAD_OBJ_HDR);
141 *obj = calloc(1, sizeof(**obj));
142 if (*obj == NULL)
143 return got_error(GOT_ERR_NO_MEM);
144 (*obj)->type = type;
145 (*obj)->hdrlen = hdrlen;
146 (*obj)->size = size;
147 return NULL;
150 static const struct got_error *
151 read_object_header(struct got_object **obj, struct got_repository *repo,
152 FILE *f)
154 const struct got_error *err;
155 struct got_zstream_buf zb;
156 char *buf;
157 const size_t zbsize = 64;
158 size_t outlen, totlen;
159 int i;
161 buf = calloc(zbsize, sizeof(char));
162 if (buf == NULL)
163 return got_error(GOT_ERR_NO_MEM);
165 err = got_inflate_init(&zb, zbsize);
166 if (err)
167 return err;
169 i = 0;
170 totlen = 0;
171 do {
172 err = got_inflate_read(&zb, f, &outlen);
173 if (err)
174 goto done;
175 if (strchr(zb.outbuf, '\0') == NULL) {
176 buf = recallocarray(buf, 1 + i, 2 + i, zbsize);
177 if (buf == NULL) {
178 err = got_error(GOT_ERR_NO_MEM);
179 goto done;
182 memcpy(buf + totlen, zb.outbuf, outlen);
183 totlen += outlen;
184 i++;
185 } while (strchr(zb.outbuf, '\0') == NULL);
187 err = parse_object_header(obj, buf, totlen);
188 done:
189 got_inflate_end(&zb);
190 return err;
193 static const struct got_error *
194 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
196 const struct got_error *err = NULL;
197 char *hex;
198 char *path_objects = got_repo_get_path_objects(repo);
200 if (path_objects == NULL)
201 return got_error(GOT_ERR_NO_MEM);
203 err = got_object_id_str(&hex, id);
204 if (err)
205 return err;
207 if (asprintf(path, "%s/%.2x/%s", path_objects,
208 id->sha1[0], hex + 2) == -1)
209 err = got_error(GOT_ERR_NO_MEM);
211 free(hex);
212 free(path_objects);
213 return err;
216 static const struct got_error *
217 open_loose_object(FILE **f, struct got_object *obj, struct got_repository *repo)
219 const struct got_error *err = NULL;
220 char *path;
222 err = object_path(&path, &obj->id, repo);
223 if (err)
224 return err;
225 *f = fopen(path, "rb");
226 if (*f == NULL) {
227 err = got_error_from_errno();
228 goto done;
230 done:
231 free(path);
232 return err;
235 const struct got_error *
236 got_object_open(struct got_object **obj, struct got_repository *repo,
237 struct got_object_id *id)
239 const struct got_error *err = NULL;
240 char *path;
241 FILE *f;
243 err = object_path(&path, id, repo);
244 if (err)
245 return err;
247 f = fopen(path, "rb");
248 if (f == NULL) {
249 if (errno != ENOENT) {
250 err = got_error_from_errno();
251 goto done;
253 err = got_packfile_open_object(obj, id, repo);
254 if (err)
255 goto done;
256 if (*obj == NULL)
257 err = got_error(GOT_ERR_NO_OBJ);
258 } else {
259 err = read_object_header(obj, repo, f);
260 if (err)
261 goto done;
262 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
264 done:
265 free(path);
266 if (err && f)
267 fclose(f);
268 return err;
272 const struct got_error *
273 got_object_open_by_id_str(struct got_object **obj, struct got_repository *repo,
274 const char *id_str)
276 struct got_object_id id;
278 if (!got_parse_sha1_digest(id.sha1, id_str))
279 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
281 return got_object_open(obj, repo, &id);
284 void
285 got_object_close(struct got_object *obj)
287 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
288 struct got_delta *delta;
289 while (!SIMPLEQ_EMPTY(&obj->deltas.entries)) {
290 delta = SIMPLEQ_FIRST(&obj->deltas.entries);
291 SIMPLEQ_REMOVE_HEAD(&obj->deltas.entries, entry);
292 got_delta_close(delta);
295 if (obj->flags & GOT_OBJ_FLAG_PACKED)
296 free(obj->path_packfile);
297 free(obj);
300 static const struct got_error *
301 parse_commit_object(struct got_commit_object **commit, char *buf, size_t len)
303 const struct got_error *err = NULL;
304 char *s = buf;
305 size_t tlen;
306 ssize_t remain = (ssize_t)len;
308 *commit = calloc(1, sizeof(**commit));
309 if (*commit == NULL)
310 return got_error(GOT_ERR_NO_MEM);
311 (*commit)->tree_id = calloc(1, sizeof(*(*commit)->tree_id));
312 if ((*commit)->tree_id == NULL) {
313 free(*commit);
314 *commit = NULL;
315 return got_error(GOT_ERR_NO_MEM);
318 SIMPLEQ_INIT(&(*commit)->parent_ids);
320 tlen = strlen(GOT_COMMIT_TAG_TREE);
321 if (strncmp(s, GOT_COMMIT_TAG_TREE, tlen) == 0) {
322 remain -= tlen;
323 if (remain < SHA1_DIGEST_STRING_LENGTH) {
324 err = got_error(GOT_ERR_BAD_OBJ_DATA);
325 goto done;
327 s += tlen;
328 if (!got_parse_sha1_digest((*commit)->tree_id->sha1, s)) {
329 err = got_error(GOT_ERR_BAD_OBJ_DATA);
330 goto done;
332 remain -= SHA1_DIGEST_STRING_LENGTH;
333 s += SHA1_DIGEST_STRING_LENGTH;
334 } else {
335 err = got_error(GOT_ERR_BAD_OBJ_DATA);
336 goto done;
339 tlen = strlen(GOT_COMMIT_TAG_PARENT);
340 while (strncmp(s, GOT_COMMIT_TAG_PARENT, tlen) == 0) {
341 struct got_parent_id *pid;
343 remain -= tlen;
344 if (remain < SHA1_DIGEST_STRING_LENGTH) {
345 err = got_error(GOT_ERR_BAD_OBJ_DATA);
346 goto done;
349 pid = calloc(1, sizeof(*pid));
350 if (pid == NULL) {
351 err = got_error(GOT_ERR_NO_MEM);
352 goto done;
354 pid->id = calloc(1, sizeof(*pid->id));
355 if (pid->id == NULL) {
356 free(pid);
357 err = got_error(GOT_ERR_NO_MEM);
358 goto done;
360 s += tlen;
361 if (!got_parse_sha1_digest(pid->id->sha1, s)) {
362 err = got_error(GOT_ERR_BAD_OBJ_DATA);
363 free(pid->id);
364 free(pid);
365 goto done;
367 SIMPLEQ_INSERT_TAIL(&(*commit)->parent_ids, pid, entry);
368 (*commit)->nparents++;
370 remain -= SHA1_DIGEST_STRING_LENGTH;
371 s += SHA1_DIGEST_STRING_LENGTH;
374 tlen = strlen(GOT_COMMIT_TAG_AUTHOR);
375 if (strncmp(s, GOT_COMMIT_TAG_AUTHOR, tlen) == 0) {
376 char *p;
378 remain -= tlen;
379 if (remain <= 0) {
380 err = got_error(GOT_ERR_BAD_OBJ_DATA);
381 goto done;
383 s += tlen;
384 p = strchr(s, '\n');
385 if (p == NULL) {
386 err = got_error(GOT_ERR_BAD_OBJ_DATA);
387 goto done;
389 *p = '\0';
390 (*commit)->author = strdup(s);
391 if ((*commit)->author == NULL) {
392 err = got_error(GOT_ERR_NO_MEM);
393 goto done;
395 s += strlen((*commit)->author) + 1;
396 remain -= strlen((*commit)->author) + 1;
399 tlen = strlen(GOT_COMMIT_TAG_COMMITTER);
400 if (strncmp(s, GOT_COMMIT_TAG_COMMITTER, tlen) == 0) {
401 char *p;
403 remain -= tlen;
404 if (remain <= 0) {
405 err = got_error(GOT_ERR_BAD_OBJ_DATA);
406 goto done;
408 s += tlen;
409 p = strchr(s, '\n');
410 if (p == NULL) {
411 err = got_error(GOT_ERR_BAD_OBJ_DATA);
412 goto done;
414 *p = '\0';
415 (*commit)->committer = strdup(s);
416 if ((*commit)->committer == NULL) {
417 err = got_error(GOT_ERR_NO_MEM);
418 goto done;
420 s += strlen((*commit)->committer) + 1;
421 remain -= strlen((*commit)->committer) + 1;
424 (*commit)->logmsg = strndup(s, remain);
425 if ((*commit)->logmsg == NULL) {
426 err = got_error(GOT_ERR_NO_MEM);
427 goto done;
429 done:
430 if (err) {
431 got_object_commit_close(*commit);
432 *commit = NULL;
434 return err;
437 static void
438 tree_entry_close(struct got_tree_entry *te)
440 free(te->id);
441 free(te->name);
442 free(te);
445 static const struct got_error *
446 parse_tree_entry(struct got_tree_entry **te, size_t *elen, char *buf,
447 size_t maxlen)
449 char *p = buf, *space;
450 const struct got_error *err = NULL;
452 *te = calloc(1, sizeof(**te));
453 if (*te == NULL)
454 return got_error(GOT_ERR_NO_MEM);
456 (*te)->id = calloc(1, sizeof(*(*te)->id));
457 if ((*te)->id == NULL) {
458 free(*te);
459 *te = NULL;
460 return got_error(GOT_ERR_NO_MEM);
463 *elen = strlen(buf) + 1;
464 if (*elen > maxlen) {
465 free(*te);
466 *te = NULL;
467 return got_error(GOT_ERR_BAD_OBJ_DATA);
470 space = strchr(buf, ' ');
471 if (space == NULL) {
472 free(*te);
473 *te = NULL;
474 return got_error(GOT_ERR_BAD_OBJ_DATA);
476 while (*p != ' ') {
477 if (*p < '0' && *p > '7') {
478 err = got_error(GOT_ERR_BAD_OBJ_DATA);
479 goto done;
481 (*te)->mode <<= 3;
482 (*te)->mode |= *p - '0';
483 p++;
486 (*te)->name = strdup(space + 1);
487 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH) {
488 err = got_error(GOT_ERR_BAD_OBJ_DATA);
489 goto done;
491 buf += strlen(buf) + 1;
492 memcpy((*te)->id->sha1, buf, SHA1_DIGEST_LENGTH);
493 *elen += SHA1_DIGEST_LENGTH;
494 done:
495 if (err) {
496 tree_entry_close(*te);
497 *te = NULL;
499 return err;
502 static const struct got_error *
503 parse_tree_object(struct got_tree_object **tree, struct got_repository *repo,
504 char *buf, size_t len)
506 const struct got_error *err;
507 size_t remain = len;
509 *tree = calloc(1, sizeof(**tree));
510 if (*tree == NULL)
511 return got_error(GOT_ERR_NO_MEM);
513 SIMPLEQ_INIT(&(*tree)->entries);
515 while (remain > 0) {
516 struct got_tree_entry *te;
517 size_t elen;
519 err = parse_tree_entry(&te, &elen, buf, remain);
520 if (err)
521 return err;
522 (*tree)->nentries++;
523 SIMPLEQ_INSERT_TAIL(&(*tree)->entries, te, entry);
524 buf += elen;
525 remain -= elen;
528 if (remain != 0) {
529 got_object_tree_close(*tree);
530 return got_error(GOT_ERR_BAD_OBJ_DATA);
533 return NULL;
536 static const struct got_error *
537 read_to_mem(uint8_t **outbuf, size_t *outlen, FILE *f)
539 const struct got_error *err = NULL;
540 static const size_t blocksize = 512;
541 size_t n, total, remain;
542 uint8_t *buf;
544 *outbuf = NULL;
545 *outlen = 0;
547 buf = calloc(1, blocksize);
548 if (buf == NULL)
549 return got_error(GOT_ERR_NO_MEM);
551 remain = blocksize;
552 total = 0;
553 while (1) {
554 if (remain == 0) {
555 uint8_t *newbuf;
556 newbuf = reallocarray(buf, 1, total + blocksize);
557 if (newbuf == NULL) {
558 err = got_error(GOT_ERR_NO_MEM);
559 goto done;
561 buf = newbuf;
562 remain += blocksize;
564 n = fread(buf + total, 1, remain, f);
565 if (n == 0) {
566 if (ferror(f)) {
567 err = got_ferror(f, GOT_ERR_IO);
568 goto done;
570 break; /* EOF */
572 remain -= n;
573 total += n;
574 };
576 done:
577 if (err == NULL) {
578 *outbuf = buf;
579 *outlen = total;
580 } else
581 free(buf);
582 return err;
585 static const struct got_error *
586 read_commit_object(struct got_commit_object **commit,
587 struct got_repository *repo, struct got_object *obj, FILE *f)
589 const struct got_error *err = NULL;
590 size_t len;
591 uint8_t *p;
593 if (obj->flags & GOT_OBJ_FLAG_PACKED)
594 err = read_to_mem(&p, &len, f);
595 else
596 err = got_inflate_to_mem(&p, &len, f);
597 if (err)
598 return err;
600 if (len < obj->hdrlen + obj->size) {
601 err = got_error(GOT_ERR_BAD_OBJ_DATA);
602 goto done;
605 /* Skip object header. */
606 len -= obj->hdrlen;
607 err = parse_commit_object(commit, p + obj->hdrlen, len);
608 free(p);
609 done:
610 return err;
613 const struct got_error *
614 got_object_commit_open(struct got_commit_object **commit,
615 struct got_repository *repo, struct got_object *obj)
617 const struct got_error *err = NULL;
618 FILE *f;
620 if (obj->type != GOT_OBJ_TYPE_COMMIT)
621 return got_error(GOT_ERR_OBJ_TYPE);
623 if (obj->flags & GOT_OBJ_FLAG_PACKED)
624 err = got_packfile_extract_object(&f, obj, repo);
625 else
626 err = open_loose_object(&f, obj, repo);
627 if (err)
628 return err;
630 err = read_commit_object(commit, repo, obj, f);
631 fclose(f);
632 return err;
635 void
636 got_object_commit_close(struct got_commit_object *commit)
638 struct got_parent_id *pid;
640 while (!SIMPLEQ_EMPTY(&commit->parent_ids)) {
641 pid = SIMPLEQ_FIRST(&commit->parent_ids);
642 SIMPLEQ_REMOVE_HEAD(&commit->parent_ids, entry);
643 free(pid->id);
644 free(pid);
647 free(commit->tree_id);
648 free(commit->author);
649 free(commit->committer);
650 free(commit->logmsg);
651 free(commit);
654 static const struct got_error *
655 read_tree_object(struct got_tree_object **tree,
656 struct got_repository *repo, struct got_object *obj, FILE *f)
658 const struct got_error *err = NULL;
659 size_t len;
660 uint8_t *p;
662 if (obj->flags & GOT_OBJ_FLAG_PACKED)
663 err = read_to_mem(&p, &len, f);
664 else
665 err = got_inflate_to_mem(&p, &len, f);
666 if (err)
667 return err;
669 if (len < obj->hdrlen + obj->size) {
670 err = got_error(GOT_ERR_BAD_OBJ_DATA);
671 goto done;
674 /* Skip object header. */
675 len -= obj->hdrlen;
676 err = parse_tree_object(tree, repo, p + obj->hdrlen, len);
677 free(p);
678 done:
679 return err;
682 const struct got_error *
683 got_object_tree_open(struct got_tree_object **tree,
684 struct got_repository *repo, struct got_object *obj)
686 const struct got_error *err = NULL;
687 FILE *f;
689 if (obj->type != GOT_OBJ_TYPE_TREE)
690 return got_error(GOT_ERR_OBJ_TYPE);
692 if (obj->flags & GOT_OBJ_FLAG_PACKED)
693 err = got_packfile_extract_object(&f, obj, repo);
694 else
695 err = open_loose_object(&f, obj, repo);
696 if (err)
697 return err;
699 err = read_tree_object(tree, repo, obj, f);
700 fclose(f);
701 return err;
704 void
705 got_object_tree_close(struct got_tree_object *tree)
707 struct got_tree_entry *te;
709 while (!SIMPLEQ_EMPTY(&tree->entries)) {
710 te = SIMPLEQ_FIRST(&tree->entries);
711 SIMPLEQ_REMOVE_HEAD(&tree->entries, entry);
712 tree_entry_close(te);
715 free(tree);
718 const struct got_error *
719 got_object_blob_open(struct got_blob_object **blob,
720 struct got_repository *repo, struct got_object *obj, size_t blocksize)
722 const struct got_error *err = NULL;
724 if (obj->type != GOT_OBJ_TYPE_BLOB)
725 return got_error(GOT_ERR_OBJ_TYPE);
727 if (blocksize < obj->hdrlen)
728 return got_error(GOT_ERR_NO_SPACE);
730 *blob = calloc(1, sizeof(**blob));
731 if (*blob == NULL)
732 return got_error(GOT_ERR_NO_MEM);
734 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
735 (*blob)->read_buf = calloc(1, blocksize);
736 if ((*blob)->read_buf == NULL)
737 return got_error(GOT_ERR_NO_MEM);
738 err = got_packfile_extract_object(&((*blob)->f), obj, repo);
739 if (err)
740 return err;
741 } else {
742 err = open_loose_object(&((*blob)->f), obj, repo);
743 if (err) {
744 free(*blob);
745 return err;
748 err = got_inflate_init(&(*blob)->zb, blocksize);
749 if (err != NULL) {
750 fclose((*blob)->f);
751 free(*blob);
752 return err;
755 (*blob)->read_buf = (*blob)->zb.outbuf;
756 (*blob)->flags |= GOT_BLOB_F_COMPRESSED;
759 (*blob)->hdrlen = obj->hdrlen;
760 (*blob)->blocksize = blocksize;
761 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
763 return err;
766 void
767 got_object_blob_close(struct got_blob_object *blob)
769 if (blob->flags & GOT_BLOB_F_COMPRESSED)
770 got_inflate_end(&blob->zb);
771 else
772 free(blob->read_buf);
773 fclose(blob->f);
774 free(blob);
777 char *
778 got_object_blob_id_str(struct got_blob_object *blob, char *buf, size_t size)
780 return got_sha1_digest_to_str(blob->id.sha1, buf, size);
783 size_t
784 got_object_blob_get_hdrlen(struct got_blob_object *blob)
786 return blob->hdrlen;
789 const uint8_t *
790 got_object_blob_get_read_buf(struct got_blob_object *blob)
792 return blob->read_buf;
795 const struct got_error *
796 got_object_blob_read_block(size_t *outlenp, struct got_blob_object *blob)
798 size_t n;
800 if (blob->flags & GOT_BLOB_F_COMPRESSED)
801 return got_inflate_read(&blob->zb, blob->f, outlenp);
803 n = fread(blob->read_buf, 1, blob->blocksize, blob->f);
804 if (n == 0 && ferror(blob->f))
805 return got_ferror(blob->f, GOT_ERR_IO);
806 *outlenp = n;
807 return NULL;