2 * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <sys/queue.h>
22 #include <sys/socket.h>
39 #include "got_error.h"
40 #include "got_object.h"
41 #include "got_repository.h"
42 #include "got_opentemp.h"
45 #include "got_lib_sha1.h"
46 #include "got_lib_delta.h"
47 #include "got_lib_inflate.h"
48 #include "got_lib_object.h"
49 #include "got_lib_object_parse.h"
50 #include "got_lib_object_cache.h"
51 #include "got_lib_pack.h"
52 #include "got_lib_repository.h"
55 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
58 struct got_object_id *
59 got_object_id_dup(struct got_object_id *id1)
61 struct got_object_id *id2;
63 id2 = malloc(sizeof(*id2));
66 memcpy(id2, id1, sizeof(*id2));
71 got_object_id_cmp(const struct got_object_id *id1,
72 const struct got_object_id *id2)
74 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
77 const struct got_error *
78 got_object_qid_alloc_partial(struct got_object_qid **qid)
80 *qid = malloc(sizeof(**qid));
82 return got_error_from_errno("malloc");
88 const struct got_error *
89 got_object_id_str(char **outbuf, struct got_object_id *id)
91 static const size_t len = SHA1_DIGEST_STRING_LENGTH;
93 *outbuf = malloc(len);
95 return got_error_from_errno("malloc");
97 if (got_sha1_digest_to_str(id->sha1, *outbuf, len) == NULL) {
100 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
107 got_object_close(struct got_object *obj)
109 if (obj->refcnt > 0) {
115 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
116 struct got_delta *delta;
117 while (!STAILQ_EMPTY(&obj->deltas.entries)) {
118 delta = STAILQ_FIRST(&obj->deltas.entries);
119 STAILQ_REMOVE_HEAD(&obj->deltas.entries, entry);
126 const struct got_error *
127 got_object_raw_close(struct got_raw_object *obj)
129 const struct got_error *err = NULL;
131 if (obj->refcnt > 0) {
137 if (obj->f == NULL) {
139 if (munmap(obj->data, obj->hdrlen + obj->size) == -1)
140 err = got_error_from_errno("munmap");
141 if (close(obj->fd) == -1 && err == NULL)
142 err = got_error_from_errno("close");
146 if (fclose(obj->f) == EOF && err == NULL)
147 err = got_error_from_errno("fclose");
154 got_object_qid_free(struct got_object_qid *qid)
160 got_object_id_queue_free(struct got_object_id_queue *ids)
162 struct got_object_qid *qid;
164 while (!STAILQ_EMPTY(ids)) {
165 qid = STAILQ_FIRST(ids);
166 STAILQ_REMOVE_HEAD(ids, entry);
167 got_object_qid_free(qid);
171 const struct got_error *
172 got_object_parse_header(struct got_object **obj, char *buf, size_t len)
174 const char *obj_labels[] = {
175 GOT_OBJ_LABEL_COMMIT,
180 const int obj_types[] = {
193 end = memchr(buf, '\0', len);
195 return got_error(GOT_ERR_BAD_OBJ_HDR);
197 for (i = 0; i < nitems(obj_labels); i++) {
198 const char *label = obj_labels[i];
199 size_t label_len = strlen(label);
202 if (len <= label_len || buf + label_len >= end ||
203 strncmp(buf, label, label_len) != 0)
207 size = strtonum(buf + label_len, 0, LONG_MAX, &errstr);
209 return got_error(GOT_ERR_BAD_OBJ_HDR);
214 return got_error(GOT_ERR_BAD_OBJ_HDR);
216 *obj = calloc(1, sizeof(**obj));
218 return got_error_from_errno("calloc");
220 (*obj)->hdrlen = end - buf + 1;
225 const struct got_error *
226 got_object_read_header(struct got_object **obj, int fd)
228 const struct got_error *err;
229 struct got_inflate_buf zb;
231 const size_t zbsize = 64;
232 size_t outlen, totlen;
237 buf = malloc(zbsize);
239 return got_error_from_errno("malloc");
242 err = got_inflate_init(&zb, buf, zbsize, NULL);
248 err = got_inflate_read_fd(&zb, fd, &outlen, NULL);
254 if (memchr(zb.outbuf, '\0', outlen) == NULL) {
257 newbuf = recallocarray(buf, nbuf - 1, nbuf, zbsize);
258 if (newbuf == NULL) {
259 err = got_error_from_errno("recallocarray");
263 zb.outbuf = newbuf + totlen;
264 zb.outlen = (nbuf * zbsize) - totlen;
266 } while (memchr(zb.outbuf, '\0', outlen) == NULL);
268 err = got_object_parse_header(obj, buf, totlen);
271 got_inflate_end(&zb);
275 struct got_commit_object *
276 got_object_commit_alloc_partial(void)
278 struct got_commit_object *commit;
280 commit = calloc(1, sizeof(*commit));
283 commit->tree_id = malloc(sizeof(*commit->tree_id));
284 if (commit->tree_id == NULL) {
289 STAILQ_INIT(&commit->parent_ids);
294 const struct got_error *
295 got_object_commit_add_parent(struct got_commit_object *commit,
298 const struct got_error *err = NULL;
299 struct got_object_qid *qid;
301 err = got_object_qid_alloc_partial(&qid);
305 if (!got_parse_sha1_digest(qid->id.sha1, id_str)) {
306 err = got_error(GOT_ERR_BAD_OBJ_DATA);
307 got_object_qid_free(qid);
311 STAILQ_INSERT_TAIL(&commit->parent_ids, qid, entry);
317 static const struct got_error *
318 parse_gmtoff(time_t *gmtoff, const char *tzstr)
321 const char *p = tzstr;
329 return got_error(GOT_ERR_BAD_OBJ_DATA);
331 if (!isdigit(*p) && !isdigit(*(p + 1)))
332 return got_error(GOT_ERR_BAD_OBJ_DATA);
333 h = (((*p - '0') * 10) + (*(p + 1) - '0'));
336 if (!isdigit(*p) && !isdigit(*(p + 1)))
337 return got_error(GOT_ERR_BAD_OBJ_DATA);
338 m = ((*p - '0') * 10) + (*(p + 1) - '0');
340 *gmtoff = (h * 60 * 60 + m * 60) * sign;
344 static const struct got_error *
345 parse_commit_time(time_t *time, time_t *gmtoff, char *committer)
347 const struct got_error *err = NULL;
351 /* Parse and strip off trailing timezone indicator string. */
352 space = strrchr(committer, ' ');
354 return got_error(GOT_ERR_BAD_OBJ_DATA);
355 tzstr = strdup(space + 1);
357 return got_error_from_errno("strdup");
358 err = parse_gmtoff(gmtoff, tzstr);
361 if (err->code != GOT_ERR_BAD_OBJ_DATA)
363 /* Old versions of Git omitted the timestamp. */
370 /* Timestamp is separated from committer name + email by space. */
371 space = strrchr(committer, ' ');
373 return got_error(GOT_ERR_BAD_OBJ_DATA);
375 /* Timestamp parsed here is expressed as UNIX timestamp (UTC). */
376 *time = strtonum(space + 1, 0, INT64_MAX, &errstr);
378 return got_error(GOT_ERR_BAD_OBJ_DATA);
380 /* Strip off parsed time information, leaving just author and email. */
387 got_object_commit_close(struct got_commit_object *commit)
389 if (commit->refcnt > 0) {
391 if (commit->refcnt > 0)
395 got_object_id_queue_free(&commit->parent_ids);
396 free(commit->tree_id);
397 free(commit->author);
398 free(commit->committer);
399 free(commit->logmsg);
403 struct got_object_id *
404 got_object_commit_get_tree_id(struct got_commit_object *commit)
406 return commit->tree_id;
410 got_object_commit_get_nparents(struct got_commit_object *commit)
412 return commit->nparents;
415 const struct got_object_id_queue *
416 got_object_commit_get_parent_ids(struct got_commit_object *commit)
418 return &commit->parent_ids;
422 got_object_commit_get_author(struct got_commit_object *commit)
424 return commit->author;
428 got_object_commit_get_author_time(struct got_commit_object *commit)
430 return commit->author_time;
433 time_t got_object_commit_get_author_gmtoff(struct got_commit_object *commit)
435 return commit->author_gmtoff;
439 got_object_commit_get_committer(struct got_commit_object *commit)
441 return commit->committer;
445 got_object_commit_get_committer_time(struct got_commit_object *commit)
447 return commit->committer_time;
451 got_object_commit_get_committer_gmtoff(struct got_commit_object *commit)
453 return commit->committer_gmtoff;
456 const struct got_error *
457 got_object_commit_get_logmsg(char **logmsg, struct got_commit_object *commit)
459 const struct got_error *err = NULL;
464 len = strlen(commit->logmsg);
465 *logmsg = malloc(len + 2); /* leave room for a trailing \n and \0 */
467 return got_error_from_errno("malloc");
470 * Strip out unusual headers. Headers are separated from the commit
471 * message body by a single empty line.
473 src = commit->logmsg;
475 while (*src != '\0' && *src != '\n') {
476 int copy_header = 1, eol = 0;
477 if (strncmp(src, GOT_COMMIT_LABEL_TREE,
478 strlen(GOT_COMMIT_LABEL_TREE)) != 0 &&
479 strncmp(src, GOT_COMMIT_LABEL_AUTHOR,
480 strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 &&
481 strncmp(src, GOT_COMMIT_LABEL_PARENT,
482 strlen(GOT_COMMIT_LABEL_PARENT)) != 0 &&
483 strncmp(src, GOT_COMMIT_LABEL_COMMITTER,
484 strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0)
487 while (*src != '\0' && !eol) {
499 if (strlcat(*logmsg, src, len + 1) >= len + 1) {
500 err = got_error(GOT_ERR_NO_SPACE);
504 /* Trim redundant trailing whitespace. */
505 len = strlen(*logmsg);
506 while (len > 1 && isspace((unsigned char)(*logmsg)[len - 2]) &&
507 isspace((unsigned char)(*logmsg)[len - 1])) {
508 (*logmsg)[len - 1] = '\0';
512 /* Append a trailing newline if missing. */
513 if (len > 0 && (*logmsg)[len - 1] != '\n') {
514 (*logmsg)[len] = '\n';
515 (*logmsg)[len + 1] = '\0';
526 got_object_commit_get_logmsg_raw(struct got_commit_object *commit)
528 return commit->logmsg;
531 const struct got_error *
532 got_object_parse_commit(struct got_commit_object **commit, char *buf,
535 const struct got_error *err = NULL;
538 ssize_t remain = (ssize_t)len;
541 return got_error(GOT_ERR_BAD_OBJ_DATA);
543 *commit = got_object_commit_alloc_partial();
545 return got_error_from_errno("got_object_commit_alloc_partial");
547 label_len = strlen(GOT_COMMIT_LABEL_TREE);
548 if (strncmp(s, GOT_COMMIT_LABEL_TREE, label_len) == 0) {
550 if (remain < SHA1_DIGEST_STRING_LENGTH) {
551 err = got_error(GOT_ERR_BAD_OBJ_DATA);
555 if (!got_parse_sha1_digest((*commit)->tree_id->sha1, s)) {
556 err = got_error(GOT_ERR_BAD_OBJ_DATA);
559 remain -= SHA1_DIGEST_STRING_LENGTH;
560 s += SHA1_DIGEST_STRING_LENGTH;
562 err = got_error(GOT_ERR_BAD_OBJ_DATA);
566 label_len = strlen(GOT_COMMIT_LABEL_PARENT);
567 while (strncmp(s, GOT_COMMIT_LABEL_PARENT, label_len) == 0) {
569 if (remain < SHA1_DIGEST_STRING_LENGTH) {
570 err = got_error(GOT_ERR_BAD_OBJ_DATA);
574 err = got_object_commit_add_parent(*commit, s);
578 remain -= SHA1_DIGEST_STRING_LENGTH;
579 s += SHA1_DIGEST_STRING_LENGTH;
582 label_len = strlen(GOT_COMMIT_LABEL_AUTHOR);
583 if (strncmp(s, GOT_COMMIT_LABEL_AUTHOR, label_len) == 0) {
589 err = got_error(GOT_ERR_BAD_OBJ_DATA);
593 p = memchr(s, '\n', remain);
595 err = got_error(GOT_ERR_BAD_OBJ_DATA);
600 err = parse_commit_time(&(*commit)->author_time,
601 &(*commit)->author_gmtoff, s);
604 (*commit)->author = strdup(s);
605 if ((*commit)->author == NULL) {
606 err = got_error_from_errno("strdup");
613 label_len = strlen(GOT_COMMIT_LABEL_COMMITTER);
614 if (strncmp(s, GOT_COMMIT_LABEL_COMMITTER, label_len) == 0) {
620 err = got_error(GOT_ERR_BAD_OBJ_DATA);
624 p = memchr(s, '\n', remain);
626 err = got_error(GOT_ERR_BAD_OBJ_DATA);
631 err = parse_commit_time(&(*commit)->committer_time,
632 &(*commit)->committer_gmtoff, s);
635 (*commit)->committer = strdup(s);
636 if ((*commit)->committer == NULL) {
637 err = got_error_from_errno("strdup");
644 (*commit)->logmsg = strndup(s, remain);
645 if ((*commit)->logmsg == NULL) {
646 err = got_error_from_errno("strndup");
651 got_object_commit_close(*commit);
658 got_object_tree_close(struct got_tree_object *tree)
660 if (tree->refcnt > 0) {
662 if (tree->refcnt > 0)
670 static const struct got_error *
671 parse_tree_entry(struct got_parsed_tree_entry *pte, size_t *elen, char *buf,
678 *elen = strnlen(buf, maxlen) + 1;
680 return got_error(GOT_ERR_BAD_OBJ_DATA);
682 space = memchr(buf, ' ', *elen);
683 if (space == NULL || space <= buf)
684 return got_error(GOT_ERR_BAD_OBJ_DATA);
689 if (*p < '0' && *p > '7')
690 return got_error(GOT_ERR_BAD_OBJ_DATA);
692 pte->mode |= *p - '0';
696 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH)
697 return got_error(GOT_ERR_BAD_OBJ_DATA);
699 pte->name = space + 1;
700 pte->namelen = strlen(pte->name);
703 *elen += SHA1_DIGEST_LENGTH;
708 pte_cmp(const void *pa, const void *pb)
710 const struct got_parsed_tree_entry *a = pa, *b = pb;
712 return got_path_cmp(a->name, b->name, a->namelen, b->namelen);
715 const struct got_error *
716 got_object_parse_tree(struct got_parsed_tree_entry **entries, int *nentries,
717 uint8_t *buf, size_t len)
719 const struct got_error *err = NULL;
720 size_t remain = len, totalloc;
721 const size_t nalloc = 16;
722 struct got_parsed_tree_entry *pte;
727 return NULL; /* tree is empty */
729 *entries = calloc(nalloc, sizeof(**entries));
730 if (*entries == NULL)
731 return got_error_from_errno("calloc");
737 if (*nentries >= totalloc) {
738 pte = recallocarray(*entries, totalloc,
739 totalloc + nalloc, sizeof(**entries));
741 err = got_error_from_errno("recallocarray");
748 pte = &(*entries)[*nentries];
749 err = parse_tree_entry(pte, &elen, buf, remain);
758 err = got_error(GOT_ERR_BAD_OBJ_DATA);
763 mergesort(*entries, *nentries, sizeof(**entries), pte_cmp);
765 for (i = 0; i < *nentries - 1; i++) {
766 struct got_parsed_tree_entry *prev = &(*entries)[i];
767 pte = &(*entries)[i + 1];
768 if (got_path_cmp(prev->name, pte->name,
769 prev->namelen, pte->namelen) == 0) {
770 err = got_error(GOT_ERR_TREE_DUP_ENTRY);
785 got_object_tag_close(struct got_tag_object *tag)
787 if (tag->refcnt > 0) {
799 const struct got_error *
800 got_object_parse_tag(struct got_tag_object **tag, uint8_t *buf, size_t len)
802 const struct got_error *err = NULL;
808 return got_error(GOT_ERR_BAD_OBJ_DATA);
810 *tag = calloc(1, sizeof(**tag));
812 return got_error_from_errno("calloc");
814 label_len = strlen(GOT_TAG_LABEL_OBJECT);
815 if (strncmp(s, GOT_TAG_LABEL_OBJECT, label_len) == 0) {
817 if (remain < SHA1_DIGEST_STRING_LENGTH) {
818 err = got_error(GOT_ERR_BAD_OBJ_DATA);
822 if (!got_parse_sha1_digest((*tag)->id.sha1, s)) {
823 err = got_error(GOT_ERR_BAD_OBJ_DATA);
826 remain -= SHA1_DIGEST_STRING_LENGTH;
827 s += SHA1_DIGEST_STRING_LENGTH;
829 err = got_error(GOT_ERR_BAD_OBJ_DATA);
834 err = got_error(GOT_ERR_BAD_OBJ_DATA);
838 label_len = strlen(GOT_TAG_LABEL_TYPE);
839 if (strncmp(s, GOT_TAG_LABEL_TYPE, label_len) == 0) {
842 err = got_error(GOT_ERR_BAD_OBJ_DATA);
846 if (strncmp(s, GOT_OBJ_LABEL_COMMIT,
847 strlen(GOT_OBJ_LABEL_COMMIT)) == 0) {
848 (*tag)->obj_type = GOT_OBJ_TYPE_COMMIT;
849 label_len = strlen(GOT_OBJ_LABEL_COMMIT);
852 } else if (strncmp(s, GOT_OBJ_LABEL_TREE,
853 strlen(GOT_OBJ_LABEL_TREE)) == 0) {
854 (*tag)->obj_type = GOT_OBJ_TYPE_TREE;
855 label_len = strlen(GOT_OBJ_LABEL_TREE);
858 } else if (strncmp(s, GOT_OBJ_LABEL_BLOB,
859 strlen(GOT_OBJ_LABEL_BLOB)) == 0) {
860 (*tag)->obj_type = GOT_OBJ_TYPE_BLOB;
861 label_len = strlen(GOT_OBJ_LABEL_BLOB);
864 } else if (strncmp(s, GOT_OBJ_LABEL_TAG,
865 strlen(GOT_OBJ_LABEL_TAG)) == 0) {
866 (*tag)->obj_type = GOT_OBJ_TYPE_TAG;
867 label_len = strlen(GOT_OBJ_LABEL_TAG);
871 err = got_error(GOT_ERR_BAD_OBJ_DATA);
875 if (remain <= 0 || *s != '\n') {
876 err = got_error(GOT_ERR_BAD_OBJ_DATA);
882 err = got_error(GOT_ERR_BAD_OBJ_DATA);
886 err = got_error(GOT_ERR_BAD_OBJ_DATA);
890 label_len = strlen(GOT_TAG_LABEL_TAG);
891 if (strncmp(s, GOT_TAG_LABEL_TAG, label_len) == 0) {
896 err = got_error(GOT_ERR_BAD_OBJ_DATA);
900 p = memchr(s, '\n', remain);
902 err = got_error(GOT_ERR_BAD_OBJ_DATA);
907 (*tag)->tag = strndup(s, slen);
908 if ((*tag)->tag == NULL) {
909 err = got_error_from_errno("strndup");
915 err = got_error(GOT_ERR_BAD_OBJ_DATA);
919 err = got_error(GOT_ERR_BAD_OBJ_DATA);
923 label_len = strlen(GOT_TAG_LABEL_TAGGER);
924 if (strncmp(s, GOT_TAG_LABEL_TAGGER, label_len) == 0) {
930 err = got_error(GOT_ERR_BAD_OBJ_DATA);
934 p = memchr(s, '\n', remain);
936 err = got_error(GOT_ERR_BAD_OBJ_DATA);
941 err = parse_commit_time(&(*tag)->tagger_time,
942 &(*tag)->tagger_gmtoff, s);
945 (*tag)->tagger = strdup(s);
946 if ((*tag)->tagger == NULL) {
947 err = got_error_from_errno("strdup");
953 err = got_error(GOT_ERR_BAD_OBJ_DATA);
957 /* Some old tags in the Linux git repo have no tagger. */
958 (*tag)->tagger = strdup("");
959 if ((*tag)->tagger == NULL) {
960 err = got_error_from_errno("strdup");
965 (*tag)->tagmsg = strndup(s, remain);
966 if ((*tag)->tagmsg == NULL) {
967 err = got_error_from_errno("strndup");
972 got_object_tag_close(*tag);
978 const struct got_error *
979 got_read_file_to_mem(uint8_t **outbuf, size_t *outlen, FILE *f)
981 const struct got_error *err = NULL;
982 static const size_t blocksize = 512;
983 size_t n, total, remain;
989 buf = malloc(blocksize);
991 return got_error_from_errno("malloc");
998 newbuf = reallocarray(buf, 1, total + blocksize);
999 if (newbuf == NULL) {
1000 err = got_error_from_errno("reallocarray");
1004 remain += blocksize;
1006 n = fread(buf + total, 1, remain, f);
1009 err = got_ferror(f, GOT_ERR_IO);