2 * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <sys/queue.h>
21 #include <sys/socket.h>
36 #include "got_compat.h"
38 #include "got_error.h"
39 #include "got_object.h"
40 #include "got_repository.h"
41 #include "got_opentemp.h"
44 #include "got_lib_sha1.h"
45 #include "got_lib_delta.h"
46 #include "got_lib_inflate.h"
47 #include "got_lib_object.h"
48 #include "got_lib_object_parse.h"
49 #include "got_lib_object_cache.h"
50 #include "got_lib_pack.h"
51 #include "got_lib_repository.h"
54 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
57 struct got_object_id *
58 got_object_id_dup(struct got_object_id *id1)
60 struct got_object_id *id2;
62 id2 = malloc(sizeof(*id2));
65 memcpy(id2, id1, sizeof(*id2));
70 got_object_id_cmp(const struct got_object_id *id1,
71 const struct got_object_id *id2)
73 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
76 const struct got_error *
77 got_object_qid_alloc_partial(struct got_object_qid **qid)
79 *qid = malloc(sizeof(**qid));
81 return got_error_from_errno("malloc");
87 const struct got_error *
88 got_object_id_str(char **outbuf, struct got_object_id *id)
90 static const size_t len = SHA1_DIGEST_STRING_LENGTH;
92 *outbuf = malloc(len);
94 return got_error_from_errno("malloc");
96 if (got_sha1_digest_to_str(id->sha1, *outbuf, len) == NULL) {
99 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
106 got_object_close(struct got_object *obj)
108 if (obj->refcnt > 0) {
114 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
115 struct got_delta *delta;
116 while (!STAILQ_EMPTY(&obj->deltas.entries)) {
117 delta = STAILQ_FIRST(&obj->deltas.entries);
118 STAILQ_REMOVE_HEAD(&obj->deltas.entries, entry);
125 const struct got_error *
126 got_object_raw_close(struct got_raw_object *obj)
128 const struct got_error *err = NULL;
130 if (obj->refcnt > 0) {
136 if (obj->f == NULL) {
138 if (munmap(obj->data, obj->hdrlen + obj->size) == -1)
139 err = got_error_from_errno("munmap");
140 if (close(obj->fd) == -1 && err == NULL)
141 err = got_error_from_errno("close");
145 if (fclose(obj->f) == EOF && err == NULL)
146 err = got_error_from_errno("fclose");
153 got_object_qid_free(struct got_object_qid *qid)
159 got_object_id_queue_free(struct got_object_id_queue *ids)
161 struct got_object_qid *qid;
163 while (!STAILQ_EMPTY(ids)) {
164 qid = STAILQ_FIRST(ids);
165 STAILQ_REMOVE_HEAD(ids, entry);
166 got_object_qid_free(qid);
170 const struct got_error *
171 got_object_parse_header(struct got_object **obj, char *buf, size_t len)
173 const char *obj_labels[] = {
174 GOT_OBJ_LABEL_COMMIT,
179 const int obj_types[] = {
192 end = memchr(buf, '\0', len);
194 return got_error(GOT_ERR_BAD_OBJ_HDR);
196 for (i = 0; i < nitems(obj_labels); i++) {
197 const char *label = obj_labels[i];
198 size_t label_len = strlen(label);
201 if (len <= label_len || buf + label_len >= end ||
202 strncmp(buf, label, label_len) != 0)
206 size = strtonum(buf + label_len, 0, LONG_MAX, &errstr);
208 return got_error(GOT_ERR_BAD_OBJ_HDR);
213 return got_error(GOT_ERR_BAD_OBJ_HDR);
215 *obj = calloc(1, sizeof(**obj));
217 return got_error_from_errno("calloc");
219 (*obj)->hdrlen = end - buf + 1;
224 const struct got_error *
225 got_object_read_header(struct got_object **obj, int fd)
227 const struct got_error *err;
228 struct got_inflate_buf zb;
230 const size_t zbsize = 64;
231 size_t outlen, totlen;
236 buf = malloc(zbsize);
238 return got_error_from_errno("malloc");
241 err = got_inflate_init(&zb, buf, zbsize, NULL);
247 err = got_inflate_read_fd(&zb, fd, &outlen, NULL);
253 if (memchr(zb.outbuf, '\0', outlen) == NULL) {
256 newbuf = recallocarray(buf, nbuf - 1, nbuf, zbsize);
257 if (newbuf == NULL) {
258 err = got_error_from_errno("recallocarray");
262 zb.outbuf = newbuf + totlen;
263 zb.outlen = (nbuf * zbsize) - totlen;
265 } while (memchr(zb.outbuf, '\0', outlen) == NULL);
267 err = got_object_parse_header(obj, buf, totlen);
270 got_inflate_end(&zb);
274 struct got_commit_object *
275 got_object_commit_alloc_partial(void)
277 struct got_commit_object *commit;
279 commit = calloc(1, sizeof(*commit));
282 commit->tree_id = malloc(sizeof(*commit->tree_id));
283 if (commit->tree_id == NULL) {
288 STAILQ_INIT(&commit->parent_ids);
293 const struct got_error *
294 got_object_commit_add_parent(struct got_commit_object *commit,
297 const struct got_error *err = NULL;
298 struct got_object_qid *qid;
300 err = got_object_qid_alloc_partial(&qid);
304 if (!got_parse_sha1_digest(qid->id.sha1, id_str)) {
305 err = got_error(GOT_ERR_BAD_OBJ_DATA);
306 got_object_qid_free(qid);
310 STAILQ_INSERT_TAIL(&commit->parent_ids, qid, entry);
316 static const struct got_error *
317 parse_gmtoff(time_t *gmtoff, const char *tzstr)
320 const char *p = tzstr;
328 return got_error(GOT_ERR_BAD_OBJ_DATA);
330 if (!isdigit(*p) && !isdigit(*(p + 1)))
331 return got_error(GOT_ERR_BAD_OBJ_DATA);
332 h = (((*p - '0') * 10) + (*(p + 1) - '0'));
335 if (!isdigit(*p) && !isdigit(*(p + 1)))
336 return got_error(GOT_ERR_BAD_OBJ_DATA);
337 m = ((*p - '0') * 10) + (*(p + 1) - '0');
339 *gmtoff = (h * 60 * 60 + m * 60) * sign;
343 static const struct got_error *
344 parse_commit_time(time_t *time, time_t *gmtoff, char *committer)
346 const struct got_error *err = NULL;
350 /* Parse and strip off trailing timezone indicator string. */
351 space = strrchr(committer, ' ');
353 return got_error(GOT_ERR_BAD_OBJ_DATA);
354 tzstr = strdup(space + 1);
356 return got_error_from_errno("strdup");
357 err = parse_gmtoff(gmtoff, tzstr);
360 if (err->code != GOT_ERR_BAD_OBJ_DATA)
362 /* Old versions of Git omitted the timestamp. */
369 /* Timestamp is separated from committer name + email by space. */
370 space = strrchr(committer, ' ');
372 return got_error(GOT_ERR_BAD_OBJ_DATA);
374 /* Timestamp parsed here is expressed as UNIX timestamp (UTC). */
375 *time = strtonum(space + 1, 0, INT64_MAX, &errstr);
377 return got_error(GOT_ERR_BAD_OBJ_DATA);
379 /* Strip off parsed time information, leaving just author and email. */
386 got_object_commit_close(struct got_commit_object *commit)
388 if (commit->refcnt > 0) {
390 if (commit->refcnt > 0)
394 got_object_id_queue_free(&commit->parent_ids);
395 free(commit->tree_id);
396 free(commit->author);
397 free(commit->committer);
398 free(commit->logmsg);
402 struct got_object_id *
403 got_object_commit_get_tree_id(struct got_commit_object *commit)
405 return commit->tree_id;
409 got_object_commit_get_nparents(struct got_commit_object *commit)
411 return commit->nparents;
414 const struct got_object_id_queue *
415 got_object_commit_get_parent_ids(struct got_commit_object *commit)
417 return &commit->parent_ids;
421 got_object_commit_get_author(struct got_commit_object *commit)
423 return commit->author;
427 got_object_commit_get_author_time(struct got_commit_object *commit)
429 return commit->author_time;
432 time_t got_object_commit_get_author_gmtoff(struct got_commit_object *commit)
434 return commit->author_gmtoff;
438 got_object_commit_get_committer(struct got_commit_object *commit)
440 return commit->committer;
444 got_object_commit_get_committer_time(struct got_commit_object *commit)
446 return commit->committer_time;
450 got_object_commit_get_committer_gmtoff(struct got_commit_object *commit)
452 return commit->committer_gmtoff;
455 const struct got_error *
456 got_object_commit_get_logmsg(char **logmsg, struct got_commit_object *commit)
458 const struct got_error *err = NULL;
463 len = strlen(commit->logmsg);
464 *logmsg = malloc(len + 2); /* leave room for a trailing \n and \0 */
466 return got_error_from_errno("malloc");
469 * Strip out unusual headers. Headers are separated from the commit
470 * message body by a single empty line.
472 src = commit->logmsg;
474 while (*src != '\0' && *src != '\n') {
475 int copy_header = 1, eol = 0;
476 if (strncmp(src, GOT_COMMIT_LABEL_TREE,
477 strlen(GOT_COMMIT_LABEL_TREE)) != 0 &&
478 strncmp(src, GOT_COMMIT_LABEL_AUTHOR,
479 strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 &&
480 strncmp(src, GOT_COMMIT_LABEL_PARENT,
481 strlen(GOT_COMMIT_LABEL_PARENT)) != 0 &&
482 strncmp(src, GOT_COMMIT_LABEL_COMMITTER,
483 strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0)
486 while (*src != '\0' && !eol) {
498 if (strlcat(*logmsg, src, len + 1) >= len + 1) {
499 err = got_error(GOT_ERR_NO_SPACE);
503 /* Trim redundant trailing whitespace. */
504 len = strlen(*logmsg);
505 while (len > 1 && isspace((unsigned char)(*logmsg)[len - 2]) &&
506 isspace((unsigned char)(*logmsg)[len - 1])) {
507 (*logmsg)[len - 1] = '\0';
511 /* Append a trailing newline if missing. */
512 if (len > 0 && (*logmsg)[len - 1] != '\n') {
513 (*logmsg)[len] = '\n';
514 (*logmsg)[len + 1] = '\0';
525 got_object_commit_get_logmsg_raw(struct got_commit_object *commit)
527 return commit->logmsg;
530 const struct got_error *
531 got_object_parse_commit(struct got_commit_object **commit, char *buf,
534 const struct got_error *err = NULL;
537 ssize_t remain = (ssize_t)len;
540 return got_error(GOT_ERR_BAD_OBJ_DATA);
542 *commit = got_object_commit_alloc_partial();
544 return got_error_from_errno("got_object_commit_alloc_partial");
546 label_len = strlen(GOT_COMMIT_LABEL_TREE);
547 if (strncmp(s, GOT_COMMIT_LABEL_TREE, label_len) == 0) {
549 if (remain < SHA1_DIGEST_STRING_LENGTH) {
550 err = got_error(GOT_ERR_BAD_OBJ_DATA);
554 if (!got_parse_sha1_digest((*commit)->tree_id->sha1, s)) {
555 err = got_error(GOT_ERR_BAD_OBJ_DATA);
558 remain -= SHA1_DIGEST_STRING_LENGTH;
559 s += SHA1_DIGEST_STRING_LENGTH;
561 err = got_error(GOT_ERR_BAD_OBJ_DATA);
565 label_len = strlen(GOT_COMMIT_LABEL_PARENT);
566 while (strncmp(s, GOT_COMMIT_LABEL_PARENT, label_len) == 0) {
568 if (remain < SHA1_DIGEST_STRING_LENGTH) {
569 err = got_error(GOT_ERR_BAD_OBJ_DATA);
573 err = got_object_commit_add_parent(*commit, s);
577 remain -= SHA1_DIGEST_STRING_LENGTH;
578 s += SHA1_DIGEST_STRING_LENGTH;
581 label_len = strlen(GOT_COMMIT_LABEL_AUTHOR);
582 if (strncmp(s, GOT_COMMIT_LABEL_AUTHOR, label_len) == 0) {
588 err = got_error(GOT_ERR_BAD_OBJ_DATA);
592 p = memchr(s, '\n', remain);
594 err = got_error(GOT_ERR_BAD_OBJ_DATA);
599 err = parse_commit_time(&(*commit)->author_time,
600 &(*commit)->author_gmtoff, s);
603 (*commit)->author = strdup(s);
604 if ((*commit)->author == NULL) {
605 err = got_error_from_errno("strdup");
612 label_len = strlen(GOT_COMMIT_LABEL_COMMITTER);
613 if (strncmp(s, GOT_COMMIT_LABEL_COMMITTER, label_len) == 0) {
619 err = got_error(GOT_ERR_BAD_OBJ_DATA);
623 p = memchr(s, '\n', remain);
625 err = got_error(GOT_ERR_BAD_OBJ_DATA);
630 err = parse_commit_time(&(*commit)->committer_time,
631 &(*commit)->committer_gmtoff, s);
634 (*commit)->committer = strdup(s);
635 if ((*commit)->committer == NULL) {
636 err = got_error_from_errno("strdup");
643 (*commit)->logmsg = strndup(s, remain);
644 if ((*commit)->logmsg == NULL) {
645 err = got_error_from_errno("strndup");
650 got_object_commit_close(*commit);
657 got_object_tree_close(struct got_tree_object *tree)
659 if (tree->refcnt > 0) {
661 if (tree->refcnt > 0)
669 static const struct got_error *
670 parse_tree_entry(struct got_parsed_tree_entry *pte, size_t *elen, char *buf,
677 *elen = strnlen(buf, maxlen) + 1;
679 return got_error(GOT_ERR_BAD_OBJ_DATA);
681 space = memchr(buf, ' ', *elen);
682 if (space == NULL || space <= buf)
683 return got_error(GOT_ERR_BAD_OBJ_DATA);
688 if (*p < '0' && *p > '7')
689 return got_error(GOT_ERR_BAD_OBJ_DATA);
691 pte->mode |= *p - '0';
695 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH)
696 return got_error(GOT_ERR_BAD_OBJ_DATA);
698 pte->name = space + 1;
699 pte->namelen = strlen(pte->name);
702 *elen += SHA1_DIGEST_LENGTH;
707 pte_cmp(const void *pa, const void *pb)
709 const struct got_parsed_tree_entry *a = pa, *b = pb;
711 return got_path_cmp(a->name, b->name, a->namelen, b->namelen);
714 const struct got_error *
715 got_object_parse_tree(struct got_parsed_tree_entry **entries, int *nentries,
716 uint8_t *buf, size_t len)
718 const struct got_error *err = NULL;
719 size_t remain = len, totalloc;
720 const size_t nalloc = 16;
721 struct got_parsed_tree_entry *pte;
726 return NULL; /* tree is empty */
728 *entries = calloc(nalloc, sizeof(**entries));
729 if (*entries == NULL)
730 return got_error_from_errno("calloc");
736 if (*nentries >= totalloc) {
737 pte = recallocarray(*entries, totalloc,
738 totalloc + nalloc, sizeof(**entries));
740 err = got_error_from_errno("recallocarray");
747 pte = &(*entries)[*nentries];
748 err = parse_tree_entry(pte, &elen, buf, remain);
757 err = got_error(GOT_ERR_BAD_OBJ_DATA);
762 mergesort(*entries, *nentries, sizeof(**entries), pte_cmp);
764 for (i = 0; i < *nentries - 1; i++) {
765 struct got_parsed_tree_entry *prev = &(*entries)[i];
766 pte = &(*entries)[i + 1];
767 if (got_path_cmp(prev->name, pte->name,
768 prev->namelen, pte->namelen) == 0) {
769 err = got_error(GOT_ERR_TREE_DUP_ENTRY);
784 got_object_tag_close(struct got_tag_object *tag)
786 if (tag->refcnt > 0) {
798 const struct got_error *
799 got_object_parse_tag(struct got_tag_object **tag, uint8_t *buf, size_t len)
801 const struct got_error *err = NULL;
807 return got_error(GOT_ERR_BAD_OBJ_DATA);
809 *tag = calloc(1, sizeof(**tag));
811 return got_error_from_errno("calloc");
813 label_len = strlen(GOT_TAG_LABEL_OBJECT);
814 if (strncmp(s, GOT_TAG_LABEL_OBJECT, label_len) == 0) {
816 if (remain < SHA1_DIGEST_STRING_LENGTH) {
817 err = got_error(GOT_ERR_BAD_OBJ_DATA);
821 if (!got_parse_sha1_digest((*tag)->id.sha1, s)) {
822 err = got_error(GOT_ERR_BAD_OBJ_DATA);
825 remain -= SHA1_DIGEST_STRING_LENGTH;
826 s += SHA1_DIGEST_STRING_LENGTH;
828 err = got_error(GOT_ERR_BAD_OBJ_DATA);
833 err = got_error(GOT_ERR_BAD_OBJ_DATA);
837 label_len = strlen(GOT_TAG_LABEL_TYPE);
838 if (strncmp(s, GOT_TAG_LABEL_TYPE, label_len) == 0) {
841 err = got_error(GOT_ERR_BAD_OBJ_DATA);
845 if (strncmp(s, GOT_OBJ_LABEL_COMMIT,
846 strlen(GOT_OBJ_LABEL_COMMIT)) == 0) {
847 (*tag)->obj_type = GOT_OBJ_TYPE_COMMIT;
848 label_len = strlen(GOT_OBJ_LABEL_COMMIT);
851 } else if (strncmp(s, GOT_OBJ_LABEL_TREE,
852 strlen(GOT_OBJ_LABEL_TREE)) == 0) {
853 (*tag)->obj_type = GOT_OBJ_TYPE_TREE;
854 label_len = strlen(GOT_OBJ_LABEL_TREE);
857 } else if (strncmp(s, GOT_OBJ_LABEL_BLOB,
858 strlen(GOT_OBJ_LABEL_BLOB)) == 0) {
859 (*tag)->obj_type = GOT_OBJ_TYPE_BLOB;
860 label_len = strlen(GOT_OBJ_LABEL_BLOB);
863 } else if (strncmp(s, GOT_OBJ_LABEL_TAG,
864 strlen(GOT_OBJ_LABEL_TAG)) == 0) {
865 (*tag)->obj_type = GOT_OBJ_TYPE_TAG;
866 label_len = strlen(GOT_OBJ_LABEL_TAG);
870 err = got_error(GOT_ERR_BAD_OBJ_DATA);
874 if (remain <= 0 || *s != '\n') {
875 err = got_error(GOT_ERR_BAD_OBJ_DATA);
881 err = got_error(GOT_ERR_BAD_OBJ_DATA);
885 err = got_error(GOT_ERR_BAD_OBJ_DATA);
889 label_len = strlen(GOT_TAG_LABEL_TAG);
890 if (strncmp(s, GOT_TAG_LABEL_TAG, label_len) == 0) {
895 err = got_error(GOT_ERR_BAD_OBJ_DATA);
899 p = memchr(s, '\n', remain);
901 err = got_error(GOT_ERR_BAD_OBJ_DATA);
906 (*tag)->tag = strndup(s, slen);
907 if ((*tag)->tag == NULL) {
908 err = got_error_from_errno("strndup");
914 err = got_error(GOT_ERR_BAD_OBJ_DATA);
918 err = got_error(GOT_ERR_BAD_OBJ_DATA);
922 label_len = strlen(GOT_TAG_LABEL_TAGGER);
923 if (strncmp(s, GOT_TAG_LABEL_TAGGER, label_len) == 0) {
929 err = got_error(GOT_ERR_BAD_OBJ_DATA);
933 p = memchr(s, '\n', remain);
935 err = got_error(GOT_ERR_BAD_OBJ_DATA);
940 err = parse_commit_time(&(*tag)->tagger_time,
941 &(*tag)->tagger_gmtoff, s);
944 (*tag)->tagger = strdup(s);
945 if ((*tag)->tagger == NULL) {
946 err = got_error_from_errno("strdup");
952 err = got_error(GOT_ERR_BAD_OBJ_DATA);
956 /* Some old tags in the Linux git repo have no tagger. */
957 (*tag)->tagger = strdup("");
958 if ((*tag)->tagger == NULL) {
959 err = got_error_from_errno("strdup");
964 (*tag)->tagmsg = strndup(s, remain);
965 if ((*tag)->tagmsg == NULL) {
966 err = got_error_from_errno("strndup");
971 got_object_tag_close(*tag);
977 const struct got_error *
978 got_read_file_to_mem(uint8_t **outbuf, size_t *outlen, FILE *f)
980 const struct got_error *err = NULL;
981 static const size_t blocksize = 512;
982 size_t n, total, remain;
988 buf = malloc(blocksize);
990 return got_error_from_errno("malloc");
997 newbuf = reallocarray(buf, 1, total + blocksize);
998 if (newbuf == NULL) {
999 err = got_error_from_errno("reallocarray");
1003 remain += blocksize;
1005 n = fread(buf + total, 1, remain, f);
1008 err = got_ferror(f, GOT_ERR_IO);