Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
20 #include <sys/uio.h>
21 #include <sys/socket.h>
22 #include <sys/wait.h>
23 #include <sys/syslimits.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdint.h>
31 #include <sha1.h>
32 #include <zlib.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <imsg.h>
36 #include <time.h>
38 #include "got_error.h"
39 #include "got_object.h"
40 #include "got_repository.h"
41 #include "got_opentemp.h"
43 #include "got_lib_sha1.h"
44 #include "got_lib_delta.h"
45 #include "got_lib_pack.h"
46 #include "got_lib_path.h"
47 #include "got_lib_inflate.h"
48 #include "got_lib_object.h"
49 #include "got_lib_privsep.h"
50 #include "got_lib_object_idcache.h"
51 #include "got_lib_object_cache.h"
52 #include "got_lib_object_parse.h"
53 #include "got_lib_repository.h"
55 #ifndef MIN
56 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
57 #endif
59 const struct got_error *
60 got_object_id_str(char **outbuf, struct got_object_id *id)
61 {
62 static const size_t len = SHA1_DIGEST_STRING_LENGTH;
64 *outbuf = malloc(len);
65 if (*outbuf == NULL)
66 return got_error_from_errno();
68 if (got_sha1_digest_to_str(id->sha1, *outbuf, len) == NULL) {
69 free(*outbuf);
70 *outbuf = NULL;
71 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
72 }
74 return NULL;
75 }
77 int
78 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
79 {
80 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
81 }
83 struct got_object_id *
84 got_object_id_dup(struct got_object_id *id1)
85 {
86 struct got_object_id *id2;
88 id2 = malloc(sizeof(*id2));
89 if (id2 == NULL)
90 return NULL;
91 memcpy(id2, id1, sizeof(*id2));
92 return id2;
93 }
95 struct got_object_id *
96 got_object_get_id(struct got_object *obj)
97 {
98 return got_object_id_dup(&obj->id);
99 }
101 const struct got_error *
102 got_object_get_id_str(char **outbuf, struct got_object *obj)
104 return got_object_id_str(outbuf, &obj->id);
107 int
108 got_object_get_type(struct got_object *obj)
110 switch (obj->type) {
111 case GOT_OBJ_TYPE_COMMIT:
112 case GOT_OBJ_TYPE_TREE:
113 case GOT_OBJ_TYPE_BLOB:
114 case GOT_OBJ_TYPE_TAG:
115 return obj->type;
116 default:
117 abort();
118 break;
121 /* not reached */
122 return 0;
125 static const struct got_error *
126 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
128 const struct got_error *err = NULL;
129 char *hex = NULL;
130 char *path_objects = got_repo_get_path_objects(repo);
132 *path = NULL;
134 if (path_objects == NULL)
135 return got_error_from_errno();
137 err = got_object_id_str(&hex, id);
138 if (err)
139 goto done;
141 if (asprintf(path, "%s/%.2x/%s", path_objects,
142 id->sha1[0], hex + 2) == -1)
143 err = got_error_from_errno();
145 done:
146 free(hex);
147 free(path_objects);
148 return err;
151 static const struct got_error *
152 open_loose_object(int *fd, struct got_object *obj, struct got_repository *repo)
154 const struct got_error *err = NULL;
155 char *path;
157 err = object_path(&path, &obj->id, repo);
158 if (err)
159 return err;
160 *fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
161 if (*fd == -1) {
162 err = got_error_from_errno();
163 goto done;
165 done:
166 free(path);
167 return err;
170 static const struct got_error *
171 get_packfile_path(char **path_packfile, struct got_packidx *packidx)
173 size_t size;
175 /* Packfile path contains ".pack" instead of ".idx", so add one byte. */
176 size = strlen(packidx->path_packidx) + 2;
177 if (size < GOT_PACKFILE_NAMELEN + 1)
178 return got_error(GOT_ERR_BAD_PATH);
180 *path_packfile = calloc(size, sizeof(**path_packfile));
181 if (*path_packfile == NULL)
182 return got_error_from_errno();
184 /* Copy up to and excluding ".idx". */
185 if (strlcpy(*path_packfile, packidx->path_packidx,
186 size - strlen(GOT_PACKIDX_SUFFIX) - 1) >= size)
187 return got_error(GOT_ERR_NO_SPACE);
189 if (strlcat(*path_packfile, GOT_PACKFILE_SUFFIX, size) >= size)
190 return got_error(GOT_ERR_NO_SPACE);
192 return NULL;
195 static const struct got_error *
196 open_packed_object(struct got_object **obj, struct got_object_id *id,
197 struct got_repository *repo)
199 const struct got_error *err = NULL;
200 struct got_pack *pack = NULL;
201 struct got_packidx *packidx = NULL;
202 int idx;
203 char *path_packfile;
205 err = got_repo_search_packidx(&packidx, &idx, repo, id);
206 if (err)
207 return err;
209 err = get_packfile_path(&path_packfile, packidx);
210 if (err)
211 return err;
213 pack = got_repo_get_cached_pack(repo, path_packfile);
214 if (pack == NULL) {
215 err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
216 if (err)
217 goto done;
220 err = got_object_packed_read_privsep(obj, repo, pack, packidx, idx, id);
221 if (err)
222 goto done;
224 err = got_repo_cache_pack(NULL, repo, (*obj)->path_packfile, packidx);
225 done:
226 free(path_packfile);
227 return err;
230 const struct got_error *
231 got_object_open(struct got_object **obj, struct got_repository *repo,
232 struct got_object_id *id)
234 const struct got_error *err = NULL;
235 char *path;
236 int fd;
238 *obj = got_repo_get_cached_object(repo, id);
239 if (*obj != NULL) {
240 (*obj)->refcnt++;
241 return NULL;
244 err = object_path(&path, id, repo);
245 if (err)
246 return err;
248 fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
249 if (fd == -1) {
250 if (errno != ENOENT) {
251 err = got_error_from_errno();
252 goto done;
254 err = open_packed_object(obj, id, repo);
255 if (err)
256 goto done;
257 if (*obj == NULL)
258 err = got_error(GOT_ERR_NO_OBJ);
259 } else {
260 err = got_object_read_header_privsep(obj, repo, fd);
261 if (err)
262 goto done;
263 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
266 if (err == NULL) {
267 (*obj)->refcnt++;
268 err = got_repo_cache_object(repo, id, *obj);
270 done:
271 free(path);
272 if (fd != -1)
273 close(fd);
274 return err;
278 const struct got_error *
279 got_object_open_by_id_str(struct got_object **obj, struct got_repository *repo,
280 const char *id_str)
282 struct got_object_id id;
284 if (!got_parse_sha1_digest(id.sha1, id_str))
285 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
287 return got_object_open(obj, repo, &id);
290 const struct got_error *
291 got_object_open_as_commit(struct got_commit_object **commit,
292 struct got_repository *repo, struct got_object_id *id)
294 const struct got_error *err;
295 struct got_object *obj;
297 *commit = NULL;
299 err = got_object_open(&obj, repo, id);
300 if (err)
301 return err;
302 if (got_object_get_type(obj) != GOT_OBJ_TYPE_COMMIT) {
303 err = got_error(GOT_ERR_OBJ_TYPE);
304 goto done;
307 err = got_object_commit_open(commit, repo, obj);
308 done:
309 got_object_close(obj);
310 return err;
313 const struct got_error *
314 got_object_qid_alloc(struct got_object_qid **qid, struct got_object_id *id)
316 const struct got_error *err = NULL;
318 *qid = calloc(1, sizeof(**qid));
319 if (*qid == NULL)
320 return got_error_from_errno();
322 (*qid)->id = got_object_id_dup(id);
323 if ((*qid)->id == NULL) {
324 err = got_error_from_errno();
325 got_object_qid_free(*qid);
326 *qid = NULL;
327 return err;
330 return NULL;
333 const struct got_error *
334 got_object_commit_open(struct got_commit_object **commit,
335 struct got_repository *repo, struct got_object *obj)
337 const struct got_error *err = NULL;
339 *commit = got_repo_get_cached_commit(repo, &obj->id);
340 if (*commit != NULL) {
341 (*commit)->refcnt++;
342 return NULL;
345 if (obj->type != GOT_OBJ_TYPE_COMMIT)
346 return got_error(GOT_ERR_OBJ_TYPE);
348 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
349 struct got_pack *pack;
350 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
351 if (pack == NULL) {
352 err = got_repo_cache_pack(&pack, repo,
353 obj->path_packfile, NULL);
354 if (err)
355 return err;
357 err = got_object_read_packed_commit_privsep(commit, obj, pack);
358 } else {
359 int fd;
360 err = open_loose_object(&fd, obj, repo);
361 if (err)
362 return err;
363 err = got_object_read_commit_privsep(commit, obj, fd, repo);
364 close(fd);
367 if (err == NULL) {
368 (*commit)->refcnt++;
369 err = got_repo_cache_commit(repo, &obj->id, *commit);
372 return err;
375 const struct got_error *
376 got_object_tree_open(struct got_tree_object **tree,
377 struct got_repository *repo, struct got_object *obj)
379 const struct got_error *err = NULL;
381 *tree = got_repo_get_cached_tree(repo, &obj->id);
382 if (*tree != NULL) {
383 (*tree)->refcnt++;
384 return NULL;
387 if (obj->type != GOT_OBJ_TYPE_TREE)
388 return got_error(GOT_ERR_OBJ_TYPE);
390 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
391 struct got_pack *pack;
392 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
393 if (pack == NULL) {
394 err = got_repo_cache_pack(&pack, repo,
395 obj->path_packfile, NULL);
396 if (err)
397 return err;
399 err = got_object_read_packed_tree_privsep(tree, obj, pack);
400 } else {
401 int fd;
402 err = open_loose_object(&fd, obj, repo);
403 if (err)
404 return err;
405 err = got_object_read_tree_privsep(tree, obj, fd, repo);
406 close(fd);
409 if (err == NULL) {
410 (*tree)->refcnt++;
411 err = got_repo_cache_tree(repo, &obj->id, *tree);
414 return err;
417 const struct got_error *
418 got_object_open_as_tree(struct got_tree_object **tree,
419 struct got_repository *repo, struct got_object_id *id)
421 const struct got_error *err;
422 struct got_object *obj;
424 *tree = NULL;
426 err = got_object_open(&obj, repo, id);
427 if (err)
428 return err;
429 if (got_object_get_type(obj) != GOT_OBJ_TYPE_TREE) {
430 err = got_error(GOT_ERR_OBJ_TYPE);
431 goto done;
434 err = got_object_tree_open(tree, repo, obj);
435 done:
436 got_object_close(obj);
437 return err;
440 const struct got_tree_entries *
441 got_object_tree_get_entries(struct got_tree_object *tree)
443 return &tree->entries;
446 static const struct got_error *
447 read_packed_blob_privsep(size_t *size, int outfd, struct got_object *obj,
448 struct got_pack *pack)
450 const struct got_error *err = NULL;
451 int outfd_child;
452 int basefd, accumfd; /* temporary files for delta application */
454 basefd = got_opentempfd();
455 if (basefd == -1)
456 return got_error_from_errno();
457 accumfd = got_opentempfd();
458 if (accumfd == -1)
459 return got_error_from_errno();
461 outfd_child = dup(outfd);
462 if (outfd_child == -1)
463 return got_error_from_errno();
465 err = got_privsep_send_obj_req(pack->privsep_child->ibuf, -1, obj);
466 if (err)
467 return err;
469 err = got_privsep_send_blob_outfd(pack->privsep_child->ibuf,
470 outfd_child);
471 if (err) {
472 close(outfd_child);
473 return err;
475 err = got_privsep_send_tmpfd(pack->privsep_child->ibuf,
476 basefd);
477 if (err) {
478 close(basefd);
479 close(accumfd);
480 close(outfd_child);
481 return err;
484 err = got_privsep_send_tmpfd(pack->privsep_child->ibuf,
485 accumfd);
486 if (err) {
487 close(accumfd);
488 close(outfd_child);
489 return err;
492 err = got_privsep_recv_blob(size, pack->privsep_child->ibuf);
493 if (err)
494 return err;
496 if (lseek(outfd, SEEK_SET, 0) == -1)
497 err = got_error_from_errno();
499 return err;
502 const struct got_error *
503 got_object_blob_open(struct got_blob_object **blob,
504 struct got_repository *repo, struct got_object *obj, size_t blocksize)
506 const struct got_error *err = NULL;
507 int outfd;
508 size_t size;
509 struct stat sb;
511 if (obj->type != GOT_OBJ_TYPE_BLOB)
512 return got_error(GOT_ERR_OBJ_TYPE);
514 if (blocksize < obj->hdrlen)
515 return got_error(GOT_ERR_NO_SPACE);
517 *blob = calloc(1, sizeof(**blob));
518 if (*blob == NULL)
519 return got_error_from_errno();
521 outfd = got_opentempfd();
522 if (outfd == -1)
523 return got_error_from_errno();
525 (*blob)->read_buf = malloc(blocksize);
526 if ((*blob)->read_buf == NULL) {
527 err = got_error_from_errno();
528 goto done;
530 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
531 struct got_pack *pack;
532 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
533 if (pack == NULL) {
534 err = got_repo_cache_pack(&pack, repo,
535 obj->path_packfile, NULL);
536 if (err)
537 goto done;
539 err = read_packed_blob_privsep(&size, outfd, obj, pack);
540 if (err)
541 goto done;
542 obj->size = size;
543 } else {
544 int infd;
546 err = open_loose_object(&infd, obj, repo);
547 if (err)
548 goto done;
550 err = got_object_read_blob_privsep(&size, outfd, infd, repo);
551 close(infd);
552 if (err)
553 goto done;
555 if (size != obj->hdrlen + obj->size) {
556 err = got_error(GOT_ERR_PRIVSEP_LEN);
557 goto done;
561 if (fstat(outfd, &sb) == -1) {
562 err = got_error_from_errno();
563 goto done;
566 if (sb.st_size != obj->hdrlen + obj->size) {
567 err = got_error(GOT_ERR_PRIVSEP_LEN);
568 goto done;
571 (*blob)->f = fdopen(outfd, "rb");
572 if ((*blob)->f == NULL) {
573 err = got_error_from_errno();
574 close(outfd);
575 goto done;
578 (*blob)->hdrlen = obj->hdrlen;
579 (*blob)->blocksize = blocksize;
580 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
582 done:
583 if (err) {
584 if (*blob) {
585 if ((*blob)->f)
586 fclose((*blob)->f);
587 free((*blob)->read_buf);
588 free(*blob);
589 *blob = NULL;
590 } else if (outfd != -1)
591 close(outfd);
593 return err;
596 const struct got_error *
597 got_object_open_as_blob(struct got_blob_object **blob,
598 struct got_repository *repo, struct got_object_id *id,
599 size_t blocksize)
601 const struct got_error *err;
602 struct got_object *obj;
604 *blob = NULL;
606 err = got_object_open(&obj, repo, id);
607 if (err)
608 return err;
609 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
610 err = got_error(GOT_ERR_OBJ_TYPE);
611 goto done;
614 err = got_object_blob_open(blob, repo, obj, blocksize);
615 done:
616 got_object_close(obj);
617 return err;
620 void
621 got_object_blob_close(struct got_blob_object *blob)
623 free(blob->read_buf);
624 fclose(blob->f);
625 free(blob);
628 char *
629 got_object_blob_id_str(struct got_blob_object *blob, char *buf, size_t size)
631 return got_sha1_digest_to_str(blob->id.sha1, buf, size);
634 size_t
635 got_object_blob_get_hdrlen(struct got_blob_object *blob)
637 return blob->hdrlen;
640 const uint8_t *
641 got_object_blob_get_read_buf(struct got_blob_object *blob)
643 return blob->read_buf;
646 const struct got_error *
647 got_object_blob_read_block(size_t *outlenp, struct got_blob_object *blob)
649 size_t n;
651 n = fread(blob->read_buf, 1, blob->blocksize, blob->f);
652 if (n == 0 && ferror(blob->f))
653 return got_ferror(blob->f, GOT_ERR_IO);
654 *outlenp = n;
655 return NULL;
658 const struct got_error *
659 got_object_blob_dump_to_file(size_t *total_len, size_t *nlines,
660 FILE *outfile, struct got_blob_object *blob)
662 const struct got_error *err = NULL;
663 size_t len, hdrlen;
664 const uint8_t *buf;
665 int i;
667 if (total_len)
668 *total_len = 0;
669 if (nlines)
670 *nlines = 0;
672 hdrlen = got_object_blob_get_hdrlen(blob);
673 do {
674 err = got_object_blob_read_block(&len, blob);
675 if (err)
676 return err;
677 if (len == 0)
678 break;
679 if (total_len)
680 *total_len += len;
681 buf = got_object_blob_get_read_buf(blob);
682 if (nlines) {
683 for (i = 0; i < len; i++) {
684 if (buf[i] == '\n')
685 (*nlines)++;
688 /* Skip blob object header first time around. */
689 fwrite(buf + hdrlen, len - hdrlen, 1, outfile);
690 hdrlen = 0;
691 } while (len != 0);
693 fflush(outfile);
694 rewind(outfile);
696 return NULL;
699 static struct got_tree_entry *
700 find_entry_by_name(struct got_tree_object *tree, const char *name)
702 struct got_tree_entry *te;
704 SIMPLEQ_FOREACH(te, &tree->entries.head, entry) {
705 if (strcmp(te->name, name) == 0)
706 return te;
708 return NULL;
711 const struct got_error *
712 got_object_open_by_path(struct got_object **obj, struct got_repository *repo,
713 struct got_object_id *commit_id, const char *path)
715 const struct got_error *err = NULL;
716 struct got_commit_object *commit = NULL;
717 struct got_tree_object *tree = NULL;
718 struct got_tree_entry *te = NULL;
719 char *seg, *s, *s0 = NULL;
720 size_t len = strlen(path);
722 *obj = NULL;
724 /* We are expecting an absolute in-repository path. */
725 if (path[0] != '/')
726 return got_error(GOT_ERR_NOT_ABSPATH);
728 err = got_object_open_as_commit(&commit, repo, commit_id);
729 if (err)
730 goto done;
732 /* Handle opening of root of commit's tree. */
733 if (path[1] == '\0') {
734 err = got_object_open(obj, repo, commit->tree_id);
735 goto done;
738 err = got_object_open_as_tree(&tree, repo, commit->tree_id);
739 if (err)
740 goto done;
742 s0 = strdup(path);
743 if (s0 == NULL) {
744 err = got_error_from_errno();
745 goto done;
747 err = got_canonpath(path, s0, len + 1);
748 if (err)
749 goto done;
751 s = s0;
752 s++; /* skip leading '/' */
753 len--;
754 seg = s;
755 while (len > 0) {
756 struct got_tree_object *next_tree;
758 if (*s != '/') {
759 s++;
760 len--;
761 if (*s)
762 continue;
765 /* end of path segment */
766 *s = '\0';
768 te = find_entry_by_name(tree, seg);
769 if (te == NULL) {
770 err = got_error(GOT_ERR_NO_OBJ);
771 goto done;
774 if (len == 0)
775 break;
777 seg = s + 1;
778 s++;
779 len--;
780 if (*s) {
781 err = got_object_open_as_tree(&next_tree, repo,
782 te->id);
783 te = NULL;
784 if (err)
785 goto done;
786 got_object_tree_close(tree);
787 tree = next_tree;
791 if (te)
792 err = got_object_open(obj, repo, te->id);
793 else
794 err = got_error(GOT_ERR_NO_OBJ);
795 done:
796 free(s0);
797 if (commit)
798 got_object_commit_close(commit);
799 if (tree)
800 got_object_tree_close(tree);
801 return err;