Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 #include <sys/uio.h>
22 #include <sys/stat.h>
24 #include <stdint.h>
25 #include <imsg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sha1.h>
30 #include <limits.h>
31 #include <zlib.h>
33 #include "got_error.h"
34 #include "got_cancel.h"
35 #include "got_object.h"
36 #include "got_path.h"
37 #include "got_reference.h"
38 #include "got_repository_admin.h"
39 #include "got_opentemp.h"
41 #include "got_lib_deltify.h"
42 #include "got_lib_delta.h"
43 #include "got_lib_object.h"
44 #include "got_lib_object_idset.h"
45 #include "got_lib_object_cache.h"
46 #include "got_lib_deflate.h"
47 #include "got_lib_pack.h"
48 #include "got_lib_privsep.h"
49 #include "got_lib_repository.h"
51 #ifndef MAX
52 #define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
53 #endif
55 struct got_pack_meta {
56 struct got_object_id id;
57 char *path;
58 int obj_type;
59 off_t size;
60 time_t mtime;
62 /* The best delta we picked */
63 struct got_pack_meta *head;
64 struct got_pack_meta *prev;
65 struct got_delta_instruction *deltas;
66 int ndeltas;
67 int nchain;
69 /* Only used for delta window */
70 struct got_delta_table *dtab;
72 /* Only used for writing offset deltas */
73 off_t off;
74 };
76 struct got_pack_metavec {
77 struct got_pack_meta **meta;
78 int nmeta;
79 int metasz;
80 };
82 static const struct got_error *
83 alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
84 const char *path, int obj_type, time_t mtime)
85 {
86 const struct got_error *err = NULL;
87 struct got_pack_meta *m;
89 *new = NULL;
91 m = calloc(1, sizeof(*m));
92 if (m == NULL)
93 return got_error_from_errno("calloc");
95 memcpy(&m->id, id, sizeof(m->id));
97 m->path = strdup(path);
98 if (m->path == NULL) {
99 err = got_error_from_errno("strdup");
100 free(m);
101 return err;
104 m->obj_type = obj_type;
105 m->mtime = mtime;
106 *new = m;
107 return NULL;
110 static void
111 clear_meta(struct got_pack_meta *meta)
113 if (meta == NULL)
114 return;
115 free(meta->deltas);
116 meta->deltas = NULL;
117 free(meta->path);
118 meta->path = NULL;
121 static void
122 free_nmeta(struct got_pack_meta **meta, int nmeta)
124 int i;
126 for (i = 0; i < nmeta; i++)
127 clear_meta(meta[i]);
128 free(meta);
131 static int
132 delta_order_cmp(const void *pa, const void *pb)
134 struct got_pack_meta *a, *b;
135 int cmp;
137 a = *(struct got_pack_meta **)pa;
138 b = *(struct got_pack_meta **)pb;
140 if (a->obj_type != b->obj_type)
141 return a->obj_type - b->obj_type;
142 cmp = strcmp(a->path, b->path);
143 if (cmp != 0)
144 return cmp;
145 if (a->mtime != b->mtime)
146 return a->mtime - b->mtime;
147 return got_object_id_cmp(&a->id, &b->id);
150 static int
151 delta_size(struct got_delta_instruction *deltas, int ndeltas)
153 int i, size = 32;
154 for (i = 0; i < ndeltas; i++) {
155 if (deltas[i].copy)
156 size += GOT_DELTA_SIZE_SHIFT;
157 else
158 size += deltas[i].len + 1;
160 return size;
164 static const struct got_error *
165 pick_deltas(struct got_pack_meta **meta, int nmeta, int nours,
166 struct got_repository *repo,
167 got_pack_progress_cb progress_cb, void *progress_arg,
168 got_cancel_cb cancel_cb, void *cancel_arg)
170 const struct got_error *err = NULL;
171 struct got_pack_meta *m = NULL, *base = NULL;
172 struct got_raw_object *raw = NULL, *base_raw = NULL;
173 struct got_delta_instruction *deltas;
174 int i, j, size, ndeltas, best;
175 const int max_base_candidates = 10;
176 int outfd = -1;
178 qsort(meta, nmeta, sizeof(struct got_pack_meta *), delta_order_cmp);
179 for (i = 0; i < nmeta; i++) {
180 if (cancel_cb) {
181 err = (*cancel_cb)(cancel_arg);
182 if (err)
183 break;
185 if (progress_cb) {
186 err = progress_cb(progress_arg, 0L, nours, nmeta, i, 0);
187 if (err)
188 goto done;
190 m = meta[i];
191 m->deltas = NULL;
192 m->ndeltas = 0;
194 if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
195 m->obj_type == GOT_OBJ_TYPE_TAG)
196 continue;
198 err = got_object_raw_open(&raw, &outfd, repo, &m->id, 8192);
199 if (err)
200 goto done;
201 m->size = raw->size;
203 err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
204 raw->size + raw->hdrlen);
205 if (err)
206 goto done;
208 if (i > max_base_candidates) {
209 struct got_pack_meta *n = NULL;
210 n = meta[i - (max_base_candidates + 1)];
211 got_deltify_free(n->dtab);
212 n->dtab = NULL;
215 best = raw->size;
216 for (j = MAX(0, i - max_base_candidates); j < i; j++) {
217 if (cancel_cb) {
218 err = (*cancel_cb)(cancel_arg);
219 if (err)
220 goto done;
222 base = meta[j];
223 /* long chains make unpacking slow, avoid such bases */
224 if (base->nchain >= 128 ||
225 base->obj_type != m->obj_type)
226 continue;
228 err = got_object_raw_open(&base_raw, &outfd, repo,
229 &base->id, 8192);
230 if (err)
231 goto done;
232 err = got_deltify(&deltas, &ndeltas,
233 raw->f, raw->hdrlen, raw->size + raw->hdrlen,
234 base->dtab, base_raw->f, base_raw->hdrlen,
235 base_raw->size + base_raw->hdrlen);
236 got_object_raw_close(base_raw);
237 base_raw = NULL;
238 if (err)
239 goto done;
241 size = delta_size(deltas, ndeltas);
242 if (size + 32 < best){
243 /*
244 * if we already picked a best delta,
245 * replace it.
246 */
247 free(m->deltas);
248 best = size;
249 m->deltas = deltas;
250 m->ndeltas = ndeltas;
251 m->nchain = base->nchain + 1;
252 m->prev = base;
253 m->head = base->head;
254 if (m->head == NULL)
255 m->head = base;
256 } else {
257 free(deltas);
258 deltas = NULL;
259 ndeltas = 0;
263 got_object_raw_close(raw);
264 raw = NULL;
266 done:
267 for (i = MAX(0, nmeta - max_base_candidates); i < nmeta; i++) {
268 got_deltify_free(meta[i]->dtab);
269 meta[i]->dtab = NULL;
271 if (raw)
272 got_object_raw_close(raw);
273 if (base_raw)
274 got_object_raw_close(base_raw);
275 if (outfd != -1 && close(outfd) == -1 && err == NULL)
276 err = got_error_from_errno("close");
277 return err;
280 static const struct got_error *
281 search_packidx(int *found, struct got_object_id *id,
282 struct got_repository *repo)
284 const struct got_error *err = NULL;
285 struct got_packidx *packidx = NULL;
286 int idx;
288 *found = 0;
290 err = got_repo_search_packidx(&packidx, &idx, repo, id);
291 if (err == NULL)
292 *found = 1; /* object is already packed */
293 else if (err->code == GOT_ERR_NO_OBJ)
294 err = NULL;
295 return err;
298 static const int obj_types[] = {
299 GOT_OBJ_TYPE_ANY,
300 GOT_OBJ_TYPE_COMMIT,
301 GOT_OBJ_TYPE_TREE,
302 GOT_OBJ_TYPE_BLOB,
303 GOT_OBJ_TYPE_TAG,
304 GOT_OBJ_TYPE_OFFSET_DELTA,
305 GOT_OBJ_TYPE_REF_DELTA
306 };
308 static const struct got_error *
309 add_meta(struct got_pack_metavec *v, struct got_object_idset *idset,
310 struct got_object_id *id, const char *path, int obj_type,
311 time_t mtime, int loose_obj_only, struct got_repository *repo)
313 const struct got_error *err;
314 struct got_pack_meta *m;
316 if (loose_obj_only) {
317 int is_packed;
318 err = search_packidx(&is_packed, id, repo);
319 if (err)
320 return err;
321 if (is_packed)
322 return NULL;
325 err = got_object_idset_add(idset, id, (void *)&obj_types[obj_type]);
326 if (err)
327 return err;
329 if (v == NULL)
330 return NULL;
332 err = alloc_meta(&m, id, path, obj_type, mtime);
333 if (err)
334 goto done;
336 if (v->nmeta == v->metasz){
337 size_t newsize = 2 * v->metasz;
338 struct got_pack_meta **new;
339 new = reallocarray(v->meta, newsize, sizeof(*new));
340 if (new == NULL) {
341 err = got_error_from_errno("reallocarray");
342 goto done;
344 v->meta = new;
345 v->metasz = newsize;
347 done:
348 if (err) {
349 clear_meta(m);
350 free(m);
351 } else
352 v->meta[v->nmeta++] = m;
354 return err;
357 static const struct got_error *
358 load_tree_entries(struct got_object_id_queue *ids, struct got_pack_metavec *v,
359 struct got_object_idset *idset, struct got_object_id *tree_id,
360 const char *dpath, time_t mtime, struct got_repository *repo,
361 int loose_obj_only, got_cancel_cb cancel_cb, void *cancel_arg)
363 const struct got_error *err;
364 struct got_tree_object *tree;
365 char *p = NULL;
366 int i;
368 err = got_object_open_as_tree(&tree, repo, tree_id);
369 if (err)
370 return err;
372 for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
373 struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
374 struct got_object_id *id = got_tree_entry_get_id(e);
375 mode_t mode = got_tree_entry_get_mode(e);
377 if (cancel_cb) {
378 err = (*cancel_cb)(cancel_arg);
379 if (err)
380 break;
383 if (got_object_tree_entry_is_submodule(e) ||
384 got_object_idset_contains(idset, id))
385 continue;
387 if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
388 got_tree_entry_get_name(e)) == -1) {
389 err = got_error_from_errno("asprintf");
390 break;
393 if (S_ISDIR(mode)) {
394 struct got_object_qid *qid;
395 err = got_object_qid_alloc(&qid, id);
396 if (err)
397 break;
398 STAILQ_INSERT_TAIL(ids, qid, entry);
399 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
400 err = add_meta(v, idset, id, p, GOT_OBJ_TYPE_BLOB,
401 mtime, loose_obj_only, repo);
402 if (err)
403 break;
405 free(p);
406 p = NULL;
409 got_object_tree_close(tree);
410 free(p);
411 return err;
414 static const struct got_error *
415 load_tree(struct got_pack_metavec *v, struct got_object_idset *idset,
416 struct got_object_id *tree_id, const char *dpath, time_t mtime,
417 int loose_obj_only, struct got_repository *repo,
418 got_cancel_cb cancel_cb, void *cancel_arg)
420 const struct got_error *err = NULL;
421 struct got_object_id_queue tree_ids;
422 struct got_object_qid *qid;
424 if (got_object_idset_contains(idset, tree_id))
425 return NULL;
427 err = got_object_qid_alloc(&qid, tree_id);
428 if (err)
429 return err;
431 STAILQ_INIT(&tree_ids);
432 STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
434 while (!STAILQ_EMPTY(&tree_ids)) {
435 if (cancel_cb) {
436 err = (*cancel_cb)(cancel_arg);
437 if (err)
438 break;
441 qid = STAILQ_FIRST(&tree_ids);
442 STAILQ_REMOVE_HEAD(&tree_ids, entry);
444 if (got_object_idset_contains(idset, qid->id)) {
445 got_object_qid_free(qid);
446 continue;
449 err = add_meta(v, idset, qid->id, dpath, GOT_OBJ_TYPE_TREE,
450 mtime, loose_obj_only, repo);
451 if (err) {
452 got_object_qid_free(qid);
453 break;
456 err = load_tree_entries(&tree_ids, v, idset, qid->id, dpath,
457 mtime, repo, loose_obj_only, cancel_cb, cancel_arg);
458 got_object_qid_free(qid);
459 if (err)
460 break;
463 got_object_id_queue_free(&tree_ids);
464 return err;
467 static const struct got_error *
468 load_commit(struct got_pack_metavec *v, struct got_object_idset *idset,
469 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
470 got_cancel_cb cancel_cb, void *cancel_arg)
472 const struct got_error *err;
473 struct got_commit_object *commit;
475 if (got_object_idset_contains(idset, id))
476 return NULL;
478 if (loose_obj_only) {
479 int is_packed;
480 err = search_packidx(&is_packed, id, repo);
481 if (err)
482 return err;
483 if (is_packed)
484 return NULL;
487 err = got_object_open_as_commit(&commit, repo, id);
488 if (err)
489 return err;
491 err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_COMMIT,
492 got_object_commit_get_committer_time(commit),
493 loose_obj_only, repo);
494 if (err)
495 goto done;
497 err = load_tree(v, idset, got_object_commit_get_tree_id(commit),
498 "", got_object_commit_get_committer_time(commit),
499 loose_obj_only, repo, cancel_cb, cancel_arg);
500 done:
501 got_object_commit_close(commit);
502 return err;
505 static const struct got_error *
506 load_tag(struct got_pack_metavec *v, struct got_object_idset *idset,
507 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
508 got_cancel_cb cancel_cb, void *cancel_arg)
510 const struct got_error *err;
511 struct got_tag_object *tag = NULL;
513 if (got_object_idset_contains(idset, id))
514 return NULL;
516 if (loose_obj_only) {
517 int is_packed;
518 err = search_packidx(&is_packed, id, repo);
519 if (err)
520 return err;
521 if (is_packed)
522 return NULL;
525 err = got_object_open_as_tag(&tag, repo, id);
526 if (err)
527 return err;
529 err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_TAG,
530 got_object_tag_get_tagger_time(tag),
531 loose_obj_only, repo);
532 if (err)
533 goto done;
535 switch (got_object_tag_get_object_type(tag)) {
536 case GOT_OBJ_TYPE_COMMIT:
537 err = load_commit(v, idset,
538 got_object_tag_get_object_id(tag), repo,
539 loose_obj_only, cancel_cb, cancel_arg);
540 break;
541 case GOT_OBJ_TYPE_TREE:
542 err = load_tree(v, idset, got_object_tag_get_object_id(tag),
543 "", got_object_tag_get_tagger_time(tag),
544 loose_obj_only, repo, cancel_cb, cancel_arg);
545 break;
546 default:
547 break;
550 done:
551 got_object_tag_close(tag);
552 return err;
555 enum findtwixt_color {
556 COLOR_KEEP = 0,
557 COLOR_DROP,
558 COLOR_BLANK,
559 };
560 static const int findtwixt_colors[] = {
561 COLOR_KEEP,
562 COLOR_DROP,
563 COLOR_BLANK
564 };
566 static const struct got_error *
567 queue_commit_id(struct got_object_id_queue *ids, struct got_object_id *id,
568 int color, struct got_repository *repo)
570 const struct got_error *err;
571 struct got_object_qid *qid;
573 err = got_object_qid_alloc(&qid, id);
574 if (err)
575 return err;
577 STAILQ_INSERT_TAIL(ids, qid, entry);
578 qid->data = (void *)&findtwixt_colors[color];
579 return NULL;
582 static const struct got_error *
583 drop_commit(struct got_object_idset *keep, struct got_object_idset *drop,
584 struct got_object_id *id, struct got_repository *repo,
585 got_cancel_cb cancel_cb, void *cancel_arg)
587 const struct got_error *err = NULL;
588 struct got_commit_object *commit;
589 const struct got_object_id_queue *parents;
590 struct got_object_id_queue ids;
591 struct got_object_qid *qid;
593 STAILQ_INIT(&ids);
595 err = got_object_qid_alloc(&qid, id);
596 if (err)
597 return err;
598 STAILQ_INSERT_HEAD(&ids, qid, entry);
600 while (!STAILQ_EMPTY(&ids)) {
601 if (cancel_cb) {
602 err = (*cancel_cb)(cancel_arg);
603 if (err)
604 break;
607 qid = STAILQ_FIRST(&ids);
608 STAILQ_REMOVE_HEAD(&ids, entry);
610 if (got_object_idset_contains(drop, qid->id)) {
611 got_object_qid_free(qid);
612 continue;
615 err = got_object_idset_add(drop, qid->id,
616 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
617 if (err) {
618 got_object_qid_free(qid);
619 break;
622 if (!got_object_idset_contains(keep, qid->id)) {
623 got_object_qid_free(qid);
624 continue;
627 err = got_object_open_as_commit(&commit, repo, qid->id);
628 got_object_qid_free(qid);
629 if (err)
630 break;
632 parents = got_object_commit_get_parent_ids(commit);
633 if (parents) {
634 err = got_object_id_queue_copy(parents, &ids);
635 if (err) {
636 got_object_commit_close(commit);
637 break;
640 got_object_commit_close(commit);
643 got_object_id_queue_free(&ids);
644 return err;
647 struct append_id_arg {
648 struct got_object_id **array;
649 int idx;
650 };
652 static const struct got_error *
653 append_id(struct got_object_id *id, void *data, void *arg)
655 struct append_id_arg *a = arg;
657 a->array[a->idx] = got_object_id_dup(id);
658 if (a->array[a->idx] == NULL)
659 return got_error_from_errno("got_object_id_dup");
661 a->idx++;
662 return NULL;
665 static const struct got_error *
666 findtwixt(struct got_object_id ***res, int *nres,
667 struct got_object_id **head, int nhead,
668 struct got_object_id **tail, int ntail,
669 struct got_repository *repo,
670 got_cancel_cb cancel_cb, void *cancel_arg)
672 const struct got_error *err = NULL;
673 struct got_object_id_queue ids;
674 struct got_object_idset *keep, *drop;
675 struct got_object_qid *qid;
676 int i, ncolor, nkeep, obj_type;
678 STAILQ_INIT(&ids);
679 *res = NULL;
680 *nres = 0;
682 keep = got_object_idset_alloc();
683 if (keep == NULL)
684 return got_error_from_errno("got_object_idset_alloc");
686 drop = got_object_idset_alloc();
687 if (drop == NULL) {
688 err = got_error_from_errno("got_object_idset_alloc");
689 goto done;
692 for (i = 0; i < nhead; i++) {
693 struct got_object_id *id = head[i];
694 if (id == NULL)
695 continue;
696 err = got_object_get_type(&obj_type, repo, id);
697 if (err)
698 return err;
699 if (obj_type != GOT_OBJ_TYPE_COMMIT)
700 continue;
701 err = queue_commit_id(&ids, id, COLOR_KEEP, repo);
702 if (err)
703 goto done;
705 for (i = 0; i < ntail; i++) {
706 struct got_object_id *id = tail[i];
707 if (id == NULL)
708 continue;
709 err = got_object_get_type(&obj_type, repo, id);
710 if (err)
711 return err;
712 if (obj_type != GOT_OBJ_TYPE_COMMIT)
713 continue;
714 err = queue_commit_id(&ids, id, COLOR_DROP, repo);
715 if (err)
716 goto done;
719 while (!STAILQ_EMPTY(&ids)) {
720 int qcolor;
721 qid = STAILQ_FIRST(&ids);
722 qcolor = *((int *)qid->data);
724 if (got_object_idset_contains(drop, qid->id))
725 ncolor = COLOR_DROP;
726 else if (got_object_idset_contains(keep, qid->id))
727 ncolor = COLOR_KEEP;
728 else
729 ncolor = COLOR_BLANK;
731 if (ncolor == COLOR_DROP || (ncolor == COLOR_KEEP &&
732 qcolor == COLOR_KEEP)) {
733 STAILQ_REMOVE_HEAD(&ids, entry);
734 got_object_qid_free(qid);
735 continue;
738 if (ncolor == COLOR_KEEP && qcolor == COLOR_DROP) {
739 err = drop_commit(keep, drop, qid->id, repo,
740 cancel_cb, cancel_arg);
741 if (err)
742 goto done;
743 } else if (ncolor == COLOR_BLANK) {
744 struct got_commit_object *commit;
745 struct got_object_id *id;
746 const struct got_object_id_queue *parents;
747 struct got_object_qid *pid;
749 id = got_object_id_dup(qid->id);
750 if (id == NULL) {
751 err = got_error_from_errno("got_object_id_dup");
752 goto done;
754 if (qcolor == COLOR_KEEP)
755 err = got_object_idset_add(keep, id,
756 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
757 else
758 err = got_object_idset_add(drop, id,
759 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
760 if (err) {
761 free(id);
762 goto done;
765 err = got_object_open_as_commit(&commit, repo, id);
766 if (err) {
767 free(id);
768 goto done;
770 parents = got_object_commit_get_parent_ids(commit);
771 if (parents) {
772 STAILQ_FOREACH(pid, parents, entry) {
773 err = queue_commit_id(&ids, pid->id,
774 qcolor, repo);
775 if (err) {
776 free(id);
777 goto done;
781 got_object_commit_close(commit);
782 commit = NULL;
783 } else {
784 /* should not happen */
785 err = got_error_fmt(GOT_ERR_NOT_IMPL,
786 "%s ncolor=%d qcolor=%d", __func__, ncolor, qcolor);
787 goto done;
790 STAILQ_REMOVE_HEAD(&ids, entry);
791 got_object_qid_free(qid);
794 nkeep = got_object_idset_num_elements(keep);
795 if (nkeep > 0) {
796 struct append_id_arg arg;
797 arg.array = calloc(nkeep, sizeof(struct got_object_id *));
798 if (arg.array == NULL) {
799 err = got_error_from_errno("calloc");
800 goto done;
802 arg.idx = 0;
803 err = got_object_idset_for_each(keep, append_id, &arg);
804 if (err) {
805 free(arg.array);
806 goto done;
808 *res = arg.array;
809 *nres = nkeep;
811 done:
812 got_object_idset_free(keep);
813 got_object_idset_free(drop);
814 got_object_id_queue_free(&ids);
815 return err;
818 static const struct got_error *
819 read_meta(struct got_pack_meta ***meta, int *nmeta,
820 struct got_object_id **theirs, int ntheirs,
821 struct got_object_id **ours, int nours, struct got_repository *repo,
822 int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
823 got_cancel_cb cancel_cb, void *cancel_arg)
825 const struct got_error *err = NULL;
826 struct got_object_id **ids = NULL;
827 struct got_object_idset *idset;
828 int i, nobj = 0, obj_type;
829 struct got_pack_metavec v;
831 *meta = NULL;
832 *nmeta = 0;
834 idset = got_object_idset_alloc();
835 if (idset == NULL)
836 return got_error_from_errno("got_object_idset_alloc");
838 v.nmeta = 0;
839 v.metasz = 64;
840 v.meta = calloc(v.metasz, sizeof(struct got_pack_meta *));
841 if (v.meta == NULL) {
842 err = got_error_from_errno("calloc");
843 goto done;
846 err = findtwixt(&ids, &nobj, ours, nours, theirs, ntheirs, repo,
847 cancel_cb, cancel_arg);
848 if (err || nobj == 0)
849 goto done;
851 for (i = 0; i < ntheirs; i++) {
852 struct got_object_id *id = theirs[i];
853 if (id == NULL)
854 continue;
855 err = got_object_get_type(&obj_type, repo, id);
856 if (err)
857 return err;
858 if (obj_type != GOT_OBJ_TYPE_COMMIT)
859 continue;
860 err = load_commit(NULL, idset, id, repo,
861 loose_obj_only, cancel_cb, cancel_arg);
862 if (err)
863 goto done;
864 if (progress_cb) {
865 err = progress_cb(progress_arg, 0L, nours,
866 v.nmeta, 0, 0);
867 if (err)
868 goto done;
872 for (i = 0; i < ntheirs; i++) {
873 struct got_object_id *id = theirs[i];
874 int *cached_type;
875 if (id == NULL)
876 continue;
877 cached_type = got_object_idset_get(idset, id);
878 if (cached_type == NULL) {
879 err = got_object_get_type(&obj_type, repo, id);
880 if (err)
881 goto done;
882 } else
883 obj_type = *cached_type;
884 if (obj_type != GOT_OBJ_TYPE_TAG)
885 continue;
886 err = load_tag(NULL, idset, id, repo,
887 loose_obj_only, cancel_cb, cancel_arg);
888 if (err)
889 goto done;
890 if (progress_cb) {
891 err = progress_cb(progress_arg, 0L, nours,
892 v.nmeta, 0, 0);
893 if (err)
894 goto done;
898 for (i = 0; i < nobj; i++) {
899 err = load_commit(&v, idset, ids[i], repo,
900 loose_obj_only, cancel_cb, cancel_arg);
901 if (err)
902 goto done;
903 if (progress_cb) {
904 err = progress_cb(progress_arg, 0L, nours,
905 v.nmeta, 0, 0);
906 if (err)
907 goto done;
911 for (i = 0; i < nours; i++) {
912 struct got_object_id *id = ours[i];
913 int *cached_type;
914 if (id == NULL)
915 continue;
916 cached_type = got_object_idset_get(idset, id);
917 if (cached_type == NULL) {
918 err = got_object_get_type(&obj_type, repo, id);
919 if (err)
920 goto done;
921 } else
922 obj_type = *cached_type;
923 if (obj_type != GOT_OBJ_TYPE_TAG)
924 continue;
925 err = load_tag(&v, idset, id, repo,
926 loose_obj_only, cancel_cb, cancel_arg);
927 if (err)
928 goto done;
929 if (progress_cb) {
930 err = progress_cb(progress_arg, 0L, nours,
931 v.nmeta, 0, 0);
932 if (err)
933 goto done;
937 done:
938 for (i = 0; i < nobj; i++) {
939 free(ids[i]);
941 free(ids);
942 got_object_idset_free(idset);
943 if (err == NULL) {
944 *meta = v.meta;
945 *nmeta = v.nmeta;
946 } else
947 free(v.meta);
949 return err;
952 const struct got_error *
953 hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx)
955 size_t n;
957 SHA1Update(ctx, buf, len);
958 n = fwrite(buf, 1, len, f);
959 if (n != len)
960 return got_ferror(f, GOT_ERR_IO);
961 return NULL;
964 static void
965 putbe32(char *b, uint32_t n)
967 b[0] = n >> 24;
968 b[1] = n >> 16;
969 b[2] = n >> 8;
970 b[3] = n >> 0;
973 static int
974 write_order_cmp(const void *pa, const void *pb)
976 struct got_pack_meta *a, *b, *ahd, *bhd;
978 a = *(struct got_pack_meta **)pa;
979 b = *(struct got_pack_meta **)pb;
980 ahd = (a->head == NULL) ? a : a->head;
981 bhd = (b->head == NULL) ? b : b->head;
982 if (ahd->mtime != bhd->mtime)
983 return bhd->mtime - ahd->mtime;
984 if (ahd != bhd)
985 return (uintptr_t)bhd - (uintptr_t)ahd;
986 if (a->nchain != b->nchain)
987 return a->nchain - b->nchain;
988 return a->mtime - b->mtime;
991 static const struct got_error *
992 packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
994 size_t i;
996 *hdrlen = 0;
998 hdr[0] = obj_type << 4;
999 hdr[0] |= len & 0xf;
1000 len >>= 4;
1001 for (i = 1; len != 0; i++){
1002 if (i >= bufsize)
1003 return got_error(GOT_ERR_NO_SPACE);
1004 hdr[i - 1] |= GOT_DELTA_SIZE_MORE;
1005 hdr[i] = len & GOT_DELTA_SIZE_VAL_MASK;
1006 len >>= GOT_DELTA_SIZE_SHIFT;
1009 *hdrlen = i;
1010 return NULL;
1013 static const struct got_error *
1014 encodedelta(struct got_pack_meta *m, struct got_raw_object *o,
1015 off_t base_size, FILE *f)
1017 unsigned char buf[16], *bp;
1018 int i, j;
1019 off_t n;
1020 size_t w;
1021 struct got_delta_instruction *d;
1023 /* base object size */
1024 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
1025 n = base_size >> GOT_DELTA_SIZE_SHIFT;
1026 for (i = 1; n > 0; i++) {
1027 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
1028 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
1029 n >>= GOT_DELTA_SIZE_SHIFT;
1031 w = fwrite(buf, 1, i, f);
1032 if (w != i)
1033 return got_ferror(f, GOT_ERR_IO);
1035 /* target object size */
1036 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
1037 n = o->size >> GOT_DELTA_SIZE_SHIFT;
1038 for (i = 1; n > 0; i++) {
1039 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
1040 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
1041 n >>= GOT_DELTA_SIZE_SHIFT;
1043 w = fwrite(buf, 1, i, f);
1044 if (w != i)
1045 return got_ferror(f, GOT_ERR_IO);
1047 for (j = 0; j < m->ndeltas; j++) {
1048 d = &m->deltas[j];
1049 if (d->copy) {
1050 n = d->offset;
1051 bp = &buf[1];
1052 buf[0] = GOT_DELTA_BASE_COPY;
1053 for (i = 0; i < 4; i++) {
1054 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
1055 buf[0] |= 1 << i;
1056 *bp++ = n & 0xff;
1057 n >>= 8;
1058 if (n == 0)
1059 break;
1062 n = d->len;
1063 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
1064 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
1065 for (i = 0; i < 3 && n > 0; i++) {
1066 buf[0] |= 1 << (i + 4);
1067 *bp++ = n & 0xff;
1068 n >>= 8;
1071 w = fwrite(buf, 1, bp - buf, f);
1072 if (w != bp - buf)
1073 return got_ferror(f, GOT_ERR_IO);
1074 } else {
1075 char content[128];
1076 size_t r;
1077 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1)
1078 return got_error_from_errno("fseeko");
1079 n = 0;
1080 while (n != d->len) {
1081 buf[0] = (d->len - n < 127) ? d->len - n : 127;
1082 w = fwrite(buf, 1, 1, f);
1083 if (w != 1)
1084 return got_ferror(f, GOT_ERR_IO);
1085 r = fread(content, 1, buf[0], o->f);
1086 if (r != buf[0])
1087 return got_ferror(o->f, GOT_ERR_IO);
1088 w = fwrite(content, 1, buf[0], f);
1089 if (w != buf[0])
1090 return got_ferror(f, GOT_ERR_IO);
1091 n += buf[0];
1096 return NULL;
1099 static int
1100 packoff(char *hdr, off_t off)
1102 int i, j;
1103 char rbuf[8];
1105 rbuf[0] = off & GOT_DELTA_SIZE_VAL_MASK;
1106 for (i = 1; (off >>= GOT_DELTA_SIZE_SHIFT) != 0; i++) {
1107 rbuf[i] = (--off & GOT_DELTA_SIZE_VAL_MASK) |
1108 GOT_DELTA_SIZE_MORE;
1111 j = 0;
1112 while (i > 0)
1113 hdr[j++] = rbuf[--i];
1114 return j;
1117 static const struct got_error *
1118 genpack(uint8_t *pack_sha1, FILE *packfile,
1119 struct got_pack_meta **meta, int nmeta, int nours,
1120 int use_offset_deltas, struct got_repository *repo,
1121 got_pack_progress_cb progress_cb, void *progress_arg,
1122 got_cancel_cb cancel_cb, void *cancel_arg)
1124 const struct got_error *err = NULL;
1125 int i, nh;
1126 off_t nd;
1127 SHA1_CTX ctx;
1128 struct got_pack_meta *m;
1129 struct got_raw_object *raw = NULL;
1130 FILE *delta_file = NULL;
1131 char buf[32];
1132 size_t outlen, n;
1133 struct got_deflate_checksum csum;
1134 off_t packfile_size = 0;
1135 int outfd = -1;
1137 SHA1Init(&ctx);
1138 csum.output_sha1 = &ctx;
1139 csum.output_crc = NULL;
1141 err = hwrite(packfile, "PACK", 4, &ctx);
1142 if (err)
1143 return err;
1144 putbe32(buf, GOT_PACKFILE_VERSION);
1145 err = hwrite(packfile, buf, 4, &ctx);
1146 if (err)
1147 goto done;
1148 putbe32(buf, nmeta);
1149 err = hwrite(packfile, buf, 4, &ctx);
1150 if (err)
1151 goto done;
1152 qsort(meta, nmeta, sizeof(struct got_pack_meta *), write_order_cmp);
1153 for (i = 0; i < nmeta; i++) {
1154 if (progress_cb) {
1155 err = progress_cb(progress_arg, packfile_size, nours,
1156 nmeta, nmeta, i);
1157 if (err)
1158 goto done;
1160 m = meta[i];
1161 m->off = ftello(packfile);
1162 err = got_object_raw_open(&raw, &outfd, repo, &m->id, 8192);
1163 if (err)
1164 goto done;
1165 if (m->deltas == NULL) {
1166 err = packhdr(&nh, buf, sizeof(buf),
1167 m->obj_type, raw->size);
1168 if (err)
1169 goto done;
1170 err = hwrite(packfile, buf, nh, &ctx);
1171 if (err)
1172 goto done;
1173 packfile_size += nh;
1174 if (fseeko(raw->f, raw->hdrlen, SEEK_SET) == -1) {
1175 err = got_error_from_errno("fseeko");
1176 goto done;
1178 err = got_deflate_to_file(&outlen, raw->f, packfile,
1179 &csum);
1180 if (err)
1181 goto done;
1182 packfile_size += outlen;
1183 } else {
1184 if (delta_file == NULL) {
1185 delta_file = got_opentemp();
1186 if (delta_file == NULL) {
1187 err = got_error_from_errno(
1188 "got_opentemp");
1189 goto done;
1192 if (ftruncate(fileno(delta_file), 0L) == -1) {
1193 err = got_error_from_errno("ftruncate");
1194 goto done;
1196 if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
1197 err = got_error_from_errno("fseeko");
1198 goto done;
1200 err = encodedelta(m, raw, m->prev->size, delta_file);
1201 if (err)
1202 goto done;
1203 nd = ftello(delta_file);
1204 if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
1205 err = got_error_from_errno("fseeko");
1206 goto done;
1208 if (use_offset_deltas && m->prev->off != 0) {
1209 err = packhdr(&nh, buf, sizeof(buf),
1210 GOT_OBJ_TYPE_OFFSET_DELTA, nd);
1211 if (err)
1212 goto done;
1213 nh += packoff(buf + nh,
1214 m->off - m->prev->off);
1215 err = hwrite(packfile, buf, nh, &ctx);
1216 if (err)
1217 goto done;
1218 packfile_size += nh;
1219 } else {
1220 err = packhdr(&nh, buf, sizeof(buf),
1221 GOT_OBJ_TYPE_REF_DELTA, nd);
1222 err = hwrite(packfile, buf, nh, &ctx);
1223 if (err)
1224 goto done;
1225 packfile_size += nh;
1226 err = hwrite(packfile, m->prev->id.sha1,
1227 sizeof(m->prev->id.sha1), &ctx);
1228 packfile_size += sizeof(m->prev->id.sha1);
1229 if (err)
1230 goto done;
1232 err = got_deflate_to_file(&outlen, delta_file,
1233 packfile, &csum);
1234 if (err)
1235 goto done;
1236 packfile_size += outlen;
1238 got_object_raw_close(raw);
1239 raw = NULL;
1241 SHA1Final(pack_sha1, &ctx);
1242 n = fwrite(pack_sha1, 1, SHA1_DIGEST_LENGTH, packfile);
1243 if (n != SHA1_DIGEST_LENGTH)
1244 err = got_ferror(packfile, GOT_ERR_IO);
1245 packfile_size += SHA1_DIGEST_LENGTH;
1246 packfile_size += sizeof(struct got_packfile_hdr);
1247 err = progress_cb(progress_arg, packfile_size, nours,
1248 nmeta, nmeta, nmeta);
1249 if (err)
1250 goto done;
1251 done:
1252 if (delta_file && fclose(delta_file) == EOF && err == NULL)
1253 err = got_error_from_errno("fclose");
1254 if (raw)
1255 got_object_raw_close(raw);
1256 if (outfd != -1 && close(outfd) == -1 && err == NULL)
1257 err = got_error_from_errno("close");
1258 return err;
1261 const struct got_error *
1262 got_pack_create(uint8_t *packsha1, FILE *packfile,
1263 struct got_object_id **theirs, int ntheirs,
1264 struct got_object_id **ours, int nours,
1265 struct got_repository *repo, int loose_obj_only, int allow_empty,
1266 got_pack_progress_cb progress_cb, void *progress_arg,
1267 got_cancel_cb cancel_cb, void *cancel_arg)
1269 const struct got_error *err;
1270 struct got_pack_meta **meta;
1271 int nmeta;
1273 err = read_meta(&meta, &nmeta, theirs, ntheirs, ours, nours, repo,
1274 loose_obj_only, progress_cb, progress_arg, cancel_cb, cancel_arg);
1275 if (err)
1276 return err;
1278 if (nmeta == 0 && !allow_empty) {
1279 err = got_error(GOT_ERR_CANNOT_PACK);
1280 goto done;
1282 if (nmeta > 0) {
1283 err = pick_deltas(meta, nmeta, nours, repo,
1284 progress_cb, progress_arg, cancel_cb, cancel_arg);
1285 if (err)
1286 goto done;
1289 err = genpack(packsha1, packfile, meta, nmeta, nours, 1, repo,
1290 progress_cb, progress_arg, cancel_cb, cancel_arg);
1291 if (err)
1292 goto done;
1293 done:
1294 free_nmeta(meta, nmeta);
1295 return err;