Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/uio.h>
21 #include <sys/stat.h>
22 #include <sys/time.h>
23 #include <sys/mman.h>
25 #include <errno.h>
26 #include <stdint.h>
27 #include <imsg.h>
28 #include <inttypes.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <time.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <zlib.h>
37 #if defined(__FreeBSD__)
38 #include <unistd.h>
39 #endif
41 #include "got_error.h"
42 #include "got_cancel.h"
43 #include "got_object.h"
44 #include "got_path.h"
45 #include "got_reference.h"
46 #include "got_repository_admin.h"
47 #include "got_opentemp.h"
49 #include "got_lib_deltify.h"
50 #include "got_lib_delta.h"
51 #include "got_lib_object.h"
52 #include "got_lib_object_idset.h"
53 #include "got_lib_object_cache.h"
54 #include "got_lib_deflate.h"
55 #include "got_lib_pack.h"
56 #include "got_lib_pack_create.h"
57 #include "got_lib_privsep.h"
58 #include "got_lib_repository.h"
59 #include "got_lib_ratelimit.h"
60 #include "got_lib_inflate.h"
62 #include "murmurhash2.h"
64 #ifndef MIN
65 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
66 #endif
68 #ifndef MAX
69 #define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
70 #endif
72 #ifndef nitems
73 #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
74 #endif
76 struct got_pack_meta {
77 struct got_object_id id;
78 uint32_t path_hash;
79 int obj_type;
80 off_t size;
81 time_t mtime;
83 /* The best delta we picked */
84 struct got_pack_meta *head;
85 struct got_pack_meta *prev;
86 unsigned char *delta_buf; /* if encoded in memory (compressed) */
87 off_t delta_offset; /* offset in delta cache file (compressed) */
88 off_t delta_len; /* encoded delta length */
89 off_t delta_compressed_len; /* encoded+compressed delta length */
90 int nchain;
92 off_t reused_delta_offset; /* offset of delta in reused pack file */
93 struct got_object_id *base_obj_id;
95 /* Only used for delta window */
96 struct got_delta_table *dtab;
98 /* Only used for writing offset deltas */
99 off_t off;
100 };
102 struct got_pack_metavec {
103 struct got_pack_meta **meta;
104 int nmeta;
105 int metasz;
106 };
108 static const struct got_error *
109 alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
110 const char *path, int obj_type, time_t mtime, uint32_t seed)
112 struct got_pack_meta *m;
114 *new = NULL;
116 m = calloc(1, sizeof(*m));
117 if (m == NULL)
118 return got_error_from_errno("calloc");
120 memcpy(&m->id, id, sizeof(m->id));
122 m->path_hash = murmurhash2(path, strlen(path), seed);
123 m->obj_type = obj_type;
124 m->mtime = mtime;
125 *new = m;
126 return NULL;
129 static void
130 clear_meta(struct got_pack_meta *meta)
132 if (meta == NULL)
133 return;
134 meta->path_hash = 0;
135 free(meta->delta_buf);
136 meta->delta_buf = NULL;
137 free(meta->base_obj_id);
138 meta->base_obj_id = NULL;
139 meta->reused_delta_offset = 0;
142 static void
143 free_nmeta(struct got_pack_meta **meta, int nmeta)
145 int i;
147 for (i = 0; i < nmeta; i++)
148 clear_meta(meta[i]);
149 free(meta);
152 static int
153 delta_order_cmp(const void *pa, const void *pb)
155 struct got_pack_meta *a, *b;
157 a = *(struct got_pack_meta **)pa;
158 b = *(struct got_pack_meta **)pb;
160 if (a->obj_type != b->obj_type)
161 return a->obj_type - b->obj_type;
162 if (a->path_hash < b->path_hash)
163 return -1;
164 if (a->path_hash > b->path_hash)
165 return 1;
166 if (a->mtime < b->mtime)
167 return -1;
168 if (a->mtime > b->mtime)
169 return 1;
170 return got_object_id_cmp(&a->id, &b->id);
173 static off_t
174 delta_size(struct got_delta_instruction *deltas, int ndeltas)
176 int i;
177 off_t size = 32;
178 for (i = 0; i < ndeltas; i++) {
179 if (deltas[i].copy)
180 size += GOT_DELTA_SIZE_SHIFT;
181 else
182 size += deltas[i].len + 1;
184 return size;
187 static const struct got_error *
188 append(unsigned char **p, size_t *len, off_t *sz, void *seg, int nseg)
190 char *n;
192 if (*len + nseg >= *sz) {
193 while (*len + nseg >= *sz)
194 *sz += *sz / 2;
195 n = realloc(*p, *sz);
196 if (n == NULL)
197 return got_error_from_errno("realloc");
198 *p = n;
200 memcpy(*p + *len, seg, nseg);
201 *len += nseg;
202 return NULL;
205 static const struct got_error *
206 encode_delta_in_mem(struct got_pack_meta *m, struct got_raw_object *o,
207 struct got_delta_instruction *deltas, int ndeltas,
208 off_t delta_size, off_t base_size)
210 const struct got_error *err;
211 unsigned char buf[16], *bp;
212 int i, j;
213 size_t len = 0, compressed_len;
214 off_t bufsize = delta_size;
215 off_t n;
216 struct got_delta_instruction *d;
217 uint8_t *delta_buf;
219 delta_buf = malloc(bufsize);
220 if (delta_buf == NULL)
221 return got_error_from_errno("malloc");
223 /* base object size */
224 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
225 n = base_size >> GOT_DELTA_SIZE_SHIFT;
226 for (i = 1; n > 0; i++) {
227 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
228 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
229 n >>= GOT_DELTA_SIZE_SHIFT;
231 err = append(&delta_buf, &len, &bufsize, buf, i);
232 if (err)
233 goto done;
235 /* target object size */
236 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
237 n = o->size >> GOT_DELTA_SIZE_SHIFT;
238 for (i = 1; n > 0; i++) {
239 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
240 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
241 n >>= GOT_DELTA_SIZE_SHIFT;
243 err = append(&delta_buf, &len, &bufsize, buf, i);
244 if (err)
245 goto done;
247 for (j = 0; j < ndeltas; j++) {
248 d = &deltas[j];
249 if (d->copy) {
250 n = d->offset;
251 bp = &buf[1];
252 buf[0] = GOT_DELTA_BASE_COPY;
253 for (i = 0; i < 4; i++) {
254 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
255 buf[0] |= 1 << i;
256 *bp++ = n & 0xff;
257 n >>= 8;
258 if (n == 0)
259 break;
262 n = d->len;
263 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
264 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
265 for (i = 0; i < 3 && n > 0; i++) {
266 buf[0] |= 1 << (i + 4);
267 *bp++ = n & 0xff;
268 n >>= 8;
271 err = append(&delta_buf, &len, &bufsize,
272 buf, bp - buf);
273 if (err)
274 goto done;
275 } else if (o->f == NULL) {
276 n = 0;
277 while (n != d->len) {
278 buf[0] = (d->len - n < 127) ? d->len - n : 127;
279 err = append(&delta_buf, &len, &bufsize,
280 buf, 1);
281 if (err)
282 goto done;
283 err = append(&delta_buf, &len, &bufsize,
284 o->data + o->hdrlen + d->offset + n,
285 buf[0]);
286 if (err)
287 goto done;
288 n += buf[0];
290 } else {
291 char content[128];
292 size_t r;
293 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) {
294 err = got_error_from_errno("fseeko");
295 goto done;
297 n = 0;
298 while (n != d->len) {
299 buf[0] = (d->len - n < 127) ? d->len - n : 127;
300 err = append(&delta_buf, &len, &bufsize,
301 buf, 1);
302 if (err)
303 goto done;
304 r = fread(content, 1, buf[0], o->f);
305 if (r != buf[0]) {
306 err = got_ferror(o->f, GOT_ERR_IO);
307 goto done;
309 err = append(&delta_buf, &len, &bufsize,
310 content, buf[0]);
311 if (err)
312 goto done;
313 n += buf[0];
318 err = got_deflate_to_mem_mmap(&m->delta_buf, &compressed_len,
319 NULL, NULL, delta_buf, 0, len);
320 if (err)
321 goto done;
323 m->delta_len = len;
324 m->delta_compressed_len = compressed_len;
325 done:
326 free(delta_buf);
327 return err;
330 static const struct got_error *
331 encode_delta(struct got_pack_meta *m, struct got_raw_object *o,
332 struct got_delta_instruction *deltas, int ndeltas,
333 off_t base_size, FILE *f)
335 const struct got_error *err;
336 unsigned char buf[16], *bp;
337 int i, j;
338 off_t n;
339 struct got_deflate_buf zb;
340 struct got_delta_instruction *d;
341 off_t delta_len = 0, compressed_len = 0;
343 err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE);
344 if (err)
345 return err;
347 /* base object size */
348 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
349 n = base_size >> GOT_DELTA_SIZE_SHIFT;
350 for (i = 1; n > 0; i++) {
351 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
352 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
353 n >>= GOT_DELTA_SIZE_SHIFT;
356 err = got_deflate_append_to_file_mmap(&zb, &compressed_len,
357 buf, 0, i, f, NULL);
358 if (err)
359 goto done;
360 delta_len += i;
362 /* target object size */
363 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
364 n = o->size >> GOT_DELTA_SIZE_SHIFT;
365 for (i = 1; n > 0; i++) {
366 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
367 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
368 n >>= GOT_DELTA_SIZE_SHIFT;
371 err = got_deflate_append_to_file_mmap(&zb, &compressed_len,
372 buf, 0, i, f, NULL);
373 if (err)
374 goto done;
375 delta_len += i;
377 for (j = 0; j < ndeltas; j++) {
378 d = &deltas[j];
379 if (d->copy) {
380 n = d->offset;
381 bp = &buf[1];
382 buf[0] = GOT_DELTA_BASE_COPY;
383 for (i = 0; i < 4; i++) {
384 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
385 buf[0] |= 1 << i;
386 *bp++ = n & 0xff;
387 n >>= 8;
388 if (n == 0)
389 break;
391 n = d->len;
392 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
393 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
394 for (i = 0; i < 3 && n > 0; i++) {
395 buf[0] |= 1 << (i + 4);
396 *bp++ = n & 0xff;
397 n >>= 8;
400 err = got_deflate_append_to_file_mmap(&zb,
401 &compressed_len, buf, 0, bp - buf, f, NULL);
402 if (err)
403 goto done;
404 delta_len += (bp - buf);
405 } else if (o->f == NULL) {
406 n = 0;
407 while (n != d->len) {
408 buf[0] = (d->len - n < 127) ? d->len - n : 127;
409 err = got_deflate_append_to_file_mmap(&zb,
410 &compressed_len, buf, 0, 1, f, NULL);
411 if (err)
412 goto done;
413 delta_len++;
414 err = got_deflate_append_to_file_mmap(&zb,
415 &compressed_len,
416 o->data + o->hdrlen + d->offset + n, 0,
417 buf[0], f, NULL);
418 if (err)
419 goto done;
420 delta_len += buf[0];
421 n += buf[0];
423 } else {
424 char content[128];
425 size_t r;
426 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) {
427 err = got_error_from_errno("fseeko");
428 goto done;
430 n = 0;
431 while (n != d->len) {
432 buf[0] = (d->len - n < 127) ? d->len - n : 127;
433 err = got_deflate_append_to_file_mmap(&zb,
434 &compressed_len, buf, 0, 1, f, NULL);
435 if (err)
436 goto done;
437 delta_len++;
438 r = fread(content, 1, buf[0], o->f);
439 if (r != buf[0]) {
440 err = got_ferror(o->f, GOT_ERR_IO);
441 goto done;
443 err = got_deflate_append_to_file_mmap(&zb,
444 &compressed_len, content, 0, buf[0], f,
445 NULL);
446 if (err)
447 goto done;
448 delta_len += buf[0];
449 n += buf[0];
454 err = got_deflate_flush(&zb, f, NULL, &compressed_len);
455 if (err)
456 goto done;
458 /* sanity check */
459 if (compressed_len != ftello(f) - m->delta_offset) {
460 err = got_error(GOT_ERR_COMPRESSION);
461 goto done;
464 m->delta_len = delta_len;
465 m->delta_compressed_len = compressed_len;
466 done:
467 got_deflate_end(&zb);
468 return err;
471 static const struct got_error *
472 report_progress(got_pack_progress_cb progress_cb, void *progress_arg,
473 struct got_ratelimit *rl, int ncolored, int nfound, int ntrees,
474 off_t packfile_size, int ncommits, int nobj_total, int obj_deltify,
475 int nobj_written)
477 const struct got_error *err;
478 int elapsed;
480 if (progress_cb == NULL)
481 return NULL;
483 err = got_ratelimit_check(&elapsed, rl);
484 if (err || !elapsed)
485 return err;
487 return progress_cb(progress_arg, ncolored, nfound, ntrees,
488 packfile_size, ncommits, nobj_total, obj_deltify, nobj_written);
491 static const struct got_error *
492 add_meta(struct got_pack_meta *m, struct got_pack_metavec *v)
494 if (v->nmeta == v->metasz){
495 size_t newsize = 2 * v->metasz;
496 struct got_pack_meta **new;
497 new = reallocarray(v->meta, newsize, sizeof(*new));
498 if (new == NULL)
499 return got_error_from_errno("reallocarray");
500 v->meta = new;
501 v->metasz = newsize;
504 v->meta[v->nmeta++] = m;
505 return NULL;
508 static const struct got_error *
509 find_pack_for_reuse(struct got_packidx **best_packidx,
510 struct got_repository *repo)
512 const struct got_error *err = NULL;
513 struct got_pathlist_entry *pe;
514 const char *best_packidx_path = NULL;
515 int nobj_max = 0;
517 *best_packidx = NULL;
519 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
520 const char *path_packidx = pe->path;
521 struct got_packidx *packidx;
522 int nobj;
524 err = got_repo_get_packidx(&packidx, path_packidx, repo);
525 if (err)
526 break;
528 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
529 if (nobj > nobj_max) {
530 best_packidx_path = path_packidx;
531 nobj_max = nobj;
535 if (best_packidx_path) {
536 err = got_repo_get_packidx(best_packidx, best_packidx_path,
537 repo);
540 return err;
543 struct send_id_arg {
544 struct imsgbuf *ibuf;
545 struct got_object_id *ids[GOT_IMSG_OBJ_ID_LIST_MAX_NIDS];
546 size_t nids;
547 };
549 static const struct got_error *
550 send_id(struct got_object_id *id, void *data, void *arg)
552 const struct got_error *err = NULL;
553 struct send_id_arg *a = arg;
555 a->ids[a->nids++] = id;
557 if (a->nids >= GOT_IMSG_OBJ_ID_LIST_MAX_NIDS) {
558 err = got_privsep_send_object_idlist(a->ibuf, a->ids, a->nids);
559 if (err)
560 return err;
561 a->nids = 0;
564 return NULL;
567 static const struct got_error *
568 send_idset(struct imsgbuf *ibuf, struct got_object_idset *idset)
570 const struct got_error *err;
571 struct send_id_arg sia;
573 memset(&sia, 0, sizeof(sia));
574 sia.ibuf = ibuf;
575 err = got_object_idset_for_each(idset, send_id, &sia);
576 if (err)
577 return err;
579 if (sia.nids > 0) {
580 err = got_privsep_send_object_idlist(ibuf, sia.ids, sia.nids);
581 if (err)
582 return err;
585 return got_privsep_send_object_idlist_done(ibuf);
589 static const struct got_error *
590 recv_reused_delta(struct got_imsg_reused_delta *delta,
591 struct got_object_idset *idset, struct got_pack_metavec *v)
593 struct got_pack_meta *m, *base;
595 if (delta->delta_offset + delta->delta_size < delta->delta_offset ||
596 delta->delta_offset +
597 delta->delta_compressed_size < delta->delta_offset)
598 return got_error(GOT_ERR_BAD_PACKFILE);
600 m = got_object_idset_get(idset, &delta->id);
601 if (m == NULL)
602 return got_error(GOT_ERR_NO_OBJ);
604 base = got_object_idset_get(idset, &delta->base_id);
605 if (base == NULL)
606 return got_error(GOT_ERR_NO_OBJ);
608 m->delta_len = delta->delta_size;
609 m->delta_compressed_len = delta->delta_compressed_size;
610 m->delta_offset = delta->delta_out_offset;
611 m->prev = base;
612 m->size = delta->result_size;
613 m->reused_delta_offset = delta->delta_offset;
614 m->base_obj_id = got_object_id_dup(&delta->base_id);
615 if (m->base_obj_id == NULL)
616 return got_error_from_errno("got_object_id_dup");
618 return add_meta(m, v);
621 static const struct got_error *
622 cache_pack_for_packidx(struct got_pack **pack, struct got_packidx *packidx,
623 struct got_repository *repo)
625 const struct got_error *err;
626 char *path_packfile = NULL;
628 err = got_packidx_get_packfile_path(&path_packfile,
629 packidx->path_packidx);
630 if (err)
631 return err;
633 *pack = got_repo_get_cached_pack(repo, path_packfile);
634 if (*pack == NULL) {
635 err = got_repo_cache_pack(pack, repo, path_packfile, packidx);
636 if (err)
637 goto done;
639 if ((*pack)->privsep_child == NULL) {
640 err = got_pack_start_privsep_child(*pack, packidx);
641 if (err)
642 goto done;
644 done:
645 free(path_packfile);
646 return err;
649 static const struct got_error *
650 prepare_delta_reuse(struct got_pack *pack, struct got_packidx *packidx,
651 int delta_outfd, struct got_repository *repo)
653 const struct got_error *err = NULL;
655 if (!pack->child_has_delta_outfd) {
656 int outfd_child;
657 outfd_child = dup(delta_outfd);
658 if (outfd_child == -1) {
659 err = got_error_from_errno("dup");
660 goto done;
662 err = got_privsep_send_raw_delta_outfd(
663 pack->privsep_child->ibuf, outfd_child);
664 if (err)
665 goto done;
666 pack->child_has_delta_outfd = 1;
669 err = got_privsep_send_delta_reuse_req(pack->privsep_child->ibuf);
670 done:
671 return err;
675 static const struct got_error *
676 search_deltas(struct got_pack_metavec *v, struct got_object_idset *idset,
677 int delta_cache_fd, int ncolored, int nfound, int ntrees, int ncommits,
678 struct got_repository *repo,
679 got_pack_progress_cb progress_cb, void *progress_arg,
680 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
682 const struct got_error *err = NULL;
683 struct got_packidx *packidx;
684 struct got_pack *pack;
685 struct got_imsg_reused_delta deltas[GOT_IMSG_REUSED_DELTAS_MAX_NDELTAS];
686 size_t ndeltas, i;
688 err = find_pack_for_reuse(&packidx, repo);
689 if (err)
690 return err;
692 if (packidx == NULL)
693 return NULL;
695 err = cache_pack_for_packidx(&pack, packidx, repo);
696 if (err)
697 return err;
699 err = prepare_delta_reuse(pack, packidx, delta_cache_fd, repo);
700 if (err)
701 return err;
703 err = send_idset(pack->privsep_child->ibuf, idset);
704 if (err)
705 return err;
707 for (;;) {
708 int done = 0;
710 if (cancel_cb) {
711 err = (*cancel_cb)(cancel_arg);
712 if (err)
713 break;
716 err = got_privsep_recv_reused_deltas(&done, deltas, &ndeltas,
717 pack->privsep_child->ibuf);
718 if (err || done)
719 break;
721 for (i = 0; i < ndeltas; i++) {
722 struct got_imsg_reused_delta *delta = &deltas[i];
723 err = recv_reused_delta(delta, idset, v);
724 if (err)
725 goto done;
728 err = report_progress(progress_cb, progress_arg, rl,
729 ncolored, nfound, ntrees, 0L, ncommits,
730 got_object_idset_num_elements(idset), v->nmeta, 0);
731 if (err)
732 break;
734 done:
735 return err;
738 static const struct got_error *
739 pick_deltas(struct got_pack_meta **meta, int nmeta, int ncolored,
740 int nfound, int ntrees, int ncommits, int nreused, FILE *delta_cache,
741 struct got_repository *repo,
742 got_pack_progress_cb progress_cb, void *progress_arg,
743 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
745 const struct got_error *err = NULL;
746 struct got_pack_meta *m = NULL, *base = NULL;
747 struct got_raw_object *raw = NULL, *base_raw = NULL;
748 struct got_delta_instruction *deltas = NULL, *best_deltas = NULL;
749 int i, j, ndeltas, best_ndeltas;
750 off_t size, best_size;
751 const int max_base_candidates = 3;
752 size_t delta_memsize = 0;
753 const size_t max_delta_memsize = 4 * GOT_DELTA_RESULT_SIZE_CACHED_MAX;
754 int outfd = -1;
755 uint32_t delta_seed;
757 delta_seed = arc4random();
759 qsort(meta, nmeta, sizeof(struct got_pack_meta *), delta_order_cmp);
760 for (i = 0; i < nmeta; i++) {
761 if (cancel_cb) {
762 err = (*cancel_cb)(cancel_arg);
763 if (err)
764 break;
766 err = report_progress(progress_cb, progress_arg, rl,
767 ncolored, nfound, ntrees, 0L, ncommits, nreused + nmeta,
768 nreused + i, 0);
769 if (err)
770 goto done;
771 m = meta[i];
773 if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
774 m->obj_type == GOT_OBJ_TYPE_TAG)
775 continue;
777 err = got_object_raw_open(&raw, &outfd, repo, &m->id);
778 if (err)
779 goto done;
780 m->size = raw->size;
782 if (raw->f == NULL) {
783 err = got_deltify_init_mem(&m->dtab, raw->data,
784 raw->hdrlen, raw->size + raw->hdrlen, delta_seed);
785 } else {
786 err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
787 raw->size + raw->hdrlen, delta_seed);
789 if (err)
790 goto done;
792 if (i > max_base_candidates) {
793 struct got_pack_meta *n = NULL;
794 n = meta[i - (max_base_candidates + 1)];
795 got_deltify_free(n->dtab);
796 n->dtab = NULL;
799 best_size = raw->size;
800 best_ndeltas = 0;
801 for (j = MAX(0, i - max_base_candidates); j < i; j++) {
802 if (cancel_cb) {
803 err = (*cancel_cb)(cancel_arg);
804 if (err)
805 goto done;
807 base = meta[j];
808 /* long chains make unpacking slow, avoid such bases */
809 if (base->nchain >= 128 ||
810 base->obj_type != m->obj_type)
811 continue;
813 err = got_object_raw_open(&base_raw, &outfd, repo,
814 &base->id);
815 if (err)
816 goto done;
818 if (raw->f == NULL && base_raw->f == NULL) {
819 err = got_deltify_mem_mem(&deltas, &ndeltas,
820 raw->data, raw->hdrlen,
821 raw->size + raw->hdrlen, delta_seed,
822 base->dtab, base_raw->data,
823 base_raw->hdrlen,
824 base_raw->size + base_raw->hdrlen);
825 } else if (raw->f == NULL) {
826 err = got_deltify_mem_file(&deltas, &ndeltas,
827 raw->data, raw->hdrlen,
828 raw->size + raw->hdrlen, delta_seed,
829 base->dtab, base_raw->f,
830 base_raw->hdrlen,
831 base_raw->size + base_raw->hdrlen);
832 } else if (base_raw->f == NULL) {
833 err = got_deltify_file_mem(&deltas, &ndeltas,
834 raw->f, raw->hdrlen,
835 raw->size + raw->hdrlen, delta_seed,
836 base->dtab, base_raw->data,
837 base_raw->hdrlen,
838 base_raw->size + base_raw->hdrlen);
839 } else {
840 err = got_deltify(&deltas, &ndeltas,
841 raw->f, raw->hdrlen,
842 raw->size + raw->hdrlen, delta_seed,
843 base->dtab, base_raw->f, base_raw->hdrlen,
844 base_raw->size + base_raw->hdrlen);
846 got_object_raw_close(base_raw);
847 base_raw = NULL;
848 if (err)
849 goto done;
851 size = delta_size(deltas, ndeltas);
852 if (size + 32 < best_size){
853 /*
854 * if we already picked a best delta,
855 * replace it.
856 */
857 best_size = size;
858 free(best_deltas);
859 best_deltas = deltas;
860 best_ndeltas = ndeltas;
861 deltas = NULL;
862 m->nchain = base->nchain + 1;
863 m->prev = base;
864 m->head = base->head;
865 if (m->head == NULL)
866 m->head = base;
867 } else {
868 free(deltas);
869 deltas = NULL;
870 ndeltas = 0;
874 if (best_ndeltas > 0) {
875 if (best_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX &&
876 delta_memsize + best_size <= max_delta_memsize) {
877 delta_memsize += best_size;
878 err = encode_delta_in_mem(m, raw, best_deltas,
879 best_ndeltas, best_size, m->prev->size);
880 } else {
881 m->delta_offset = ftello(delta_cache);
882 err = encode_delta(m, raw, best_deltas,
883 best_ndeltas, m->prev->size, delta_cache);
885 free(best_deltas);
886 best_deltas = NULL;
887 best_ndeltas = 0;
888 if (err)
889 goto done;
892 got_object_raw_close(raw);
893 raw = NULL;
895 done:
896 for (i = MAX(0, nmeta - max_base_candidates); i < nmeta; i++) {
897 got_deltify_free(meta[i]->dtab);
898 meta[i]->dtab = NULL;
900 if (raw)
901 got_object_raw_close(raw);
902 if (base_raw)
903 got_object_raw_close(base_raw);
904 if (outfd != -1 && close(outfd) == -1 && err == NULL)
905 err = got_error_from_errno("close");
906 free(deltas);
907 free(best_deltas);
908 return err;
911 static const struct got_error *
912 search_packidx(int *found, struct got_object_id *id,
913 struct got_repository *repo)
915 const struct got_error *err = NULL;
916 struct got_packidx *packidx = NULL;
917 int idx;
919 *found = 0;
921 err = got_repo_search_packidx(&packidx, &idx, repo, id);
922 if (err == NULL)
923 *found = 1; /* object is already packed */
924 else if (err->code == GOT_ERR_NO_OBJ)
925 err = NULL;
926 return err;
929 static const struct got_error *
930 add_object(int want_meta, struct got_object_idset *idset,
931 struct got_object_id *id, const char *path, int obj_type,
932 time_t mtime, uint32_t seed, int loose_obj_only,
933 struct got_repository *repo, int *ncolored, int *nfound, int *ntrees,
934 got_pack_progress_cb progress_cb, void *progress_arg,
935 struct got_ratelimit *rl)
937 const struct got_error *err;
938 struct got_pack_meta *m = NULL;
940 if (loose_obj_only) {
941 int is_packed;
942 err = search_packidx(&is_packed, id, repo);
943 if (err)
944 return err;
945 if (is_packed && want_meta)
946 return NULL;
949 if (want_meta) {
950 err = alloc_meta(&m, id, path, obj_type, mtime, seed);
951 if (err)
952 return err;
954 (*nfound)++;
955 err = report_progress(progress_cb, progress_arg, rl,
956 *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
957 if (err) {
958 clear_meta(m);
959 free(m);
960 return err;
964 err = got_object_idset_add(idset, id, m);
965 if (err) {
966 clear_meta(m);
967 free(m);
969 return err;
972 static const struct got_error *
973 load_tree_entries(struct got_object_id_queue *ids, int want_meta,
974 struct got_object_idset *idset, struct got_object_idset *idset_exclude,
975 struct got_tree_object *tree,
976 const char *dpath, time_t mtime, uint32_t seed, struct got_repository *repo,
977 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
978 got_pack_progress_cb progress_cb, void *progress_arg,
979 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
981 const struct got_error *err;
982 char *p = NULL;
983 int i;
985 (*ntrees)++;
986 err = report_progress(progress_cb, progress_arg, rl,
987 *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
988 if (err)
989 return err;
991 for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
992 struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
993 struct got_object_id *id = got_tree_entry_get_id(e);
994 mode_t mode = got_tree_entry_get_mode(e);
996 if (cancel_cb) {
997 err = (*cancel_cb)(cancel_arg);
998 if (err)
999 break;
1002 if (got_object_tree_entry_is_submodule(e) ||
1003 got_object_idset_contains(idset, id) ||
1004 got_object_idset_contains(idset_exclude, id))
1005 continue;
1008 * If got-read-pack is crawling trees for us then
1009 * we are only here to collect blob IDs.
1011 if (ids == NULL && S_ISDIR(mode))
1012 continue;
1014 if (asprintf(&p, "%s%s%s", dpath,
1015 got_path_is_root_dir(dpath) ? "" : "/",
1016 got_tree_entry_get_name(e)) == -1) {
1017 err = got_error_from_errno("asprintf");
1018 break;
1021 if (S_ISDIR(mode)) {
1022 struct got_object_qid *qid;
1023 err = got_object_qid_alloc(&qid, id);
1024 if (err)
1025 break;
1026 qid->data = p;
1027 p = NULL;
1028 STAILQ_INSERT_TAIL(ids, qid, entry);
1029 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
1030 err = add_object(want_meta,
1031 want_meta ? idset : idset_exclude, id, p,
1032 GOT_OBJ_TYPE_BLOB, mtime, seed, loose_obj_only,
1033 repo, ncolored, nfound, ntrees,
1034 progress_cb, progress_arg, rl);
1035 if (err)
1036 break;
1037 free(p);
1038 p = NULL;
1039 } else {
1040 free(p);
1041 p = NULL;
1045 free(p);
1046 return err;
1049 static const struct got_error *
1050 load_tree(int want_meta, struct got_object_idset *idset,
1051 struct got_object_idset *idset_exclude,
1052 struct got_object_id *tree_id, const char *dpath, time_t mtime,
1053 uint32_t seed, struct got_repository *repo, int loose_obj_only,
1054 int *ncolored, int *nfound, int *ntrees,
1055 got_pack_progress_cb progress_cb, void *progress_arg,
1056 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1058 const struct got_error *err = NULL;
1059 struct got_object_id_queue tree_ids;
1060 struct got_object_qid *qid;
1061 struct got_tree_object *tree = NULL;
1063 if (got_object_idset_contains(idset, tree_id) ||
1064 got_object_idset_contains(idset_exclude, tree_id))
1065 return NULL;
1067 err = got_object_qid_alloc(&qid, tree_id);
1068 if (err)
1069 return err;
1070 qid->data = strdup(dpath);
1071 if (qid->data == NULL) {
1072 err = got_error_from_errno("strdup");
1073 got_object_qid_free(qid);
1074 return err;
1077 STAILQ_INIT(&tree_ids);
1078 STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
1080 while (!STAILQ_EMPTY(&tree_ids)) {
1081 const char *path;
1082 if (cancel_cb) {
1083 err = (*cancel_cb)(cancel_arg);
1084 if (err)
1085 break;
1088 qid = STAILQ_FIRST(&tree_ids);
1089 STAILQ_REMOVE_HEAD(&tree_ids, entry);
1090 path = qid->data;
1092 if (got_object_idset_contains(idset, &qid->id) ||
1093 got_object_idset_contains(idset_exclude, &qid->id)) {
1094 free(qid->data);
1095 got_object_qid_free(qid);
1096 continue;
1099 err = add_object(want_meta, want_meta ? idset : idset_exclude,
1100 &qid->id, path, GOT_OBJ_TYPE_TREE,
1101 mtime, seed, loose_obj_only, repo,
1102 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
1103 if (err) {
1104 free(qid->data);
1105 got_object_qid_free(qid);
1106 break;
1109 err = got_object_open_as_tree(&tree, repo, &qid->id);
1110 if (err) {
1111 free(qid->data);
1112 got_object_qid_free(qid);
1113 break;
1116 err = load_tree_entries(&tree_ids, want_meta, idset,
1117 idset_exclude, tree, path, mtime, seed, repo,
1118 loose_obj_only, ncolored, nfound, ntrees,
1119 progress_cb, progress_arg, rl,
1120 cancel_cb, cancel_arg);
1121 free(qid->data);
1122 got_object_qid_free(qid);
1123 if (err)
1124 break;
1126 got_object_tree_close(tree);
1127 tree = NULL;
1130 STAILQ_FOREACH(qid, &tree_ids, entry)
1131 free(qid->data);
1132 got_object_id_queue_free(&tree_ids);
1133 if (tree)
1134 got_object_tree_close(tree);
1135 return err;
1138 static const struct got_error *
1139 load_commit(int want_meta, struct got_object_idset *idset,
1140 struct got_object_idset *idset_exclude,
1141 struct got_object_id *id, struct got_repository *repo, uint32_t seed,
1142 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
1143 got_pack_progress_cb progress_cb, void *progress_arg,
1144 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1146 const struct got_error *err;
1147 struct got_commit_object *commit;
1149 if (got_object_idset_contains(idset, id) ||
1150 got_object_idset_contains(idset_exclude, id))
1151 return NULL;
1153 if (loose_obj_only) {
1154 int is_packed;
1155 err = search_packidx(&is_packed, id, repo);
1156 if (err)
1157 return err;
1158 if (is_packed && want_meta)
1159 return NULL;
1162 err = got_object_open_as_commit(&commit, repo, id);
1163 if (err)
1164 return err;
1166 err = add_object(want_meta, want_meta ? idset : idset_exclude,
1167 id, "", GOT_OBJ_TYPE_COMMIT,
1168 got_object_commit_get_committer_time(commit), seed,
1169 loose_obj_only, repo,
1170 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
1171 if (err)
1172 goto done;
1174 err = load_tree(want_meta, idset, idset_exclude,
1175 got_object_commit_get_tree_id(commit),
1176 "", got_object_commit_get_committer_time(commit), seed,
1177 repo, loose_obj_only, ncolored, nfound, ntrees,
1178 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1179 done:
1180 got_object_commit_close(commit);
1181 return err;
1184 static const struct got_error *
1185 load_tag(int want_meta, struct got_object_idset *idset,
1186 struct got_object_idset *idset_exclude,
1187 struct got_object_id *id, struct got_repository *repo, uint32_t seed,
1188 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
1189 got_pack_progress_cb progress_cb, void *progress_arg,
1190 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1192 const struct got_error *err;
1193 struct got_tag_object *tag = NULL;
1195 if (got_object_idset_contains(idset, id) ||
1196 got_object_idset_contains(idset_exclude, id))
1197 return NULL;
1199 if (loose_obj_only) {
1200 int is_packed;
1201 err = search_packidx(&is_packed, id, repo);
1202 if (err)
1203 return err;
1204 if (is_packed && want_meta)
1205 return NULL;
1208 err = got_object_open_as_tag(&tag, repo, id);
1209 if (err)
1210 return err;
1212 err = add_object(want_meta, want_meta ? idset : idset_exclude,
1213 id, "", GOT_OBJ_TYPE_TAG,
1214 got_object_tag_get_tagger_time(tag), seed, loose_obj_only, repo,
1215 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
1216 if (err)
1217 goto done;
1219 switch (got_object_tag_get_object_type(tag)) {
1220 case GOT_OBJ_TYPE_COMMIT:
1221 err = load_commit(want_meta, idset, idset_exclude,
1222 got_object_tag_get_object_id(tag), repo, seed,
1223 loose_obj_only, ncolored, nfound, ntrees,
1224 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1225 break;
1226 case GOT_OBJ_TYPE_TREE:
1227 err = load_tree(want_meta, idset, idset_exclude,
1228 got_object_tag_get_object_id(tag), "",
1229 got_object_tag_get_tagger_time(tag), seed, repo,
1230 loose_obj_only, ncolored, nfound, ntrees,
1231 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1232 break;
1233 default:
1234 break;
1237 done:
1238 got_object_tag_close(tag);
1239 return err;
1242 enum findtwixt_color {
1243 COLOR_KEEP = 0,
1244 COLOR_DROP,
1245 COLOR_SKIP,
1246 COLOR_MAX,
1249 static const struct got_error *
1250 paint_commit(struct got_object_qid *qid, intptr_t color)
1252 if (color < 0 || color >= COLOR_MAX)
1253 return got_error(GOT_ERR_RANGE);
1255 qid->data = (void *)color;
1256 return NULL;
1259 static const struct got_error *
1260 queue_commit_id(struct got_object_id_queue *ids, struct got_object_id *id,
1261 intptr_t color, struct got_repository *repo)
1263 const struct got_error *err;
1264 struct got_object_qid *qid;
1266 err = got_object_qid_alloc(&qid, id);
1267 if (err)
1268 return err;
1270 STAILQ_INSERT_TAIL(ids, qid, entry);
1271 return paint_commit(qid, color);
1274 struct append_id_arg {
1275 struct got_object_id **array;
1276 int idx;
1277 struct got_object_idset *drop;
1278 struct got_object_idset *skip;
1281 static const struct got_error *
1282 append_id(struct got_object_id *id, void *data, void *arg)
1284 struct append_id_arg *a = arg;
1286 if (got_object_idset_contains(a->skip, id) ||
1287 got_object_idset_contains(a->drop, id))
1288 return NULL;
1290 a->array[++a->idx] = got_object_id_dup(id);
1291 if (a->array[a->idx] == NULL)
1292 return got_error_from_errno("got_object_id_dup");
1294 return NULL;
1297 static const struct got_error *
1298 queue_commit_or_tag_id(struct got_object_id *id, intptr_t color,
1299 struct got_object_id_queue *ids, struct got_repository *repo)
1301 const struct got_error *err;
1302 struct got_tag_object *tag = NULL;
1303 int obj_type;
1305 err = got_object_get_type(&obj_type, repo, id);
1306 if (err)
1307 return err;
1309 if (obj_type == GOT_OBJ_TYPE_TAG) {
1310 err = got_object_open_as_tag(&tag, repo, id);
1311 if (err)
1312 return err;
1313 obj_type = got_object_tag_get_object_type(tag);
1314 id = got_object_tag_get_object_id(tag);
1317 if (obj_type == GOT_OBJ_TYPE_COMMIT) {
1318 err = queue_commit_id(ids, id, color, repo);
1319 if (err)
1320 goto done;
1322 done:
1323 if (tag)
1324 got_object_tag_close(tag);
1325 return err;
1328 struct recv_painted_commit_arg {
1329 int *ncolored;
1330 int *nqueued;
1331 int *nskip;
1332 struct got_object_id_queue *ids;
1333 struct got_object_idset *keep;
1334 struct got_object_idset *drop;
1335 struct got_object_idset *skip;
1336 got_pack_progress_cb progress_cb;
1337 void *progress_arg;
1338 struct got_ratelimit *rl;
1339 got_cancel_cb cancel_cb;
1340 void *cancel_arg;
1343 static const struct got_error *
1344 recv_painted_commit(void *arg, struct got_object_id *id, intptr_t color)
1346 const struct got_error *err = NULL;
1347 struct recv_painted_commit_arg *a = arg;
1348 struct got_object_qid *qid, *tmp;
1350 if (a->cancel_cb) {
1351 err = a->cancel_cb(a->cancel_arg);
1352 if (err)
1353 return err;
1356 switch (color) {
1357 case COLOR_KEEP:
1358 err = got_object_idset_add(a->keep, id, NULL);
1359 if (err)
1360 return err;
1361 (*a->ncolored)++;
1362 break;
1363 case COLOR_DROP:
1364 err = got_object_idset_add(a->drop, id, NULL);
1365 if (err)
1366 return err;
1367 (*a->ncolored)++;
1368 break;
1369 case COLOR_SKIP:
1370 err = got_object_idset_add(a->skip, id, NULL);
1371 if (err)
1372 return err;
1373 break;
1374 default:
1375 /* should not happen */
1376 return got_error_fmt(GOT_ERR_NOT_IMPL,
1377 "%s invalid commit color %"PRIdPTR, __func__, color);
1380 STAILQ_FOREACH_SAFE(qid, a->ids, entry, tmp) {
1381 if (got_object_id_cmp(&qid->id, id) != 0)
1382 continue;
1383 STAILQ_REMOVE(a->ids, qid, got_object_qid, entry);
1384 color = (intptr_t)qid->data;
1385 got_object_qid_free(qid);
1386 (*a->nqueued)--;
1387 if (color == COLOR_SKIP)
1388 (*a->nskip)--;
1389 break;
1392 return report_progress(a->progress_cb, a->progress_arg, a->rl,
1393 *a->ncolored, 0, 0, 0L, 0, 0, 0, 0);
1396 static const struct got_error *
1397 paint_packed_commits(struct got_pack *pack, struct got_object_id *id,
1398 int idx, intptr_t color, int *ncolored, int *nqueued, int *nskip,
1399 struct got_object_id_queue *ids,
1400 struct got_object_idset *keep, struct got_object_idset *drop,
1401 struct got_object_idset *skip, struct got_repository *repo,
1402 got_pack_progress_cb progress_cb, void *progress_arg,
1403 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1405 const struct got_error *err = NULL;
1406 struct got_object_id_queue next_ids;
1407 struct got_object_qid *qid, *tmp;
1408 struct recv_painted_commit_arg arg;
1410 STAILQ_INIT(&next_ids);
1412 err = got_privsep_send_painting_request(pack->privsep_child->ibuf,
1413 idx, id, color);
1414 if (err)
1415 return err;
1417 arg.ncolored = ncolored;
1418 arg.nqueued = nqueued;
1419 arg.nskip = nskip;
1420 arg.ids = ids;
1421 arg.keep = keep;
1422 arg.drop = drop;
1423 arg.skip = skip;
1424 arg.progress_cb = progress_cb;
1425 arg.progress_arg = progress_arg;
1426 arg.rl = rl;
1427 arg.cancel_cb = cancel_cb;
1428 arg.cancel_arg = cancel_arg;
1429 err = got_privsep_recv_painted_commits(&next_ids,
1430 recv_painted_commit, &arg, pack->privsep_child->ibuf);
1431 if (err)
1432 return err;
1434 STAILQ_FOREACH_SAFE(qid, &next_ids, entry, tmp) {
1435 struct got_object_qid *old_id;
1436 intptr_t qcolor, ocolor;
1437 STAILQ_FOREACH(old_id, ids, entry) {
1438 if (got_object_id_cmp(&qid->id, &old_id->id))
1439 continue;
1440 qcolor = (intptr_t)qid->data;
1441 ocolor = (intptr_t)old_id->data;
1442 STAILQ_REMOVE(&next_ids, qid, got_object_qid, entry);
1443 got_object_qid_free(qid);
1444 qid = NULL;
1445 if (qcolor != ocolor) {
1446 paint_commit(old_id, qcolor);
1447 if (ocolor == COLOR_SKIP)
1448 (*nskip)--;
1449 else if (qcolor == COLOR_SKIP)
1450 (*nskip)++;
1452 break;
1455 while (!STAILQ_EMPTY(&next_ids)) {
1456 qid = STAILQ_FIRST(&next_ids);
1457 STAILQ_REMOVE_HEAD(&next_ids, entry);
1458 paint_commit(qid, color);
1459 STAILQ_INSERT_TAIL(ids, qid, entry);
1460 (*nqueued)++;
1461 if (color == COLOR_SKIP)
1462 (*nskip)++;
1465 return err;
1468 static const struct got_error *
1469 find_pack_for_commit_painting(struct got_packidx **best_packidx,
1470 struct got_object_id_queue *ids, int nids, struct got_repository *repo)
1472 const struct got_error *err = NULL;
1473 struct got_pathlist_entry *pe;
1474 const char *best_packidx_path = NULL;
1475 int nobj_max = 0;
1476 int ncommits_max = 0;
1478 *best_packidx = NULL;
1481 * Find the largest pack which contains at least some of the
1482 * commits we are interested in.
1484 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
1485 const char *path_packidx = pe->path;
1486 struct got_packidx *packidx;
1487 int nobj, idx, ncommits = 0;
1488 struct got_object_qid *qid;
1490 err = got_repo_get_packidx(&packidx, path_packidx, repo);
1491 if (err)
1492 break;
1494 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
1495 if (nobj <= nobj_max)
1496 continue;
1498 STAILQ_FOREACH(qid, ids, entry) {
1499 idx = got_packidx_get_object_idx(packidx, &qid->id);
1500 if (idx != -1)
1501 ncommits++;
1503 if (ncommits > ncommits_max) {
1504 best_packidx_path = path_packidx;
1505 nobj_max = nobj;
1506 ncommits_max = ncommits;
1510 if (best_packidx_path && err == NULL) {
1511 err = got_repo_get_packidx(best_packidx, best_packidx_path,
1512 repo);
1515 return err;
1518 static const struct got_error *
1519 paint_commits(int *ncolored, struct got_object_id_queue *ids, int nids,
1520 struct got_object_idset *keep, struct got_object_idset *drop,
1521 struct got_object_idset *skip, struct got_repository *repo,
1522 got_pack_progress_cb progress_cb, void *progress_arg,
1523 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1525 const struct got_error *err = NULL;
1526 struct got_commit_object *commit = NULL;
1527 struct got_packidx *packidx = NULL;
1528 struct got_pack *pack = NULL;
1529 const struct got_object_id_queue *parents;
1530 struct got_object_qid *qid = NULL;
1531 int nqueued = nids, nskip = 0;
1532 int idx;
1534 while (!STAILQ_EMPTY(ids) && nskip != nqueued) {
1535 intptr_t color;
1537 if (cancel_cb) {
1538 err = cancel_cb(cancel_arg);
1539 if (err)
1540 break;
1543 qid = STAILQ_FIRST(ids);
1544 STAILQ_REMOVE_HEAD(ids, entry);
1545 nqueued--;
1546 color = (intptr_t)qid->data;
1547 if (color == COLOR_SKIP)
1548 nskip--;
1550 if (got_object_idset_contains(skip, &qid->id)) {
1551 got_object_qid_free(qid);
1552 qid = NULL;
1553 continue;
1555 if (color == COLOR_KEEP &&
1556 got_object_idset_contains(keep, &qid->id)) {
1557 got_object_qid_free(qid);
1558 qid = NULL;
1559 continue;
1561 if (color == COLOR_DROP &&
1562 got_object_idset_contains(drop, &qid->id)) {
1563 got_object_qid_free(qid);
1564 qid = NULL;
1565 continue;
1568 /* Pinned pack may have moved to different cache slot. */
1569 pack = got_repo_get_pinned_pack(repo);
1571 if (packidx && pack) {
1572 idx = got_packidx_get_object_idx(packidx, &qid->id);
1573 if (idx != -1) {
1574 err = paint_packed_commits(pack, &qid->id,
1575 idx, color, ncolored, &nqueued, &nskip,
1576 ids, keep, drop, skip, repo,
1577 progress_cb, progress_arg, rl,
1578 cancel_cb, cancel_arg);
1579 if (err)
1580 break;
1581 got_object_qid_free(qid);
1582 qid = NULL;
1583 continue;
1587 switch (color) {
1588 case COLOR_KEEP:
1589 if (got_object_idset_contains(drop, &qid->id)) {
1590 err = paint_commit(qid, COLOR_SKIP);
1591 if (err)
1592 goto done;
1593 } else
1594 (*ncolored)++;
1595 err = got_object_idset_add(keep, &qid->id, NULL);
1596 if (err)
1597 goto done;
1598 break;
1599 case COLOR_DROP:
1600 if (got_object_idset_contains(keep, &qid->id)) {
1601 err = paint_commit(qid, COLOR_SKIP);
1602 if (err)
1603 goto done;
1604 } else
1605 (*ncolored)++;
1606 err = got_object_idset_add(drop, &qid->id, NULL);
1607 if (err)
1608 goto done;
1609 break;
1610 case COLOR_SKIP:
1611 if (!got_object_idset_contains(skip, &qid->id)) {
1612 err = got_object_idset_add(skip, &qid->id,
1613 NULL);
1614 if (err)
1615 goto done;
1617 break;
1618 default:
1619 /* should not happen */
1620 err = got_error_fmt(GOT_ERR_NOT_IMPL,
1621 "%s invalid commit color %"PRIdPTR, __func__,
1622 color);
1623 goto done;
1626 err = report_progress(progress_cb, progress_arg, rl,
1627 *ncolored, 0, 0, 0L, 0, 0, 0, 0);
1628 if (err)
1629 break;
1631 err = got_object_open_as_commit(&commit, repo, &qid->id);
1632 if (err)
1633 break;
1635 parents = got_object_commit_get_parent_ids(commit);
1636 if (parents) {
1637 struct got_object_qid *pid;
1638 color = (intptr_t)qid->data;
1639 STAILQ_FOREACH(pid, parents, entry) {
1640 err = queue_commit_id(ids, &pid->id,
1641 color, repo);
1642 if (err)
1643 break;
1644 nqueued++;
1645 if (color == COLOR_SKIP)
1646 nskip++;
1650 if (pack == NULL && (commit->flags & GOT_COMMIT_FLAG_PACKED)) {
1651 if (packidx == NULL) {
1652 err = find_pack_for_commit_painting(&packidx,
1653 ids, nqueued, repo);
1654 if (err)
1655 goto done;
1657 if (packidx != NULL) {
1658 err = cache_pack_for_packidx(&pack, packidx,
1659 repo);
1660 if (err)
1661 goto done;
1662 err = got_privsep_init_commit_painting(
1663 pack->privsep_child->ibuf);
1664 if (err)
1665 goto done;
1666 err = send_idset(pack->privsep_child->ibuf,
1667 keep);
1668 if (err)
1669 goto done;
1670 err = send_idset(pack->privsep_child->ibuf, drop);
1671 if (err)
1672 goto done;
1673 err = send_idset(pack->privsep_child->ibuf, skip);
1674 if (err)
1675 goto done;
1676 err = got_repo_pin_pack(repo, packidx, pack);
1677 if (err)
1678 goto done;
1682 got_object_commit_close(commit);
1683 commit = NULL;
1685 got_object_qid_free(qid);
1686 qid = NULL;
1688 done:
1689 if (pack) {
1690 const struct got_error *pack_err;
1691 pack_err = got_privsep_send_painting_commits_done(
1692 pack->privsep_child->ibuf);
1693 if (err == NULL)
1694 err = pack_err;
1696 if (commit)
1697 got_object_commit_close(commit);
1698 got_object_qid_free(qid);
1699 got_repo_unpin_pack(repo);
1700 return err;
1703 static const struct got_error *
1704 findtwixt(struct got_object_id ***res, int *nres, int *ncolored,
1705 struct got_object_id **head, int nhead,
1706 struct got_object_id **tail, int ntail,
1707 struct got_repository *repo,
1708 got_pack_progress_cb progress_cb, void *progress_arg,
1709 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1711 const struct got_error *err = NULL;
1712 struct got_object_id_queue ids;
1713 struct got_object_idset *keep, *drop, *skip = NULL;
1714 int i, nkeep;
1716 STAILQ_INIT(&ids);
1717 *res = NULL;
1718 *nres = 0;
1719 *ncolored = 0;
1721 keep = got_object_idset_alloc();
1722 if (keep == NULL)
1723 return got_error_from_errno("got_object_idset_alloc");
1725 drop = got_object_idset_alloc();
1726 if (drop == NULL) {
1727 err = got_error_from_errno("got_object_idset_alloc");
1728 goto done;
1731 skip = got_object_idset_alloc();
1732 if (skip == NULL) {
1733 err = got_error_from_errno("got_object_idset_alloc");
1734 goto done;
1737 for (i = 0; i < nhead; i++) {
1738 struct got_object_id *id = head[i];
1739 if (id == NULL)
1740 continue;
1741 err = queue_commit_or_tag_id(id, COLOR_KEEP, &ids, repo);
1742 if (err)
1743 goto done;
1746 for (i = 0; i < ntail; i++) {
1747 struct got_object_id *id = tail[i];
1748 if (id == NULL)
1749 continue;
1750 err = queue_commit_or_tag_id(id, COLOR_DROP, &ids, repo);
1751 if (err)
1752 goto done;
1755 err = paint_commits(ncolored, &ids, nhead + ntail,
1756 keep, drop, skip, repo, progress_cb, progress_arg, rl,
1757 cancel_cb, cancel_arg);
1758 if (err)
1759 goto done;
1761 nkeep = got_object_idset_num_elements(keep);
1762 if (nkeep > 0) {
1763 struct append_id_arg arg;
1764 arg.array = calloc(nkeep, sizeof(struct got_object_id *));
1765 if (arg.array == NULL) {
1766 err = got_error_from_errno("calloc");
1767 goto done;
1769 arg.idx = -1;
1770 arg.skip = skip;
1771 arg.drop = drop;
1772 err = got_object_idset_for_each(keep, append_id, &arg);
1773 if (err) {
1774 free(arg.array);
1775 goto done;
1777 *res = arg.array;
1778 *nres = arg.idx + 1;
1780 done:
1781 got_object_idset_free(keep);
1782 got_object_idset_free(drop);
1783 if (skip)
1784 got_object_idset_free(skip);
1785 got_object_id_queue_free(&ids);
1786 return err;
1789 struct load_packed_obj_arg {
1790 /* output parameters: */
1791 struct got_object_id *id;
1792 char *dpath;
1793 time_t mtime;
1795 /* input parameters: */
1796 uint32_t seed;
1797 int want_meta;
1798 struct got_object_idset *idset;
1799 struct got_object_idset *idset_exclude;
1800 int loose_obj_only;
1801 int *ncolored;
1802 int *nfound;
1803 int *ntrees;
1804 got_pack_progress_cb progress_cb;
1805 void *progress_arg;
1806 struct got_ratelimit *rl;
1807 got_cancel_cb cancel_cb;
1808 void *cancel_arg;
1811 static const struct got_error *
1812 load_packed_commit_id(void *arg, time_t mtime, struct got_object_id *id,
1813 struct got_repository *repo)
1815 struct load_packed_obj_arg *a = arg;
1817 if (got_object_idset_contains(a->idset, id) ||
1818 got_object_idset_contains(a->idset_exclude, id))
1819 return NULL;
1821 return add_object(a->want_meta,
1822 a->want_meta ? a->idset : a->idset_exclude,
1823 id, "", GOT_OBJ_TYPE_COMMIT, mtime, a->seed, a->loose_obj_only,
1824 repo, a->ncolored, a->nfound, a->ntrees,
1825 a->progress_cb, a->progress_arg, a->rl);
1828 static const struct got_error *
1829 load_packed_tree_ids(void *arg, struct got_tree_object *tree, time_t mtime,
1830 struct got_object_id *id, const char *dpath, struct got_repository *repo)
1832 const struct got_error *err;
1833 struct load_packed_obj_arg *a = arg;
1834 const char *relpath;
1837 * When we receive a tree's ID and path but not the tree itself,
1838 * this tree object was not found in the pack file. This is the
1839 * last time we are being called for this optimized traversal.
1840 * Return from here and switch to loading objects the slow way.
1842 if (tree == NULL) {
1843 free(a->id);
1844 a->id = got_object_id_dup(id);
1845 if (a->id == NULL) {
1846 err = got_error_from_errno("got_object_id_dup");
1847 free(a->dpath);
1848 a->dpath = NULL;
1849 return err;
1852 free(a->dpath);
1853 a->dpath = strdup(dpath);
1854 if (a->dpath == NULL) {
1855 err = got_error_from_errno("strdup");
1856 free(a->id);
1857 a->id = NULL;
1858 return err;
1861 a->mtime = mtime;
1862 return NULL;
1865 if (got_object_idset_contains(a->idset, id) ||
1866 got_object_idset_contains(a->idset_exclude, id))
1867 return NULL;
1869 relpath = dpath;
1870 while (relpath[0] == '/')
1871 relpath++;
1873 err = add_object(a->want_meta,
1874 a->want_meta ? a->idset : a->idset_exclude,
1875 id, relpath, GOT_OBJ_TYPE_TREE, mtime, a->seed,
1876 a->loose_obj_only, repo, a->ncolored, a->nfound, a->ntrees,
1877 a->progress_cb, a->progress_arg, a->rl);
1878 if (err)
1879 return err;
1881 return load_tree_entries(NULL, a->want_meta, a->idset,
1882 a->idset_exclude, tree, dpath, mtime, a->seed, repo,
1883 a->loose_obj_only, a->ncolored, a->nfound, a->ntrees,
1884 a->progress_cb, a->progress_arg, a->rl,
1885 a->cancel_cb, a->cancel_arg);
1888 static const struct got_error *
1889 load_packed_object_ids(int *found_all_objects,
1890 struct got_object_id **ours, int nours,
1891 struct got_object_id **theirs, int ntheirs,
1892 int want_meta, uint32_t seed, struct got_object_idset *idset,
1893 struct got_object_idset *idset_exclude, int loose_obj_only,
1894 struct got_repository *repo, struct got_packidx *packidx,
1895 int *ncolored, int *nfound, int *ntrees,
1896 got_pack_progress_cb progress_cb, void *progress_arg,
1897 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1899 const struct got_error *err = NULL;
1900 struct load_packed_obj_arg lpa;
1902 memset(&lpa, 0, sizeof(lpa));
1903 lpa.seed = seed;
1904 lpa.want_meta = want_meta;
1905 lpa.idset = idset;
1906 lpa.idset_exclude = idset_exclude;
1907 lpa.loose_obj_only = loose_obj_only;
1908 lpa.ncolored = ncolored;
1909 lpa.nfound = nfound;
1910 lpa.ntrees = ntrees;
1911 lpa.progress_cb = progress_cb;
1912 lpa.progress_arg = progress_arg;
1913 lpa.rl = rl;
1914 lpa.cancel_cb = cancel_cb;
1915 lpa.cancel_arg = cancel_arg;
1917 /* Attempt to load objects via got-read-pack, as far as possible. */
1918 err = got_object_enumerate(found_all_objects, load_packed_commit_id,
1919 load_packed_tree_ids, &lpa, ours, nours, theirs, ntheirs,
1920 packidx, repo);
1921 if (err)
1922 return err;
1924 if (lpa.id == NULL)
1925 return NULL;
1928 * An incomplete tree hierarchy was present in the pack file
1929 * and caused loading to be aborted.
1930 * Continue loading trees the slow way.
1932 err = load_tree(want_meta, idset, idset_exclude,
1933 lpa.id, lpa.dpath, lpa.mtime, seed, repo, loose_obj_only,
1934 ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
1935 cancel_cb, cancel_arg);
1936 free(lpa.id);
1937 free(lpa.dpath);
1938 return err;
1941 static const struct got_error *
1942 find_pack_for_enumeration(struct got_packidx **best_packidx,
1943 struct got_object_id **ids, int nids, struct got_repository *repo)
1945 const struct got_error *err = NULL;
1946 struct got_pathlist_entry *pe;
1947 const char *best_packidx_path = NULL;
1948 int nobj_max = 0;
1949 int ncommits_max = 0;
1951 *best_packidx = NULL;
1954 * Find the largest pack which contains at least some of the
1955 * commits and tags we are interested in.
1957 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
1958 const char *path_packidx = pe->path;
1959 struct got_packidx *packidx;
1960 int nobj, i, idx, ncommits = 0;
1962 err = got_repo_get_packidx(&packidx, path_packidx, repo);
1963 if (err)
1964 break;
1966 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
1967 if (nobj <= nobj_max)
1968 continue;
1970 for (i = 0; i < nids; i++) {
1971 idx = got_packidx_get_object_idx(packidx, ids[i]);
1972 if (idx != -1)
1973 ncommits++;
1975 if (ncommits > ncommits_max) {
1976 best_packidx_path = path_packidx;
1977 nobj_max = nobj;
1978 ncommits_max = ncommits;
1982 if (best_packidx_path && err == NULL) {
1983 err = got_repo_get_packidx(best_packidx, best_packidx_path,
1984 repo);
1987 return err;
1990 static const struct got_error *
1991 load_object_ids(int *ncolored, int *nfound, int *ntrees,
1992 struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs,
1993 struct got_object_id **ours, int nours, struct got_repository *repo,
1994 uint32_t seed, int loose_obj_only, got_pack_progress_cb progress_cb,
1995 void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb,
1996 void *cancel_arg)
1998 const struct got_error *err = NULL;
1999 struct got_object_id **ids = NULL;
2000 struct got_packidx *packidx = NULL;
2001 int i, nobj = 0, obj_type, found_all_objects = 0;
2002 struct got_object_idset *idset_exclude;
2004 idset_exclude = got_object_idset_alloc();
2005 if (idset_exclude == NULL)
2006 return got_error_from_errno("got_object_idset_alloc");
2008 *ncolored = 0;
2009 *nfound = 0;
2010 *ntrees = 0;
2012 err = findtwixt(&ids, &nobj, ncolored, ours, nours, theirs, ntheirs,
2013 repo, progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
2014 if (err)
2015 goto done;
2017 err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo);
2018 if (err)
2019 goto done;
2020 if (packidx) {
2021 err = load_packed_object_ids(&found_all_objects,
2022 theirs, ntheirs, NULL, 0, 0, seed, idset, idset_exclude,
2023 loose_obj_only, repo, packidx, ncolored, nfound, ntrees,
2024 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
2025 if (err)
2026 goto done;
2029 for (i = 0; i < ntheirs; i++) {
2030 struct got_object_id *id = theirs[i];
2031 if (id == NULL)
2032 continue;
2033 err = got_object_get_type(&obj_type, repo, id);
2034 if (err)
2035 return err;
2036 if (obj_type == GOT_OBJ_TYPE_COMMIT) {
2037 if (!found_all_objects) {
2038 err = load_commit(0, idset, idset_exclude,
2039 id, repo, seed, loose_obj_only,
2040 ncolored, nfound, ntrees,
2041 progress_cb, progress_arg, rl,
2042 cancel_cb, cancel_arg);
2043 if (err)
2044 goto done;
2046 } else if (obj_type == GOT_OBJ_TYPE_TAG) {
2047 err = load_tag(0, idset, idset_exclude, id, repo,
2048 seed, loose_obj_only, ncolored, nfound, ntrees,
2049 progress_cb, progress_arg, rl,
2050 cancel_cb, cancel_arg);
2051 if (err)
2052 goto done;
2056 found_all_objects = 0;
2057 err = find_pack_for_enumeration(&packidx, ids, nobj, repo);
2058 if (err)
2059 goto done;
2060 if (packidx) {
2061 err = load_packed_object_ids(&found_all_objects, ids,
2062 nobj, theirs, ntheirs, 1, seed, idset, idset_exclude,
2063 loose_obj_only, repo, packidx, ncolored, nfound, ntrees,
2064 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
2065 if (err)
2066 goto done;
2069 if (!found_all_objects) {
2070 for (i = 0; i < nobj; i++) {
2071 err = load_commit(1, idset, idset_exclude, ids[i],
2072 repo, seed, loose_obj_only, ncolored, nfound,
2073 ntrees, progress_cb, progress_arg, rl,
2074 cancel_cb, cancel_arg);
2075 if (err)
2076 goto done;
2080 for (i = 0; i < nours; i++) {
2081 struct got_object_id *id = ours[i];
2082 struct got_pack_meta *m;
2083 if (id == NULL)
2084 continue;
2085 m = got_object_idset_get(idset, id);
2086 if (m == NULL) {
2087 err = got_object_get_type(&obj_type, repo, id);
2088 if (err)
2089 goto done;
2090 } else
2091 obj_type = m->obj_type;
2092 if (obj_type != GOT_OBJ_TYPE_TAG)
2093 continue;
2094 err = load_tag(1, idset, idset_exclude, id, repo,
2095 seed, loose_obj_only, ncolored, nfound, ntrees,
2096 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
2097 if (err)
2098 goto done;
2100 done:
2101 for (i = 0; i < nobj; i++) {
2102 free(ids[i]);
2104 free(ids);
2105 got_object_idset_free(idset_exclude);
2106 return err;
2109 static const struct got_error *
2110 hwrite(FILE *f, const void *buf, off_t len, SHA1_CTX *ctx)
2112 size_t n;
2114 SHA1Update(ctx, buf, len);
2115 n = fwrite(buf, 1, len, f);
2116 if (n != len)
2117 return got_ferror(f, GOT_ERR_IO);
2118 return NULL;
2121 static const struct got_error *
2122 hcopy(FILE *fsrc, FILE *fdst, off_t len, SHA1_CTX *ctx)
2124 unsigned char buf[65536];
2125 off_t remain = len;
2126 size_t n;
2128 while (remain > 0) {
2129 size_t copylen = MIN(sizeof(buf), remain);
2130 n = fread(buf, 1, copylen, fsrc);
2131 if (n != copylen)
2132 return got_ferror(fsrc, GOT_ERR_IO);
2133 SHA1Update(ctx, buf, copylen);
2134 n = fwrite(buf, 1, copylen, fdst);
2135 if (n != copylen)
2136 return got_ferror(fdst, GOT_ERR_IO);
2137 remain -= copylen;
2140 return NULL;
2143 static const struct got_error *
2144 hcopy_mmap(uint8_t *src, off_t src_offset, size_t src_size,
2145 FILE *fdst, off_t len, SHA1_CTX *ctx)
2147 size_t n;
2149 if (src_offset + len > src_size)
2150 return got_error(GOT_ERR_RANGE);
2152 SHA1Update(ctx, src + src_offset, len);
2153 n = fwrite(src + src_offset, 1, len, fdst);
2154 if (n != len)
2155 return got_ferror(fdst, GOT_ERR_IO);
2157 return NULL;
2160 static void
2161 putbe32(char *b, uint32_t n)
2163 b[0] = n >> 24;
2164 b[1] = n >> 16;
2165 b[2] = n >> 8;
2166 b[3] = n >> 0;
2169 static int
2170 write_order_cmp(const void *pa, const void *pb)
2172 struct got_pack_meta *a, *b, *ahd, *bhd;
2174 a = *(struct got_pack_meta **)pa;
2175 b = *(struct got_pack_meta **)pb;
2176 ahd = (a->head == NULL) ? a : a->head;
2177 bhd = (b->head == NULL) ? b : b->head;
2178 if (bhd->mtime < ahd->mtime)
2179 return -1;
2180 if (bhd->mtime > ahd->mtime)
2181 return 1;
2182 if (bhd < ahd)
2183 return -1;
2184 if (bhd > ahd)
2185 return 1;
2186 if (a->nchain != b->nchain)
2187 return a->nchain - b->nchain;
2188 if (a->mtime < b->mtime)
2189 return -1;
2190 if (a->mtime > b->mtime)
2191 return 1;
2192 return got_object_id_cmp(&a->id, &b->id);
2195 static int
2196 reuse_write_order_cmp(const void *pa, const void *pb)
2198 struct got_pack_meta *a, *b;
2200 a = *(struct got_pack_meta **)pa;
2201 b = *(struct got_pack_meta **)pb;
2203 if (a->reused_delta_offset < b->reused_delta_offset)
2204 return -1;
2205 if (a->reused_delta_offset > b->reused_delta_offset)
2206 return 1;
2207 return 0;
2210 static const struct got_error *
2211 packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
2213 size_t i;
2215 *hdrlen = 0;
2217 hdr[0] = obj_type << 4;
2218 hdr[0] |= len & 0xf;
2219 len >>= 4;
2220 for (i = 1; len != 0; i++){
2221 if (i >= bufsize)
2222 return got_error(GOT_ERR_NO_SPACE);
2223 hdr[i - 1] |= GOT_DELTA_SIZE_MORE;
2224 hdr[i] = len & GOT_DELTA_SIZE_VAL_MASK;
2225 len >>= GOT_DELTA_SIZE_SHIFT;
2228 *hdrlen = i;
2229 return NULL;
2232 static int
2233 packoff(char *hdr, off_t off)
2235 int i, j;
2236 char rbuf[8];
2238 rbuf[0] = off & GOT_DELTA_SIZE_VAL_MASK;
2239 for (i = 1; (off >>= GOT_DELTA_SIZE_SHIFT) != 0; i++) {
2240 rbuf[i] = (--off & GOT_DELTA_SIZE_VAL_MASK) |
2241 GOT_DELTA_SIZE_MORE;
2244 j = 0;
2245 while (i > 0)
2246 hdr[j++] = rbuf[--i];
2247 return j;
2250 static const struct got_error *
2251 deltahdr(off_t *packfile_size, SHA1_CTX *ctx, FILE *packfile,
2252 struct got_pack_meta *m)
2254 const struct got_error *err;
2255 char buf[32];
2256 int nh;
2258 if (m->prev->off != 0) {
2259 err = packhdr(&nh, buf, sizeof(buf),
2260 GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len);
2261 if (err)
2262 return err;
2263 nh += packoff(buf + nh, m->off - m->prev->off);
2264 err = hwrite(packfile, buf, nh, ctx);
2265 if (err)
2266 return err;
2267 *packfile_size += nh;
2268 } else {
2269 err = packhdr(&nh, buf, sizeof(buf),
2270 GOT_OBJ_TYPE_REF_DELTA, m->delta_len);
2271 if (err)
2272 return err;
2273 err = hwrite(packfile, buf, nh, ctx);
2274 if (err)
2275 return err;
2276 *packfile_size += nh;
2277 err = hwrite(packfile, m->prev->id.sha1,
2278 sizeof(m->prev->id.sha1), ctx);
2279 if (err)
2280 return err;
2281 *packfile_size += sizeof(m->prev->id.sha1);
2284 return NULL;
2287 static const struct got_error *
2288 write_packed_object(off_t *packfile_size, FILE *packfile,
2289 FILE *delta_cache, uint8_t *delta_cache_map, size_t delta_cache_size,
2290 struct got_pack_meta *m, int *outfd, SHA1_CTX *ctx,
2291 struct got_repository *repo)
2293 const struct got_error *err = NULL;
2294 struct got_deflate_checksum csum;
2295 char buf[32];
2296 int nh;
2297 struct got_raw_object *raw = NULL;
2298 off_t outlen;
2300 csum.output_sha1 = ctx;
2301 csum.output_crc = NULL;
2303 m->off = ftello(packfile);
2304 if (m->delta_len == 0) {
2305 err = got_object_raw_open(&raw, outfd, repo, &m->id);
2306 if (err)
2307 goto done;
2308 err = packhdr(&nh, buf, sizeof(buf),
2309 m->obj_type, raw->size);
2310 if (err)
2311 goto done;
2312 err = hwrite(packfile, buf, nh, ctx);
2313 if (err)
2314 goto done;
2315 *packfile_size += nh;
2316 if (raw->f == NULL) {
2317 err = got_deflate_to_file_mmap(&outlen,
2318 raw->data + raw->hdrlen, 0, raw->size,
2319 packfile, &csum);
2320 if (err)
2321 goto done;
2322 } else {
2323 if (fseeko(raw->f, raw->hdrlen, SEEK_SET)
2324 == -1) {
2325 err = got_error_from_errno("fseeko");
2326 goto done;
2328 err = got_deflate_to_file(&outlen, raw->f,
2329 raw->size, packfile, &csum);
2330 if (err)
2331 goto done;
2333 *packfile_size += outlen;
2334 got_object_raw_close(raw);
2335 raw = NULL;
2336 } else if (m->delta_buf) {
2337 err = deltahdr(packfile_size, ctx, packfile, m);
2338 if (err)
2339 goto done;
2340 err = hwrite(packfile, m->delta_buf,
2341 m->delta_compressed_len, ctx);
2342 if (err)
2343 goto done;
2344 *packfile_size += m->delta_compressed_len;
2345 free(m->delta_buf);
2346 m->delta_buf = NULL;
2347 } else if (delta_cache_map) {
2348 err = deltahdr(packfile_size, ctx, packfile, m);
2349 if (err)
2350 goto done;
2351 err = hcopy_mmap(delta_cache_map, m->delta_offset,
2352 delta_cache_size, packfile, m->delta_compressed_len,
2353 ctx);
2354 if (err)
2355 goto done;
2356 *packfile_size += m->delta_compressed_len;
2357 } else {
2358 if (fseeko(delta_cache, m->delta_offset, SEEK_SET)
2359 == -1) {
2360 err = got_error_from_errno("fseeko");
2361 goto done;
2363 err = deltahdr(packfile_size, ctx, packfile, m);
2364 if (err)
2365 goto done;
2366 err = hcopy(delta_cache, packfile,
2367 m->delta_compressed_len, ctx);
2368 if (err)
2369 goto done;
2370 *packfile_size += m->delta_compressed_len;
2372 done:
2373 if (raw)
2374 got_object_raw_close(raw);
2375 return err;
2378 static const struct got_error *
2379 genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delta_cache,
2380 struct got_pack_meta **deltify, int ndeltify,
2381 struct got_pack_meta **reuse, int nreuse,
2382 int ncolored, int nfound, int ntrees, int nours,
2383 struct got_repository *repo,
2384 got_pack_progress_cb progress_cb, void *progress_arg,
2385 struct got_ratelimit *rl,
2386 got_cancel_cb cancel_cb, void *cancel_arg)
2388 const struct got_error *err = NULL;
2389 int i;
2390 SHA1_CTX ctx;
2391 struct got_pack_meta *m;
2392 char buf[32];
2393 size_t n;
2394 off_t packfile_size = 0;
2395 int outfd = -1;
2396 int delta_cache_fd = -1;
2397 uint8_t *delta_cache_map = NULL;
2398 size_t delta_cache_size = 0;
2400 SHA1Init(&ctx);
2402 #ifndef GOT_PACK_NO_MMAP
2403 delta_cache_fd = dup(fileno(delta_cache));
2404 if (delta_cache_fd != -1) {
2405 struct stat sb;
2406 if (fstat(delta_cache_fd, &sb) == -1) {
2407 err = got_error_from_errno("fstat");
2408 goto done;
2410 if (sb.st_size > 0 && sb.st_size <= SIZE_MAX) {
2411 delta_cache_map = mmap(NULL, sb.st_size,
2412 PROT_READ, MAP_PRIVATE, delta_cache_fd, 0);
2413 if (delta_cache_map == MAP_FAILED) {
2414 if (errno != ENOMEM) {
2415 err = got_error_from_errno("mmap");
2416 goto done;
2418 delta_cache_map = NULL; /* fallback on stdio */
2419 } else
2420 delta_cache_size = (size_t)sb.st_size;
2423 #endif
2424 err = hwrite(packfile, "PACK", 4, &ctx);
2425 if (err)
2426 goto done;
2427 putbe32(buf, GOT_PACKFILE_VERSION);
2428 err = hwrite(packfile, buf, 4, &ctx);
2429 if (err)
2430 goto done;
2431 putbe32(buf, ndeltify + nreuse);
2432 err = hwrite(packfile, buf, 4, &ctx);
2433 if (err)
2434 goto done;
2436 qsort(deltify, ndeltify, sizeof(struct got_pack_meta *),
2437 write_order_cmp);
2438 for (i = 0; i < ndeltify; i++) {
2439 err = report_progress(progress_cb, progress_arg, rl,
2440 ncolored, nfound, ntrees, packfile_size, nours,
2441 ndeltify + nreuse, ndeltify + nreuse, i);
2442 if (err)
2443 goto done;
2444 m = deltify[i];
2445 err = write_packed_object(&packfile_size, packfile,
2446 delta_cache, delta_cache_map, delta_cache_size,
2447 m, &outfd, &ctx, repo);
2448 if (err)
2449 goto done;
2452 qsort(reuse, nreuse, sizeof(struct got_pack_meta *),
2453 reuse_write_order_cmp);
2454 for (i = 0; i < nreuse; i++) {
2455 err = report_progress(progress_cb, progress_arg, rl,
2456 ncolored, nfound, ntrees, packfile_size, nours,
2457 ndeltify + nreuse, ndeltify + nreuse, ndeltify + i);
2458 if (err)
2459 goto done;
2460 m = reuse[i];
2461 err = write_packed_object(&packfile_size, packfile,
2462 delta_cache, delta_cache_map, delta_cache_size,
2463 m, &outfd, &ctx, repo);
2464 if (err)
2465 goto done;
2468 SHA1Final(pack_sha1, &ctx);
2469 n = fwrite(pack_sha1, 1, SHA1_DIGEST_LENGTH, packfile);
2470 if (n != SHA1_DIGEST_LENGTH)
2471 err = got_ferror(packfile, GOT_ERR_IO);
2472 packfile_size += SHA1_DIGEST_LENGTH;
2473 packfile_size += sizeof(struct got_packfile_hdr);
2474 if (progress_cb) {
2475 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
2476 packfile_size, nours, ndeltify + nreuse,
2477 ndeltify + nreuse, ndeltify + nreuse);
2478 if (err)
2479 goto done;
2481 done:
2482 if (outfd != -1 && close(outfd) == -1 && err == NULL)
2483 err = got_error_from_errno("close");
2484 if (delta_cache_map && munmap(delta_cache_map, delta_cache_size) == -1)
2485 err = got_error_from_errno("munmap");
2486 if (delta_cache_fd != -1 && close(delta_cache_fd) == -1 && err == NULL)
2487 err = got_error_from_errno("close");
2488 return err;
2491 static const struct got_error *
2492 add_meta_idset_cb(struct got_object_id *id, void *data, void *arg)
2494 struct got_pack_meta *m = data;
2495 struct got_pack_metavec *v = arg;
2497 if (m->reused_delta_offset != 0)
2498 return NULL;
2500 return add_meta(m, v);
2503 const struct got_error *
2504 got_pack_create(uint8_t *packsha1, FILE *packfile,
2505 struct got_object_id **theirs, int ntheirs,
2506 struct got_object_id **ours, int nours,
2507 struct got_repository *repo, int loose_obj_only, int allow_empty,
2508 got_pack_progress_cb progress_cb, void *progress_arg,
2509 got_cancel_cb cancel_cb, void *cancel_arg)
2511 const struct got_error *err;
2512 int delta_cache_fd = -1;
2513 FILE *delta_cache = NULL;
2514 struct got_object_idset *idset;
2515 struct got_ratelimit rl;
2516 struct got_pack_metavec deltify, reuse;
2517 int ncolored = 0, nfound = 0, ntrees = 0;
2518 size_t ndeltify;
2519 uint32_t seed;
2521 seed = arc4random();
2523 memset(&deltify, 0, sizeof(deltify));
2524 memset(&reuse, 0, sizeof(reuse));
2526 got_ratelimit_init(&rl, 0, 500);
2528 idset = got_object_idset_alloc();
2529 if (idset == NULL)
2530 return got_error_from_errno("got_object_idset_alloc");
2532 err = load_object_ids(&ncolored, &nfound, &ntrees, idset, theirs,
2533 ntheirs, ours, nours, repo, seed, loose_obj_only,
2534 progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
2535 if (err)
2536 goto done;
2538 if (progress_cb) {
2539 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
2540 0L, nours, got_object_idset_num_elements(idset), 0, 0);
2541 if (err)
2542 goto done;
2545 if (got_object_idset_num_elements(idset) == 0 && !allow_empty) {
2546 err = got_error(GOT_ERR_CANNOT_PACK);
2547 goto done;
2550 delta_cache_fd = got_opentempfd();
2551 if (delta_cache_fd == -1) {
2552 err = got_error_from_errno("got_opentemp");
2553 goto done;
2556 reuse.metasz = 64;
2557 reuse.meta = calloc(reuse.metasz,
2558 sizeof(struct got_pack_meta *));
2559 if (reuse.meta == NULL) {
2560 err = got_error_from_errno("calloc");
2561 goto done;
2564 err = search_deltas(&reuse, idset, delta_cache_fd, ncolored, nfound,
2565 ntrees, nours, repo, progress_cb, progress_arg, &rl,
2566 cancel_cb, cancel_arg);
2567 if (err)
2568 goto done;
2570 delta_cache = fdopen(delta_cache_fd, "a+");
2571 if (delta_cache == NULL) {
2572 err = got_error_from_errno("fdopen");
2573 goto done;
2575 delta_cache_fd = -1;
2577 if (fseeko(delta_cache, 0L, SEEK_END) == -1) {
2578 err = got_error_from_errno("fseeko");
2579 goto done;
2582 ndeltify = got_object_idset_num_elements(idset) - reuse.nmeta;
2583 if (ndeltify > 0) {
2584 deltify.meta = calloc(ndeltify, sizeof(struct got_pack_meta *));
2585 if (deltify.meta == NULL) {
2586 err = got_error_from_errno("calloc");
2587 goto done;
2589 deltify.metasz = ndeltify;
2591 err = got_object_idset_for_each(idset, add_meta_idset_cb,
2592 &deltify);
2593 if (err)
2594 goto done;
2595 if (deltify.nmeta > 0) {
2596 err = pick_deltas(deltify.meta, deltify.nmeta,
2597 ncolored, nfound, ntrees, nours, reuse.nmeta,
2598 delta_cache, repo, progress_cb, progress_arg, &rl,
2599 cancel_cb, cancel_arg);
2600 if (err)
2601 goto done;
2605 if (fflush(delta_cache) == EOF) {
2606 err = got_error_from_errno("fflush");
2607 goto done;
2609 err = genpack(packsha1, packfile, delta_cache, deltify.meta,
2610 deltify.nmeta, reuse.meta, reuse.nmeta, ncolored, nfound, ntrees,
2611 nours, repo, progress_cb, progress_arg, &rl,
2612 cancel_cb, cancel_arg);
2613 if (err)
2614 goto done;
2615 done:
2616 free_nmeta(deltify.meta, deltify.nmeta);
2617 free_nmeta(reuse.meta, reuse.nmeta);
2618 got_object_idset_free(idset);
2619 if (delta_cache_fd != -1 && close(delta_cache_fd) == -1 && err == NULL)
2620 err = got_error_from_errno("close");
2621 if (delta_cache && fclose(delta_cache) == EOF && err == NULL)
2622 err = got_error_from_errno("fclose");
2623 return err;