Blob


1 /*
2 * Copyright (c) 2019 Ori Bernstein <ori@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
19 #include <sys/syslimits.h>
20 #include <sys/time.h>
21 #include <sys/types.h>
22 #include <sys/uio.h>
24 #include <stdint.h>
25 #include <errno.h>
26 #include <imsg.h>
27 #include <limits.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <sha1.h>
34 #include <fcntl.h>
35 #include <zlib.h>
36 #include <err.h>
37 #include <assert.h>
38 #include <dirent.h>
40 #include "got_error.h"
41 #include "got_object.h"
43 #include "got_lib_sha1.h"
44 #include "got_lib_delta.h"
45 #include "got_lib_inflate.h"
46 #include "got_lib_object.h"
47 #include "got_lib_object_parse.h"
48 #include "got_lib_object_idset.h"
49 #include "got_lib_privsep.h"
51 typedef struct Cinfo Cinfo;
52 typedef struct Tinfo Tinfo;
53 typedef struct Object Object;
54 typedef struct Pack Pack;
55 typedef struct Buf Buf;
56 typedef struct Dirent Dirent;
57 typedef struct Idxent Idxent;
58 typedef struct Ols Ols;
60 enum {
61 /* 5k objects should be enough */
62 Cachemax = 5*1024,
63 Pathmax = 512,
64 Hashsz = 20,
65 Pktmax = 65536,
67 Nproto = 16,
68 Nport = 16,
69 Nhost = 256,
70 Npath = 128,
71 Nrepo = 64,
72 Nbranch = 32,
73 };
75 typedef enum Type {
76 GNone = 0,
77 GCommit = 1,
78 GTree = 2,
79 GBlob = 3,
80 GTag = 4,
81 GOdelta = 6,
82 GRdelta = 7,
83 } Type;
85 enum {
86 Cloaded = 1 << 0,
87 Cidx = 1 << 1,
88 Ccache = 1 << 2,
89 Cexist = 1 << 3,
90 Cparsed = 1 << 5,
91 };
93 struct Dirent {
94 char *name;
95 int modref;
96 int mode;
97 struct got_object_id h;
98 };
100 struct Object {
101 /* Git data */
102 struct got_object_id hash;
103 Type type;
105 /* Cache */
106 int id;
107 int flag;
108 int refs;
109 Object *next;
110 Object *prev;
112 /* For indexing */
113 off_t off;
114 off_t len;
115 uint32_t crc;
117 /* Everything below here gets cleared */
118 char *all;
119 char *data;
120 /* size excludes header */
121 off_t size;
123 union {
124 Cinfo *commit;
125 Tinfo *tree;
126 };
127 };
129 struct Tinfo {
130 /* Tree */
131 Dirent *ent;
132 int nent;
133 };
135 struct Cinfo {
136 /* Commit */
137 struct got_object_id *parent;
138 int nparent;
139 struct got_object_id tree;
140 char *author;
141 char *committer;
142 char *msg;
143 int nmsg;
144 off_t ctime;
145 off_t mtime;
146 };
148 typedef struct Buf Buf;
150 struct Buf {
151 int len;
152 int sz;
153 char *data;
154 };
156 static int readpacked(FILE *, Object *, int);
157 static Object *readidxobject(FILE *, struct got_object_id, int);
159 struct got_object_idset *objcache;
160 int next_object_id;
161 Object *lruhead;
162 Object *lrutail;
163 int ncache;
165 #define GETBE16(b)\
166 ((((b)[0] & 0xFFul) << 8) | \
167 (((b)[1] & 0xFFul) << 0))
169 #define GETBE32(b)\
170 ((((b)[0] & 0xFFul) << 24) | \
171 (((b)[1] & 0xFFul) << 16) | \
172 (((b)[2] & 0xFFul) << 8) | \
173 (((b)[3] & 0xFFul) << 0))
174 #define GETBE64(b)\
175 ((((b)[0] & 0xFFull) << 56) | \
176 (((b)[1] & 0xFFull) << 48) | \
177 (((b)[2] & 0xFFull) << 40) | \
178 (((b)[3] & 0xFFull) << 32) | \
179 (((b)[4] & 0xFFull) << 24) | \
180 (((b)[5] & 0xFFull) << 16) | \
181 (((b)[6] & 0xFFull) << 8) | \
182 (((b)[7] & 0xFFull) << 0))
184 #define PUTBE16(b, n)\
185 do{ \
186 (b)[0] = (n) >> 8; \
187 (b)[1] = (n) >> 0; \
188 } while(0)
190 #define PUTBE32(b, n)\
191 do{ \
192 (b)[0] = (n) >> 24; \
193 (b)[1] = (n) >> 16; \
194 (b)[2] = (n) >> 8; \
195 (b)[3] = (n) >> 0; \
196 } while(0)
198 #define PUTBE64(b, n)\
199 do{ \
200 (b)[0] = (n) >> 56; \
201 (b)[1] = (n) >> 48; \
202 (b)[2] = (n) >> 40; \
203 (b)[3] = (n) >> 32; \
204 (b)[4] = (n) >> 24; \
205 (b)[5] = (n) >> 16; \
206 (b)[6] = (n) >> 8; \
207 (b)[7] = (n) >> 0; \
208 } while(0)
210 static int
211 charval(int c, int *err)
213 if(c >= '0' && c <= '9')
214 return c - '0';
215 if(c >= 'a' && c <= 'f')
216 return c - 'a' + 10;
217 if(c >= 'A' && c <= 'F')
218 return c - 'A' + 10;
219 *err = 1;
220 return -1;
223 static int
224 hparse(struct got_object_id *h, char *b)
226 int i, err;
228 err = 0;
229 for(i = 0; i < sizeof(h->sha1); i++){
230 err = 0;
231 h->sha1[i] = 0;
232 h->sha1[i] |= ((charval(b[2*i], &err) & 0xf) << 4);
233 h->sha1[i] |= ((charval(b[2*i+1], &err)& 0xf) << 0);
234 if(err)
235 return -1;
237 return 0;
240 static void *
241 emalloc(size_t n)
243 void *v;
245 v = calloc(n, 1);
246 if(v == NULL)
247 err(1, "malloc:");
248 return v;
251 static void *
252 erealloc(void *p, ulong n)
254 void *v;
256 v = realloc(p, n);
257 if(v == NULL)
258 err(1, "realloc:");
259 memset(v, 0, n);
260 return v;
263 static int
264 hasheq(struct got_object_id *a, struct got_object_id *b)
266 return memcmp(a->sha1, b->sha1, sizeof(a->sha1)) == 0;
269 static char *
270 typestr(int t)
272 char *types[] = {
273 "???",
274 "commit",
275 "tree",
276 "blob",
277 "tag",
278 "odelta",
279 "rdelta",
280 };
281 if (t < 0 || t >= sizeof(types)/sizeof(types[0]))
282 abort();
283 return types[t];
286 static char *
287 hashfmt(char *out, size_t nout, struct got_object_id *h)
289 int i, n, c0, c1;
290 char *p;
292 if (nout < 2*sizeof(h->sha1) + 1)
293 return NULL;
294 p = out;
295 for(i = 0; i < sizeof(h->sha1); i++){
296 n = (h->sha1[i] >> 4) & 0xf;
297 c0 = (n >= 10) ? n-10 + 'a' : n + '0';
298 n = h->sha1[i] & 0xf;
299 c1 = (n >= 10) ? n-10 + 'a' : n + '0';
300 *p++ = c0;
301 *p++ = c1;
303 *p++ = 0;
304 return out;
307 static void
308 clear(Object *o)
310 if(!o)
311 return;
313 assert(o->refs == 0);
314 assert((o->flag & Ccache) == 0);
315 assert(o->flag & Cloaded);
316 switch(o->type){
317 case GCommit:
318 if(!o->commit)
319 break;
320 free(o->commit->parent);
321 free(o->commit->author);
322 free(o->commit->committer);
323 free(o->commit);
324 o->commit = NULL;
325 break;
326 case GTree:
327 if(!o->tree)
328 break;
329 free(o->tree->ent);
330 free(o->tree);
331 o->tree = NULL;
332 break;
333 default:
334 break;
337 free(o->all);
338 o->all = NULL;
339 o->data = NULL;
340 o->flag &= ~Cloaded;
343 static void
344 unref(Object *o)
346 if(!o)
347 return;
348 o->refs--;
349 if(!o->refs)
350 clear(o);
353 static Object*
354 ref(Object *o)
356 o->refs++;
357 return o;
360 static void
361 cache(Object *o)
363 char buf[41];
364 Object *p;
366 hashfmt(buf, sizeof(buf), &o->hash);
367 if(o == lruhead)
368 return;
369 if(o == lrutail)
370 lrutail = lrutail->prev;
371 if(!(o->flag & Cexist)){
372 got_object_idset_add(objcache, &o->hash, o);
373 o->id = next_object_id++;
374 o->flag |= Cexist;
376 if(o->prev)
377 o->prev->next = o->next;
378 if(o->next)
379 o->next->prev = o->prev;
380 if(lrutail == o){
381 lrutail = o->prev;
382 lrutail->next = NULL;
383 }else if(!lrutail)
384 lrutail = o;
385 if(lruhead)
386 lruhead->prev = o;
387 o->next = lruhead;
388 o->prev = NULL;
389 lruhead = o;
391 if(!(o->flag & Ccache)){
392 o->flag |= Ccache;
393 ref(o);
394 ncache++;
396 while(ncache > Cachemax){
397 p = lrutail;
398 lrutail = p->prev;
399 lrutail->next = NULL;
400 p->flag &= ~Ccache;
401 p->prev = NULL;
402 p->next = NULL;
403 unref(p);
404 ncache--;
408 static int
409 preadbe32(FILE *b, int *v, off_t off)
411 char buf[4];
413 if(fseek(b, off, 0) == -1)
414 return -1;
415 if(fread(buf, 1, sizeof(buf), b) == -1)
416 return -1;
417 *v = GETBE32(buf);
419 return 0;
421 static int
422 preadbe64(FILE *b, off_t *v, off_t off)
424 char buf[8];
426 if(fseek(b, off, 0) == -1)
427 return -1;
428 if(fread(buf, 1, sizeof(buf), b) == -1)
429 return -1;
430 *v = GETBE64(buf);
431 return 0;
434 static int
435 readvint(char *p, char **pp)
437 int i, n, c;
439 i = 0;
440 n = 0;
441 do {
442 c = *p++;
443 n |= (c & 0x7f) << i;
444 i += 7;
445 } while (c & 0x80);
446 *pp = p;
448 return n;
451 static int
452 applydelta(Object *dst, Object *base, char *d, int nd)
454 char *r, *b, *ed, *er;
455 int n, nr, c;
456 off_t o, l;
458 ed = d + nd;
459 b = base->data;
460 n = readvint(d, &d);
461 if(n != base->size){
462 fprintf(stderr, "mismatched source size");
463 return -1;
466 nr = readvint(d, &d);
467 r = emalloc(nr + 64);
468 n = snprintf(r, 64, "%s %d", typestr(base->type), nr) + 1;
469 dst->all = r;
470 dst->type = base->type;
471 dst->data = r + n;
472 dst->size = nr;
473 er = dst->data + nr;
474 r = dst->data;
476 while(1){
477 if(d == ed)
478 break;
479 c = *d++;
480 if(!c){
481 fprintf(stderr, "bad delta encoding");
482 return -1;
484 /* copy from base */
485 if(c & 0x80){
486 o = 0;
487 l = 0;
488 /* Offset in base */
489 if(c & 0x01 && d != ed) o |= (*d++ << 0) & 0x000000ff;
490 if(c & 0x02 && d != ed) o |= (*d++ << 8) & 0x0000ff00;
491 if(c & 0x04 && d != ed) o |= (*d++ << 16) & 0x00ff0000;
492 if(c & 0x08 && d != ed) o |= (*d++ << 24) & 0xff000000;
494 /* Length to copy */
495 if(c & 0x10 && d != ed) l |= (*d++ << 0) & 0x0000ff;
496 if(c & 0x20 && d != ed) l |= (*d++ << 8) & 0x00ff00;
497 if(c & 0x40 && d != ed) l |= (*d++ << 16) & 0xff0000;
498 if(l == 0) l = 0x10000;
500 assert(o + l <= base->size);
501 memmove(r, b + o, l);
502 r += l;
503 /* inline data */
504 }else{
505 memmove(r, d, c);
506 d += c;
507 r += c;
511 if(r != er){
512 fprintf(stderr, "truncated delta (%zd)", er - r);
513 return -1;
516 return nr;
519 static int
520 readrdelta(FILE *f, Object *o, int nd, int flag)
522 const struct got_error *e;
523 struct got_object_id h;
524 Object *b;
525 uint8_t *d;
526 size_t n;
528 d = NULL;
529 if(fread(h.sha1, 1, sizeof(h.sha1), f) != sizeof(h.sha1))
530 goto error;
531 if(hasheq(&o->hash, &h))
532 goto error;
533 if ((e = got_inflate_to_mem(&d, &n, f)) != NULL)
534 goto error;
535 o->len = ftello(f) - o->off;
536 if(d == NULL || n != nd)
537 goto error;
538 if((b = readidxobject(f, h, flag)) == NULL)
539 goto error;
540 if(applydelta(o, b, d, n) == -1)
541 goto error;
542 free(d);
543 return 0;
544 error:
545 free(d);
546 return -1;
549 static int
550 readodelta(FILE *f, Object *o, off_t nd, off_t p, int flag)
552 Object b;
553 uint8_t *d;
554 off_t r;
555 size_t n;
556 int c;
558 r = 0;
559 d = NULL;
560 while(1){
561 if((c = fgetc(f)) == -1)
562 goto error;
563 r |= c & 0x7f;
564 if (!(c & 0x80))
565 break;
566 r++;
567 r <<= 7;
568 }while(c & 0x80);
570 if(r > p){
571 fprintf(stderr, "junk offset -%lld (from %lld)", r, p);
572 goto error;
575 if (got_inflate_to_mem(&d, &n, f) == NULL)
576 goto error;
577 o->len = ftello(f) - o->off;
578 if(d == NULL || n != nd)
579 goto error;
580 if(fseek(f, p - r, 0) == -1)
581 goto error;
582 if(readpacked(f, &b, flag) == -1)
583 goto error;
584 if(applydelta(o, &b, d, nd) == -1)
585 goto error;
586 free(d);
587 return 0;
588 error:
589 free(d);
590 return -1;
593 static int
594 readpacked(FILE *f, Object *o, int flag)
596 const struct got_error *e;
597 int c, s, n;
598 off_t l, p;
599 size_t ndata;
600 uint8_t *data;
601 Type t;
602 Buf b;
604 p = ftello(f);
605 c = fgetc(f);
606 if(c == -1)
607 return -1;
608 l = c & 0xf;
609 s = 4;
610 t = (c >> 4) & 0x7;
611 if(!t){
612 fprintf(stderr, "unknown type for byte %x", c);
613 return -1;
615 while(c & 0x80){
616 if((c = fgetc(f)) == -1)
617 return -1;
618 l |= (c & 0x7f) << s;
619 s += 7;
622 switch(t){
623 default:
624 fprintf(stderr, "invalid object at %lld", ftello(f));
625 return -1;
626 case GCommit:
627 case GTree:
628 case GTag:
629 case GBlob:
630 b.sz = 64 + l;
632 b.data = emalloc(b.sz);
633 n = snprintf(b.data, 64, "%s %lld", typestr(t), l) + 1;
634 b.len = n;
635 e = got_inflate_to_mem(&data, &ndata, f);
636 if (e != NULL || n + ndata >= b.sz) {
637 free(b.data);
638 return -1;
640 memcpy(b.data + n, data, ndata);
641 o->len = ftello(f) - o->off;
642 o->type = t;
643 o->all = b.data;
644 o->data = b.data + n;
645 o->size = ndata;
646 free(data);
647 break;
648 case GOdelta:
649 if(readodelta(f, o, l, p, flag) == -1)
650 return -1;
651 break;
652 case GRdelta:
653 if(readrdelta(f, o, l, flag) == -1)
654 return -1;
655 break;
657 o->flag |= Cloaded|flag;
658 return 0;
661 static int
662 readloose(FILE *f, Object *o, int flag)
664 struct { char *tag; int type; } *p, types[] = {
665 {"blob", GBlob},
666 {"tree", GTree},
667 {"commit", GCommit},
668 {"tag", GTag},
669 {NULL},
670 };
671 char *s, *e;
672 uint8_t *d;
673 off_t sz;
674 size_t n;
675 int l;
677 if (got_inflate_to_mem(&d, &n, f) != NULL)
678 return -1;
680 s = (char *)d;
681 o->type = GNone;
682 for(p = types; p->tag; p++){
683 l = strlen(p->tag);
684 if(strncmp(s, p->tag, l) == 0){
685 s += l;
686 o->type = p->type;
687 while(!isspace(*s))
688 s++;
689 break;
692 if(o->type == GNone){
693 free(o->data);
694 return -1;
696 sz = strtol(s, &e, 0);
697 if(e == s || *e++ != 0){
698 fprintf(stderr, "malformed object header");
699 goto error;
701 if(sz != n - (e - (char *)d)){
702 fprintf(stderr, "mismatched sizes");
703 goto error;
705 o->size = sz;
706 o->data = e;
707 o->all = d;
708 o->flag |= Cloaded|flag;
709 return 0;
711 error:
712 free(d);
713 return -1;
716 static off_t
717 searchindex(FILE *f, struct got_object_id h)
719 int lo, hi, idx, i, nent;
720 off_t o, oo;
721 struct got_object_id hh;
723 o = 8;
724 /*
725 * Read the fanout table. The fanout table
726 * contains 256 entries, corresponsding to
727 * the first byte of the hash. Each entry
728 * is a 4 byte big endian integer, containing
729 * the total number of entries with a leading
730 * byte <= the table index, allowing us to
731 * rapidly do a binary search on them.
732 */
733 if (h.sha1[0] == 0){
734 lo = 0;
735 if(preadbe32(f, &hi, o) == -1)
736 goto err;
737 } else {
738 o += h.sha1[0]*4 - 4;
739 if(preadbe32(f, &lo, o + 0) == -1)
740 goto err;
741 if(preadbe32(f, &hi, o + 4) == -1)
742 goto err;
744 if(hi == lo)
745 goto notfound;
746 if(preadbe32(f, &nent, 8 + 255*4) == -1)
747 goto err;
749 /*
750 * Now that we know the range of hashes that the
751 * entry may exist in, read them in so we can do
752 * a bsearch.
753 */
754 idx = -1;
755 fseek(f, Hashsz*lo + 8 + 256*4, 0);
756 for(i = 0; i < hi - lo; i++){
757 if(fread(hh.sha1, 1, sizeof(hh.sha1), f) == -1)
758 goto err;
759 if(hasheq(&hh, &h))
760 idx = lo + i;
762 if(idx == -1)
763 goto notfound;
766 /*
767 * We found the entry. If it's 32 bits, then we
768 * can just return the oset, otherwise the 32
769 * bit entry contains the oset to the 64 bit
770 * entry.
771 */
772 oo = 8; /* Header */
773 oo += 256*4; /* Fanout table */
774 oo += Hashsz*nent; /* Hashes */
775 oo += 4*nent; /* Checksums */
776 oo += 4*idx; /* Offset offset */
777 if(preadbe32(f, &i, oo) == -1)
778 goto err;
779 o = i & 0xffffffff;
780 if(o & (1ull << 31)){
781 o &= 0x7fffffff;
782 if(preadbe64(f, &o, o) == -1)
783 goto err;
785 return o;
787 err:
788 fprintf(stderr, "unable to read packfile\n");
789 return -1;
790 notfound:
792 char hstr[41];
793 hashfmt(hstr, sizeof(hstr), &h);
794 fprintf(stdout, "could not find object %s\n", hstr);
796 return -1;
799 /*
800 * Scans for non-empty word, copying it into buf.
801 * Strips off word, leading, and trailing space
802 * from input.
804 * Returns -1 on empty string or error, leaving
805 * input unmodified.
806 */
807 static int
808 scanword(char **str, int *nstr, char *buf, int nbuf)
810 char *p;
811 int n, r;
813 r = -1;
814 p = *str;
815 n = *nstr;
816 while(n && isblank(*p)){
817 n--;
818 p++;
821 for(; n && *p && !isspace(*p); p++, n--){
822 r = 0;
823 *buf++ = *p;
824 nbuf--;
825 if(nbuf == 0)
826 return -1;
828 while(n && isblank(*p)){
829 n--;
830 p++;
832 *buf = 0;
833 *str = p;
834 *nstr = n;
835 return r;
838 static void
839 nextline(char **str, int *nstr)
841 char *s;
843 if((s = strchr(*str, '\n')) != NULL){
844 *nstr -= s - *str + 1;
845 *str = s + 1;
849 static int
850 parseauthor(char **str, int *nstr, char **name, off_t *time)
852 return 0;
855 static void
856 parsecommit(Object *o)
858 char *p, *t, buf[128];
859 int np;
861 p = o->data;
862 np = o->size;
863 o->commit = emalloc(sizeof(Cinfo));
864 while(1){
865 if(scanword(&p, &np, buf, sizeof(buf)) == -1)
866 break;
867 if(strcmp(buf, "tree") == 0){
868 if(scanword(&p, &np, buf, sizeof(buf)) == -1)
869 errx(1, "invalid commit: tree missing");
870 if(hparse(&o->commit->tree, buf) == -1)
871 errx(1, "invalid commit: garbled tree");
872 }else if(strcmp(buf, "parent") == 0){
873 if(scanword(&p, &np, buf, sizeof(buf)) == -1)
874 errx(1, "invalid commit: missing parent");
875 o->commit->parent = realloc(o->commit->parent, ++o->commit->nparent * sizeof(struct got_object_id));
876 if(!o->commit->parent)
877 err(1, "unable to malloc: ");
878 if(hparse(&o->commit->parent[o->commit->nparent - 1], buf) == -1)
879 errx(1, "invalid commit: garbled parent");
880 }else if(strcmp(buf, "author") == 0){
881 parseauthor(&p, &np, &o->commit->author, &o->commit->mtime);
882 }else if(strcmp(buf, "committer") == 0){
883 parseauthor(&p, &np, &o->commit->committer, &o->commit->ctime);
884 }else if(strcmp(buf, "gpgsig") == 0){
885 /* just drop it */
886 if((t = strstr(p, "-----END PGP SIGNATURE-----")) == NULL)
887 errx(1, "malformed gpg signature");
888 np -= t - p;
889 p = t;
891 nextline(&p, &np);
893 while (np && isspace(*p)) {
894 p++;
895 np--;
897 o->commit->msg = p;
898 o->commit->nmsg = np;
901 static void
902 parsetree(Object *o)
904 char *p, buf[256];
905 int np, nn, m;
906 Dirent *t;
908 p = o->data;
909 np = o->size;
910 o->tree = emalloc(sizeof(Tinfo));
911 while(np > 0){
912 if(scanword(&p, &np, buf, sizeof(buf)) == -1)
913 break;
914 o->tree->ent = erealloc(o->tree->ent, ++o->tree->nent * sizeof(Dirent));
915 t = &o->tree->ent[o->tree->nent - 1];
916 memset(t, 0, sizeof(Dirent));
917 m = strtol(buf, NULL, 8);
918 /* FIXME: symlinks and other BS */
919 if(m == 0160000){
920 t->mode |= S_IFDIR;
921 t->modref = 1;
923 t->mode = m & 0777;
924 if(m & 0040000)
925 t->mode |= S_IFDIR;
926 t->name = p;
927 nn = strlen(p) + 1;
928 p += nn;
929 np -= nn;
930 if(np < sizeof(t->h.sha1))
931 errx(1, "malformed tree, remaining %d (%s)", np, p);
932 memcpy(t->h.sha1, p, sizeof(t->h.sha1));
933 p += sizeof(t->h.sha1);
934 np -= sizeof(t->h.sha1);
938 void
939 parseobject(Object *o)
941 if(o->flag & Cparsed)
942 return;
943 switch(o->type){
944 case GTree: parsetree(o); break;
945 case GCommit: parsecommit(o); break;
946 //case GTag: parsetag(o); break;
947 default: break;
949 o->flag |= Cparsed;
952 static Object*
953 readidxobject(FILE *idx, struct got_object_id h, int flag)
955 char path[Pathmax];
956 char hbuf[41];
957 FILE *f;
958 Object *obj;
959 int l, n;
960 off_t o;
961 struct dirent *ent;
962 DIR *d;
965 if ((obj = got_object_idset_lookup_data(objcache, &h))) {
966 if(obj->flag & Cloaded)
967 return obj;
968 if(obj->flag & Cidx){
969 assert(idx != NULL);
970 o = ftello(idx);
971 if(fseek(idx, obj->off, 0) == -1)
972 errx(1, "could not seek to object offset");
973 if(readpacked(idx, obj, flag) == -1)
974 errx(1, "could not reload object");
975 if(fseek(idx, o, 0) == -1)
976 errx(1, "could not restore offset");
977 cache(obj);
978 return obj;
982 d = NULL;
983 /* We're not putting it in the cache yet... */
984 obj = emalloc(sizeof(Object));
985 obj->id = next_object_id + 1;
986 obj->hash = h;
988 hashfmt(hbuf, sizeof(hbuf), &h);
989 snprintf(path, sizeof(path), ".git/objects/%c%c/%s", hbuf[0], hbuf[1], hbuf + 2);
990 if((f = fopen(path, "r")) != NULL){
991 if(readloose(f, obj, flag) == -1)
992 goto error;
993 fclose(f);
994 parseobject(obj);
995 hashfmt(hbuf, sizeof(hbuf), &obj->hash);
996 fprintf(stderr, "object %s cached", hbuf);
997 cache(obj);
998 return obj;
1001 o = -1;
1002 if ((d = opendir(".git/objects/pack")) == NULL)
1003 err(1, "open pack dir");
1004 while ((ent = readdir(d)) != NULL) {
1005 l = strlen(ent->d_name);
1006 if(l > 4 && strcmp(ent->d_name + l - 4, ".idx") != 0)
1007 continue;
1008 snprintf(path, sizeof(path), ".git/objects/pack/%s", ent->d_name);
1009 if((f = fopen(path, "r")) == NULL)
1010 continue;
1011 o = searchindex(f, h);
1012 fclose(f);
1013 if(o == -1)
1014 continue;
1015 break;
1017 closedir(d);
1019 if (o == -1)
1020 goto error;
1022 if((n = snprintf(path, sizeof(path), "%s", path)) >= sizeof(path) - 4)
1023 goto error;
1024 memcpy(path + n - 4, ".pack", 6);
1025 if((f = fopen(path, "r")) == NULL)
1026 goto error;
1027 if(fseek(f, o, 0) == -1)
1028 goto error;
1029 if(readpacked(f, obj, flag) == -1)
1030 goto error;
1031 fclose(f);
1032 parseobject(obj);
1033 cache(obj);
1034 return obj;
1035 error:
1036 free(obj);
1037 return NULL;
1040 Object*
1041 readobject(struct got_object_id h)
1043 Object *o;
1045 o = readidxobject(NULL, h, 0);
1046 if(o)
1047 ref(o);
1048 return o;
1051 int
1052 objcmp(const void *pa, const void *pb)
1054 Object *a, *b;
1056 a = *(Object**)pa;
1057 b = *(Object**)pb;
1058 return memcmp(a->hash.sha1, b->hash.sha1, sizeof(a->hash.sha1));
1061 static int
1062 hwrite(FILE *b, void *buf, int len, SHA1_CTX *ctx)
1064 SHA1Update(ctx, buf, len);
1065 return fwrite(buf, 1, len, b);
1068 static uint32_t
1069 objectcrc(FILE *f, Object *o)
1071 char buf[8096];
1072 int n, r;
1074 o->crc = 0;
1075 fseek(f, o->off, 0);
1076 for(n = o->len; n > 0; n -= r){
1077 r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f);
1078 if(r == -1)
1079 return -1;
1080 if(r == 0)
1081 return 0;
1082 o->crc = crc32(o->crc, buf, r);
1084 return 0;
1087 int
1088 indexpack(int packfd, int idxfd, struct got_object_id *packhash)
1090 char hdr[4*3], buf[8];
1091 int nobj, nvalid, nbig, n, i, step;
1092 Object *o, **objects;
1093 char *valid;
1094 SHA1_CTX ctx, objctx;
1095 FILE *f;
1096 struct got_object_id h;
1097 int c;
1099 if ((f = fdopen(packfd, "r")) == NULL)
1100 return -1;
1101 if (fseek(f, 0, SEEK_SET) == -1)
1102 return -1;
1103 if (fread(hdr, 1, sizeof(hdr), f) != sizeof(hdr)) {
1104 fprintf(stderr, "short read on header");
1105 return -1;
1107 if (memcmp(hdr, "PACK\0\0\0\2", 8) != 0) {
1108 fprintf(stderr, "invalid header");
1109 return -1;
1112 nvalid = 0;
1113 nobj = GETBE32(hdr + 8);
1114 objects = calloc(nobj, sizeof(Object*));
1115 valid = calloc(nobj, sizeof(char));
1116 step = nobj/100;
1117 if(!step)
1118 step++;
1119 while (nvalid != nobj) {
1120 fprintf(stderr, "indexing (%d/%d):", nvalid, nobj);
1121 n = 0;
1122 for (i = 0; i < nobj; i++) {
1123 if (valid[i]) {
1124 n++;
1125 continue;
1127 if (i % step == 0)
1128 fprintf(stderr, ".");
1129 if (!objects[i]) {
1130 o = emalloc(sizeof(Object));
1131 o->off = ftello(f);
1132 objects[i] = o;
1134 o = objects[i];
1135 fseek(f, o->off, 0);
1136 if (readpacked(f, o, Cidx) == 0){
1137 SHA1Init(&objctx);
1138 SHA1Update(&objctx, (uint8_t*)o->all, o->size + strlen(o->all) + 1);
1139 SHA1Final(o->hash.sha1, &objctx);
1140 cache(o);
1141 valid[i] = 1;
1142 n++;
1144 if(objectcrc(f, o) == -1)
1145 return -1;
1147 fprintf(stderr, "\n");
1148 if (n == nvalid) {
1149 errx(1, "fix point reached too early: %d/%d", nvalid, nobj);
1150 goto error;
1152 nvalid = n;
1154 fclose(f);
1156 SHA1Init(&ctx);
1157 qsort(objects, nobj, sizeof(Object*), objcmp);
1158 if((f = fdopen(idxfd, "w")) == NULL)
1159 return -1;
1160 if(hwrite(f, "\xfftOc\x00\x00\x00\x02", 8, &ctx) != 8)
1161 goto error;
1162 /* fanout table */
1163 c = 0;
1164 for(i = 0; i < 256; i++){
1165 while(c < nobj && (objects[c]->hash.sha1[0] & 0xff) <= i)
1166 c++;
1167 PUTBE32(buf, c);
1168 hwrite(f, buf, 4, &ctx);
1170 for(i = 0; i < nobj; i++){
1171 o = objects[i];
1172 hwrite(f, o->hash.sha1, sizeof(o->hash.sha1), &ctx);
1175 /* pointless, nothing uses this */
1176 for(i = 0; i < nobj; i++){
1177 PUTBE32(buf, objects[i]->crc);
1178 hwrite(f, buf, 4, &ctx);
1181 nbig = 0;
1182 for(i = 0; i < nobj; i++){
1183 if(objects[i]->off <= (1ull<<31))
1184 PUTBE32(buf, objects[i]->off);
1185 else
1186 PUTBE32(buf, (1ull << 31) | nbig++);
1187 hwrite(f, buf, 4, &ctx);
1189 for(i = 0; i < nobj; i++){
1190 if(objects[i]->off > (1ull<<31)){
1191 PUTBE64(buf, objects[i]->off);
1192 hwrite(f, buf, 8, &ctx);
1195 hwrite(f, packhash->sha1, sizeof(packhash->sha1), &ctx);
1196 SHA1Final(h.sha1, &ctx);
1197 fwrite(h.sha1, 1, sizeof(h.sha1), f);
1199 free(objects);
1200 free(valid);
1201 fclose(f);
1202 return 0;
1204 error:
1205 free(objects);
1206 free(valid);
1207 fclose(f);
1208 return -1;
1211 int
1212 main(int argc, char **argv)
1214 const struct got_error *err = NULL;
1215 struct got_object_id packhash;
1216 struct imsgbuf ibuf;
1217 struct imsg imsg;
1218 int packfd, idxfd;
1220 objcache = got_object_idset_alloc();
1221 imsg_init(&ibuf, GOT_IMSG_FD_CHILD);
1222 if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) {
1223 if (err->code == GOT_ERR_PRIVSEP_PIPE)
1224 err = NULL;
1225 goto done;
1227 if (imsg.hdr.type == GOT_IMSG_STOP)
1228 goto done;
1229 if (imsg.hdr.type != GOT_IMSG_IDXPACK_REQUEST) {
1230 err = got_error(GOT_ERR_PRIVSEP_MSG);
1231 goto done;
1233 if (imsg.hdr.len - IMSG_HEADER_SIZE != SHA1_DIGEST_LENGTH) {
1234 err = got_error(GOT_ERR_PRIVSEP_LEN);
1235 goto done;
1237 packfd = imsg.fd;
1238 memcpy(packhash.sha1, imsg.data, SHA1_DIGEST_LENGTH);
1240 if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) {
1241 if (err->code == GOT_ERR_PRIVSEP_PIPE)
1242 err = NULL;
1243 goto done;
1245 if (imsg.hdr.type == GOT_IMSG_STOP)
1246 goto done;
1247 if (imsg.hdr.type != GOT_IMSG_TMPFD) {
1248 err = got_error(GOT_ERR_PRIVSEP_MSG);
1249 goto done;
1251 if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) {
1252 err = got_error(GOT_ERR_PRIVSEP_LEN);
1253 goto done;
1255 idxfd = imsg.fd;
1257 indexpack(packfd, idxfd, &packhash);
1258 done:
1259 if(err != NULL)
1260 got_privsep_send_error(&ibuf, err);
1261 else
1262 err = got_privsep_send_index_pack_done(&ibuf);
1263 if(err != NULL) {
1264 fprintf(stderr, "%s: %s\n", getprogname(), err->msg);
1265 got_privsep_send_error(&ibuf, err);
1268 exit(0);