2 * Copyright (c) 2019 Ori Bernstein <ori@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/queue.h>
19 #include <sys/syslimits.h>
21 #include <sys/types.h>
40 #include "got_error.h"
41 #include "got_object.h"
43 #include "got_lib_sha1.h"
44 #include "got_lib_delta.h"
45 #include "got_lib_inflate.h"
46 #include "got_lib_object.h"
47 #include "got_lib_object_parse.h"
48 #include "got_lib_object_idset.h"
49 #include "got_lib_privsep.h"
51 typedef struct Cinfo Cinfo;
52 typedef struct Tinfo Tinfo;
53 typedef struct Object Object;
54 typedef struct Pack Pack;
55 typedef struct Buf Buf;
56 typedef struct Dirent Dirent;
57 typedef struct Idxent Idxent;
58 typedef struct Ols Ols;
61 /* 5k objects should be enough */
97 struct got_object_id h;
102 struct got_object_id hash;
117 /* Everything below here gets cleared */
120 /* size excludes header */
137 struct got_object_id *parent;
139 struct got_object_id tree;
148 typedef struct Buf Buf;
156 static int readpacked(FILE *, Object *, int);
157 static Object *readidxobject(FILE *, struct got_object_id, int);
159 struct got_object_idset *objcache;
166 ((((b)[0] & 0xFFul) << 8) | \
167 (((b)[1] & 0xFFul) << 0))
170 ((((b)[0] & 0xFFul) << 24) | \
171 (((b)[1] & 0xFFul) << 16) | \
172 (((b)[2] & 0xFFul) << 8) | \
173 (((b)[3] & 0xFFul) << 0))
175 ((((b)[0] & 0xFFull) << 56) | \
176 (((b)[1] & 0xFFull) << 48) | \
177 (((b)[2] & 0xFFull) << 40) | \
178 (((b)[3] & 0xFFull) << 32) | \
179 (((b)[4] & 0xFFull) << 24) | \
180 (((b)[5] & 0xFFull) << 16) | \
181 (((b)[6] & 0xFFull) << 8) | \
182 (((b)[7] & 0xFFull) << 0))
184 #define PUTBE16(b, n)\
190 #define PUTBE32(b, n)\
192 (b)[0] = (n) >> 24; \
193 (b)[1] = (n) >> 16; \
198 #define PUTBE64(b, n)\
200 (b)[0] = (n) >> 56; \
201 (b)[1] = (n) >> 48; \
202 (b)[2] = (n) >> 40; \
203 (b)[3] = (n) >> 32; \
204 (b)[4] = (n) >> 24; \
205 (b)[5] = (n) >> 16; \
211 charval(int c, int *err)
213 if(c >= '0' && c <= '9')
215 if(c >= 'a' && c <= 'f')
217 if(c >= 'A' && c <= 'F')
224 hparse(struct got_object_id *h, char *b)
229 for(i = 0; i < sizeof(h->sha1); i++){
232 h->sha1[i] |= ((charval(b[2*i], &err) & 0xf) << 4);
233 h->sha1[i] |= ((charval(b[2*i+1], &err)& 0xf) << 0);
252 erealloc(void *p, ulong n)
264 hasheq(struct got_object_id *a, struct got_object_id *b)
266 return memcmp(a->sha1, b->sha1, sizeof(a->sha1)) == 0;
281 if (t < 0 || t >= sizeof(types)/sizeof(types[0]))
287 hashfmt(char *out, size_t nout, struct got_object_id *h)
292 if (nout < 2*sizeof(h->sha1) + 1)
295 for(i = 0; i < sizeof(h->sha1); i++){
296 n = (h->sha1[i] >> 4) & 0xf;
297 c0 = (n >= 10) ? n-10 + 'a' : n + '0';
298 n = h->sha1[i] & 0xf;
299 c1 = (n >= 10) ? n-10 + 'a' : n + '0';
313 assert(o->refs == 0);
314 assert((o->flag & Ccache) == 0);
315 assert(o->flag & Cloaded);
320 free(o->commit->parent);
321 free(o->commit->author);
322 free(o->commit->committer);
366 hashfmt(buf, sizeof(buf), &o->hash);
370 lrutail = lrutail->prev;
371 if(!(o->flag & Cexist)){
372 got_object_idset_add(objcache, &o->hash, o);
373 o->id = next_object_id++;
377 o->prev->next = o->next;
379 o->next->prev = o->prev;
382 lrutail->next = NULL;
391 if(!(o->flag & Ccache)){
396 while(ncache > Cachemax){
399 lrutail->next = NULL;
409 preadbe32(FILE *b, int *v, off_t off)
413 if(fseek(b, off, 0) == -1)
415 if(fread(buf, 1, sizeof(buf), b) == -1)
422 preadbe64(FILE *b, off_t *v, off_t off)
426 if(fseek(b, off, 0) == -1)
428 if(fread(buf, 1, sizeof(buf), b) == -1)
435 readvint(char *p, char **pp)
443 n |= (c & 0x7f) << i;
452 applydelta(Object *dst, Object *base, char *d, int nd)
454 char *r, *b, *ed, *er;
462 fprintf(stderr, "mismatched source size");
466 nr = readvint(d, &d);
467 r = emalloc(nr + 64);
468 n = snprintf(r, 64, "%s %d", typestr(base->type), nr) + 1;
470 dst->type = base->type;
481 fprintf(stderr, "bad delta encoding");
489 if(c & 0x01 && d != ed) o |= (*d++ << 0) & 0x000000ff;
490 if(c & 0x02 && d != ed) o |= (*d++ << 8) & 0x0000ff00;
491 if(c & 0x04 && d != ed) o |= (*d++ << 16) & 0x00ff0000;
492 if(c & 0x08 && d != ed) o |= (*d++ << 24) & 0xff000000;
495 if(c & 0x10 && d != ed) l |= (*d++ << 0) & 0x0000ff;
496 if(c & 0x20 && d != ed) l |= (*d++ << 8) & 0x00ff00;
497 if(c & 0x40 && d != ed) l |= (*d++ << 16) & 0xff0000;
498 if(l == 0) l = 0x10000;
500 assert(o + l <= base->size);
501 memmove(r, b + o, l);
512 fprintf(stderr, "truncated delta (%zd)", er - r);
520 readrdelta(FILE *f, Object *o, int nd, int flag)
522 const struct got_error *e;
523 struct got_object_id h;
529 if(fread(h.sha1, 1, sizeof(h.sha1), f) != sizeof(h.sha1))
531 if(hasheq(&o->hash, &h))
533 if ((e = got_inflate_to_mem(&d, &n, f)) != NULL)
535 o->len = ftello(f) - o->off;
536 if(d == NULL || n != nd)
538 if((b = readidxobject(f, h, flag)) == NULL)
540 if(applydelta(o, b, d, n) == -1)
550 readodelta(FILE *f, Object *o, off_t nd, off_t p, int flag)
561 if((c = fgetc(f)) == -1)
571 fprintf(stderr, "junk offset -%lld (from %lld)", r, p);
575 if (got_inflate_to_mem(&d, &n, f) == NULL)
577 o->len = ftello(f) - o->off;
578 if(d == NULL || n != nd)
580 if(fseek(f, p - r, 0) == -1)
582 if(readpacked(f, &b, flag) == -1)
584 if(applydelta(o, &b, d, nd) == -1)
594 readpacked(FILE *f, Object *o, int flag)
596 const struct got_error *e;
612 fprintf(stderr, "unknown type for byte %x", c);
616 if((c = fgetc(f)) == -1)
618 l |= (c & 0x7f) << s;
624 fprintf(stderr, "invalid object at %lld", ftello(f));
632 b.data = emalloc(b.sz);
633 n = snprintf(b.data, 64, "%s %lld", typestr(t), l) + 1;
635 e = got_inflate_to_mem(&data, &ndata, f);
636 if (e != NULL || n + ndata >= b.sz) {
640 memcpy(b.data + n, data, ndata);
641 o->len = ftello(f) - o->off;
644 o->data = b.data + n;
649 if(readodelta(f, o, l, p, flag) == -1)
653 if(readrdelta(f, o, l, flag) == -1)
657 o->flag |= Cloaded|flag;
662 readloose(FILE *f, Object *o, int flag)
664 struct { char *tag; int type; } *p, types[] = {
677 if (got_inflate_to_mem(&d, &n, f) != NULL)
682 for(p = types; p->tag; p++){
684 if(strncmp(s, p->tag, l) == 0){
692 if(o->type == GNone){
696 sz = strtol(s, &e, 0);
697 if(e == s || *e++ != 0){
698 fprintf(stderr, "malformed object header");
701 if(sz != n - (e - (char *)d)){
702 fprintf(stderr, "mismatched sizes");
708 o->flag |= Cloaded|flag;
717 searchindex(FILE *f, struct got_object_id h)
719 int lo, hi, idx, i, nent;
721 struct got_object_id hh;
725 * Read the fanout table. The fanout table
726 * contains 256 entries, corresponsding to
727 * the first byte of the hash. Each entry
728 * is a 4 byte big endian integer, containing
729 * the total number of entries with a leading
730 * byte <= the table index, allowing us to
731 * rapidly do a binary search on them.
735 if(preadbe32(f, &hi, o) == -1)
738 o += h.sha1[0]*4 - 4;
739 if(preadbe32(f, &lo, o + 0) == -1)
741 if(preadbe32(f, &hi, o + 4) == -1)
746 if(preadbe32(f, &nent, 8 + 255*4) == -1)
750 * Now that we know the range of hashes that the
751 * entry may exist in, read them in so we can do
755 fseek(f, Hashsz*lo + 8 + 256*4, 0);
756 for(i = 0; i < hi - lo; i++){
757 if(fread(hh.sha1, 1, sizeof(hh.sha1), f) == -1)
767 * We found the entry. If it's 32 bits, then we
768 * can just return the oset, otherwise the 32
769 * bit entry contains the oset to the 64 bit
773 oo += 256*4; /* Fanout table */
774 oo += Hashsz*nent; /* Hashes */
775 oo += 4*nent; /* Checksums */
776 oo += 4*idx; /* Offset offset */
777 if(preadbe32(f, &i, oo) == -1)
780 if(o & (1ull << 31)){
782 if(preadbe64(f, &o, o) == -1)
788 fprintf(stderr, "unable to read packfile\n");
793 hashfmt(hstr, sizeof(hstr), &h);
794 fprintf(stdout, "could not find object %s\n", hstr);
800 * Scans for non-empty word, copying it into buf.
801 * Strips off word, leading, and trailing space
804 * Returns -1 on empty string or error, leaving
808 scanword(char **str, int *nstr, char *buf, int nbuf)
816 while(n && isblank(*p)){
821 for(; n && *p && !isspace(*p); p++, n--){
828 while(n && isblank(*p)){
839 nextline(char **str, int *nstr)
843 if((s = strchr(*str, '\n')) != NULL){
844 *nstr -= s - *str + 1;
850 parseauthor(char **str, int *nstr, char **name, off_t *time)
856 parsecommit(Object *o)
858 char *p, *t, buf[128];
863 o->commit = emalloc(sizeof(Cinfo));
865 if(scanword(&p, &np, buf, sizeof(buf)) == -1)
867 if(strcmp(buf, "tree") == 0){
868 if(scanword(&p, &np, buf, sizeof(buf)) == -1)
869 errx(1, "invalid commit: tree missing");
870 if(hparse(&o->commit->tree, buf) == -1)
871 errx(1, "invalid commit: garbled tree");
872 }else if(strcmp(buf, "parent") == 0){
873 if(scanword(&p, &np, buf, sizeof(buf)) == -1)
874 errx(1, "invalid commit: missing parent");
875 o->commit->parent = realloc(o->commit->parent, ++o->commit->nparent * sizeof(struct got_object_id));
876 if(!o->commit->parent)
877 err(1, "unable to malloc: ");
878 if(hparse(&o->commit->parent[o->commit->nparent - 1], buf) == -1)
879 errx(1, "invalid commit: garbled parent");
880 }else if(strcmp(buf, "author") == 0){
881 parseauthor(&p, &np, &o->commit->author, &o->commit->mtime);
882 }else if(strcmp(buf, "committer") == 0){
883 parseauthor(&p, &np, &o->commit->committer, &o->commit->ctime);
884 }else if(strcmp(buf, "gpgsig") == 0){
886 if((t = strstr(p, "-----END PGP SIGNATURE-----")) == NULL)
887 errx(1, "malformed gpg signature");
893 while (np && isspace(*p)) {
898 o->commit->nmsg = np;
910 o->tree = emalloc(sizeof(Tinfo));
912 if(scanword(&p, &np, buf, sizeof(buf)) == -1)
914 o->tree->ent = erealloc(o->tree->ent, ++o->tree->nent * sizeof(Dirent));
915 t = &o->tree->ent[o->tree->nent - 1];
916 memset(t, 0, sizeof(Dirent));
917 m = strtol(buf, NULL, 8);
918 /* FIXME: symlinks and other BS */
930 if(np < sizeof(t->h.sha1))
931 errx(1, "malformed tree, remaining %d (%s)", np, p);
932 memcpy(t->h.sha1, p, sizeof(t->h.sha1));
933 p += sizeof(t->h.sha1);
934 np -= sizeof(t->h.sha1);
939 parseobject(Object *o)
941 if(o->flag & Cparsed)
944 case GTree: parsetree(o); break;
945 case GCommit: parsecommit(o); break;
946 //case GTag: parsetag(o); break;
953 readidxobject(FILE *idx, struct got_object_id h, int flag)
965 if ((obj = got_object_idset_lookup_data(objcache, &h))) {
966 if(obj->flag & Cloaded)
968 if(obj->flag & Cidx){
971 if(fseek(idx, obj->off, 0) == -1)
972 errx(1, "could not seek to object offset");
973 if(readpacked(idx, obj, flag) == -1)
974 errx(1, "could not reload object");
975 if(fseek(idx, o, 0) == -1)
976 errx(1, "could not restore offset");
983 /* We're not putting it in the cache yet... */
984 obj = emalloc(sizeof(Object));
985 obj->id = next_object_id + 1;
988 hashfmt(hbuf, sizeof(hbuf), &h);
989 snprintf(path, sizeof(path), ".git/objects/%c%c/%s", hbuf[0], hbuf[1], hbuf + 2);
990 if((f = fopen(path, "r")) != NULL){
991 if(readloose(f, obj, flag) == -1)
995 hashfmt(hbuf, sizeof(hbuf), &obj->hash);
996 fprintf(stderr, "object %s cached", hbuf);
1002 if ((d = opendir(".git/objects/pack")) == NULL)
1003 err(1, "open pack dir");
1004 while ((ent = readdir(d)) != NULL) {
1005 l = strlen(ent->d_name);
1006 if(l > 4 && strcmp(ent->d_name + l - 4, ".idx") != 0)
1008 snprintf(path, sizeof(path), ".git/objects/pack/%s", ent->d_name);
1009 if((f = fopen(path, "r")) == NULL)
1011 o = searchindex(f, h);
1022 if((n = snprintf(path, sizeof(path), "%s", path)) >= sizeof(path) - 4)
1024 memcpy(path + n - 4, ".pack", 6);
1025 if((f = fopen(path, "r")) == NULL)
1027 if(fseek(f, o, 0) == -1)
1029 if(readpacked(f, obj, flag) == -1)
1041 readobject(struct got_object_id h)
1045 o = readidxobject(NULL, h, 0);
1052 objcmp(const void *pa, const void *pb)
1058 return memcmp(a->hash.sha1, b->hash.sha1, sizeof(a->hash.sha1));
1062 hwrite(FILE *b, void *buf, int len, SHA1_CTX *ctx)
1064 SHA1Update(ctx, buf, len);
1065 return fwrite(buf, 1, len, b);
1069 objectcrc(FILE *f, Object *o)
1075 fseek(f, o->off, 0);
1076 for(n = o->len; n > 0; n -= r){
1077 r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f);
1082 o->crc = crc32(o->crc, buf, r);
1088 indexpack(int packfd, int idxfd, struct got_object_id *packhash)
1090 char hdr[4*3], buf[8];
1091 int nobj, nvalid, nbig, n, i, step;
1092 Object *o, **objects;
1094 SHA1_CTX ctx, objctx;
1096 struct got_object_id h;
1099 if ((f = fdopen(packfd, "r")) == NULL)
1101 if (fseek(f, 0, SEEK_SET) == -1)
1103 if (fread(hdr, 1, sizeof(hdr), f) != sizeof(hdr)) {
1104 fprintf(stderr, "short read on header");
1107 if (memcmp(hdr, "PACK\0\0\0\2", 8) != 0) {
1108 fprintf(stderr, "invalid header");
1113 nobj = GETBE32(hdr + 8);
1114 objects = calloc(nobj, sizeof(Object*));
1115 valid = calloc(nobj, sizeof(char));
1119 while (nvalid != nobj) {
1120 fprintf(stderr, "indexing (%d/%d):", nvalid, nobj);
1122 for (i = 0; i < nobj; i++) {
1128 fprintf(stderr, ".");
1130 o = emalloc(sizeof(Object));
1135 fseek(f, o->off, 0);
1136 if (readpacked(f, o, Cidx) == 0){
1138 SHA1Update(&objctx, (uint8_t*)o->all, o->size + strlen(o->all) + 1);
1139 SHA1Final(o->hash.sha1, &objctx);
1144 if(objectcrc(f, o) == -1)
1147 fprintf(stderr, "\n");
1149 errx(1, "fix point reached too early: %d/%d", nvalid, nobj);
1157 qsort(objects, nobj, sizeof(Object*), objcmp);
1158 if((f = fdopen(idxfd, "w")) == NULL)
1160 if(hwrite(f, "\xfftOc\x00\x00\x00\x02", 8, &ctx) != 8)
1164 for(i = 0; i < 256; i++){
1165 while(c < nobj && (objects[c]->hash.sha1[0] & 0xff) <= i)
1168 hwrite(f, buf, 4, &ctx);
1170 for(i = 0; i < nobj; i++){
1172 hwrite(f, o->hash.sha1, sizeof(o->hash.sha1), &ctx);
1175 /* pointless, nothing uses this */
1176 for(i = 0; i < nobj; i++){
1177 PUTBE32(buf, objects[i]->crc);
1178 hwrite(f, buf, 4, &ctx);
1182 for(i = 0; i < nobj; i++){
1183 if(objects[i]->off <= (1ull<<31))
1184 PUTBE32(buf, objects[i]->off);
1186 PUTBE32(buf, (1ull << 31) | nbig++);
1187 hwrite(f, buf, 4, &ctx);
1189 for(i = 0; i < nobj; i++){
1190 if(objects[i]->off > (1ull<<31)){
1191 PUTBE64(buf, objects[i]->off);
1192 hwrite(f, buf, 8, &ctx);
1195 hwrite(f, packhash->sha1, sizeof(packhash->sha1), &ctx);
1196 SHA1Final(h.sha1, &ctx);
1197 fwrite(h.sha1, 1, sizeof(h.sha1), f);
1212 main(int argc, char **argv)
1214 const struct got_error *err = NULL;
1215 struct got_object_id packhash;
1216 struct imsgbuf ibuf;
1220 objcache = got_object_idset_alloc();
1221 imsg_init(&ibuf, GOT_IMSG_FD_CHILD);
1222 if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) {
1223 if (err->code == GOT_ERR_PRIVSEP_PIPE)
1227 if (imsg.hdr.type == GOT_IMSG_STOP)
1229 if (imsg.hdr.type != GOT_IMSG_IDXPACK_REQUEST) {
1230 err = got_error(GOT_ERR_PRIVSEP_MSG);
1233 if (imsg.hdr.len - IMSG_HEADER_SIZE != SHA1_DIGEST_LENGTH) {
1234 err = got_error(GOT_ERR_PRIVSEP_LEN);
1238 memcpy(packhash.sha1, imsg.data, SHA1_DIGEST_LENGTH);
1240 if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) {
1241 if (err->code == GOT_ERR_PRIVSEP_PIPE)
1245 if (imsg.hdr.type == GOT_IMSG_STOP)
1247 if (imsg.hdr.type != GOT_IMSG_TMPFD) {
1248 err = got_error(GOT_ERR_PRIVSEP_MSG);
1251 if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) {
1252 err = got_error(GOT_ERR_PRIVSEP_LEN);
1257 indexpack(packfd, idxfd, &packhash);
1260 got_privsep_send_error(&ibuf, err);
1262 err = got_privsep_send_index_pack_done(&ibuf);
1264 fprintf(stderr, "%s: %s\n", getprogname(), err->msg);
1265 got_privsep_send_error(&ibuf, err);