commit d746408514803a55d763495c6a567c4142718232 from: Stefan Sperling date: Mon Jul 09 13:30:16 2018 UTC read pack files with mmap commit - 1828273a5cb2731da90afc4d82058a176fd71b84 commit + d746408514803a55d763495c6a567c4142718232 blob - ed55c453befa6d0c887220b11547b64dc3377ba6 blob + ac420c8b27bdfa56508ea4e25573f453564c686f --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -18,10 +18,11 @@ struct got_pack { char *path_packfile; int fd; + uint8_t *map; size_t filesize; }; -void got_pack_close(struct got_pack *); +const struct got_error *got_pack_close(struct got_pack *); /* See Documentation/technical/pack-format.txt in Git. */ blob - 55eb6bb8c0ba084834a8b92595cbbbba72818c43 blob + 45fce72d13ba62cbd804455e3befe0d050f112be --- lib/got_lib_zbuf.h +++ lib/got_lib_zbuf.h @@ -33,9 +33,15 @@ const struct got_error *got_inflate_read(struct got_zs size_t *); const struct got_error *got_inflate_read_fd(struct got_zstream_buf *, int, size_t *); +const struct got_error *got_inflate_read_mmap(struct got_zstream_buf *, + uint8_t *, size_t, size_t, size_t *, size_t *); void got_inflate_end(struct got_zstream_buf *); const struct got_error *got_inflate_to_mem(uint8_t **, size_t *, FILE *); const struct got_error *got_inflate_to_mem_fd(uint8_t **, size_t *, int); +const struct got_error *got_inflate_to_mem_mmap(uint8_t **, size_t *, uint8_t *, + size_t, size_t); const struct got_error *got_inflate_to_file(size_t *, FILE *, FILE *); const struct got_error *got_inflate_to_file_fd(size_t *, int, FILE *); const struct got_error *got_inflate_to_fd(size_t *, FILE *, int); +const struct got_error *got_inflate_to_file_mmap(size_t *, uint8_t *, size_t, + size_t, FILE *); blob - 2636e0c5b0597693ac457b1e087f27f0fd48e0de blob + 2c8c7be7e19a77bc23760c44f9ea4458d0a7be6d --- lib/pack.c +++ lib/pack.c @@ -482,7 +482,7 @@ get_packfile_path(char **path_packfile, struct got_rep return NULL; } -const struct got_error * +static const struct got_error * read_packfile_hdr(int fd, struct got_packidx *packidx) { const struct got_error *err = NULL; @@ -524,14 +524,20 @@ open_packfile(int *fd, const char *path_packfile, return err; } -void +const struct got_error * got_pack_close(struct got_pack *pack) { + const struct got_error *err = NULL; + + if (munmap(pack->map, pack->filesize) == -1) + err = got_error_from_errno(); close(pack->fd); pack->fd = -1; free(pack->path_packfile); pack->path_packfile = NULL; pack->filesize = 0; + + return err; } static const struct got_error * @@ -554,7 +560,9 @@ cache_pack(struct got_pack **packp, const char *path_p } if (i == nitems(repo->packs) - 1) { - got_pack_close(&repo->packs[i - 1]); + err = got_pack_close(&repo->packs[i - 1]); + if (err) + return err; memmove(&repo->packs[1], &repo->packs[0], sizeof(repo->packs) - sizeof(repo->packs[0])); i = 0; @@ -573,6 +581,15 @@ cache_pack(struct got_pack **packp, const char *path_p goto done; err = get_packfile_size(&pack->filesize, path_packfile); + if (err) + goto done; + + pack->map = mmap(NULL, pack->filesize, PROT_READ, MAP_PRIVATE, + pack->fd, 0); + if (pack->map == MAP_FAILED) { + err = got_error_from_errno(); + pack->map = NULL; + } done: if (err) { if (pack) { @@ -602,24 +619,43 @@ get_cached_pack(const char *path_packfile, struct got_ } static const struct got_error * -parse_object_type_and_size(uint8_t *type, uint64_t *size, size_t *len, int fd) +parse_object_type_and_size(uint8_t *type, uint64_t *size, size_t *len, + struct got_pack *pack, off_t offset) { uint8_t t = 0; uint64_t s = 0; uint8_t sizeN; - ssize_t n; + size_t mapoff = 0; int i = 0; + + *len = 0; + + if (offset >= pack->filesize) + return got_error(GOT_ERR_PACK_OFFSET); + + if (pack->map) { + mapoff = (size_t)offset; + } else { + if (lseek(pack->fd, offset, SEEK_SET) == -1) + return got_error_from_errno(); + } do { /* We do not support size values which don't fit in 64 bit. */ if (i > 9) return got_error(GOT_ERR_NO_SPACE); - n = read(fd, &sizeN, sizeof(sizeN)); - if (n < 0) - return got_error_from_errno(); - if (n != sizeof(sizeN)) - return got_error(GOT_ERR_BAD_PACKFILE); + if (pack->map) { + sizeN = *(pack->map + mapoff); + mapoff += sizeof(sizeN); + } else { + ssize_t n = read(pack->fd, &sizeN, sizeof(sizeN)); + if (n < 0) + return got_error_from_errno(); + if (n != sizeof(sizeN)) + return got_error(GOT_ERR_BAD_PACKFILE); + } + *len += sizeof(sizeN); if (i == 0) { t = (sizeN & GOT_PACK_OBJ_SIZE0_TYPE_MASK) >> @@ -634,7 +670,6 @@ parse_object_type_and_size(uint8_t *type, uint64_t *si *type = t; *size = s; - *len = i * sizeof(sizeN); return NULL; } @@ -665,23 +700,35 @@ open_plain_object(struct got_object **obj, const char } static const struct got_error * -parse_negative_offset(int64_t *offset, size_t *len, int fd) +parse_negative_offset(int64_t *offset, size_t *len, struct got_pack *pack, + off_t delta_offset) { int64_t o = 0; uint8_t offN; - ssize_t n; int i = 0; + *len = 0; + do { /* We do not support offset values which don't fit in 64 bit. */ if (i > 8) return got_error(GOT_ERR_NO_SPACE); - n = read(fd, &offN, sizeof(offN)); - if (n < 0) - return got_error_from_errno(); - if (n != sizeof(offN)) - return got_error(GOT_ERR_BAD_PACKFILE); + if (pack->map) { + size_t mapoff; + if (delta_offset >= pack->filesize) + return got_error(GOT_ERR_PACK_OFFSET); + mapoff = (size_t)delta_offset + *len; + offN = *(pack->map + mapoff); + } else { + ssize_t n; + n = read(pack->fd, &offN, sizeof(offN)); + if (n < 0) + return got_error_from_errno(); + if (n != sizeof(offN)) + return got_error(GOT_ERR_BAD_PACKFILE); + } + *len += sizeof(offN); if (i == 0) o = (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK); @@ -694,18 +741,21 @@ parse_negative_offset(int64_t *offset, size_t *len, in } while (offN & GOT_PACK_OBJ_DELTA_OFF_MORE); *offset = o; - *len = i * sizeof(offN); return NULL; } static const struct got_error * -parse_offset_delta(off_t *base_offset, int fd, off_t offset) +parse_offset_delta(off_t *base_offset, size_t *len, struct got_pack *pack, + off_t offset, int tslen) { const struct got_error *err; int64_t negoffset; size_t negofflen; - err = parse_negative_offset(&negoffset, &negofflen, fd); + *len = 0; + + err = parse_negative_offset(&negoffset, &negofflen, pack, + offset + tslen); if (err) return err; @@ -714,6 +764,7 @@ parse_offset_delta(off_t *base_offset, int fd, off_t o if (*base_offset <= 0) return got_error(GOT_ERR_BAD_PACKFILE); + *len = negofflen; return NULL; } @@ -754,20 +805,36 @@ resolve_offset_delta(struct got_delta_chain *deltas, size_t base_tslen; off_t delta_data_offset; uint8_t *delta_buf; - size_t delta_len; + size_t delta_len, consumed; - err = parse_offset_delta(&base_offset, pack->fd, delta_offset); + err = parse_offset_delta(&base_offset, &consumed, pack, + delta_offset, tslen); if (err) return err; - delta_data_offset = lseek(pack->fd, 0, SEEK_CUR); - if (delta_data_offset == -1) - return got_error_from_errno(); + delta_data_offset = delta_offset + tslen + consumed; + if (delta_data_offset >= pack->filesize) + return got_error(GOT_ERR_PACK_OFFSET); - err = got_inflate_to_mem_fd(&delta_buf, &delta_len, pack->fd); - if (err) - return err; + if (pack->map == NULL) { + delta_data_offset = lseek(pack->fd, 0, SEEK_CUR); + if (delta_data_offset == -1) + return got_error_from_errno(); + } + if (pack->map) { + size_t mapoff = (size_t)delta_data_offset; + err = got_inflate_to_mem_mmap(&delta_buf, &delta_len, pack->map, + mapoff, pack->filesize - mapoff); + if (err) + return err; + } else { + + err = got_inflate_to_mem_fd(&delta_buf, &delta_len, pack->fd); + if (err) + return err; + } + err = add_delta(deltas, pack->path_packfile, delta_offset, tslen, delta_type, delta_size, delta_data_offset, delta_buf, delta_len); if (err) @@ -776,11 +843,9 @@ resolve_offset_delta(struct got_delta_chain *deltas, /* An offset delta must be in the same packfile. */ if (base_offset >= pack->filesize) return got_error(GOT_ERR_PACK_OFFSET); - if (lseek(pack->fd, base_offset, SEEK_SET) == -1) - return got_error_from_errno(); err = parse_object_type_and_size(&base_type, &base_size, &base_tslen, - pack->fd); + pack, base_offset); if (err) return err; @@ -801,25 +866,42 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru uint8_t base_type; uint64_t base_size; size_t base_tslen; - ssize_t n; off_t delta_data_offset; uint8_t *delta_buf; size_t delta_len; - n = read(pack->fd, &id, sizeof(id)); - if (n < 0) - return got_error_from_errno(); - if (n != sizeof(id)) - return got_error(GOT_ERR_BAD_PACKFILE); - - delta_data_offset = lseek(pack->fd, 0, SEEK_CUR); - if (delta_data_offset == -1) - return got_error_from_errno(); + if (delta_offset >= pack->filesize) + return got_error(GOT_ERR_PACK_OFFSET); + delta_data_offset = delta_offset + tslen + sizeof(id); + if (delta_data_offset >= pack->filesize) + return got_error(GOT_ERR_PACK_OFFSET); - err = got_inflate_to_mem_fd(&delta_buf, &delta_len, pack->fd); - if (err) - return err; + if (pack->map == NULL) { + delta_data_offset = lseek(pack->fd, 0, SEEK_CUR); + if (delta_data_offset == -1) + return got_error_from_errno(); + } + + if (pack->map) { + size_t mapoff = (size_t)delta_offset; + memcpy(&id, pack->map + mapoff, sizeof(id)); + mapoff += sizeof(id); + err = got_inflate_to_mem_mmap(&delta_buf, &delta_len, pack->map, + mapoff, pack->filesize - delta_data_offset); + if (err) + return err; + } else { + ssize_t n = read(pack->fd, &id, sizeof(id)); + if (n < 0) + return got_error_from_errno(); + if (n != sizeof(id)) + return got_error(GOT_ERR_BAD_PACKFILE); + err = got_inflate_to_mem_fd(&delta_buf, &delta_len, pack->fd); + if (err) + return err; + } + err = add_delta(deltas, pack->path_packfile, delta_offset, tslen, delta_type, delta_size, delta_data_offset, delta_buf, delta_len); if (err) @@ -839,13 +921,9 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru err = got_error(GOT_ERR_PACK_OFFSET); goto done; } - if (lseek(pack->fd, base_offset, SEEK_SET) == -1) { - err = got_error_from_errno(); - goto done; - } err = parse_object_type_and_size(&base_type, &base_size, &base_tslen, - pack->fd); + pack, base_offset); if (err) goto done; @@ -965,16 +1043,7 @@ open_packed_object(struct got_object **obj, struct got goto done; } - if (offset >= pack->filesize) { - err = got_error(GOT_ERR_PACK_OFFSET); - goto done; - } - if (lseek(pack->fd, offset, SEEK_SET) == -1) { - err = got_error_from_errno(); - goto done; - } - - err = parse_object_type_and_size(&type, &size, &tslen, pack->fd); + err = parse_object_type_and_size(&type, &size, &tslen, pack, offset); if (err) goto done; @@ -1093,7 +1162,7 @@ dump_delta_chain_to_file(size_t *result_size, struct g SIMPLEQ_FOREACH(delta, &deltas->entries, entry) { if (n == 0) { struct got_pack *pack; - size_t base_len; + size_t base_len, mapoff; off_t delta_data_offset; /* Plain object types are the delta base. */ @@ -1116,17 +1185,31 @@ dump_delta_chain_to_file(size_t *result_size, struct g err = got_error(GOT_ERR_PACK_OFFSET); goto done; } - if (lseek(pack->fd, delta_data_offset, SEEK_SET) - == -1) { - err = got_error_from_errno(); - goto done; + if (pack->map == NULL) { + if (lseek(pack->fd, delta_data_offset, SEEK_SET) + == -1) { + err = got_error_from_errno(); + goto done; + } } - if (base_file) - err = got_inflate_to_file_fd(&base_len, - pack->fd, base_file); - else { - err = got_inflate_to_mem_fd(&base_buf, - &base_len, pack->fd); + if (base_file) { + if (pack->map) { + mapoff = (size_t)delta_data_offset; + err = got_inflate_to_file_mmap( + &base_len, pack->map, mapoff, + pack->filesize - mapoff, base_file); + } else + err = got_inflate_to_file_fd(&base_len, + pack->fd, base_file); + } else { + if (pack->map) { + mapoff = (size_t)delta_data_offset; + err = got_inflate_to_mem_mmap(&base_buf, + &base_len, pack->map, mapoff, + pack->filesize - mapoff); + } else + err = got_inflate_to_mem_fd(&base_buf, + &base_len, pack->fd); if (base_len < max_size) { uint8_t *p; p = reallocarray(base_buf, 1, max_size); @@ -1244,13 +1327,22 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outl err = got_error(GOT_ERR_PACK_OFFSET); goto done; } - if (lseek(pack->fd, delta_data_offset, SEEK_SET) - == -1) { - err = got_error_from_errno(); - goto done; + if (pack->map) { + size_t mapoff = (size_t)delta_data_offset; + err = got_inflate_to_mem_mmap(&base_buf, + &base_len, pack->map, mapoff, + pack->filesize - mapoff); + } else { + if (lseek(pack->fd, delta_data_offset, SEEK_SET) + == -1) { + err = got_error_from_errno(); + goto done; + } + err = got_inflate_to_mem_fd(&base_buf, + &base_len, pack->fd); } - err = got_inflate_to_mem_fd(&base_buf, &base_len, - pack->fd); + if (err) + goto done; if (base_len < max_size) { uint8_t *p; p = reallocarray(base_buf, 1, max_size); @@ -1260,8 +1352,6 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outl } base_buf = p; } - if (err) - goto done; n++; continue; } @@ -1324,12 +1414,18 @@ got_packfile_extract_object(FILE **f, struct got_objec err = got_error(GOT_ERR_PACK_OFFSET); goto done; } - if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) { - err = got_error_from_errno(); - goto done; - } - err = got_inflate_to_file_fd(&obj->size, pack->fd, *f); + if (pack->map) { + size_t mapoff = (size_t)obj->pack_offset; + err = got_inflate_to_file_mmap(&obj->size, pack->map, + mapoff, pack->filesize - mapoff, *f); + } else { + if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) { + err = got_error_from_errno(); + goto done; + } + err = got_inflate_to_file_fd(&obj->size, pack->fd, *f); + } } else err = dump_delta_chain_to_file(&obj->size, &obj->deltas, *f, repo); @@ -1364,12 +1460,17 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size err = got_error(GOT_ERR_PACK_OFFSET); goto done; } - if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) { - err = got_error_from_errno(); - goto done; + if (pack->map) { + size_t mapoff = (size_t)obj->pack_offset; + err = got_inflate_to_mem_mmap(buf, len, pack->map, + mapoff, pack->filesize - mapoff); + } else { + if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) { + err = got_error_from_errno(); + goto done; + } + err = got_inflate_to_mem_fd(buf, len, pack->fd); } - - err = got_inflate_to_mem_fd(buf, len, pack->fd); } else err = dump_delta_chain_to_mem(buf, len, &obj->deltas, repo); done: blob - cd8dc7d1925b20202d7dd0d3f89e37bcc75806da blob + 85e1798e7ce36e945d837778bafed18dbe0dbd92 --- lib/zbuf.c +++ lib/zbuf.c @@ -29,6 +29,10 @@ #include "got_lib_path.h" #include "got_lib_zbuf.h" +#ifndef MIN +#define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) +#endif + const struct got_error * got_inflate_init(struct got_zstream_buf *zb, uint8_t *outbuf, size_t bufsize) { @@ -145,6 +149,48 @@ got_inflate_read_fd(struct got_zstream_buf *zb, int fd return NULL; } +const struct got_error * +got_inflate_read_mmap(struct got_zstream_buf *zb, uint8_t *map, size_t offset, + size_t len, size_t *outlenp, size_t *consumed) +{ + size_t last_total_out = zb->z.total_out; + z_stream *z = &zb->z; + int ret = Z_ERRNO; + + z->next_out = zb->outbuf; + z->avail_out = zb->outlen; + + *outlenp = 0; + *consumed = 0; + + do { + if (z->avail_in == 0) { + if (len == 0) { + /* EOF */ + ret = Z_STREAM_END; + break; + } + z->next_in = map + offset; + z->avail_in = MIN(zb->inlen, len); + *consumed += z->avail_in; + offset += z->avail_in; + len -= z->avail_in; + } + ret = inflate(z, Z_SYNC_FLUSH); + } while (ret == Z_OK && z->avail_out > 0); + + if (ret == Z_OK) { + zb->flags |= GOT_ZSTREAM_F_HAVE_MORE; + } else { + if (ret != Z_STREAM_END) + return got_error(GOT_ERR_DECOMPRESSION); + zb->flags &= ~GOT_ZSTREAM_F_HAVE_MORE; + } + + *outlenp = z->total_out - last_total_out; + return NULL; +} + void got_inflate_end(struct got_zstream_buf *zb) { @@ -216,9 +262,59 @@ got_inflate_to_mem_fd(uint8_t **outbuf, size_t *outlen do { err = got_inflate_read_fd(&zb, infd, &avail); + if (err) + return err; + *outlen += avail; + if (zb.flags & GOT_ZSTREAM_F_HAVE_MORE) { + newbuf = reallocarray(*outbuf, 1, + *outlen + GOT_ZSTREAM_BUFSIZE); + if (newbuf == NULL) { + err = got_error_from_errno(); + free(*outbuf); + *outbuf = NULL; + *outlen = 0; + goto done; + } + *outbuf = newbuf; + zb.outbuf = newbuf + *outlen; + zb.outlen = GOT_ZSTREAM_BUFSIZE; + } + } while (zb.flags & GOT_ZSTREAM_F_HAVE_MORE); + +done: + got_inflate_end(&zb); + return err; +} + +const struct got_error * +got_inflate_to_mem_mmap(uint8_t **outbuf, size_t *outlen, uint8_t *map, + size_t offset, size_t len) +{ + const struct got_error *err; + size_t avail; + struct got_zstream_buf zb; + void *newbuf; + size_t consumed; + + *outbuf = calloc(1, GOT_ZSTREAM_BUFSIZE); + if (*outbuf == NULL) + return got_error_from_errno(); + err = got_inflate_init(&zb, *outbuf, GOT_ZSTREAM_BUFSIZE); + if (err) + return err; + + *outlen = 0; + + do { + err = got_inflate_read_mmap(&zb, map, offset, len, &avail, + &consumed); if (err) return err; + offset += consumed; + len -= consumed; *outlen += avail; + if (len == 0) + break; if (zb.flags & GOT_ZSTREAM_F_HAVE_MORE) { newbuf = reallocarray(*outbuf, 1, *outlen + GOT_ZSTREAM_BUFSIZE); @@ -346,3 +442,43 @@ done: got_inflate_end(&zb); return err; } + +const struct got_error * +got_inflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset, + size_t len, FILE *outfile) +{ + const struct got_error *err; + size_t avail; + struct got_zstream_buf zb; + size_t consumed; + + err = got_inflate_init(&zb, NULL, GOT_ZSTREAM_BUFSIZE); + if (err) + goto done; + + *outlen = 0; + + do { + err = got_inflate_read_mmap(&zb, map, offset, len, &avail, + &consumed); + if (err) + return err; + offset += consumed; + len -= consumed; + if (avail > 0) { + size_t n; + n = fwrite(zb.outbuf, avail, 1, outfile); + if (n != 1) { + err = got_ferror(outfile, GOT_ERR_IO); + goto done; + } + *outlen += avail; + } + } while (zb.flags & GOT_ZSTREAM_F_HAVE_MORE); + +done: + if (err == NULL) + rewind(outfile); + got_inflate_end(&zb); + return err; +}