commit bd1223b9e402164e437d56f0fc72d208a4f354c7 from: Stefan Sperling date: Wed Mar 14 00:21:09 2018 UTC add a delta cache; saves us from reading + decompressing some deltas commit - 4834ca9697f0ad310b1608d2ca08a667742c5d7c commit + bd1223b9e402164e437d56f0fc72d208a4f354c7 blob - 2eecc5f80f379b929f5aceb980431c7b171e883f blob + a8e4cedf77becb8285b6605ada60fbe549208fbb --- lib/got_repository_lib.h +++ lib/got_repository_lib.h @@ -14,6 +14,19 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +struct got_delta_cache_entry { + off_t data_offset; + uint8_t *delta_buf; + size_t delta_len; +}; + +#define GOT_DELTA_CACHE_SIZE 1024 + +struct got_delta_cache { + char *path_packfile; + struct got_delta_cache_entry deltas[GOT_DELTA_CACHE_SIZE]; +}; + #define GOT_PACKIDX_CACHE_SIZE 64 struct got_repository { @@ -22,5 +35,8 @@ struct got_repository { /* The pack index cache speeds up search for packed objects. */ struct got_packidx_v2_hdr *packidx_cache[GOT_PACKIDX_CACHE_SIZE]; + + /* The delta cache speeds up reconstruction of packed objects. */ + struct got_delta_cache delta_cache[GOT_PACKIDX_CACHE_SIZE]; }; blob - 52d060271b4c99be982509935b482e278279e203 blob + d3af74a3fb9e36cb7d3086ac9cb4edb9859644a2 --- lib/pack.c +++ lib/pack.c @@ -974,10 +974,125 @@ get_delta_chain_max_size(uint64_t *max_size, struct go } return NULL; +} + +void +clear_delta_cache_entry(struct got_delta_cache_entry *entry) +{ + entry->data_offset = 0; + free(entry->delta_buf); + entry->delta_buf = NULL; + entry->delta_len = 0; +} + +void +add_delta_cache_entry(struct got_delta_cache *cache, off_t data_offset, + uint8_t *delta_buf, size_t delta_len) +{ + int i; + struct got_delta_cache_entry *entry; + + for (i = 0; i < nitems(cache->deltas); i++) { + entry = &cache->deltas[i]; + if (entry->data_offset == 0) + break; + } + + if (i == nitems(cache->deltas)) { + entry = &cache->deltas[i - 1]; + clear_delta_cache_entry(entry); + memmove(&cache->deltas[1], &cache->deltas[0], + sizeof(cache->deltas) - sizeof(cache->deltas[0])); + i = 0; + } + + entry = &cache->deltas[i]; + entry->delta_buf = calloc(1, delta_len); + if (entry->delta_buf == NULL) + return; + entry->data_offset = data_offset; + memcpy(entry->delta_buf, delta_buf, delta_len); + entry->delta_len = delta_len; } +void +cache_delta(off_t data_offset, uint8_t *delta_buf, size_t delta_len, + const char *path_packfile, struct got_repository *repo) +{ + struct got_delta_cache *cache; + int i; + + for (i = 0; i < nitems(repo->delta_cache); i++) { + cache = &repo->delta_cache[i]; + if (cache->path_packfile == NULL) + break; + if (strcmp(cache->path_packfile, path_packfile) == 0) { + add_delta_cache_entry(cache, data_offset, delta_buf, + delta_len); + return; + } + } + + if (i == nitems(repo->delta_cache)) { + int j; + cache = &repo->delta_cache[i - 1]; + free(cache->path_packfile); + cache->path_packfile = NULL; + for (j = 0; j < nitems(cache->deltas); j++) { + struct got_delta_cache_entry *entry = &cache->deltas[j]; + if (entry->data_offset == 0) + break; + clear_delta_cache_entry(entry); + } + memmove(&repo->delta_cache[1], &repo->delta_cache[0], + sizeof(repo->delta_cache) - sizeof(repo->delta_cache[0])); + i = 0; + } + + cache = &repo->delta_cache[i]; + cache->path_packfile = strdup(path_packfile); + if (cache->path_packfile == NULL) + return; + add_delta_cache_entry(cache, data_offset, delta_buf, delta_len); +} + +void +get_cached_delta(uint8_t **delta_buf, size_t *delta_len, + off_t data_offset, const char *path_packfile, struct got_repository *repo) +{ + struct got_delta_cache *cache; + struct got_delta_cache_entry *entry; + int i; + + *delta_buf = NULL; + *delta_len = 0; + + for (i = 0; i < nitems(repo->delta_cache); i++) { + cache = &repo->delta_cache[i]; + if (cache->path_packfile == NULL) + return; + if (strcmp(cache->path_packfile, path_packfile) == 0) + break; + } + + if (i == nitems(repo->delta_cache)) + return; + + for (i = 0; i < nitems(cache->deltas); i++) { + entry = &cache->deltas[i]; + if (entry->data_offset == 0) + break; + if (entry->data_offset == data_offset) { + *delta_buf = entry->delta_buf; + *delta_len = entry->delta_len; + break; + } + } +} + static const struct got_error * -dump_delta_chain(struct got_delta_chain *deltas, FILE *outfile) +dump_delta_chain(struct got_delta_chain *deltas, FILE *outfile, + const char *path_packfile, struct got_repository *repo) { const struct got_error *err = NULL; struct got_delta *delta; @@ -1014,6 +1129,7 @@ dump_delta_chain(struct got_delta_chain *deltas, FILE uint8_t *delta_buf = NULL; size_t delta_len = 0; FILE *delta_file; + int is_cached = 0; delta_file = fopen(delta->path_packfile, "rb"); if (delta_file == NULL) { @@ -1047,22 +1163,34 @@ dump_delta_chain(struct got_delta_chain *deltas, FILE continue; } - if (fseeko(delta_file, delta->data_offset, SEEK_CUR) != 0) { + get_cached_delta(&delta_buf, &delta_len, delta->data_offset, + path_packfile, repo); + + if (delta_buf == NULL) { + if (fseeko(delta_file, delta->data_offset, SEEK_CUR) + != 0) { + fclose(delta_file); + err = got_error_from_errno(); + goto done; + } + + /* Delta streams should always fit in memory. */ + err = got_inflate_to_mem(&delta_buf, &delta_len, + delta_file); fclose(delta_file); - err = got_error_from_errno(); - goto done; - } + if (err) + goto done; - /* Delta streams should always fit in memory. */ - err = got_inflate_to_mem(&delta_buf, &delta_len, delta_file); - fclose(delta_file); - if (err) - goto done; + cache_delta(delta->data_offset, delta_buf, delta_len, + path_packfile, repo); + } else + is_cached = 1; err = got_delta_apply(base_file, delta_buf, delta_len, /* Final delta application writes to the output file. */ ++n < deltas->nentries ? accum_file : outfile); - free(delta_buf); + if (!is_cached) + free(delta_buf); if (err) goto done; @@ -1113,7 +1241,8 @@ got_packfile_extract_object(FILE **f, struct got_objec err = got_inflate_to_file(&obj->size, packfile, *f); } else - err = dump_delta_chain(&obj->deltas, *f); + err = dump_delta_chain(&obj->deltas, *f, obj->path_packfile, + repo); done: if (packfile) fclose(packfile); blob - 1dc1c5a51c6686f80d8a7431f508604a99e48c30 blob + c19c01392d63487dbce490ca2de4bbd2576af1d0 --- lib/repository.c +++ lib/repository.c @@ -207,6 +207,27 @@ got_repo_close(struct got_repository *repo) break; got_packidx_close(repo->packidx_cache[i]); } + + for (i = 0; i < nitems(repo->delta_cache); i++) { + struct got_delta_cache *cache = &repo->delta_cache[i]; + int j; + + if (cache->path_packfile == NULL) + break; + free(cache->path_packfile); + cache->path_packfile = NULL; + + for (j = 0; j < nitems(cache->deltas); j++) { + struct got_delta_cache_entry *entry = &cache->deltas[j]; + if (entry->data_offset == 0) + break; + entry->data_offset = 0; + free(entry->delta_buf); + entry->delta_buf = NULL; + entry->delta_len = 0; + } + } + free(repo->path); free(repo->path_git_dir); free(repo);