commit d23a6c95c554b90f2f8d0ecaeb09b73e4433576c from: Stefan Sperling via: Thomas Adam date: Tue May 31 23:14:47 2022 UTC store a path hash instead of a verbatim path in pack meta data This reduces memory use by gotadmin pack. The goal is to sort files which share a path next to each other for deltification. A hash of the path is good enough for this purpose and consumes less memory than a verbatim copy of the path. Git does something similar. ok op@ commit - eee114b41cf7ab91bbbd07d50d4d496161c901b7 commit + d23a6c95c554b90f2f8d0ecaeb09b73e4433576c blob - 72ca2f999a2f5bb3eda4f7075e6c1d7959be3faa blob + 70683bf242e8e041d19e21fdd6a8dd1eddc6421f --- lib/pack_create.c +++ lib/pack_create.c @@ -55,6 +55,8 @@ #include "got_lib_ratelimit.h" #include "got_lib_inflate.h" +#include "murmurhash2.h" + #ifndef MIN #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) #endif @@ -69,7 +71,7 @@ struct got_pack_meta { struct got_object_id id; - char *path; + uint32_t path_hash; int obj_type; off_t size; time_t mtime; @@ -104,7 +106,6 @@ static const struct got_error * alloc_meta(struct got_pack_meta **new, struct got_object_id *id, const char *path, int obj_type, time_t mtime) { - const struct got_error *err = NULL; struct got_pack_meta *m; *new = NULL; @@ -114,14 +115,8 @@ alloc_meta(struct got_pack_meta **new, struct got_obje return got_error_from_errno("calloc"); memcpy(&m->id, id, sizeof(m->id)); - - m->path = strdup(path); - if (m->path == NULL) { - err = got_error_from_errno("strdup"); - free(m); - return err; - } + m->path_hash = murmurhash2(path, strlen(path), 0xd70af26a); m->obj_type = obj_type; m->mtime = mtime; *new = m; @@ -133,8 +128,7 @@ clear_meta(struct got_pack_meta *meta) { if (meta == NULL) return; - free(meta->path); - meta->path = NULL; + meta->path_hash = 0; free(meta->delta_buf); meta->delta_buf = NULL; free(meta->base_obj_id); @@ -155,16 +149,16 @@ static int delta_order_cmp(const void *pa, const void *pb) { struct got_pack_meta *a, *b; - int cmp; a = *(struct got_pack_meta **)pa; b = *(struct got_pack_meta **)pb; if (a->obj_type != b->obj_type) return a->obj_type - b->obj_type; - cmp = strcmp(a->path, b->path); - if (cmp != 0) - return cmp; + if (a->path_hash < b->path_hash) + return -1; + if (a->path_hash > b->path_hash) + return 1; if (a->mtime < b->mtime) return -1; if (a->mtime > b->mtime)