commit 4eaf43871217ec0d5ddda0840af42350cd4048fd from: Stefan Sperling via: Thomas Adam date: Sat Oct 16 11:09:45 2021 UTC use a bloom filter to avoid pointless pack index searches commit - 37fdd5cc735366ad64a9becadd0ca4aab44d744f commit + 4eaf43871217ec0d5ddda0840af42350cd4048fd blob - 831cb967da0e76447d97b70ff058416fbf492de4 blob + 7005ceff4f2d553de11c1201ae67f4e97388b4be --- lib/got_lib_repository.h +++ lib/got_lib_repository.h @@ -31,21 +31,14 @@ #define GOT_PACK_CACHE_SIZE 64 struct got_packidx_bloom_filter { - RB_ENTRY(got_packidx_bloom_filter) entry; - char path[PATH_MAX]; /* on-disk path */ - size_t path_len; + char path_packidx[PATH_MAX]; /* on-disk path */ + size_t path_packidx_len; struct bloom *bloom; + STAILQ_ENTRY(got_packidx_bloom_filter) entry; }; -RB_HEAD(got_packidx_bloom_filter_tree, got_packidx_bloom_filter); +STAILQ_HEAD(got_packidx_bloom_filter_head, got_packidx_bloom_filter); -static inline int -got_packidx_bloom_filter_cmp(const struct got_packidx_bloom_filter *f1, - const struct got_packidx_bloom_filter *f2) -{ - return got_path_cmp(f1->path, f2->path, f1->path_len, f2->path_len); -} - struct got_repository { char *path; char *path_git_dir; @@ -59,7 +52,7 @@ struct got_repository { * Used to avoid opening a pack index in search of an * object ID which is not contained in this pack index. */ - struct got_packidx_bloom_filter_tree packidx_bloom_filters; + struct got_packidx_bloom_filter_head packidx_bloom_filters; /* Open file handles for pack files. */ struct got_pack packs[GOT_PACK_CACHE_SIZE]; blob - d3a67547fa3ddbd12660f688766bd387d6ed905b blob + ac869f866068f2b1050c52779f2539890a8c877f --- lib/murmurhash2.c +++ lib/murmurhash2.c @@ -5,12 +5,11 @@ /* Obtained from https://github.com/aappleby/smhasher */ #include -#include #include "murmurhash2.h" uint32_t -murmurhash2(const unsigned char * key, int len, uint32_t seed) +murmurhash2(const void * key, int len, uint32_t seed) { // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. @@ -24,14 +23,12 @@ murmurhash2(const unsigned char * key, int len, uint32 // Mix 4 bytes at a time into the hash - const unsigned char *data = key; + const unsigned char *data = (const unsigned char *)key; while(len >= 4) { - uint32_t k; + uint32_t k = *(uint32_t*)data; - memcpy(&k, data, sizeof(k)); - k *= m; k ^= k >> r; k *= m; @@ -61,4 +58,4 @@ murmurhash2(const unsigned char * key, int len, uint32 h ^= h >> 15; return h; -} +} blob - 8fa42cdb7d310218bbaff43e40157c42561bdebd blob + bbd2bf3ef0309efebdfbb79dd75aa100d4fb0b74 --- lib/murmurhash2.h +++ lib/murmurhash2.h @@ -4,4 +4,4 @@ /* Obtained from https://github.com/aappleby/smhasher */ -uint32_t murmurhash2(const unsigned char *key, int len, uint32_t seed); +uint32_t murmurhash2(const void *key, int len, uint32_t seed); blob - a5d6593a9f5cdcacb01699ad5efa1bbfc2aa78f8 blob + c6f5465ad763c15f1e6aad591500253352468c39 --- lib/repository.c +++ lib/repository.c @@ -663,7 +663,7 @@ got_repo_open(struct got_repository **repop, const cha goto done; } - RB_INIT(&repo->packidx_bloom_filters); + STAILQ_INIT(&repo->packidx_bloom_filters); for (i = 0; i < nitems(repo->privsep_children); i++) { memset(&repo->privsep_children[i], 0, @@ -766,10 +766,10 @@ got_repo_close(struct got_repository *repo) got_packidx_close(repo->packidx_cache[i]); } - while ((bf = RB_MIN(got_packidx_bloom_filter_tree, - &repo->packidx_bloom_filters))) { - RB_REMOVE(got_packidx_bloom_filter_tree, - &repo->packidx_bloom_filters, bf); + while (!STAILQ_EMPTY(&repo->packidx_bloom_filters)) { + struct got_packidx_bloom_filter *bf; + bf = STAILQ_FIRST(&repo->packidx_bloom_filters); + STAILQ_REMOVE_HEAD(&repo->packidx_bloom_filters, entry); free(bf->bloom); free(bf); } @@ -997,20 +997,6 @@ got_repo_is_packidx_filename(const char *name, size_t return 0; return 1; -} - -static struct got_packidx_bloom_filter * -get_packidx_bloom_filter(struct got_repository *repo, - const char *path, size_t path_len) -{ - struct got_packidx_bloom_filter key; - - if (strlcpy(key.path, path, sizeof(key.path)) >= sizeof(key.path)) - return NULL; /* XXX */ - key.path_len = path_len; - - return RB_FIND(got_packidx_bloom_filter_tree, - &repo->packidx_bloom_filters, &key); } static int @@ -1019,9 +1005,13 @@ check_packidx_bloom_filter(struct got_repository *repo { struct got_packidx_bloom_filter *bf; - bf = get_packidx_bloom_filter(repo, path_packidx, strlen(path_packidx)); - if (bf) - return bloom_check(bf->bloom, id->sha1, sizeof(id->sha1)); + STAILQ_FOREACH(bf, &repo->packidx_bloom_filters, entry) { + if (got_path_cmp(bf->path_packidx, path_packidx, + bf->path_packidx_len, strlen(path_packidx)) == 0) { + return bloom_check(bf->bloom, id->sha1, + sizeof(id->sha1)); + } + } /* No bloom filter means this pack index must be searched. */ return 1; @@ -1047,9 +1037,11 @@ add_packidx_bloom_filter(struct got_repository *repo, return NULL; /* Do we already have a filter for this pack index? */ - if (get_packidx_bloom_filter(repo, path_packidx, - strlen(path_packidx)) != NULL) - return NULL; + STAILQ_FOREACH(bf, &repo->packidx_bloom_filters, entry) { + if (got_path_cmp(bf->path_packidx, path_packidx, + bf->path_packidx_len, strlen(path_packidx)) == 0) + return NULL; + } bf = calloc(1, sizeof(*bf)); if (bf == NULL) @@ -1060,13 +1052,14 @@ add_packidx_bloom_filter(struct got_repository *repo, return got_error_from_errno("calloc"); } - len = strlcpy(bf->path, path_packidx, sizeof(bf->path)); - if (len >= sizeof(bf->path)) { + + len = strlcpy(bf->path_packidx, path_packidx, sizeof(bf->path_packidx)); + if (len >= sizeof(bf->path_packidx)) { free(bf->bloom); free(bf); return got_error(GOT_ERR_NO_SPACE); } - bf->path_len = len; + bf->path_packidx_len = len; /* Minimum size supported by our bloom filter is 1000 entries. */ bloom_init(bf->bloom, nobjects < 1000 ? 1000 : nobjects, 0.1); @@ -1076,8 +1069,7 @@ add_packidx_bloom_filter(struct got_repository *repo, bloom_add(bf->bloom, id->sha1, sizeof(id->sha1)); } - RB_INSERT(got_packidx_bloom_filter_tree, - &repo->packidx_bloom_filters, bf); + STAILQ_INSERT_TAIL(&repo->packidx_bloom_filters, bf, entry); return NULL; }