2 b343c297 2021-10-11 stsp * Copyright (c) 2012-2019, Jyri J. Virkki
3 b343c297 2021-10-11 stsp * All rights reserved.
5 b343c297 2021-10-11 stsp * Redistribution and use in source and binary forms, with or without
6 b343c297 2021-10-11 stsp * modification, are permitted provided that the following conditions are
9 b343c297 2021-10-11 stsp * 1. Redistributions of source code must retain the above copyright
10 b343c297 2021-10-11 stsp * notice, this list of conditions and the following disclaimer.
12 b343c297 2021-10-11 stsp * 2. Redistributions in binary form must reproduce the above copyright
13 b343c297 2021-10-11 stsp * notice, this list of conditions and the following disclaimer in the
14 b343c297 2021-10-11 stsp * documentation and/or other materials provided with the distribution.
16 b343c297 2021-10-11 stsp * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 b343c297 2021-10-11 stsp * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 b343c297 2021-10-11 stsp * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 b343c297 2021-10-11 stsp * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 b343c297 2021-10-11 stsp * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 b343c297 2021-10-11 stsp * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 b343c297 2021-10-11 stsp * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 b343c297 2021-10-11 stsp * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 b343c297 2021-10-11 stsp * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 b343c297 2021-10-11 stsp * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 b343c297 2021-10-11 stsp * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 b343c297 2021-10-11 stsp /* Obtained from https://github.com/jvirkki/libbloom */
32 b343c297 2021-10-11 stsp * Refer to bloom.h for documentation on the public interfaces.
35 b343c297 2021-10-11 stsp #include <assert.h>
36 b343c297 2021-10-11 stsp #include <fcntl.h>
37 b343c297 2021-10-11 stsp #include <math.h>
38 b343c297 2021-10-11 stsp #include <stdint.h>
39 b343c297 2021-10-11 stsp #include <stdio.h>
40 b343c297 2021-10-11 stsp #include <stdlib.h>
41 b343c297 2021-10-11 stsp #include <string.h>
42 b343c297 2021-10-11 stsp #include <sys/stat.h>
43 b343c297 2021-10-11 stsp #include <sys/types.h>
44 b343c297 2021-10-11 stsp #include <unistd.h>
46 b343c297 2021-10-11 stsp #include "bloom.h"
47 b343c297 2021-10-11 stsp #include "murmurhash2.h"
49 b343c297 2021-10-11 stsp #define MAKESTRING(n) STRING(n)
50 b343c297 2021-10-11 stsp #define STRING(n) #n
53 b343c297 2021-10-11 stsp inline static int test_bit_set_bit(unsigned char * buf,
54 b343c297 2021-10-11 stsp unsigned int x, int set_bit)
56 b343c297 2021-10-11 stsp unsigned int byte = x >> 3;
57 b343c297 2021-10-11 stsp unsigned char c = buf[byte]; // expensive memory access
58 b343c297 2021-10-11 stsp unsigned int mask = 1 << (x % 8);
60 b343c297 2021-10-11 stsp if (c & mask) {
63 b343c297 2021-10-11 stsp if (set_bit) {
64 b343c297 2021-10-11 stsp buf[byte] = c | mask;
71 b343c297 2021-10-11 stsp static int bloom_check_add(struct bloom * bloom,
72 b343c297 2021-10-11 stsp const void * buffer, int len, int add)
74 b343c297 2021-10-11 stsp if (bloom->ready == 0) {
75 b343c297 2021-10-11 stsp printf("bloom at %p not initialized!\n", (void *)bloom);
79 b343c297 2021-10-11 stsp int hits = 0;
80 d6a28ffe 2022-05-20 op register unsigned int a = murmurhash2(buffer, len, bloom->seed);
81 b343c297 2021-10-11 stsp register unsigned int b = murmurhash2(buffer, len, a);
82 b343c297 2021-10-11 stsp register unsigned int x;
83 b343c297 2021-10-11 stsp register unsigned int i;
85 b343c297 2021-10-11 stsp for (i = 0; i < bloom->hashes; i++) {
86 b343c297 2021-10-11 stsp x = (a + i*b) % bloom->bits;
87 b343c297 2021-10-11 stsp if (test_bit_set_bit(bloom->bf, x, add)) {
89 b343c297 2021-10-11 stsp } else if (!add) {
90 b343c297 2021-10-11 stsp // Don't care about the presence of all the bits. Just our own.
95 b343c297 2021-10-11 stsp if (hits == bloom->hashes) {
96 b343c297 2021-10-11 stsp return 1; // 1 == element already in (or collision)
103 b343c297 2021-10-11 stsp int bloom_init_size(struct bloom * bloom, int entries, double error,
104 b343c297 2021-10-11 stsp unsigned int cache_size)
106 b343c297 2021-10-11 stsp return bloom_init(bloom, entries, error);
110 b343c297 2021-10-11 stsp int bloom_init(struct bloom * bloom, int entries, double error)
112 b343c297 2021-10-11 stsp bloom->ready = 0;
114 d6a28ffe 2022-05-20 op bloom->seed = arc4random();
116 b343c297 2021-10-11 stsp if (entries < 1000 || error == 0) {
120 b343c297 2021-10-11 stsp bloom->entries = entries;
121 b343c297 2021-10-11 stsp bloom->error = error;
123 b343c297 2021-10-11 stsp double num = log(bloom->error);
124 b343c297 2021-10-11 stsp double denom = 0.480453013918201; // ln(2)^2
125 b343c297 2021-10-11 stsp bloom->bpe = -(num / denom);
127 b343c297 2021-10-11 stsp double dentries = (double)entries;
128 b343c297 2021-10-11 stsp bloom->bits = (int)(dentries * bloom->bpe);
130 b343c297 2021-10-11 stsp if (bloom->bits % 8) {
131 b343c297 2021-10-11 stsp bloom->bytes = (bloom->bits / 8) + 1;
133 b343c297 2021-10-11 stsp bloom->bytes = bloom->bits / 8;
136 b343c297 2021-10-11 stsp bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2)
138 b343c297 2021-10-11 stsp bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
139 b343c297 2021-10-11 stsp if (bloom->bf == NULL) { // LCOV_EXCL_START
141 b343c297 2021-10-11 stsp } // LCOV_EXCL_STOP
143 b343c297 2021-10-11 stsp bloom->ready = 1;
148 b343c297 2021-10-11 stsp int bloom_check(struct bloom * bloom, const void * buffer, int len)
150 b343c297 2021-10-11 stsp return bloom_check_add(bloom, buffer, len, 0);
154 b343c297 2021-10-11 stsp int bloom_add(struct bloom * bloom, const void * buffer, int len)
156 b343c297 2021-10-11 stsp return bloom_check_add(bloom, buffer, len, 1);
160 b343c297 2021-10-11 stsp void bloom_print(struct bloom * bloom)
162 b343c297 2021-10-11 stsp printf("bloom at %p\n", (void *)bloom);
163 b343c297 2021-10-11 stsp printf(" ->entries = %d\n", bloom->entries);
164 b343c297 2021-10-11 stsp printf(" ->error = %f\n", bloom->error);
165 b343c297 2021-10-11 stsp printf(" ->bits = %d\n", bloom->bits);
166 b343c297 2021-10-11 stsp printf(" ->bits per elem = %f\n", bloom->bpe);
167 b343c297 2021-10-11 stsp printf(" ->bytes = %d\n", bloom->bytes);
168 b343c297 2021-10-11 stsp printf(" ->hash functions = %d\n", bloom->hashes);
172 b343c297 2021-10-11 stsp void bloom_free(struct bloom * bloom)
174 b343c297 2021-10-11 stsp if (bloom->ready) {
175 b343c297 2021-10-11 stsp free(bloom->bf);
177 b343c297 2021-10-11 stsp bloom->ready = 0;
181 b343c297 2021-10-11 stsp int bloom_reset(struct bloom * bloom)
183 b343c297 2021-10-11 stsp if (!bloom->ready) return 1;
184 b343c297 2021-10-11 stsp memset(bloom->bf, 0, bloom->bytes);