Blob


1 /*
2 * Copyright (c) 2012-2019, Jyri J. Virkki
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
29 /* Obtained from https://github.com/jvirkki/libbloom */
31 /*
32 * Refer to bloom.h for documentation on the public interfaces.
33 */
35 #include <assert.h>
36 #include <fcntl.h>
37 #include <math.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <unistd.h>
46 #include "bloom.h"
47 #include "murmurhash2.h"
49 #define MAKESTRING(n) STRING(n)
50 #define STRING(n) #n
53 inline static int test_bit_set_bit(unsigned char * buf,
54 unsigned int x, int set_bit)
55 {
56 unsigned int byte = x >> 3;
57 unsigned char c = buf[byte]; // expensive memory access
58 unsigned int mask = 1 << (x % 8);
60 if (c & mask) {
61 return 1;
62 } else {
63 if (set_bit) {
64 buf[byte] = c | mask;
65 }
66 return 0;
67 }
68 }
71 static int bloom_check_add(struct bloom * bloom,
72 const void * buffer, int len, int add)
73 {
74 if (bloom->ready == 0) {
75 printf("bloom at %p not initialized!\n", (void *)bloom);
76 return -1;
77 }
79 int hits = 0;
80 register unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
81 register unsigned int b = murmurhash2(buffer, len, a);
82 register unsigned int x;
83 register unsigned int i;
85 for (i = 0; i < bloom->hashes; i++) {
86 x = (a + i*b) % bloom->bits;
87 if (test_bit_set_bit(bloom->bf, x, add)) {
88 hits++;
89 } else if (!add) {
90 // Don't care about the presence of all the bits. Just our own.
91 return 0;
92 }
93 }
95 if (hits == bloom->hashes) {
96 return 1; // 1 == element already in (or collision)
97 }
99 return 0;
103 int bloom_init_size(struct bloom * bloom, int entries, double error,
104 unsigned int cache_size)
106 return bloom_init(bloom, entries, error);
110 int bloom_init(struct bloom * bloom, int entries, double error)
112 bloom->ready = 0;
114 if (entries < 1000 || error == 0) {
115 return 1;
118 bloom->entries = entries;
119 bloom->error = error;
121 double num = log(bloom->error);
122 double denom = 0.480453013918201; // ln(2)^2
123 bloom->bpe = -(num / denom);
125 double dentries = (double)entries;
126 bloom->bits = (int)(dentries * bloom->bpe);
128 if (bloom->bits % 8) {
129 bloom->bytes = (bloom->bits / 8) + 1;
130 } else {
131 bloom->bytes = bloom->bits / 8;
134 bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2)
136 bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
137 if (bloom->bf == NULL) { // LCOV_EXCL_START
138 return 1;
139 } // LCOV_EXCL_STOP
141 bloom->ready = 1;
142 return 0;
146 int bloom_check(struct bloom * bloom, const void * buffer, int len)
148 return bloom_check_add(bloom, buffer, len, 0);
152 int bloom_add(struct bloom * bloom, const void * buffer, int len)
154 return bloom_check_add(bloom, buffer, len, 1);
158 void bloom_print(struct bloom * bloom)
160 printf("bloom at %p\n", (void *)bloom);
161 printf(" ->entries = %d\n", bloom->entries);
162 printf(" ->error = %f\n", bloom->error);
163 printf(" ->bits = %d\n", bloom->bits);
164 printf(" ->bits per elem = %f\n", bloom->bpe);
165 printf(" ->bytes = %d\n", bloom->bytes);
166 printf(" ->hash functions = %d\n", bloom->hashes);
170 void bloom_free(struct bloom * bloom)
172 if (bloom->ready) {
173 free(bloom->bf);
175 bloom->ready = 0;
179 int bloom_reset(struct bloom * bloom)
181 if (!bloom->ready) return 1;
182 memset(bloom->bf, 0, bloom->bytes);
183 return 0;