Blame


1 4545b700 2021-10-15 thomas /*
2 4545b700 2021-10-15 thomas * Copyright (c) 2012-2017, Jyri J. Virkki
3 4545b700 2021-10-15 thomas * All rights reserved.
4 4545b700 2021-10-15 thomas *
5 4545b700 2021-10-15 thomas * Redistribution and use in source and binary forms, with or without
6 4545b700 2021-10-15 thomas * modification, are permitted provided that the following conditions are
7 4545b700 2021-10-15 thomas * met:
8 4545b700 2021-10-15 thomas *
9 4545b700 2021-10-15 thomas * 1. Redistributions of source code must retain the above copyright
10 4545b700 2021-10-15 thomas * notice, this list of conditions and the following disclaimer.
11 4545b700 2021-10-15 thomas *
12 4545b700 2021-10-15 thomas * 2. Redistributions in binary form must reproduce the above copyright
13 4545b700 2021-10-15 thomas * notice, this list of conditions and the following disclaimer in the
14 4545b700 2021-10-15 thomas * documentation and/or other materials provided with the distribution.
15 4545b700 2021-10-15 thomas *
16 4545b700 2021-10-15 thomas * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 4545b700 2021-10-15 thomas * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 4545b700 2021-10-15 thomas * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 4545b700 2021-10-15 thomas * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 4545b700 2021-10-15 thomas * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 4545b700 2021-10-15 thomas * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 4545b700 2021-10-15 thomas * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 4545b700 2021-10-15 thomas * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 4545b700 2021-10-15 thomas * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 4545b700 2021-10-15 thomas * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 4545b700 2021-10-15 thomas * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 4545b700 2021-10-15 thomas */
28 4545b700 2021-10-15 thomas
29 4545b700 2021-10-15 thomas /* Obtained from https://github.com/jvirkki/libbloom */
30 4545b700 2021-10-15 thomas
31 4545b700 2021-10-15 thomas #ifndef _BLOOM_H
32 4545b700 2021-10-15 thomas #define _BLOOM_H
33 4545b700 2021-10-15 thomas
34 4545b700 2021-10-15 thomas #ifdef __cplusplus
35 4545b700 2021-10-15 thomas extern "C" {
36 4545b700 2021-10-15 thomas #endif
37 4545b700 2021-10-15 thomas
38 4545b700 2021-10-15 thomas
39 4545b700 2021-10-15 thomas /** ***************************************************************************
40 4545b700 2021-10-15 thomas * Structure to keep track of one bloom filter. Caller needs to
41 4545b700 2021-10-15 thomas * allocate this and pass it to the functions below. First call for
42 4545b700 2021-10-15 thomas * every struct must be to bloom_init().
43 4545b700 2021-10-15 thomas *
44 4545b700 2021-10-15 thomas */
45 4545b700 2021-10-15 thomas struct bloom
46 4545b700 2021-10-15 thomas {
47 4545b700 2021-10-15 thomas // These fields are part of the public interface of this structure.
48 4545b700 2021-10-15 thomas // Client code may read these values if desired. Client code MUST NOT
49 4545b700 2021-10-15 thomas // modify any of these.
50 4545b700 2021-10-15 thomas int entries;
51 4545b700 2021-10-15 thomas double error;
52 4545b700 2021-10-15 thomas int bits;
53 4545b700 2021-10-15 thomas int bytes;
54 4545b700 2021-10-15 thomas int hashes;
55 cc524d36 2022-05-31 thomas uint32_t seed;
56 4545b700 2021-10-15 thomas
57 4545b700 2021-10-15 thomas // Fields below are private to the implementation. These may go away or
58 4545b700 2021-10-15 thomas // change incompatibly at any moment. Client code MUST NOT access or rely
59 4545b700 2021-10-15 thomas // on these.
60 4545b700 2021-10-15 thomas double bpe;
61 4545b700 2021-10-15 thomas unsigned char * bf;
62 4545b700 2021-10-15 thomas int ready;
63 4545b700 2021-10-15 thomas };
64 4545b700 2021-10-15 thomas
65 4545b700 2021-10-15 thomas
66 4545b700 2021-10-15 thomas /** ***************************************************************************
67 4545b700 2021-10-15 thomas * Initialize the bloom filter for use.
68 4545b700 2021-10-15 thomas *
69 4545b700 2021-10-15 thomas * The filter is initialized with a bit field and number of hash functions
70 4545b700 2021-10-15 thomas * according to the computations from the wikipedia entry:
71 4545b700 2021-10-15 thomas * http://en.wikipedia.org/wiki/Bloom_filter
72 4545b700 2021-10-15 thomas *
73 4545b700 2021-10-15 thomas * Optimal number of bits is:
74 4545b700 2021-10-15 thomas * bits = (entries * ln(error)) / ln(2)^2
75 4545b700 2021-10-15 thomas *
76 4545b700 2021-10-15 thomas * Optimal number of hash functions is:
77 4545b700 2021-10-15 thomas * hashes = bpe * ln(2)
78 4545b700 2021-10-15 thomas *
79 4545b700 2021-10-15 thomas * Parameters:
80 4545b700 2021-10-15 thomas * -----------
81 4545b700 2021-10-15 thomas * bloom - Pointer to an allocated struct bloom (see above).
82 4545b700 2021-10-15 thomas * entries - The expected number of entries which will be inserted.
83 4545b700 2021-10-15 thomas * Must be at least 1000 (in practice, likely much larger).
84 4545b700 2021-10-15 thomas * error - Probability of collision (as long as entries are not
85 4545b700 2021-10-15 thomas * exceeded).
86 4545b700 2021-10-15 thomas *
87 4545b700 2021-10-15 thomas * Return:
88 4545b700 2021-10-15 thomas * -------
89 4545b700 2021-10-15 thomas * 0 - on success
90 4545b700 2021-10-15 thomas * 1 - on failure
91 4545b700 2021-10-15 thomas *
92 4545b700 2021-10-15 thomas */
93 4545b700 2021-10-15 thomas int bloom_init(struct bloom * bloom, int entries, double error);
94 4545b700 2021-10-15 thomas
95 4545b700 2021-10-15 thomas
96 4545b700 2021-10-15 thomas /** ***************************************************************************
97 4545b700 2021-10-15 thomas * Deprecated, use bloom_init()
98 4545b700 2021-10-15 thomas *
99 4545b700 2021-10-15 thomas */
100 4545b700 2021-10-15 thomas int bloom_init_size(struct bloom * bloom, int entries, double error,
101 4545b700 2021-10-15 thomas unsigned int cache_size);
102 4545b700 2021-10-15 thomas
103 4545b700 2021-10-15 thomas
104 4545b700 2021-10-15 thomas /** ***************************************************************************
105 4545b700 2021-10-15 thomas * Check if the given element is in the bloom filter. Remember this may
106 4545b700 2021-10-15 thomas * return false positive if a collision occurred.
107 4545b700 2021-10-15 thomas *
108 4545b700 2021-10-15 thomas * Parameters:
109 4545b700 2021-10-15 thomas * -----------
110 4545b700 2021-10-15 thomas * bloom - Pointer to an allocated struct bloom (see above).
111 4545b700 2021-10-15 thomas * buffer - Pointer to buffer containing element to check.
112 4545b700 2021-10-15 thomas * len - Size of 'buffer'.
113 4545b700 2021-10-15 thomas *
114 4545b700 2021-10-15 thomas * Return:
115 4545b700 2021-10-15 thomas * -------
116 4545b700 2021-10-15 thomas * 0 - element is not present
117 4545b700 2021-10-15 thomas * 1 - element is present (or false positive due to collision)
118 4545b700 2021-10-15 thomas * -1 - bloom not initialized
119 4545b700 2021-10-15 thomas *
120 4545b700 2021-10-15 thomas */
121 4545b700 2021-10-15 thomas int bloom_check(struct bloom * bloom, const void * buffer, int len);
122 4545b700 2021-10-15 thomas
123 4545b700 2021-10-15 thomas
124 4545b700 2021-10-15 thomas /** ***************************************************************************
125 4545b700 2021-10-15 thomas * Add the given element to the bloom filter.
126 4545b700 2021-10-15 thomas * The return code indicates if the element (or a collision) was already in,
127 4545b700 2021-10-15 thomas * so for the common check+add use case, no need to call check separately.
128 4545b700 2021-10-15 thomas *
129 4545b700 2021-10-15 thomas * Parameters:
130 4545b700 2021-10-15 thomas * -----------
131 4545b700 2021-10-15 thomas * bloom - Pointer to an allocated struct bloom (see above).
132 4545b700 2021-10-15 thomas * buffer - Pointer to buffer containing element to add.
133 4545b700 2021-10-15 thomas * len - Size of 'buffer'.
134 4545b700 2021-10-15 thomas *
135 4545b700 2021-10-15 thomas * Return:
136 4545b700 2021-10-15 thomas * -------
137 4545b700 2021-10-15 thomas * 0 - element was not present and was added
138 4545b700 2021-10-15 thomas * 1 - element (or a collision) had already been added previously
139 4545b700 2021-10-15 thomas * -1 - bloom not initialized
140 4545b700 2021-10-15 thomas *
141 4545b700 2021-10-15 thomas */
142 4545b700 2021-10-15 thomas int bloom_add(struct bloom * bloom, const void * buffer, int len);
143 4545b700 2021-10-15 thomas
144 4545b700 2021-10-15 thomas
145 4545b700 2021-10-15 thomas /** ***************************************************************************
146 4545b700 2021-10-15 thomas * Print (to stdout) info about this bloom filter. Debugging aid.
147 4545b700 2021-10-15 thomas *
148 4545b700 2021-10-15 thomas */
149 4545b700 2021-10-15 thomas void bloom_print(struct bloom * bloom);
150 4545b700 2021-10-15 thomas
151 4545b700 2021-10-15 thomas
152 4545b700 2021-10-15 thomas /** ***************************************************************************
153 4545b700 2021-10-15 thomas * Deallocate internal storage.
154 4545b700 2021-10-15 thomas *
155 4545b700 2021-10-15 thomas * Upon return, the bloom struct is no longer usable. You may call bloom_init
156 4545b700 2021-10-15 thomas * again on the same struct to reinitialize it again.
157 4545b700 2021-10-15 thomas *
158 4545b700 2021-10-15 thomas * Parameters:
159 4545b700 2021-10-15 thomas * -----------
160 4545b700 2021-10-15 thomas * bloom - Pointer to an allocated struct bloom (see above).
161 4545b700 2021-10-15 thomas *
162 4545b700 2021-10-15 thomas * Return: none
163 4545b700 2021-10-15 thomas *
164 4545b700 2021-10-15 thomas */
165 4545b700 2021-10-15 thomas void bloom_free(struct bloom * bloom);
166 4545b700 2021-10-15 thomas
167 4545b700 2021-10-15 thomas /** ***************************************************************************
168 4545b700 2021-10-15 thomas * Erase internal storage.
169 4545b700 2021-10-15 thomas *
170 4545b700 2021-10-15 thomas * Erases all elements. Upon return, the bloom struct returns to its initial
171 4545b700 2021-10-15 thomas * (initialized) state.
172 4545b700 2021-10-15 thomas *
173 4545b700 2021-10-15 thomas * Parameters:
174 4545b700 2021-10-15 thomas * -----------
175 4545b700 2021-10-15 thomas * bloom - Pointer to an allocated struct bloom (see above).
176 4545b700 2021-10-15 thomas *
177 4545b700 2021-10-15 thomas * Return:
178 4545b700 2021-10-15 thomas * 0 - on success
179 4545b700 2021-10-15 thomas * 1 - on failure
180 4545b700 2021-10-15 thomas *
181 4545b700 2021-10-15 thomas */
182 4545b700 2021-10-15 thomas int bloom_reset(struct bloom * bloom);
183 4545b700 2021-10-15 thomas
184 4545b700 2021-10-15 thomas #ifdef __cplusplus
185 4545b700 2021-10-15 thomas }
186 4545b700 2021-10-15 thomas #endif
187 4545b700 2021-10-15 thomas
188 4545b700 2021-10-15 thomas #endif