Blame


1 ab2f42e7 2019-11-10 stsp /*
2 5aa81393 2020-01-06 stsp * Copyright (c) 2019, 2020 Stefan Sperling <stsp@openbsd.org>
3 ab2f42e7 2019-11-10 stsp *
4 ab2f42e7 2019-11-10 stsp * Permission to use, copy, modify, and distribute this software for any
5 ab2f42e7 2019-11-10 stsp * purpose with or without fee is hereby granted, provided that the above
6 ab2f42e7 2019-11-10 stsp * copyright notice and this permission notice appear in all copies.
7 ab2f42e7 2019-11-10 stsp *
8 ab2f42e7 2019-11-10 stsp * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 ab2f42e7 2019-11-10 stsp * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 ab2f42e7 2019-11-10 stsp * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ab2f42e7 2019-11-10 stsp * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 ab2f42e7 2019-11-10 stsp * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ab2f42e7 2019-11-10 stsp * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 ab2f42e7 2019-11-10 stsp * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 ab2f42e7 2019-11-10 stsp */
16 ab2f42e7 2019-11-10 stsp
17 8b925c6c 2022-07-16 thomas #include <sys/queue.h>
18 ab2f42e7 2019-11-10 stsp
19 ab2f42e7 2019-11-10 stsp #include <stdlib.h>
20 ab2f42e7 2019-11-10 stsp #include <string.h>
21 26498a8b 2022-07-06 thomas #include <stdint.h>
22 588a8092 2023-02-23 thomas #include <sha1.h>
23 588a8092 2023-02-23 thomas #include <sha2.h>
24 ab2f42e7 2019-11-10 stsp #include <stdio.h>
25 ab2f42e7 2019-11-10 stsp #include <zlib.h>
26 ab2f42e7 2019-11-10 stsp #include <limits.h>
27 ab2f42e7 2019-11-10 stsp #include <time.h>
28 a5061f77 2022-06-13 thomas #include <errno.h>
29 ab2f42e7 2019-11-10 stsp
30 dd038bc6 2021-09-21 thomas.ad #include "got_compat.h"
31 dd038bc6 2021-09-21 thomas.ad
32 ab2f42e7 2019-11-10 stsp #include "got_object.h"
33 ab2f42e7 2019-11-10 stsp #include "got_error.h"
34 ab2f42e7 2019-11-10 stsp
35 ab2f42e7 2019-11-10 stsp #include "got_lib_delta.h"
36 ab2f42e7 2019-11-10 stsp #include "got_lib_inflate.h"
37 ab2f42e7 2019-11-10 stsp #include "got_lib_object.h"
38 ab2f42e7 2019-11-10 stsp #include "got_lib_delta_cache.h"
39 ab2f42e7 2019-11-10 stsp
40 ab2f42e7 2019-11-10 stsp #ifndef nitems
41 ab2f42e7 2019-11-10 stsp #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
42 ab2f42e7 2019-11-10 stsp #endif
43 ab2f42e7 2019-11-10 stsp
44 a5061f77 2022-06-13 thomas #define GOT_DELTA_CACHE_MIN_BUCKETS 64
45 345f6509 2022-11-08 thomas #define GOT_DELTA_CACHE_MAX_BUCKETS 2048
46 a5061f77 2022-06-13 thomas #define GOT_DELTA_CACHE_MAX_CHAIN 2
47 345f6509 2022-11-08 thomas #define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024
48 a5061f77 2022-06-13 thomas
49 a5061f77 2022-06-13 thomas struct got_cached_delta {
50 a5061f77 2022-06-13 thomas off_t offset;
51 a5061f77 2022-06-13 thomas uint8_t *data;
52 a5061f77 2022-06-13 thomas size_t len;
53 ab2f42e7 2019-11-10 stsp };
54 ab2f42e7 2019-11-10 stsp
55 a5061f77 2022-06-13 thomas struct got_delta_cache_head {
56 a5061f77 2022-06-13 thomas struct got_cached_delta entries[GOT_DELTA_CACHE_MAX_CHAIN];
57 a5061f77 2022-06-13 thomas unsigned int nchain;
58 a5061f77 2022-06-13 thomas };
59 ab2f42e7 2019-11-10 stsp
60 ab2f42e7 2019-11-10 stsp struct got_delta_cache {
61 a5061f77 2022-06-13 thomas struct got_delta_cache_head *buckets;
62 a5061f77 2022-06-13 thomas unsigned int nbuckets;
63 a5061f77 2022-06-13 thomas unsigned int totelem;
64 c3b318d0 2019-11-10 stsp int cache_search;
65 c3b318d0 2019-11-10 stsp int cache_hit;
66 c3b318d0 2019-11-10 stsp int cache_miss;
67 c3b318d0 2019-11-10 stsp int cache_evict;
68 c3b318d0 2019-11-10 stsp int cache_toolarge;
69 a5061f77 2022-06-13 thomas unsigned int flags;
70 a5061f77 2022-06-13 thomas #define GOT_DELTA_CACHE_F_NOMEM 0x01
71 a5061f77 2022-06-13 thomas SIPHASH_KEY key;
72 ab2f42e7 2019-11-10 stsp };
73 ab2f42e7 2019-11-10 stsp
74 a5061f77 2022-06-13 thomas const struct got_error *
75 a5061f77 2022-06-13 thomas got_delta_cache_alloc(struct got_delta_cache **new)
76 ab2f42e7 2019-11-10 stsp {
77 a5061f77 2022-06-13 thomas const struct got_error *err;
78 ab2f42e7 2019-11-10 stsp struct got_delta_cache *cache;
79 ab2f42e7 2019-11-10 stsp
80 a5061f77 2022-06-13 thomas *new = NULL;
81 a5061f77 2022-06-13 thomas
82 ab2f42e7 2019-11-10 stsp cache = calloc(1, sizeof(*cache));
83 ab2f42e7 2019-11-10 stsp if (cache == NULL)
84 a5061f77 2022-06-13 thomas return got_error_from_errno("calloc");
85 7c7a66bd 2022-06-23 thomas
86 a5061f77 2022-06-13 thomas cache->buckets = calloc(GOT_DELTA_CACHE_MIN_BUCKETS,
87 a5061f77 2022-06-13 thomas sizeof(cache->buckets[0]));
88 a5061f77 2022-06-13 thomas if (cache->buckets == NULL) {
89 a5061f77 2022-06-13 thomas err = got_error_from_errno("calloc");
90 a5061f77 2022-06-13 thomas free(cache);
91 a5061f77 2022-06-13 thomas return err;
92 a5061f77 2022-06-13 thomas }
93 a5061f77 2022-06-13 thomas cache->nbuckets = GOT_DELTA_CACHE_MIN_BUCKETS;
94 ab2f42e7 2019-11-10 stsp
95 a5061f77 2022-06-13 thomas arc4random_buf(&cache->key, sizeof(cache->key));
96 a5061f77 2022-06-13 thomas *new = cache;
97 a5061f77 2022-06-13 thomas return NULL;
98 ab2f42e7 2019-11-10 stsp }
99 ab2f42e7 2019-11-10 stsp
100 ab2f42e7 2019-11-10 stsp void
101 ab2f42e7 2019-11-10 stsp got_delta_cache_free(struct got_delta_cache *cache)
102 ab2f42e7 2019-11-10 stsp {
103 a5061f77 2022-06-13 thomas struct got_cached_delta *delta;
104 a5061f77 2022-06-13 thomas unsigned int i;
105 ab2f42e7 2019-11-10 stsp
106 cd9fd5e0 2022-12-01 thomas #ifdef GOT_DELTA_CACHE_DEBUG
107 a5061f77 2022-06-13 thomas fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, "
108 a5061f77 2022-06-13 thomas "%d missed, %d evicted, %d too large\n", getprogname(),
109 a5061f77 2022-06-13 thomas cache->totelem, cache->cache_search, cache->cache_hit,
110 a5061f77 2022-06-13 thomas cache->cache_miss, cache->cache_evict, cache->cache_toolarge);
111 c3b318d0 2019-11-10 stsp #endif
112 a5061f77 2022-06-13 thomas for (i = 0; i < cache->nbuckets; i++) {
113 a5061f77 2022-06-13 thomas struct got_delta_cache_head *head;
114 a5061f77 2022-06-13 thomas int j;
115 a5061f77 2022-06-13 thomas head = &cache->buckets[i];
116 a5061f77 2022-06-13 thomas for (j = 0; j < head->nchain; j++) {
117 a5061f77 2022-06-13 thomas delta = &head->entries[j];
118 a5061f77 2022-06-13 thomas free(delta->data);
119 a5061f77 2022-06-13 thomas }
120 ab2f42e7 2019-11-10 stsp }
121 a5061f77 2022-06-13 thomas free(cache->buckets);
122 ab2f42e7 2019-11-10 stsp free(cache);
123 ab2f42e7 2019-11-10 stsp }
124 ab2f42e7 2019-11-10 stsp
125 a5061f77 2022-06-13 thomas static uint64_t
126 a5061f77 2022-06-13 thomas delta_cache_hash(struct got_delta_cache *cache, off_t delta_offset)
127 ab2f42e7 2019-11-10 stsp {
128 a5061f77 2022-06-13 thomas return SipHash24(&cache->key, &delta_offset, sizeof(delta_offset));
129 a5061f77 2022-06-13 thomas }
130 ab2f42e7 2019-11-10 stsp
131 b5a8f744 2022-11-08 thomas #ifndef GOT_NO_DELTA_CACHE
132 a5061f77 2022-06-13 thomas static const struct got_error *
133 a5061f77 2022-06-13 thomas delta_cache_resize(struct got_delta_cache *cache, unsigned int nbuckets)
134 a5061f77 2022-06-13 thomas {
135 a5061f77 2022-06-13 thomas struct got_delta_cache_head *buckets;
136 a5061f77 2022-06-13 thomas size_t i;
137 ab2f42e7 2019-11-10 stsp
138 a5061f77 2022-06-13 thomas buckets = calloc(nbuckets, sizeof(buckets[0]));
139 a5061f77 2022-06-13 thomas if (buckets == NULL) {
140 a5061f77 2022-06-13 thomas if (errno != ENOMEM)
141 a5061f77 2022-06-13 thomas return got_error_from_errno("calloc");
142 a5061f77 2022-06-13 thomas /* Proceed with our current amount of hash buckets. */
143 a5061f77 2022-06-13 thomas cache->flags |= GOT_DELTA_CACHE_F_NOMEM;
144 a5061f77 2022-06-13 thomas return NULL;
145 a5061f77 2022-06-13 thomas }
146 a5061f77 2022-06-13 thomas
147 a5061f77 2022-06-13 thomas arc4random_buf(&cache->key, sizeof(cache->key));
148 a5061f77 2022-06-13 thomas
149 a5061f77 2022-06-13 thomas for (i = 0; i < cache->nbuckets; i++) {
150 a5061f77 2022-06-13 thomas unsigned int j;
151 a5061f77 2022-06-13 thomas for (j = 0; j < cache->buckets[i].nchain; j++) {
152 a5061f77 2022-06-13 thomas struct got_delta_cache_head *head;
153 a5061f77 2022-06-13 thomas struct got_cached_delta *delta;
154 a5061f77 2022-06-13 thomas uint64_t idx;
155 a5061f77 2022-06-13 thomas delta = &cache->buckets[i].entries[j];
156 a5061f77 2022-06-13 thomas idx = delta_cache_hash(cache, delta->offset) % nbuckets;
157 a5061f77 2022-06-13 thomas head = &buckets[idx];
158 a5061f77 2022-06-13 thomas if (head->nchain < nitems(head->entries)) {
159 a5061f77 2022-06-13 thomas struct got_cached_delta *new_delta;
160 a5061f77 2022-06-13 thomas new_delta = &head->entries[head->nchain];
161 a5061f77 2022-06-13 thomas memcpy(new_delta, delta, sizeof(*new_delta));
162 a5061f77 2022-06-13 thomas head->nchain++;
163 c94b2859 2022-11-08 thomas } else {
164 a5061f77 2022-06-13 thomas free(delta->data);
165 c94b2859 2022-11-08 thomas cache->totelem--;
166 c94b2859 2022-11-08 thomas }
167 a5061f77 2022-06-13 thomas }
168 a5061f77 2022-06-13 thomas }
169 a5061f77 2022-06-13 thomas
170 a5061f77 2022-06-13 thomas free(cache->buckets);
171 a5061f77 2022-06-13 thomas cache->buckets = buckets;
172 a5061f77 2022-06-13 thomas cache->nbuckets = nbuckets;
173 a5061f77 2022-06-13 thomas return NULL;
174 ab2f42e7 2019-11-10 stsp }
175 ab2f42e7 2019-11-10 stsp
176 a5061f77 2022-06-13 thomas static const struct got_error *
177 a5061f77 2022-06-13 thomas delta_cache_grow(struct got_delta_cache *cache)
178 a5061f77 2022-06-13 thomas {
179 a5061f77 2022-06-13 thomas unsigned int nbuckets;
180 a5061f77 2022-06-13 thomas
181 a5061f77 2022-06-13 thomas if ((cache->flags & GOT_DELTA_CACHE_F_NOMEM) ||
182 a5061f77 2022-06-13 thomas cache->nbuckets == GOT_DELTA_CACHE_MAX_BUCKETS)
183 a5061f77 2022-06-13 thomas return NULL;
184 a5061f77 2022-06-13 thomas
185 a5061f77 2022-06-13 thomas if (cache->nbuckets >= GOT_DELTA_CACHE_MAX_BUCKETS / 2)
186 a5061f77 2022-06-13 thomas nbuckets = GOT_DELTA_CACHE_MAX_BUCKETS;
187 a5061f77 2022-06-13 thomas else
188 a5061f77 2022-06-13 thomas nbuckets = cache->nbuckets * 2;
189 a5061f77 2022-06-13 thomas
190 a5061f77 2022-06-13 thomas return delta_cache_resize(cache, nbuckets);
191 a5061f77 2022-06-13 thomas }
192 7c7a66bd 2022-06-23 thomas #endif
193 a5061f77 2022-06-13 thomas
194 ab2f42e7 2019-11-10 stsp const struct got_error *
195 ab2f42e7 2019-11-10 stsp got_delta_cache_add(struct got_delta_cache *cache,
196 ab2f42e7 2019-11-10 stsp off_t delta_data_offset, uint8_t *delta_data, size_t delta_len)
197 ab2f42e7 2019-11-10 stsp {
198 b5a8f744 2022-11-08 thomas #ifdef GOT_NO_DELTA_CACHE
199 fa7a529e 2020-01-06 stsp return got_error(GOT_ERR_NO_SPACE);
200 fa7a529e 2020-01-06 stsp #else
201 a5061f77 2022-06-13 thomas const struct got_error *err = NULL;
202 a5061f77 2022-06-13 thomas struct got_cached_delta *delta;
203 a5061f77 2022-06-13 thomas struct got_delta_cache_head *head;
204 a5061f77 2022-06-13 thomas uint64_t idx;
205 ab2f42e7 2019-11-10 stsp
206 88b1b490 2022-06-13 thomas if (delta_len > GOT_DELTA_CACHE_MAX_DELTA_SIZE) {
207 c3b318d0 2019-11-10 stsp cache->cache_toolarge++;
208 ab2f42e7 2019-11-10 stsp return got_error(GOT_ERR_NO_SPACE);
209 c3b318d0 2019-11-10 stsp }
210 ab2f42e7 2019-11-10 stsp
211 a5061f77 2022-06-13 thomas if (cache->nbuckets * 3 < cache->totelem * 4) {
212 a5061f77 2022-06-13 thomas err = delta_cache_grow(cache);
213 a5061f77 2022-06-13 thomas if (err)
214 a5061f77 2022-06-13 thomas return err;
215 a5061f77 2022-06-13 thomas }
216 ab2f42e7 2019-11-10 stsp
217 a5061f77 2022-06-13 thomas idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
218 a5061f77 2022-06-13 thomas head = &cache->buckets[idx];
219 a5061f77 2022-06-13 thomas if (head->nchain >= nitems(head->entries)) {
220 a5061f77 2022-06-13 thomas delta = &head->entries[head->nchain - 1];
221 a5061f77 2022-06-13 thomas free(delta->data);
222 a5061f77 2022-06-13 thomas memset(delta, 0, sizeof(*delta));
223 a5061f77 2022-06-13 thomas head->nchain--;
224 c94b2859 2022-11-08 thomas cache->totelem--;
225 c94b2859 2022-11-08 thomas cache->cache_evict++;
226 a5061f77 2022-06-13 thomas }
227 ab2f42e7 2019-11-10 stsp
228 a5061f77 2022-06-13 thomas delta = &head->entries[head->nchain];
229 a5061f77 2022-06-13 thomas delta->offset = delta_data_offset;
230 a5061f77 2022-06-13 thomas delta->data = delta_data;
231 a5061f77 2022-06-13 thomas delta->len = delta_len;
232 a5061f77 2022-06-13 thomas head->nchain++;
233 a5061f77 2022-06-13 thomas cache->totelem++;
234 ab2f42e7 2019-11-10 stsp
235 ab2f42e7 2019-11-10 stsp return NULL;
236 fa7a529e 2020-01-06 stsp #endif
237 ab2f42e7 2019-11-10 stsp }
238 ab2f42e7 2019-11-10 stsp
239 ab2f42e7 2019-11-10 stsp void
240 ab2f42e7 2019-11-10 stsp got_delta_cache_get(uint8_t **delta_data, size_t *delta_len,
241 ab2f42e7 2019-11-10 stsp struct got_delta_cache *cache, off_t delta_data_offset)
242 ab2f42e7 2019-11-10 stsp {
243 a5061f77 2022-06-13 thomas uint64_t idx;
244 a5061f77 2022-06-13 thomas struct got_delta_cache_head *head;
245 a5061f77 2022-06-13 thomas struct got_cached_delta *delta;
246 a5061f77 2022-06-13 thomas int i;
247 ab2f42e7 2019-11-10 stsp
248 a5061f77 2022-06-13 thomas idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
249 a5061f77 2022-06-13 thomas head = &cache->buckets[idx];
250 a5061f77 2022-06-13 thomas
251 c3b318d0 2019-11-10 stsp cache->cache_search++;
252 ab2f42e7 2019-11-10 stsp *delta_data = NULL;
253 ab2f42e7 2019-11-10 stsp *delta_len = 0;
254 a5061f77 2022-06-13 thomas for (i = 0; i < head->nchain; i++) {
255 a5061f77 2022-06-13 thomas delta = &head->entries[i];
256 a5061f77 2022-06-13 thomas if (delta->offset != delta_data_offset)
257 ab2f42e7 2019-11-10 stsp continue;
258 c3b318d0 2019-11-10 stsp cache->cache_hit++;
259 a5061f77 2022-06-13 thomas if (i > 0) {
260 a5061f77 2022-06-13 thomas struct got_cached_delta tmp;
261 a5061f77 2022-06-13 thomas memcpy(&tmp, &head->entries[0], sizeof(tmp));
262 a5061f77 2022-06-13 thomas memcpy(&head->entries[0], &head->entries[i],
263 a5061f77 2022-06-13 thomas sizeof(head->entries[0]));
264 a5061f77 2022-06-13 thomas memcpy(&head->entries[i], &tmp,
265 a5061f77 2022-06-13 thomas sizeof(head->entries[i]));
266 a5061f77 2022-06-13 thomas delta = &head->entries[0];
267 ab2f42e7 2019-11-10 stsp }
268 a5061f77 2022-06-13 thomas *delta_data = delta->data;
269 a5061f77 2022-06-13 thomas *delta_len = delta->len;
270 ab2f42e7 2019-11-10 stsp return;
271 ab2f42e7 2019-11-10 stsp }
272 c3b318d0 2019-11-10 stsp
273 c3b318d0 2019-11-10 stsp cache->cache_miss++;
274 ab2f42e7 2019-11-10 stsp }