Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include "got_compat.h"
20 #include <sys/types.h>
21 #include <sys/queue.h>
23 #include <assert.h>
24 #include <errno.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
31 #include "got_error.h"
33 #include "got_lib_deltify.h"
34 #include "murmurhash2.h"
36 #ifndef MIN
37 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
38 #endif
40 /*
41 * The algorihm used here is FastCDC (Fast Content-Defined Chunking)
42 * https://www.usenix.org/conference/atc16/technical-sessions/presentation/xia
43 */
45 static const uint32_t geartab[256] = {
46 0x67ed26b7, 0x32da500c, 0x53d0fee0, 0xce387dc7, 0xcd406d90, 0x2e83a4d4,
47 0x9fc9a38d, 0xb67259dc, 0xca6b1722, 0x6d2ea08c, 0x235cea2e, 0x3149bb5f,
48 0x1beda787, 0x2a6b77d5, 0x2f22d9ac, 0x91fc0544, 0xe413acfa, 0x5a30ff7a,
49 0xad6fdde0, 0x444fd0f5, 0x7ad87864, 0x58c5ff05, 0x8d2ec336, 0x2371f853,
50 0x550f8572, 0x6aa448dd, 0x7c9ddbcf, 0x95221e14, 0x2a82ec33, 0xcbec5a78,
51 0xc6795a0d, 0x243995b7, 0x1c909a2f, 0x4fded51c, 0x635d334b, 0x0e2b9999,
52 0x2702968d, 0x856de1d5, 0x3325d60e, 0xeb6a7502, 0xec2a9844, 0x0905835a,
53 0xa1820375, 0xa4be5cab, 0x96a6c058, 0x2c2ccd70, 0xba40fce3, 0xd794c46b,
54 0x8fbae83e, 0xc3aa7899, 0x3d3ff8ed, 0xa0d42b5b, 0x571c0c97, 0xd2811516,
55 0xf7e7b96c, 0x4fd2fcbd, 0xe2fdec94, 0x282cc436, 0x78e8e95c, 0x80a3b613,
56 0xcfbee20c, 0xd4a32d1c, 0x2a12ff13, 0x6af82936, 0xe5630258, 0x8efa6a98,
57 0x294fb2d1, 0xdeb57086, 0x5f0fddb3, 0xeceda7ce, 0x4c87305f, 0x3a6d3307,
58 0xe22d2942, 0x9d060217, 0x1e42ed02, 0xb6f63b52, 0x4367f39f, 0x055cf262,
59 0x03a461b2, 0x5ef9e382, 0x386bc03a, 0x2a1e79c7, 0xf1a0058b, 0xd4d2dea9,
60 0x56baf37d, 0x5daff6cc, 0xf03a951d, 0xaef7de45, 0xa8f4581e, 0x3960b555,
61 0xffbfff6d, 0xbe702a23, 0x8f5b6d6f, 0x061739fb, 0x98696f47, 0x3fd596d4,
62 0x151eac6b, 0xa9fcc4f5, 0x69181a12, 0x3ac5a107, 0xb5198fe7, 0x96bcb1da,
63 0x1b5ddf8e, 0xc757d650, 0x65865c3a, 0x8fc0a41a, 0x87435536, 0x99eda6f2,
64 0x41874794, 0x29cff4e8, 0xb70efd9a, 0x3103f6e7, 0x84d2453b, 0x15a450bd,
65 0x74f49af1, 0x60f664b1, 0xa1c86935, 0xfdafbce1, 0xe36353e3, 0x5d9ba739,
66 0xbc0559ba, 0x708b0054, 0xd41d808c, 0xb2f31723, 0x9027c41f, 0xf136d165,
67 0xb5374b12, 0x9420a6ac, 0x273958b6, 0xe6c2fad0, 0xebdc1f21, 0xfb33af8b,
68 0xc71c25cd, 0xe9a2d8e5, 0xbeb38a50, 0xbceb7cc2, 0x4e4e73f0, 0xcd6c251d,
69 0xde4c032c, 0x4b04ac30, 0x725b8b21, 0x4eb8c33b, 0x20d07b75, 0x0567aa63,
70 0xb56b2bb7, 0xc1f5fd3a, 0xcafd35ca, 0x470dd4da, 0xfe4f94cd, 0xfb8de424,
71 0xe8dbcf40, 0xfe50a37a, 0x62db5b5d, 0xf32f4ab6, 0x2c4a8a51, 0x18473dc0,
72 0xfe0cbb6e, 0xfe399efd, 0xdf34ecc9, 0x6ccd5055, 0x46097073, 0x139135c2,
73 0x721c76f6, 0x1c6a94b4, 0x6eee014d, 0x8a508e02, 0x3da538f5, 0x280d394f,
74 0x5248a0c4, 0x3ce94c6c, 0x9a71ad3a, 0x8493dd05, 0xe43f0ab6, 0x18e4ed42,
75 0x6c5c0e09, 0x42b06ec9, 0x8d330343, 0xa45b6f59, 0x2a573c0c, 0xd7fd3de6,
76 0xeedeab68, 0x5c84dafc, 0xbbd1b1a8, 0xa3ce1ad1, 0x85b70bed, 0xb6add07f,
77 0xa531309c, 0x8f8ab852, 0x564de332, 0xeac9ed0c, 0x73da402c, 0x3ec52761,
78 0x43af2f4d, 0xd6ff45c8, 0x4c367462, 0xd553bd6a, 0x44724855, 0x3b2aa728,
79 0x56e5eb65, 0xeaf16173, 0x33fa42ff, 0xd714bb5d, 0xfbd0a3b9, 0xaf517134,
80 0x9416c8cd, 0x534cf94f, 0x548947c2, 0x34193569, 0x32f4389a, 0xfe7028bc,
81 0xed73b1ed, 0x9db95770, 0x468e3922, 0x0440c3cd, 0x60059a62, 0x33504562,
82 0x2b229fbd, 0x5174dca5, 0xf7028752, 0xd63c6aa8, 0x31276f38, 0x0646721c,
83 0xb0191da8, 0xe00e6de0, 0x9eac1a6e, 0x9f7628a5, 0xed6c06ea, 0x0bb8af15,
84 0xf119fb12, 0x38693c1c, 0x732bc0fe, 0x84953275, 0xb82ec888, 0x33a4f1b3,
85 0x3099835e, 0x028a8782, 0x5fdd51d7, 0xc6c717b3, 0xb06caf71, 0x17c8c111,
86 0x61bad754, 0x9fd03061, 0xe09df1af, 0x3bc9eb73, 0x85878413, 0x9889aaf2,
87 0x3f5a9e46, 0x42c9f01f, 0x9984a4f4, 0xd5de43cc, 0xd294daed, 0xbecba2d2,
88 0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
89 };
91 static const struct got_error *addblk(struct got_delta_table *, FILE *,
92 uint8_t *, off_t, off_t, off_t, uint32_t);
94 static uint32_t
95 hashblk(const unsigned char *p, off_t n, uint32_t seed)
96 {
97 return murmurhash2(p, n, seed);
98 }
100 static const struct got_error *
101 lookup(int *n, struct got_delta_table *dt, FILE *f, off_t file_offset0,
102 off_t len, off_t offset, uint32_t h)
104 struct got_delta_block *block;
105 uint8_t buf[GOT_DELTIFY_MAXCHUNK];
106 uint8_t buf2[GOT_DELTIFY_MAXCHUNK];
107 size_t r = 0;
108 int i;
110 for (i = h % dt->size; dt->offs[i] != 0; i = (i + 1) % dt->size) {
111 block = &dt->blocks[dt->offs[i] - 1];
112 if (block->len != len || block->hash != h)
113 continue;
115 if (r == 0) {
116 if (fseeko(f, file_offset0 + offset, SEEK_SET) == -1)
117 return got_error_from_errno("fseeko");
118 r = fread(buf, 1, len, f);
119 if (r != len) {
120 if (!ferror(f))
121 break; /* why? */
122 return got_ferror(f, GOT_ERR_IO);
126 if (fseeko(f, file_offset0 + block->offset, SEEK_SET) == -1)
127 return got_error_from_errno("fseeko");
128 if (fread(buf2, 1, len, f) != len)
129 return got_ferror(f, GOT_ERR_IO);
130 if (memcmp(buf, buf2, len) == 0)
131 break;
134 *n = i;
135 return NULL;
138 static const struct got_error *
139 lookup_mem(int *n, struct got_delta_table *dt, uint8_t *basedata,
140 off_t basefile_offset0, uint8_t *p, off_t len, uint32_t h)
142 struct got_delta_block *block;
143 uint8_t *d;
144 int i;
146 for (i = h % dt->size; dt->offs[i] != 0; i = (i + 1) % dt->size) {
147 block = &dt->blocks[dt->offs[i] - 1];
148 if (len == block->len && h == block->hash) {
149 d = basedata + basefile_offset0 + block->offset;
150 if (memcmp(d, p, len) == 0)
151 break;
155 *n = i;
156 return NULL;
159 static const struct got_error *
160 resizeht(struct got_delta_table *dt, FILE *f, off_t offset0, uint8_t *data,
161 off_t len)
163 const struct got_error *err;
164 struct got_delta_block *b;
165 size_t newsize, oldsize;
166 int i, n;
168 if (dt->nblocks == dt->nalloc) {
169 newsize = dt->nalloc + 256;
170 b = reallocarray(dt->blocks, newsize, sizeof(*dt->blocks));
171 if (b == NULL)
172 return got_error_from_errno("reallocarray");
173 dt->blocks = b;
174 dt->nalloc = newsize;
177 if (dt->blocks == NULL || dt->len * 100 / dt->size > 70) {
178 oldsize = dt->size;
179 dt->size *= 2;
180 free(dt->offs);
181 dt->offs = calloc(dt->size, sizeof(*dt->offs));
182 if (dt->offs == NULL) {
183 dt->size = oldsize;
184 return got_error_from_errno("calloc");
187 for (i = 0; i < dt->nblocks; ++i) {
188 b = &dt->blocks[i];
189 if (f)
190 err = lookup(&n, dt, f, offset0, b->len,
191 b->offset, b->hash);
192 else
193 err = lookup_mem(&n, dt, data, offset0,
194 data + b->offset, b->len, b->hash);
195 if (err) {
196 free(dt->offs);
197 dt->offs = NULL;
198 dt->size = oldsize;
199 return err;
201 assert(dt->offs[n] == 0);
202 dt->offs[n] = i + 1;
206 return NULL;
209 static const struct got_error *
210 addblk(struct got_delta_table *dt, FILE *f, uint8_t *data, off_t file_offset0,
211 off_t len, off_t offset, uint32_t h)
213 const struct got_error *err;
214 struct got_delta_block *block;
215 int i;
217 if (len == 0)
218 return NULL;
220 err = resizeht(dt, f, file_offset0, data, len);
221 if (err)
222 return err;
224 if (f)
225 err = lookup(&i, dt, f, file_offset0, len, offset, h);
226 else
227 err = lookup_mem(&i, dt, data, file_offset0, data + offset,
228 len, h);
229 if (err)
230 return err;
232 if (dt->offs[i] != 0)
233 return NULL; /* found */
235 dt->offs[i] = dt->nblocks + 1;
236 block = &dt->blocks[dt->nblocks];
237 block->len = len;
238 block->offset = offset;
239 block->hash = h;
241 dt->nblocks++;
242 dt->len++;
244 return NULL;
247 static const struct got_error *
248 lookupblk(struct got_delta_block **block, struct got_delta_table *dt,
249 unsigned char *p, off_t len, uint32_t seed, FILE *basefile,
250 off_t basefile_offset0)
252 int i;
253 uint32_t h;
254 uint8_t buf[GOT_DELTIFY_MAXCHUNK];
255 struct got_delta_block *b;
256 size_t r;
258 *block = NULL;
260 h = hashblk(p, len, seed);
262 for (i = h % dt->size; dt->offs[i] != 0; i = (i + 1) % dt->size) {
263 b = &dt->blocks[dt->offs[i] - 1];
264 if (b->hash != h || b->len != len)
265 continue;
266 if (fseeko(basefile, basefile_offset0 + b->offset,
267 SEEK_SET) == -1)
268 return got_error_from_errno("fseeko");
269 r = fread(buf, 1, len, basefile);
270 if (r != len)
271 return got_ferror(basefile, GOT_ERR_IO);
272 if (memcmp(p, buf, len) == 0) {
273 *block = b;
274 break;
277 return NULL;
280 static const struct got_error *
281 lookupblk_mem(struct got_delta_block **block, struct got_delta_table *dt,
282 unsigned char *p, off_t len, uint32_t seed, uint8_t *basedata,
283 off_t basefile_offset0)
285 const struct got_error *err;
286 int n;
287 uint32_t h;
289 *block = NULL;
290 h = hashblk(p, len, seed);
291 err = lookup_mem(&n, dt, basedata, basefile_offset0, p, len, h);
292 if (err)
293 return err;
294 if (dt->offs[n] != 0)
295 *block = &dt->blocks[dt->offs[n] - 1];
296 return NULL;
299 static const struct got_error *
300 nextblk(uint8_t *buf, off_t *blocklen, FILE *f)
302 uint32_t gh;
303 const unsigned char *p;
304 size_t r;
305 off_t pos = ftello(f);
307 *blocklen = 0;
309 r = fread(buf, 1, GOT_DELTIFY_MAXCHUNK, f);
310 if (r == 0 && ferror(f))
311 return got_ferror(f, GOT_ERR_IO);
312 if (r < GOT_DELTIFY_MINCHUNK)
313 return NULL; /* no more delta-worthy blocks left */
315 /* Got a deltifiable block. Find the split-point where it ends. */
316 p = buf + GOT_DELTIFY_MINCHUNK;
317 gh = 0;
318 while (p != buf + r) {
319 gh = (gh << 1) + geartab[*p++];
320 if ((gh & GOT_DELTIFY_SPLITMASK) == 0)
321 break;
324 *blocklen = (p - buf);
325 if (fseeko(f, pos + *blocklen, SEEK_SET) == -1)
326 return got_error_from_errno("fseeko");
328 return NULL;
331 static const struct got_error *
332 nextblk_mem(off_t *blocklen, uint8_t *data, off_t fileoffset, off_t filesize)
334 uint32_t gh;
335 const unsigned char *p;
337 *blocklen = 0;
339 if (fileoffset >= filesize ||
340 filesize - fileoffset < GOT_DELTIFY_MINCHUNK)
341 return NULL; /* no more delta-worthy blocks left */
343 /* Got a deltifiable block. Find the split-point where it ends. */
344 p = data + fileoffset + GOT_DELTIFY_MINCHUNK;
345 gh = 0;
346 while (p != data + MIN(fileoffset + GOT_DELTIFY_MAXCHUNK, filesize)) {
347 gh = (gh << 1) + geartab[*p++];
348 if ((gh & GOT_DELTIFY_SPLITMASK) == 0)
349 break;
352 *blocklen = (p - (data + fileoffset));
353 return NULL;
356 const struct got_error *
357 got_deltify_init(struct got_delta_table **dt, FILE *f, off_t fileoffset,
358 off_t filesize, uint32_t seed)
360 const struct got_error *err = NULL;
361 uint32_t h;
362 const off_t offset0 = fileoffset;
364 *dt = calloc(1, sizeof(**dt));
365 if (*dt == NULL)
366 return got_error_from_errno("calloc");
368 (*dt)->nblocks = 0;
369 (*dt)->nalloc = 128;
370 (*dt)->blocks = calloc((*dt)->nalloc, sizeof(struct got_delta_block));
371 if ((*dt)->blocks == NULL) {
372 err = got_error_from_errno("calloc");
373 goto done;
376 (*dt)->size = 128;
377 (*dt)->offs = calloc((*dt)->size, sizeof(*(*dt)->offs));
378 if ((*dt)->offs == NULL) {
379 err = got_error_from_errno("calloc");
380 goto done;
383 if (fseeko(f, fileoffset, SEEK_SET) == -1)
384 return got_error_from_errno("fseeko");
386 while (fileoffset < filesize) {
387 uint8_t buf[GOT_DELTIFY_MAXCHUNK];
388 off_t blocklen;
389 err = nextblk(buf, &blocklen, f);
390 if (err)
391 goto done;
392 if (blocklen == 0)
393 break;
394 h = hashblk(buf, blocklen, seed);
395 err = addblk(*dt, f, NULL, offset0, blocklen,
396 fileoffset - offset0, h);
397 if (err)
398 goto done;
399 fileoffset += blocklen;
400 if (fseeko(f, fileoffset, SEEK_SET) == -1)
401 return got_error_from_errno("fseeko");
403 done:
404 if (err) {
405 free((*dt)->blocks);
406 free(*dt);
407 *dt = NULL;
410 return err;
413 const struct got_error *
414 got_deltify_init_mem(struct got_delta_table **dt, uint8_t *data,
415 off_t fileoffset, off_t filesize, uint32_t seed)
417 const struct got_error *err = NULL;
418 uint32_t h;
419 const off_t offset0 = fileoffset;
421 *dt = calloc(1, sizeof(**dt));
422 if (*dt == NULL)
423 return got_error_from_errno("calloc");
425 (*dt)->nblocks = 0;
426 (*dt)->nalloc = 128;
427 (*dt)->blocks = calloc((*dt)->nalloc, sizeof(struct got_delta_block));
428 if ((*dt)->blocks == NULL) {
429 err = got_error_from_errno("calloc");
430 goto done;
433 (*dt)->size = 128;
434 (*dt)->offs = calloc((*dt)->size, sizeof(*(*dt)->offs));
435 if ((*dt)->offs == NULL) {
436 err = got_error_from_errno("calloc");
437 goto done;
440 while (fileoffset < filesize) {
441 off_t blocklen;
442 err = nextblk_mem(&blocklen, data, fileoffset, filesize);
443 if (err)
444 goto done;
445 if (blocklen == 0)
446 break;
447 h = hashblk(data + fileoffset, blocklen, seed);
448 err = addblk(*dt, NULL, data, offset0, blocklen,
449 fileoffset - offset0, h);
450 if (err)
451 goto done;
452 fileoffset += blocklen;
454 done:
455 if (err) {
456 free((*dt)->blocks);
457 free(*dt);
458 *dt = NULL;
461 return err;
464 void
465 got_deltify_free(struct got_delta_table *dt)
467 if (dt == NULL)
468 return;
469 free(dt->blocks);
470 free(dt->offs);
471 free(dt);
474 static const struct got_error *
475 emitdelta(struct got_delta_instruction **deltas, size_t *nalloc, int *ndeltas,
476 const size_t alloc_chunk_size, int copy, off_t offset, off_t len)
478 struct got_delta_instruction *d, *p;
480 if (*nalloc < *ndeltas + alloc_chunk_size) {
481 p = reallocarray(*deltas, *nalloc + alloc_chunk_size,
482 sizeof(struct got_delta_instruction));
483 if (p == NULL)
484 return got_error_from_errno("reallocarray");
485 *deltas = p;
486 *nalloc += alloc_chunk_size;
488 *ndeltas += 1;
489 d = &(*deltas)[*ndeltas - 1];
490 d->copy = copy;
491 d->offset = offset;
492 d->len = len;
493 return NULL;
496 static const struct got_error *
497 stretchblk(FILE *basefile, off_t base_offset0, struct got_delta_block *block,
498 FILE *f, off_t filesize, off_t *blocklen)
500 uint8_t basebuf[GOT_DELTIFY_MAXCHUNK], buf[GOT_DELTIFY_MAXCHUNK];
501 size_t base_r, r, i;
502 int buf_equal = 1;
504 if (fseeko(basefile, base_offset0 + block->offset + *blocklen,
505 SEEK_SET) == -1)
506 return got_error_from_errno("fseeko");
508 while (buf_equal && *blocklen < (1 << 24) - 1) {
509 base_r = fread(basebuf, 1, sizeof(basebuf), basefile);
510 if (base_r == 0) {
511 if (ferror(basefile))
512 return got_ferror(basefile, GOT_ERR_IO);
513 break;
515 r = fread(buf, 1, sizeof(buf), f);
516 if (r == 0) {
517 if (ferror(f))
518 return got_ferror(f, GOT_ERR_IO);
519 break;
521 for (i = 0; i < MIN(base_r, r); i++) {
522 if (buf[i] != basebuf[i]) {
523 buf_equal = 0;
524 break;
526 (*blocklen)++;
530 return NULL;
533 static const struct got_error *
534 stretchblk_file_mem(uint8_t *basedata, off_t base_offset0, off_t basefile_size,
535 struct got_delta_block *block, FILE *f, off_t filesize, off_t *blocklen)
537 uint8_t buf[GOT_DELTIFY_MAXCHUNK];
538 size_t r, i;
539 int buf_equal = 1;
540 off_t base_offset = base_offset0 + block->offset + *blocklen;
542 if (base_offset > basefile_size) {
543 return got_error_fmt(GOT_ERR_RANGE,
544 "read beyond the size of delta base at offset %lld",
545 (long long)base_offset);
548 while (buf_equal && *blocklen < (1 << 24) - 1) {
549 if (base_offset + *blocklen >= basefile_size)
550 break;
551 r = fread(buf, 1, sizeof(buf), f);
552 if (r == 0) {
553 if (ferror(f))
554 return got_ferror(f, GOT_ERR_IO);
555 break;
557 for (i = 0; i < MIN(basefile_size - base_offset, r); i++) {
558 if (buf[i] != *(basedata + base_offset + i)) {
559 buf_equal = 0;
560 break;
562 (*blocklen)++;
566 return NULL;
569 static const struct got_error *
570 stretchblk_mem_file(FILE *basefile, off_t base_offset0,
571 struct got_delta_block *block, uint8_t *data, off_t fileoffset,
572 off_t filesize, off_t *blocklen)
574 uint8_t basebuf[GOT_DELTIFY_MAXCHUNK];
575 size_t base_r, i;
576 int buf_equal = 1;
578 if (fileoffset > filesize) {
579 return got_error_fmt(GOT_ERR_RANGE,
580 "read beyond the size of deltify file at offset %lld",
581 (long long)fileoffset);
584 if (fseeko(basefile, base_offset0 + block->offset + *blocklen,
585 SEEK_SET) == -1)
586 return got_error_from_errno("fseeko");
588 while (buf_equal && *blocklen < (1 << 24) - 1) {
589 if (fileoffset + *blocklen >= filesize)
590 break;
591 base_r = fread(basebuf, 1, sizeof(basebuf), basefile);
592 if (base_r == 0) {
593 if (ferror(basefile))
594 return got_ferror(basefile, GOT_ERR_IO);
595 break;
597 for (i = 0; i < MIN(base_r, filesize - fileoffset); i++) {
598 if (*(data + fileoffset + i) != basebuf[i]) {
599 buf_equal = 0;
600 break;
602 (*blocklen)++;
606 return NULL;
609 static const struct got_error *
610 stretchblk_mem_mem(uint8_t *basedata, off_t base_offset0, off_t basefile_size,
611 struct got_delta_block *block, uint8_t *data, off_t fileoffset,
612 off_t filesize, off_t *blocklen)
614 off_t i, maxlen;
615 off_t base_offset = base_offset0 + block->offset + *blocklen;
616 uint8_t *p, *q;
618 if (base_offset > basefile_size) {
619 return got_error_fmt(GOT_ERR_RANGE,
620 "read beyond the size of delta base at offset %lld",
621 (long long)base_offset);
624 if (fileoffset > filesize) {
625 return got_error_fmt(GOT_ERR_RANGE,
626 "read beyond the size of deltify file at offset %lld",
627 (long long)fileoffset);
630 p = data + fileoffset;
631 q = basedata + base_offset;
632 maxlen = MIN(basefile_size - base_offset, filesize - fileoffset);
633 for (i = 0; i < maxlen && *blocklen < (1 << 24) - 1; i++) {
634 if (p[i] != q[i])
635 break;
636 (*blocklen)++;
639 return NULL;
642 const struct got_error *
643 got_deltify(struct got_delta_instruction **deltas, int *ndeltas,
644 FILE *f, off_t fileoffset, off_t filesize, uint32_t seed,
645 struct got_delta_table *dt, FILE *basefile,
646 off_t basefile_offset0, off_t basefile_size)
648 const struct got_error *err = NULL;
649 const off_t offset0 = fileoffset;
650 size_t nalloc = 0;
651 const size_t alloc_chunk_size = 64;
653 *deltas = NULL;
654 *ndeltas = 0;
656 /*
657 * offset0 indicates where data to be deltified begins.
658 * For example, we want to avoid deltifying a Git object header at
659 * the beginning of the file.
660 */
661 if (fseeko(f, offset0, SEEK_SET) == -1)
662 return got_error_from_errno("fseeko");
664 *deltas = reallocarray(NULL, alloc_chunk_size,
665 sizeof(struct got_delta_instruction));
666 if (*deltas == NULL)
667 return got_error_from_errno("reallocarray");
668 nalloc = alloc_chunk_size;
670 while (fileoffset < filesize) {
671 uint8_t buf[GOT_DELTIFY_MAXCHUNK];
672 off_t blocklen;
673 struct got_delta_block *block;
674 err = nextblk(buf, &blocklen, f);
675 if (err)
676 break;
677 if (blocklen == 0) {
678 /* Source remainder from the file itself. */
679 if (fileoffset < filesize) {
680 err = emitdelta(deltas, &nalloc, ndeltas,
681 alloc_chunk_size, 0, fileoffset - offset0,
682 filesize - fileoffset);
684 break;
686 err = lookupblk(&block, dt, buf, blocklen, seed, basefile,
687 basefile_offset0);
688 if (err)
689 break;
690 if (block != NULL) {
691 /*
692 * We have found a matching block in the delta base.
693 * Attempt to stretch the block as far as possible and
694 * generate a copy instruction.
695 */
696 err = stretchblk(basefile, basefile_offset0, block,
697 f, filesize, &blocklen);
698 if (err)
699 break;
700 err = emitdelta(deltas, &nalloc, ndeltas,
701 alloc_chunk_size, 1, block->offset, blocklen);
702 if (err)
703 break;
704 } else {
705 /*
706 * No match.
707 * This block needs to be sourced from the file itself.
708 */
709 err = emitdelta(deltas, &nalloc, ndeltas,
710 alloc_chunk_size, 0, fileoffset - offset0, blocklen);
711 if (err)
712 break;
714 fileoffset += blocklen;
715 if (fseeko(f, fileoffset, SEEK_SET) == -1) {
716 err = got_error_from_errno("fseeko");
717 break;
721 if (err) {
722 free(*deltas);
723 *deltas = NULL;
724 *ndeltas = 0;
726 return err;
729 const struct got_error *
730 got_deltify_file_mem(struct got_delta_instruction **deltas, int *ndeltas,
731 FILE *f, off_t fileoffset, off_t filesize, uint32_t seed,
732 struct got_delta_table *dt, uint8_t *basedata,
733 off_t basefile_offset0, off_t basefile_size)
735 const struct got_error *err = NULL;
736 const off_t offset0 = fileoffset;
737 size_t nalloc = 0;
738 const size_t alloc_chunk_size = 64;
740 *deltas = NULL;
741 *ndeltas = 0;
743 /*
744 * offset0 indicates where data to be deltified begins.
745 * For example, we want to avoid deltifying a Git object header at
746 * the beginning of the file.
747 */
748 if (fseeko(f, offset0, SEEK_SET) == -1)
749 return got_error_from_errno("fseeko");
751 *deltas = reallocarray(NULL, alloc_chunk_size,
752 sizeof(struct got_delta_instruction));
753 if (*deltas == NULL)
754 return got_error_from_errno("reallocarray");
755 nalloc = alloc_chunk_size;
757 while (fileoffset < filesize) {
758 uint8_t buf[GOT_DELTIFY_MAXCHUNK];
759 off_t blocklen;
760 struct got_delta_block *block;
761 err = nextblk(buf, &blocklen, f);
762 if (err)
763 break;
764 if (blocklen == 0) {
765 /* Source remainder from the file itself. */
766 if (fileoffset < filesize) {
767 err = emitdelta(deltas, &nalloc, ndeltas,
768 alloc_chunk_size, 0, fileoffset - offset0,
769 filesize - fileoffset);
771 break;
773 err = lookupblk_mem(&block, dt, buf, blocklen, seed, basedata,
774 basefile_offset0);
775 if (err)
776 break;
777 if (block != NULL) {
778 /*
779 * We have found a matching block in the delta base.
780 * Attempt to stretch the block as far as possible and
781 * generate a copy instruction.
782 */
783 err = stretchblk_file_mem(basedata, basefile_offset0,
784 basefile_size, block, f, filesize, &blocklen);
785 if (err)
786 break;
787 err = emitdelta(deltas, &nalloc, ndeltas,
788 alloc_chunk_size, 1, block->offset, blocklen);
789 if (err)
790 break;
791 } else {
792 /*
793 * No match.
794 * This block needs to be sourced from the file itself.
795 */
796 err = emitdelta(deltas, &nalloc, ndeltas,
797 alloc_chunk_size, 0, fileoffset - offset0, blocklen);
798 if (err)
799 break;
801 fileoffset += blocklen;
802 if (fseeko(f, fileoffset, SEEK_SET) == -1) {
803 err = got_error_from_errno("fseeko");
804 break;
808 if (err) {
809 free(*deltas);
810 *deltas = NULL;
811 *ndeltas = 0;
813 return err;
816 const struct got_error *
817 got_deltify_mem_file(struct got_delta_instruction **deltas, int *ndeltas,
818 uint8_t *data, off_t fileoffset, off_t filesize, uint32_t seed,
819 struct got_delta_table *dt, FILE *basefile,
820 off_t basefile_offset0, off_t basefile_size)
822 const struct got_error *err = NULL;
823 const off_t offset0 = fileoffset;
824 size_t nalloc = 0;
825 const size_t alloc_chunk_size = 64;
827 *deltas = NULL;
828 *ndeltas = 0;
830 *deltas = reallocarray(NULL, alloc_chunk_size,
831 sizeof(struct got_delta_instruction));
832 if (*deltas == NULL)
833 return got_error_from_errno("reallocarray");
834 nalloc = alloc_chunk_size;
836 while (fileoffset < filesize) {
837 off_t blocklen;
838 struct got_delta_block *block;
839 err = nextblk_mem(&blocklen, data, fileoffset, filesize);
840 if (err)
841 break;
842 if (blocklen == 0) {
843 /* Source remainder from the file itself. */
844 if (fileoffset < filesize) {
845 err = emitdelta(deltas, &nalloc, ndeltas,
846 alloc_chunk_size, 0, fileoffset - offset0,
847 filesize - fileoffset);
849 break;
851 err = lookupblk(&block, dt, data + fileoffset, blocklen, seed,
852 basefile, basefile_offset0);
853 if (err)
854 break;
855 if (block != NULL) {
856 /*
857 * We have found a matching block in the delta base.
858 * Attempt to stretch the block as far as possible and
859 * generate a copy instruction.
860 */
861 err = stretchblk_mem_file(basefile, basefile_offset0,
862 block, data, fileoffset + blocklen, filesize,
863 &blocklen);
864 if (err)
865 break;
866 err = emitdelta(deltas, &nalloc, ndeltas,
867 alloc_chunk_size, 1, block->offset, blocklen);
868 if (err)
869 break;
870 } else {
871 /*
872 * No match.
873 * This block needs to be sourced from the file itself.
874 */
875 err = emitdelta(deltas, &nalloc, ndeltas,
876 alloc_chunk_size, 0, fileoffset - offset0, blocklen);
877 if (err)
878 break;
880 fileoffset += blocklen;
883 if (err) {
884 free(*deltas);
885 *deltas = NULL;
886 *ndeltas = 0;
888 return err;
891 const struct got_error *
892 got_deltify_mem_mem(struct got_delta_instruction **deltas, int *ndeltas,
893 uint8_t *data, off_t fileoffset, off_t filesize, uint32_t seed,
894 struct got_delta_table *dt, uint8_t *basedata,
895 off_t basefile_offset0, off_t basefile_size)
897 const struct got_error *err = NULL;
898 const off_t offset0 = fileoffset;
899 size_t nalloc = 0;
900 const size_t alloc_chunk_size = 64;
902 *deltas = NULL;
903 *ndeltas = 0;
905 *deltas = reallocarray(NULL, alloc_chunk_size,
906 sizeof(struct got_delta_instruction));
907 if (*deltas == NULL)
908 return got_error_from_errno("reallocarray");
909 nalloc = alloc_chunk_size;
911 while (fileoffset < filesize) {
912 off_t blocklen;
913 struct got_delta_block *block;
914 err = nextblk_mem(&blocklen, data, fileoffset, filesize);
915 if (err)
916 break;
917 if (blocklen == 0) {
918 /* Source remainder from the file itself. */
919 if (fileoffset < filesize) {
920 err = emitdelta(deltas, &nalloc, ndeltas,
921 alloc_chunk_size, 0, fileoffset - offset0,
922 filesize - fileoffset);
924 break;
926 err = lookupblk_mem(&block, dt, data + fileoffset, blocklen,
927 seed, basedata, basefile_offset0);
928 if (err)
929 break;
930 if (block != NULL) {
931 /*
932 * We have found a matching block in the delta base.
933 * Attempt to stretch the block as far as possible and
934 * generate a copy instruction.
935 */
936 err = stretchblk_mem_mem(basedata, basefile_offset0,
937 basefile_size, block, data, fileoffset + blocklen,
938 filesize, &blocklen);
939 if (err)
940 break;
941 err = emitdelta(deltas, &nalloc, ndeltas,
942 alloc_chunk_size, 1, block->offset, blocklen);
943 if (err)
944 break;
945 } else {
946 /*
947 * No match.
948 * This block needs to be sourced from the file itself.
949 */
950 err = emitdelta(deltas, &nalloc, ndeltas,
951 alloc_chunk_size, 0, fileoffset - offset0, blocklen);
952 if (err)
953 break;
955 fileoffset += blocklen;
958 if (err) {
959 free(*deltas);
960 *deltas = NULL;
961 *ndeltas = 0;
963 return err;