Blob


1 /* Split source by line breaks, and calculate a simplistic checksum. */
2 /*
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <diff/diff_main.h>
20 static int diff_data_atomize_text_lines(struct diff_data *d)
21 {
22 const uint8_t *pos = d->data;
23 const uint8_t *end = pos + d->len;
25 unsigned int array_size_estimate = d->len / 50;
26 unsigned int pow2 = 1;
27 while (array_size_estimate >>= 1)
28 pow2++;
30 ARRAYLIST_INIT(d->atoms, 1 << pow2);
32 while (pos < end) {
33 const uint8_t *line_end = pos;
34 unsigned int hash = 0;
36 while (line_end < end && *line_end != '\r' && *line_end != '\n') {
37 hash = hash * 23 + *line_end;
38 line_end++;
39 }
41 /* When not at the end of data, the line ending char ('\r' or '\n') must follow */
42 if (line_end < end)
43 line_end++;
44 /* If that was an '\r', also pull in any following '\n' */
45 if (line_end[0] == '\r' && line_end < end && line_end[1] == '\n')
46 line_end++;
48 /* Record the found line as diff atom */
49 struct diff_atom *atom;
50 ARRAYLIST_ADD(atom, d->atoms);
51 if (!atom)
52 return DIFF_RC_ENOMEM;
54 *atom = (struct diff_atom){
55 .at = pos,
56 .len = line_end - pos,
57 .hash = hash,
58 };
60 /* Starting point for next line: */
61 pos = line_end;
62 }
64 return DIFF_RC_OK;
65 }
67 enum diff_rc diff_atomize_text_by_line(void *func_data, struct diff_data *left, struct diff_data *right)
68 {
69 enum diff_rc rc;
70 rc = diff_data_atomize_text_lines(left);
71 if (rc != DIFF_RC_OK)
72 return rc;
73 return diff_data_atomize_text_lines(right);
74 }