Blame


1 3b0f3d61 2020-01-22 neels /* Split source by line breaks, and calculate a simplistic checksum. */
2 3b0f3d61 2020-01-22 neels /*
3 3b0f3d61 2020-01-22 neels * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 3b0f3d61 2020-01-22 neels *
5 3b0f3d61 2020-01-22 neels * Permission to use, copy, modify, and distribute this software for any
6 3b0f3d61 2020-01-22 neels * purpose with or without fee is hereby granted, provided that the above
7 3b0f3d61 2020-01-22 neels * copyright notice and this permission notice appear in all copies.
8 3b0f3d61 2020-01-22 neels *
9 3b0f3d61 2020-01-22 neels * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 3b0f3d61 2020-01-22 neels * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 3b0f3d61 2020-01-22 neels * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 3b0f3d61 2020-01-22 neels * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 3b0f3d61 2020-01-22 neels * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 3b0f3d61 2020-01-22 neels * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 3b0f3d61 2020-01-22 neels * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 3b0f3d61 2020-01-22 neels */
17 3b0f3d61 2020-01-22 neels
18 3b0f3d61 2020-01-22 neels #include <diff/diff_main.h>
19 3b0f3d61 2020-01-22 neels
20 61a7b578 2020-05-06 neels static int
21 61a7b578 2020-05-06 neels diff_data_atomize_text_lines(struct diff_data *d)
22 3b0f3d61 2020-01-22 neels {
23 3b0f3d61 2020-01-22 neels const uint8_t *pos = d->data;
24 3b0f3d61 2020-01-22 neels const uint8_t *end = pos + d->len;
25 3b0f3d61 2020-01-22 neels
26 3b0f3d61 2020-01-22 neels unsigned int array_size_estimate = d->len / 50;
27 3b0f3d61 2020-01-22 neels unsigned int pow2 = 1;
28 3b0f3d61 2020-01-22 neels while (array_size_estimate >>= 1)
29 3b0f3d61 2020-01-22 neels pow2++;
30 3b0f3d61 2020-01-22 neels
31 3b0f3d61 2020-01-22 neels ARRAYLIST_INIT(d->atoms, 1 << pow2);
32 3b0f3d61 2020-01-22 neels
33 3b0f3d61 2020-01-22 neels while (pos < end) {
34 3b0f3d61 2020-01-22 neels const uint8_t *line_end = pos;
35 3b0f3d61 2020-01-22 neels unsigned int hash = 0;
36 3b0f3d61 2020-01-22 neels
37 3b0f3d61 2020-01-22 neels while (line_end < end && *line_end != '\r' && *line_end != '\n') {
38 3b0f3d61 2020-01-22 neels hash = hash * 23 + *line_end;
39 3b0f3d61 2020-01-22 neels line_end++;
40 3b0f3d61 2020-01-22 neels }
41 3b0f3d61 2020-01-22 neels
42 3b0f3d61 2020-01-22 neels /* When not at the end of data, the line ending char ('\r' or '\n') must follow */
43 3b0f3d61 2020-01-22 neels if (line_end < end)
44 3b0f3d61 2020-01-22 neels line_end++;
45 3b0f3d61 2020-01-22 neels /* If that was an '\r', also pull in any following '\n' */
46 3b0f3d61 2020-01-22 neels if (line_end[0] == '\r' && line_end < end && line_end[1] == '\n')
47 3b0f3d61 2020-01-22 neels line_end++;
48 3b0f3d61 2020-01-22 neels
49 3b0f3d61 2020-01-22 neels /* Record the found line as diff atom */
50 3b0f3d61 2020-01-22 neels struct diff_atom *atom;
51 3b0f3d61 2020-01-22 neels ARRAYLIST_ADD(atom, d->atoms);
52 3b0f3d61 2020-01-22 neels if (!atom)
53 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
54 3b0f3d61 2020-01-22 neels
55 3b0f3d61 2020-01-22 neels *atom = (struct diff_atom){
56 3b0f3d61 2020-01-22 neels .at = pos,
57 3b0f3d61 2020-01-22 neels .len = line_end - pos,
58 3b0f3d61 2020-01-22 neels .hash = hash,
59 3b0f3d61 2020-01-22 neels };
60 3b0f3d61 2020-01-22 neels
61 3b0f3d61 2020-01-22 neels /* Starting point for next line: */
62 3b0f3d61 2020-01-22 neels pos = line_end;
63 3b0f3d61 2020-01-22 neels }
64 3b0f3d61 2020-01-22 neels
65 3b0f3d61 2020-01-22 neels return DIFF_RC_OK;
66 3b0f3d61 2020-01-22 neels }
67 3b0f3d61 2020-01-22 neels
68 61a7b578 2020-05-06 neels enum diff_rc
69 61a7b578 2020-05-06 neels diff_atomize_text_by_line(void *func_data, struct diff_data *left, struct diff_data *right)
70 3b0f3d61 2020-01-22 neels {
71 3b0f3d61 2020-01-22 neels enum diff_rc rc;
72 3b0f3d61 2020-01-22 neels rc = diff_data_atomize_text_lines(left);
73 3b0f3d61 2020-01-22 neels if (rc != DIFF_RC_OK)
74 3b0f3d61 2020-01-22 neels return rc;
75 3b0f3d61 2020-01-22 neels return diff_data_atomize_text_lines(right);
76 3b0f3d61 2020-01-22 neels }