Blame


1 3b0f3d61 2020-01-22 neels /* Generic infrastructure to implement various diff algorithms (implementation). */
2 3b0f3d61 2020-01-22 neels /*
3 3b0f3d61 2020-01-22 neels * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 3b0f3d61 2020-01-22 neels *
5 3b0f3d61 2020-01-22 neels * Permission to use, copy, modify, and distribute this software for any
6 3b0f3d61 2020-01-22 neels * purpose with or without fee is hereby granted, provided that the above
7 3b0f3d61 2020-01-22 neels * copyright notice and this permission notice appear in all copies.
8 3b0f3d61 2020-01-22 neels *
9 3b0f3d61 2020-01-22 neels * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 3b0f3d61 2020-01-22 neels * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 3b0f3d61 2020-01-22 neels * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 3b0f3d61 2020-01-22 neels * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 3b0f3d61 2020-01-22 neels * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 3b0f3d61 2020-01-22 neels * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 3b0f3d61 2020-01-22 neels * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 3b0f3d61 2020-01-22 neels */
17 3b0f3d61 2020-01-22 neels
18 3b0f3d61 2020-01-22 neels
19 3b0f3d61 2020-01-22 neels #include <sys/queue.h>
20 732e8ee0 2020-09-20 stsp #include <ctype.h>
21 e10a628a 2020-09-16 stsp #include <errno.h>
22 3b0f3d61 2020-01-22 neels #include <stdint.h>
23 3b0f3d61 2020-01-22 neels #include <stdlib.h>
24 3b0f3d61 2020-01-22 neels #include <stdbool.h>
25 3b0f3d61 2020-01-22 neels #include <stdio.h>
26 3b0f3d61 2020-01-22 neels #include <string.h>
27 3b0f3d61 2020-01-22 neels #include <limits.h>
28 c6eecea3 2020-07-26 stsp #include <unistd.h>
29 3b0f3d61 2020-01-22 neels
30 3b0f3d61 2020-01-22 neels #include <assert.h>
31 3b0f3d61 2020-01-22 neels
32 1dfba055 2020-10-07 stsp #include <arraylist.h>
33 1dfba055 2020-10-07 stsp #include <diff_main.h>
34 3b0f3d61 2020-01-22 neels
35 85ab4559 2020-09-22 stsp #include "diff_internal.h"
36 2a1b94d0 2020-09-26 stsp #include "diff_debug.h"
37 c6eecea3 2020-07-26 stsp
38 b3fb4686 2020-09-20 neels static int
39 7a54ad3a 2020-09-20 stsp read_at(FILE *f, off_t at_pos, unsigned char *buf, size_t len)
40 c6eecea3 2020-07-26 stsp {
41 b3fb4686 2020-09-20 neels int r;
42 7a54ad3a 2020-09-20 stsp if (fseeko(f, at_pos, SEEK_SET) == -1)
43 b3fb4686 2020-09-20 neels return errno;
44 7a54ad3a 2020-09-20 stsp r = fread(buf, sizeof(char), len, f);
45 7a54ad3a 2020-09-20 stsp if ((r == 0 || r < len) && ferror(f))
46 b3fb4686 2020-09-20 neels return errno;
47 b3fb4686 2020-09-20 neels if (r != len)
48 b3fb4686 2020-09-20 neels return EIO;
49 b3fb4686 2020-09-20 neels return 0;
50 b3fb4686 2020-09-20 neels }
51 b3fb4686 2020-09-20 neels
52 b3fb4686 2020-09-20 neels static int
53 b3fb4686 2020-09-20 neels buf_cmp(const unsigned char *left, size_t left_len,
54 732e8ee0 2020-09-20 stsp const unsigned char *right, size_t right_len,
55 732e8ee0 2020-09-20 stsp bool ignore_whitespace)
56 b3fb4686 2020-09-20 neels {
57 732e8ee0 2020-09-20 stsp int cmp;
58 732e8ee0 2020-09-20 stsp
59 732e8ee0 2020-09-20 stsp if (ignore_whitespace) {
60 732e8ee0 2020-09-20 stsp int il = 0, ir = 0;
61 732e8ee0 2020-09-20 stsp while (il < left_len && ir < right_len) {
62 732e8ee0 2020-09-20 stsp unsigned char cl = left[il];
63 732e8ee0 2020-09-20 stsp unsigned char cr = right[ir];
64 732e8ee0 2020-09-20 stsp
65 732e8ee0 2020-09-20 stsp if (isspace(cl) && il < left_len) {
66 732e8ee0 2020-09-20 stsp il++;
67 732e8ee0 2020-09-20 stsp continue;
68 732e8ee0 2020-09-20 stsp }
69 732e8ee0 2020-09-20 stsp if (isspace(cr) && ir < right_len) {
70 732e8ee0 2020-09-20 stsp ir++;
71 732e8ee0 2020-09-20 stsp continue;
72 732e8ee0 2020-09-20 stsp }
73 732e8ee0 2020-09-20 stsp
74 732e8ee0 2020-09-20 stsp if (cl > cr)
75 732e8ee0 2020-09-20 stsp return 1;
76 732e8ee0 2020-09-20 stsp if (cr > cl)
77 732e8ee0 2020-09-20 stsp return -1;
78 732e8ee0 2020-09-20 stsp il++;
79 732e8ee0 2020-09-20 stsp ir++;
80 732e8ee0 2020-09-20 stsp }
81 732e8ee0 2020-09-20 stsp while (il < left_len) {
82 732e8ee0 2020-09-20 stsp unsigned char cl = left[il++];
83 732e8ee0 2020-09-20 stsp if (!isspace(cl))
84 732e8ee0 2020-09-20 stsp return 1;
85 732e8ee0 2020-09-20 stsp }
86 732e8ee0 2020-09-20 stsp while (ir < right_len) {
87 732e8ee0 2020-09-20 stsp unsigned char cr = right[ir++];
88 732e8ee0 2020-09-20 stsp if (!isspace(cr))
89 732e8ee0 2020-09-20 stsp return -1;
90 732e8ee0 2020-09-20 stsp }
91 732e8ee0 2020-09-20 stsp
92 732e8ee0 2020-09-20 stsp return 0;
93 732e8ee0 2020-09-20 stsp }
94 732e8ee0 2020-09-20 stsp
95 732e8ee0 2020-09-20 stsp cmp = memcmp(left, right, MIN(left_len, right_len));
96 b3fb4686 2020-09-20 neels if (cmp)
97 b3fb4686 2020-09-20 neels return cmp;
98 b3fb4686 2020-09-20 neels if (left_len == right_len)
99 b3fb4686 2020-09-20 neels return 0;
100 b3fb4686 2020-09-20 neels return (left_len > right_len) ? 1 : -1;
101 b3fb4686 2020-09-20 neels }
102 b3fb4686 2020-09-20 neels
103 b3fb4686 2020-09-20 neels int
104 b3fb4686 2020-09-20 neels diff_atom_cmp(int *cmp,
105 b3fb4686 2020-09-20 neels const struct diff_atom *left,
106 b3fb4686 2020-09-20 neels const struct diff_atom *right)
107 b3fb4686 2020-09-20 neels {
108 c6eecea3 2020-07-26 stsp off_t remain_left, remain_right;
109 ad5b3f85 2020-10-12 neels int flags = (left->root->diff_flags | right->root->diff_flags);
110 00d5652b 2020-09-22 stsp bool ignore_whitespace = (flags & DIFF_FLAG_IGNORE_WHITESPACE);
111 c6eecea3 2020-07-26 stsp
112 74ad6a69 2020-10-22 neels if (!left->len && !right->len) {
113 74ad6a69 2020-10-22 neels *cmp = 0;
114 74ad6a69 2020-10-22 neels return 0;
115 74ad6a69 2020-10-22 neels }
116 732e8ee0 2020-09-20 stsp if (!ignore_whitespace) {
117 732e8ee0 2020-09-20 stsp if (!right->len) {
118 732e8ee0 2020-09-20 stsp *cmp = 1;
119 732e8ee0 2020-09-20 stsp return 0;
120 732e8ee0 2020-09-20 stsp }
121 732e8ee0 2020-09-20 stsp if (!left->len) {
122 732e8ee0 2020-09-20 stsp *cmp = -1;
123 732e8ee0 2020-09-20 stsp return 0;
124 732e8ee0 2020-09-20 stsp }
125 b3fb4686 2020-09-20 neels }
126 c6eecea3 2020-07-26 stsp
127 b3fb4686 2020-09-20 neels if (left->at != NULL && right->at != NULL) {
128 732e8ee0 2020-09-20 stsp *cmp = buf_cmp(left->at, left->len, right->at, right->len,
129 732e8ee0 2020-09-20 stsp ignore_whitespace);
130 b3fb4686 2020-09-20 neels return 0;
131 b3fb4686 2020-09-20 neels }
132 c6eecea3 2020-07-26 stsp
133 c6eecea3 2020-07-26 stsp remain_left = left->len;
134 c6eecea3 2020-07-26 stsp remain_right = right->len;
135 c6eecea3 2020-07-26 stsp while (remain_left > 0 || remain_right > 0) {
136 c6eecea3 2020-07-26 stsp const size_t chunksz = 8192;
137 c6eecea3 2020-07-26 stsp unsigned char buf_left[chunksz], buf_right[chunksz];
138 c6eecea3 2020-07-26 stsp const uint8_t *p_left, *p_right;
139 c6eecea3 2020-07-26 stsp off_t n_left, n_right;
140 c6eecea3 2020-07-26 stsp ssize_t r;
141 c6eecea3 2020-07-26 stsp
142 b3fb4686 2020-09-20 neels if (!remain_right) {
143 b3fb4686 2020-09-20 neels *cmp = 1;
144 b3fb4686 2020-09-20 neels return 0;
145 b3fb4686 2020-09-20 neels }
146 b3fb4686 2020-09-20 neels if (!remain_left) {
147 b3fb4686 2020-09-20 neels *cmp = -1;
148 b3fb4686 2020-09-20 neels return 0;
149 b3fb4686 2020-09-20 neels }
150 b3fb4686 2020-09-20 neels
151 c6eecea3 2020-07-26 stsp n_left = MIN(chunksz, remain_left);
152 c6eecea3 2020-07-26 stsp n_right = MIN(chunksz, remain_right);
153 c6eecea3 2020-07-26 stsp
154 c6eecea3 2020-07-26 stsp if (left->at == NULL) {
155 ad5b3f85 2020-10-12 neels r = read_at(left->root->f,
156 b3fb4686 2020-09-20 neels left->pos + (left->len - remain_left),
157 b3fb4686 2020-09-20 neels buf_left, n_left);
158 b3fb4686 2020-09-20 neels if (r) {
159 b3fb4686 2020-09-20 neels *cmp = 0;
160 b3fb4686 2020-09-20 neels return r;
161 b3fb4686 2020-09-20 neels }
162 c6eecea3 2020-07-26 stsp p_left = buf_left;
163 c6eecea3 2020-07-26 stsp } else {
164 c6eecea3 2020-07-26 stsp p_left = left->at + (left->len - remain_left);
165 c6eecea3 2020-07-26 stsp }
166 c6eecea3 2020-07-26 stsp
167 c6eecea3 2020-07-26 stsp if (right->at == NULL) {
168 ad5b3f85 2020-10-12 neels r = read_at(right->root->f,
169 b3fb4686 2020-09-20 neels right->pos + (right->len - remain_right),
170 b3fb4686 2020-09-20 neels buf_right, n_right);
171 b3fb4686 2020-09-20 neels if (r) {
172 b3fb4686 2020-09-20 neels *cmp = 0;
173 b3fb4686 2020-09-20 neels return r;
174 b3fb4686 2020-09-20 neels }
175 c6eecea3 2020-07-26 stsp p_right = buf_right;
176 c6eecea3 2020-07-26 stsp } else {
177 c6eecea3 2020-07-26 stsp p_right = right->at + (right->len - remain_right);
178 c6eecea3 2020-07-26 stsp }
179 b3fb4686 2020-09-20 neels
180 732e8ee0 2020-09-20 stsp r = buf_cmp(p_left, n_left, p_right, n_right,
181 732e8ee0 2020-09-20 stsp ignore_whitespace);
182 b3fb4686 2020-09-20 neels if (r) {
183 b3fb4686 2020-09-20 neels *cmp = r;
184 b3fb4686 2020-09-20 neels return 0;
185 c6eecea3 2020-07-26 stsp }
186 c6eecea3 2020-07-26 stsp
187 c6eecea3 2020-07-26 stsp remain_left -= n_left;
188 c6eecea3 2020-07-26 stsp remain_right -= n_right;
189 c6eecea3 2020-07-26 stsp }
190 3b0f3d61 2020-01-22 neels
191 b3fb4686 2020-09-20 neels *cmp = 0;
192 b3fb4686 2020-09-20 neels return 0;
193 b3fb4686 2020-09-20 neels }
194 b3fb4686 2020-09-20 neels
195 b3fb4686 2020-09-20 neels int
196 b3fb4686 2020-09-20 neels diff_atom_same(bool *same,
197 b3fb4686 2020-09-20 neels const struct diff_atom *left,
198 b3fb4686 2020-09-20 neels const struct diff_atom *right)
199 b3fb4686 2020-09-20 neels {
200 b3fb4686 2020-09-20 neels int cmp;
201 2fa08c64 2020-10-22 neels int r;
202 2fa08c64 2020-10-22 neels if (left->hash != right->hash) {
203 2fa08c64 2020-10-22 neels *same = false;
204 2fa08c64 2020-10-22 neels return 0;
205 2fa08c64 2020-10-22 neels }
206 2fa08c64 2020-10-22 neels r = diff_atom_cmp(&cmp, left, right);
207 b3fb4686 2020-09-20 neels if (r) {
208 b3fb4686 2020-09-20 neels *same = true;
209 b3fb4686 2020-09-20 neels return r;
210 b3fb4686 2020-09-20 neels }
211 b3fb4686 2020-09-20 neels *same = (cmp == 0);
212 b3fb4686 2020-09-20 neels return 0;
213 c6eecea3 2020-07-26 stsp }
214 c6eecea3 2020-07-26 stsp
215 93f8150a 2020-10-11 neels static struct diff_chunk *
216 93f8150a 2020-10-11 neels diff_state_add_solved_chunk(struct diff_state *state,
217 93f8150a 2020-10-11 neels const struct diff_chunk *chunk)
218 3b0f3d61 2020-01-22 neels {
219 93f8150a 2020-10-11 neels diff_chunk_arraylist_t *result;
220 8546b045 2020-09-20 neels struct diff_chunk *new_chunk;
221 8546b045 2020-09-20 neels enum diff_chunk_type last_t;
222 8546b045 2020-09-20 neels enum diff_chunk_type new_t;
223 8546b045 2020-09-20 neels
224 8546b045 2020-09-20 neels /* Append to solved chunks; make sure that adjacent chunks of same type are combined, and that a minus chunk
225 8546b045 2020-09-20 neels * never directly follows a plus chunk. */
226 8546b045 2020-09-20 neels result = &state->result->chunks;
227 8546b045 2020-09-20 neels
228 8546b045 2020-09-20 neels last_t = diff_chunk_type(&result->head[result->len - 1]);
229 93f8150a 2020-10-11 neels new_t = diff_chunk_type(chunk);
230 8546b045 2020-09-20 neels
231 93f8150a 2020-10-11 neels debug("Add %s chunk:\n", chunk->solved ? "solved" : "UNSOLVED");
232 93f8150a 2020-10-11 neels debug("L\n");
233 93f8150a 2020-10-11 neels debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
234 93f8150a 2020-10-11 neels debug("R\n");
235 93f8150a 2020-10-11 neels debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
236 93f8150a 2020-10-11 neels
237 8546b045 2020-09-20 neels if (new_t == last_t) {
238 8546b045 2020-09-20 neels new_chunk = &result->head[result->len - 1];
239 93f8150a 2020-10-11 neels new_chunk->left_count += chunk->left_count;
240 93f8150a 2020-10-11 neels new_chunk->right_count += chunk->right_count;
241 bb867e68 2020-10-11 neels debug(" - added chunk touches previous one of same type, joined:\n");
242 bb867e68 2020-10-11 neels debug("L\n");
243 bb867e68 2020-10-11 neels debug_dump_atoms(&state->left, new_chunk->left_start, new_chunk->left_count);
244 bb867e68 2020-10-11 neels debug("R\n");
245 bb867e68 2020-10-11 neels debug_dump_atoms(&state->right, new_chunk->right_start, new_chunk->right_count);
246 8546b045 2020-09-20 neels } else if (last_t == CHUNK_PLUS && new_t == CHUNK_MINUS) {
247 93f8150a 2020-10-11 neels enum diff_chunk_type prev_last_t =
248 93f8150a 2020-10-11 neels result->len > 1 ?
249 93f8150a 2020-10-11 neels diff_chunk_type(&result->head[result->len - 2])
250 93f8150a 2020-10-11 neels : CHUNK_EMPTY;
251 93f8150a 2020-10-11 neels /* If a minus-chunk follows a plus-chunk, place it above the plus-chunk->
252 8546b045 2020-09-20 neels * Is the one before that also a minus? combine. */
253 8546b045 2020-09-20 neels if (prev_last_t == CHUNK_MINUS) {
254 8546b045 2020-09-20 neels new_chunk = &result->head[result->len - 2];
255 93f8150a 2020-10-11 neels new_chunk->left_count += chunk->left_count;
256 93f8150a 2020-10-11 neels new_chunk->right_count += chunk->right_count;
257 bb867e68 2020-10-11 neels
258 bb867e68 2020-10-11 neels debug(" - added minus-chunk follows plus-chunk,"
259 bb867e68 2020-10-11 neels " put before that plus-chunk and joined"
260 bb867e68 2020-10-11 neels " with preceding minus-chunk:\n");
261 bb867e68 2020-10-11 neels debug("L\n");
262 bb867e68 2020-10-11 neels debug_dump_atoms(&state->left, new_chunk->left_start, new_chunk->left_count);
263 bb867e68 2020-10-11 neels debug("R\n");
264 bb867e68 2020-10-11 neels debug_dump_atoms(&state->right, new_chunk->right_start, new_chunk->right_count);
265 8546b045 2020-09-20 neels } else {
266 724967e9 2020-10-11 neels ARRAYLIST_INSERT(new_chunk, *result, result->len - 1);
267 8546b045 2020-09-20 neels if (!new_chunk)
268 8546b045 2020-09-20 neels return NULL;
269 93f8150a 2020-10-11 neels *new_chunk = *chunk;
270 bb867e68 2020-10-11 neels
271 bb867e68 2020-10-11 neels debug(" - added minus-chunk follows plus-chunk,"
272 bb867e68 2020-10-11 neels " put before that plus-chunk\n");
273 8546b045 2020-09-20 neels }
274 8546b045 2020-09-20 neels } else {
275 8546b045 2020-09-20 neels ARRAYLIST_ADD(new_chunk, *result);
276 8546b045 2020-09-20 neels if (!new_chunk)
277 8546b045 2020-09-20 neels return NULL;
278 93f8150a 2020-10-11 neels *new_chunk = *chunk;
279 93f8150a 2020-10-11 neels }
280 93f8150a 2020-10-11 neels return new_chunk;
281 93f8150a 2020-10-11 neels }
282 93f8150a 2020-10-11 neels
283 93f8150a 2020-10-11 neels /* Even if a left or right side is empty, diff output may need to know the
284 93f8150a 2020-10-11 neels * position in that file.
285 93f8150a 2020-10-11 neels * So left_start or right_start must never be NULL -- pass left_count or
286 93f8150a 2020-10-11 neels * right_count as zero to indicate staying at that position without consuming
287 93f8150a 2020-10-11 neels * any lines. */
288 93f8150a 2020-10-11 neels struct diff_chunk *
289 93f8150a 2020-10-11 neels diff_state_add_chunk(struct diff_state *state, bool solved,
290 93f8150a 2020-10-11 neels struct diff_atom *left_start, unsigned int left_count,
291 93f8150a 2020-10-11 neels struct diff_atom *right_start, unsigned int right_count)
292 93f8150a 2020-10-11 neels {
293 93f8150a 2020-10-11 neels diff_chunk_arraylist_t *result = NULL;
294 93f8150a 2020-10-11 neels struct diff_chunk *new_chunk;
295 93f8150a 2020-10-11 neels struct diff_chunk chunk = {
296 93f8150a 2020-10-11 neels .solved = solved,
297 93f8150a 2020-10-11 neels .left_start = left_start,
298 93f8150a 2020-10-11 neels .left_count = left_count,
299 93f8150a 2020-10-11 neels .right_start = right_start,
300 93f8150a 2020-10-11 neels .right_count = right_count,
301 93f8150a 2020-10-11 neels };
302 93f8150a 2020-10-11 neels
303 93f8150a 2020-10-11 neels if (!solved || state->temp_result.len) {
304 93f8150a 2020-10-11 neels /* Append to temp_result */
305 93f8150a 2020-10-11 neels result = &state->temp_result;
306 10ae3a65 2020-10-24 neels debug("append to temp_result %p L %u R %u\n", result,
307 10ae3a65 2020-10-24 neels left_count, right_count);
308 10ae3a65 2020-10-24 neels debug("L\n");
309 10ae3a65 2020-10-24 neels debug_dump_atoms(&state->left, left_start, left_count);
310 10ae3a65 2020-10-24 neels debug("R\n");
311 10ae3a65 2020-10-24 neels debug_dump_atoms(&state->right, right_start, right_count);
312 93f8150a 2020-10-11 neels } else if (!state->result->chunks.len) {
313 93f8150a 2020-10-11 neels /* Append to final result */
314 93f8150a 2020-10-11 neels result = &state->result->chunks;
315 acfce337 2020-10-12 neels debug("Add first chunk:\n");
316 acfce337 2020-10-12 neels debug("L\n");
317 acfce337 2020-10-12 neels debug_dump_atoms(&state->left, left_start, left_count);
318 acfce337 2020-10-12 neels debug("R\n");
319 acfce337 2020-10-12 neels debug_dump_atoms(&state->right, right_start, right_count);
320 93f8150a 2020-10-11 neels }
321 93f8150a 2020-10-11 neels if (result) {
322 93f8150a 2020-10-11 neels ARRAYLIST_ADD(new_chunk, *result);
323 93f8150a 2020-10-11 neels if (!new_chunk)
324 93f8150a 2020-10-11 neels return NULL;
325 8546b045 2020-09-20 neels *new_chunk = chunk;
326 93f8150a 2020-10-11 neels return new_chunk;
327 8546b045 2020-09-20 neels }
328 8546b045 2020-09-20 neels
329 93f8150a 2020-10-11 neels return diff_state_add_solved_chunk(state, &chunk);
330 3b0f3d61 2020-01-22 neels }
331 3b0f3d61 2020-01-22 neels
332 61a7b578 2020-05-06 neels void
333 7a54ad3a 2020-09-20 stsp diff_data_init_root(struct diff_data *d, FILE *f, const uint8_t *data,
334 00d5652b 2020-09-22 stsp unsigned long long len, int diff_flags)
335 3b0f3d61 2020-01-22 neels {
336 3b0f3d61 2020-01-22 neels *d = (struct diff_data){
337 7a54ad3a 2020-09-20 stsp .f = f,
338 c6eecea3 2020-07-26 stsp .pos = 0,
339 3b0f3d61 2020-01-22 neels .data = data,
340 3b0f3d61 2020-01-22 neels .len = len,
341 3b0f3d61 2020-01-22 neels .root = d,
342 00d5652b 2020-09-22 stsp .diff_flags = diff_flags,
343 3b0f3d61 2020-01-22 neels };
344 3b0f3d61 2020-01-22 neels }
345 3b0f3d61 2020-01-22 neels
346 61a7b578 2020-05-06 neels void
347 61a7b578 2020-05-06 neels diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
348 61a7b578 2020-05-06 neels struct diff_atom *from_atom, unsigned int atoms_count)
349 3b0f3d61 2020-01-22 neels {
350 05b5f01f 2020-09-21 stsp struct diff_atom *last_atom;
351 05b5f01f 2020-09-21 stsp
352 de7a2939 2020-10-24 neels debug("diff_data %p parent %p from_atom %p atoms_count %u\n",
353 de7a2939 2020-10-24 neels d, parent, from_atom, atoms_count);
354 de7a2939 2020-10-24 neels debug(" from_atom ");
355 f5a254cc 2020-10-24 neels debug_dump_atom(parent, NULL, from_atom);
356 de7a2939 2020-10-24 neels
357 05b5f01f 2020-09-21 stsp if (atoms_count == 0) {
358 05b5f01f 2020-09-21 stsp *d = (struct diff_data){
359 05b5f01f 2020-09-21 stsp .f = NULL,
360 05b5f01f 2020-09-21 stsp .pos = 0,
361 34570dbe 2020-10-16 stsp .data = NULL,
362 05b5f01f 2020-09-21 stsp .len = 0,
363 05b5f01f 2020-09-21 stsp .root = parent->root,
364 05b5f01f 2020-09-21 stsp .atoms.head = NULL,
365 05b5f01f 2020-09-21 stsp .atoms.len = atoms_count,
366 05b5f01f 2020-09-21 stsp };
367 05b5f01f 2020-09-21 stsp
368 05b5f01f 2020-09-21 stsp return;
369 05b5f01f 2020-09-21 stsp }
370 05b5f01f 2020-09-21 stsp
371 05b5f01f 2020-09-21 stsp last_atom = from_atom + atoms_count - 1;
372 3b0f3d61 2020-01-22 neels *d = (struct diff_data){
373 7a54ad3a 2020-09-20 stsp .f = NULL,
374 c6eecea3 2020-07-26 stsp .pos = from_atom->pos,
375 3b0f3d61 2020-01-22 neels .data = from_atom->at,
376 c6eecea3 2020-07-26 stsp .len = (last_atom->pos + last_atom->len) - from_atom->pos,
377 3b0f3d61 2020-01-22 neels .root = parent->root,
378 3b0f3d61 2020-01-22 neels .atoms.head = from_atom,
379 3b0f3d61 2020-01-22 neels .atoms.len = atoms_count,
380 3b0f3d61 2020-01-22 neels };
381 3b0f3d61 2020-01-22 neels
382 3b0f3d61 2020-01-22 neels debug("subsection:\n");
383 3b0f3d61 2020-01-22 neels debug_dump(d);
384 3b0f3d61 2020-01-22 neels }
385 3b0f3d61 2020-01-22 neels
386 61a7b578 2020-05-06 neels void
387 61a7b578 2020-05-06 neels diff_data_free(struct diff_data *diff_data)
388 3b0f3d61 2020-01-22 neels {
389 3b0f3d61 2020-01-22 neels if (!diff_data)
390 3b0f3d61 2020-01-22 neels return;
391 3b0f3d61 2020-01-22 neels if (diff_data->atoms.allocated)
392 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(diff_data->atoms);
393 3b0f3d61 2020-01-22 neels }
394 3b0f3d61 2020-01-22 neels
395 3e6cba3a 2020-08-13 stsp int
396 0d27172a 2020-05-06 neels diff_algo_none(const struct diff_algo_config *algo_config,
397 0d27172a 2020-05-06 neels struct diff_state *state)
398 3b0f3d61 2020-01-22 neels {
399 3b0f3d61 2020-01-22 neels debug("\n** %s\n", __func__);
400 3b0f3d61 2020-01-22 neels debug("left:\n");
401 3b0f3d61 2020-01-22 neels debug_dump(&state->left);
402 3b0f3d61 2020-01-22 neels debug("right:\n");
403 3b0f3d61 2020-01-22 neels debug_dump(&state->right);
404 0d27172a 2020-05-06 neels debug_dump_myers_graph(&state->left, &state->right, NULL, NULL, 0, NULL,
405 0d27172a 2020-05-06 neels 0);
406 3b0f3d61 2020-01-22 neels
407 3b0f3d61 2020-01-22 neels /* Add a chunk of equal lines, if any */
408 467a993d 2020-10-11 neels struct diff_atom *l = state->left.atoms.head;
409 467a993d 2020-10-11 neels unsigned int l_len = state->left.atoms.len;
410 467a993d 2020-10-11 neels struct diff_atom *r = state->right.atoms.head;
411 467a993d 2020-10-11 neels unsigned int r_len = state->right.atoms.len;
412 41ff30f3 2020-10-11 neels unsigned int equal_atoms_start = 0;
413 2146cf12 2020-10-11 neels unsigned int equal_atoms_end = 0;
414 13497fff 2020-10-11 neels unsigned int l_idx = 0;
415 13497fff 2020-10-11 neels unsigned int r_idx = 0;
416 467a993d 2020-10-11 neels
417 41ff30f3 2020-10-11 neels while (equal_atoms_start < l_len
418 41ff30f3 2020-10-11 neels && equal_atoms_start < r_len) {
419 467a993d 2020-10-11 neels int err;
420 b3fb4686 2020-09-20 neels bool same;
421 41ff30f3 2020-10-11 neels err = diff_atom_same(&same, &l[equal_atoms_start],
422 e5447b81 2020-10-12 neels &r[equal_atoms_start]);
423 467a993d 2020-10-11 neels if (err)
424 467a993d 2020-10-11 neels return err;
425 b3fb4686 2020-09-20 neels if (!same)
426 b3fb4686 2020-09-20 neels break;
427 41ff30f3 2020-10-11 neels equal_atoms_start++;
428 b3fb4686 2020-09-20 neels }
429 2146cf12 2020-10-11 neels while (equal_atoms_end < (l_len - equal_atoms_start)
430 2146cf12 2020-10-11 neels && equal_atoms_end < (r_len - equal_atoms_start)) {
431 2146cf12 2020-10-11 neels int err;
432 2146cf12 2020-10-11 neels bool same;
433 2146cf12 2020-10-11 neels err = diff_atom_same(&same, &l[l_len - 1 - equal_atoms_end],
434 2146cf12 2020-10-11 neels &r[r_len - 1 - equal_atoms_end]);
435 2146cf12 2020-10-11 neels if (err)
436 2146cf12 2020-10-11 neels return err;
437 2146cf12 2020-10-11 neels if (!same)
438 2146cf12 2020-10-11 neels break;
439 2146cf12 2020-10-11 neels equal_atoms_end++;
440 2146cf12 2020-10-11 neels }
441 2146cf12 2020-10-11 neels
442 2146cf12 2020-10-11 neels /* Add a chunk of equal lines at the start */
443 41ff30f3 2020-10-11 neels if (equal_atoms_start) {
444 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
445 e5447b81 2020-10-12 neels l, equal_atoms_start,
446 e5447b81 2020-10-12 neels r, equal_atoms_start))
447 3e6cba3a 2020-08-13 stsp return ENOMEM;
448 13497fff 2020-10-11 neels l_idx += equal_atoms_start;
449 13497fff 2020-10-11 neels r_idx += equal_atoms_start;
450 3b0f3d61 2020-01-22 neels }
451 3b0f3d61 2020-01-22 neels
452 3b0f3d61 2020-01-22 neels /* Add a "minus" chunk with all lines from the left. */
453 2146cf12 2020-10-11 neels if (equal_atoms_start + equal_atoms_end < l_len) {
454 13497fff 2020-10-11 neels unsigned int add_len = l_len - equal_atoms_start - equal_atoms_end;
455 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
456 13497fff 2020-10-11 neels &l[l_idx], add_len,
457 13497fff 2020-10-11 neels &r[r_idx], 0))
458 13497fff 2020-10-11 neels return ENOMEM;
459 13497fff 2020-10-11 neels l_idx += add_len;
460 3b0f3d61 2020-01-22 neels }
461 3b0f3d61 2020-01-22 neels
462 3b0f3d61 2020-01-22 neels /* Add a "plus" chunk with all lines from the right. */
463 2146cf12 2020-10-11 neels if (equal_atoms_start + equal_atoms_end < r_len) {
464 13497fff 2020-10-11 neels unsigned int add_len = r_len - equal_atoms_start - equal_atoms_end;
465 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
466 9dc0554f 2020-10-12 neels &l[l_idx], 0,
467 9dc0554f 2020-10-12 neels &r[r_idx], add_len))
468 13497fff 2020-10-11 neels return ENOMEM;
469 13497fff 2020-10-11 neels r_idx += add_len;
470 3b0f3d61 2020-01-22 neels }
471 2146cf12 2020-10-11 neels
472 2146cf12 2020-10-11 neels /* Add a chunk of equal lines at the end */
473 2146cf12 2020-10-11 neels if (equal_atoms_end) {
474 2146cf12 2020-10-11 neels if (!diff_state_add_chunk(state, true,
475 13497fff 2020-10-11 neels &l[l_idx], equal_atoms_end,
476 13497fff 2020-10-11 neels &r[r_idx], equal_atoms_end))
477 2146cf12 2020-10-11 neels return ENOMEM;
478 2146cf12 2020-10-11 neels }
479 2146cf12 2020-10-11 neels
480 3b0f3d61 2020-01-22 neels return DIFF_RC_OK;
481 3b0f3d61 2020-01-22 neels }
482 3b0f3d61 2020-01-22 neels
483 3e6cba3a 2020-08-13 stsp int
484 0d27172a 2020-05-06 neels diff_run_algo(const struct diff_algo_config *algo_config,
485 0d27172a 2020-05-06 neels struct diff_state *state)
486 3b0f3d61 2020-01-22 neels {
487 3e6cba3a 2020-08-13 stsp int rc;
488 3b0f3d61 2020-01-22 neels
489 0d27172a 2020-05-06 neels if (!algo_config || !algo_config->impl
490 746d94df 2020-10-12 neels || !state->recursion_depth_left
491 746d94df 2020-10-12 neels || !state->left.atoms.len || !state->right.atoms.len) {
492 746d94df 2020-10-12 neels debug("Fall back to diff_algo_none():%s%s%s\n",
493 746d94df 2020-10-12 neels (!algo_config || !algo_config->impl) ? " no-cfg" : "",
494 746d94df 2020-10-12 neels (!state->recursion_depth_left) ? " max-depth" : "",
495 746d94df 2020-10-12 neels (!state->left.atoms.len || !state->right.atoms.len)?
496 746d94df 2020-10-12 neels " trivial" : "");
497 3b0f3d61 2020-01-22 neels return diff_algo_none(algo_config, state);
498 3b0f3d61 2020-01-22 neels }
499 3b0f3d61 2020-01-22 neels
500 746d94df 2020-10-12 neels ARRAYLIST_FREE(state->temp_result);
501 3b0f3d61 2020-01-22 neels ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
502 3b0f3d61 2020-01-22 neels rc = algo_config->impl(algo_config, state);
503 3b0f3d61 2020-01-22 neels switch (rc) {
504 3b0f3d61 2020-01-22 neels case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
505 0d27172a 2020-05-06 neels debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n",
506 0d27172a 2020-05-06 neels algo_config->fallback_algo);
507 3b0f3d61 2020-01-22 neels rc = diff_run_algo(algo_config->fallback_algo, state);
508 3b0f3d61 2020-01-22 neels goto return_rc;
509 3b0f3d61 2020-01-22 neels
510 3b0f3d61 2020-01-22 neels case DIFF_RC_OK:
511 3b0f3d61 2020-01-22 neels /* continue below */
512 3b0f3d61 2020-01-22 neels break;
513 3b0f3d61 2020-01-22 neels
514 3b0f3d61 2020-01-22 neels default:
515 3b0f3d61 2020-01-22 neels /* some error happened */
516 3b0f3d61 2020-01-22 neels goto return_rc;
517 3b0f3d61 2020-01-22 neels }
518 3b0f3d61 2020-01-22 neels
519 0d27172a 2020-05-06 neels /* Pick up any diff chunks that are still unsolved and feed to
520 0d27172a 2020-05-06 neels * inner_algo. inner_algo will solve unsolved chunks and append to
521 0d27172a 2020-05-06 neels * result, and subsequent solved chunks on this level are then appended
522 0d27172a 2020-05-06 neels * to result afterwards. */
523 3b0f3d61 2020-01-22 neels int i;
524 3b0f3d61 2020-01-22 neels for (i = 0; i < state->temp_result.len; i++) {
525 3b0f3d61 2020-01-22 neels struct diff_chunk *c = &state->temp_result.head[i];
526 3b0f3d61 2020-01-22 neels if (c->solved) {
527 93f8150a 2020-10-11 neels diff_state_add_solved_chunk(state, c);
528 3b0f3d61 2020-01-22 neels continue;
529 3b0f3d61 2020-01-22 neels }
530 3b0f3d61 2020-01-22 neels
531 3b0f3d61 2020-01-22 neels /* c is an unsolved chunk, feed to inner_algo */
532 3b0f3d61 2020-01-22 neels struct diff_state inner_state = {
533 3b0f3d61 2020-01-22 neels .result = state->result,
534 3b0f3d61 2020-01-22 neels .recursion_depth_left = state->recursion_depth_left - 1,
535 8be88dfa 2020-10-21 stsp .kd_buf = state->kd_buf,
536 8be88dfa 2020-10-21 stsp .kd_buf_size = state->kd_buf_size,
537 3b0f3d61 2020-01-22 neels };
538 0d27172a 2020-05-06 neels diff_data_init_subsection(&inner_state.left, &state->left,
539 0d27172a 2020-05-06 neels c->left_start, c->left_count);
540 0d27172a 2020-05-06 neels diff_data_init_subsection(&inner_state.right, &state->right,
541 0d27172a 2020-05-06 neels c->right_start, c->right_count);
542 3b0f3d61 2020-01-22 neels
543 3b0f3d61 2020-01-22 neels rc = diff_run_algo(algo_config->inner_algo, &inner_state);
544 8be88dfa 2020-10-21 stsp state->kd_buf = inner_state.kd_buf;
545 8be88dfa 2020-10-21 stsp state->kd_buf_size = inner_state.kd_buf_size;
546 3b0f3d61 2020-01-22 neels if (rc != DIFF_RC_OK)
547 3b0f3d61 2020-01-22 neels goto return_rc;
548 3b0f3d61 2020-01-22 neels }
549 3b0f3d61 2020-01-22 neels
550 3b0f3d61 2020-01-22 neels rc = DIFF_RC_OK;
551 3b0f3d61 2020-01-22 neels return_rc:
552 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(state->temp_result);
553 3b0f3d61 2020-01-22 neels return rc;
554 3b0f3d61 2020-01-22 neels }
555 c16dde50 2020-10-22 stsp
556 c16dde50 2020-10-22 stsp int
557 c16dde50 2020-10-22 stsp diff_atomize_file(struct diff_data *d,
558 c16dde50 2020-10-22 stsp const struct diff_config *config,
559 c16dde50 2020-10-22 stsp FILE *f, const uint8_t *data, off_t len, int diff_flags)
560 c16dde50 2020-10-22 stsp {
561 c16dde50 2020-10-22 stsp if (!config->atomize_func)
562 c16dde50 2020-10-22 stsp return EINVAL;
563 3b0f3d61 2020-01-22 neels
564 c16dde50 2020-10-22 stsp diff_data_init_root(d, f, data, len, diff_flags);
565 c16dde50 2020-10-22 stsp
566 c16dde50 2020-10-22 stsp return config->atomize_func(config->atomize_func_data, d);
567 c16dde50 2020-10-22 stsp
568 c16dde50 2020-10-22 stsp }
569 c16dde50 2020-10-22 stsp
570 61a7b578 2020-05-06 neels struct diff_result *
571 c16dde50 2020-10-22 stsp diff_main(const struct diff_config *config, struct diff_data *left,
572 c16dde50 2020-10-22 stsp struct diff_data *right)
573 3b0f3d61 2020-01-22 neels {
574 3b0f3d61 2020-01-22 neels struct diff_result *result = malloc(sizeof(struct diff_result));
575 3b0f3d61 2020-01-22 neels if (!result)
576 3b0f3d61 2020-01-22 neels return NULL;
577 3b0f3d61 2020-01-22 neels
578 3b0f3d61 2020-01-22 neels *result = (struct diff_result){};
579 c16dde50 2020-10-22 stsp result->left = left;
580 c16dde50 2020-10-22 stsp result->right = right;
581 3b0f3d61 2020-01-22 neels
582 3b0f3d61 2020-01-22 neels struct diff_state state = {
583 3b0f3d61 2020-01-22 neels .result = result,
584 00961134 2020-09-20 stsp .recursion_depth_left = config->max_recursion_depth ? : 32,
585 8be88dfa 2020-10-21 stsp .kd_buf = NULL,
586 8be88dfa 2020-10-21 stsp .kd_buf_size = 0,
587 3b0f3d61 2020-01-22 neels };
588 c16dde50 2020-10-22 stsp diff_data_init_subsection(&state.left, left,
589 c16dde50 2020-10-22 stsp left->atoms.head,
590 c16dde50 2020-10-22 stsp left->atoms.len);
591 c16dde50 2020-10-22 stsp diff_data_init_subsection(&state.right, right,
592 c16dde50 2020-10-22 stsp right->atoms.head,
593 c16dde50 2020-10-22 stsp right->atoms.len);
594 3b0f3d61 2020-01-22 neels
595 3b0f3d61 2020-01-22 neels result->rc = diff_run_algo(config->algo, &state);
596 8be88dfa 2020-10-21 stsp free(state.kd_buf);
597 3b0f3d61 2020-01-22 neels
598 3b0f3d61 2020-01-22 neels return result;
599 3b0f3d61 2020-01-22 neels }
600 3b0f3d61 2020-01-22 neels
601 61a7b578 2020-05-06 neels void
602 61a7b578 2020-05-06 neels diff_result_free(struct diff_result *result)
603 3b0f3d61 2020-01-22 neels {
604 3b0f3d61 2020-01-22 neels if (!result)
605 3b0f3d61 2020-01-22 neels return;
606 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(result->chunks);
607 3b0f3d61 2020-01-22 neels free(result);
608 3b0f3d61 2020-01-22 neels }