Blame


1 fe621944 2020-11-10 stsp /* Generic infrastructure to implement various diff algorithms (implementation). */
2 fe621944 2020-11-10 stsp /*
3 fe621944 2020-11-10 stsp * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 fe621944 2020-11-10 stsp *
5 fe621944 2020-11-10 stsp * Permission to use, copy, modify, and distribute this software for any
6 fe621944 2020-11-10 stsp * purpose with or without fee is hereby granted, provided that the above
7 fe621944 2020-11-10 stsp * copyright notice and this permission notice appear in all copies.
8 fe621944 2020-11-10 stsp *
9 fe621944 2020-11-10 stsp * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 fe621944 2020-11-10 stsp * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 fe621944 2020-11-10 stsp * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 fe621944 2020-11-10 stsp * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 fe621944 2020-11-10 stsp * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 fe621944 2020-11-10 stsp * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 fe621944 2020-11-10 stsp * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 fe621944 2020-11-10 stsp */
17 fe621944 2020-11-10 stsp
18 fe621944 2020-11-10 stsp
19 8b925c6c 2022-07-16 thomas #include <sys/queue.h>
20 fe621944 2020-11-10 stsp #include <ctype.h>
21 fe621944 2020-11-10 stsp #include <errno.h>
22 fe621944 2020-11-10 stsp #include <stdint.h>
23 fe621944 2020-11-10 stsp #include <stdlib.h>
24 fe621944 2020-11-10 stsp #include <stdbool.h>
25 fe621944 2020-11-10 stsp #include <stdio.h>
26 fe621944 2020-11-10 stsp #include <string.h>
27 fe621944 2020-11-10 stsp #include <limits.h>
28 fe621944 2020-11-10 stsp #include <unistd.h>
29 fe621944 2020-11-10 stsp
30 fe621944 2020-11-10 stsp #include <assert.h>
31 fe621944 2020-11-10 stsp
32 fe621944 2020-11-10 stsp #include <arraylist.h>
33 fe621944 2020-11-10 stsp #include <diff_main.h>
34 fe621944 2020-11-10 stsp
35 dd038bc6 2021-09-21 thomas.ad #include "got_compat.h"
36 dd038bc6 2021-09-21 thomas.ad
37 fe621944 2020-11-10 stsp #include "diff_internal.h"
38 fe621944 2020-11-10 stsp #include "diff_debug.h"
39 ae012d47 2023-02-20 thomas
40 ae012d47 2023-02-20 thomas inline enum diff_chunk_type
41 ae012d47 2023-02-20 thomas diff_chunk_type(const struct diff_chunk *chunk)
42 ae012d47 2023-02-20 thomas {
43 ae012d47 2023-02-20 thomas if (!chunk->left_count && !chunk->right_count)
44 ae012d47 2023-02-20 thomas return CHUNK_EMPTY;
45 ae012d47 2023-02-20 thomas if (!chunk->solved)
46 ae012d47 2023-02-20 thomas return CHUNK_ERROR;
47 ae012d47 2023-02-20 thomas if (!chunk->right_count)
48 ae012d47 2023-02-20 thomas return CHUNK_MINUS;
49 ae012d47 2023-02-20 thomas if (!chunk->left_count)
50 ae012d47 2023-02-20 thomas return CHUNK_PLUS;
51 ae012d47 2023-02-20 thomas if (chunk->left_count != chunk->right_count)
52 ae012d47 2023-02-20 thomas return CHUNK_ERROR;
53 ae012d47 2023-02-20 thomas return CHUNK_SAME;
54 ae012d47 2023-02-20 thomas }
55 fe621944 2020-11-10 stsp
56 fe621944 2020-11-10 stsp static int
57 fe621944 2020-11-10 stsp read_at(FILE *f, off_t at_pos, unsigned char *buf, size_t len)
58 fe621944 2020-11-10 stsp {
59 fe621944 2020-11-10 stsp int r;
60 fe621944 2020-11-10 stsp if (fseeko(f, at_pos, SEEK_SET) == -1)
61 fe621944 2020-11-10 stsp return errno;
62 fe621944 2020-11-10 stsp r = fread(buf, sizeof(char), len, f);
63 fe621944 2020-11-10 stsp if ((r == 0 || r < len) && ferror(f))
64 0ce85e2d 2022-10-12 thomas return EIO;
65 fe621944 2020-11-10 stsp if (r != len)
66 fe621944 2020-11-10 stsp return EIO;
67 fe621944 2020-11-10 stsp return 0;
68 fe621944 2020-11-10 stsp }
69 fe621944 2020-11-10 stsp
70 fe621944 2020-11-10 stsp static int
71 fe621944 2020-11-10 stsp buf_cmp(const unsigned char *left, size_t left_len,
72 fe621944 2020-11-10 stsp const unsigned char *right, size_t right_len,
73 fe621944 2020-11-10 stsp bool ignore_whitespace)
74 fe621944 2020-11-10 stsp {
75 fe621944 2020-11-10 stsp int cmp;
76 fe621944 2020-11-10 stsp
77 fe621944 2020-11-10 stsp if (ignore_whitespace) {
78 fe621944 2020-11-10 stsp int il = 0, ir = 0;
79 fe621944 2020-11-10 stsp while (il < left_len && ir < right_len) {
80 fe621944 2020-11-10 stsp unsigned char cl = left[il];
81 fe621944 2020-11-10 stsp unsigned char cr = right[ir];
82 fe621944 2020-11-10 stsp
83 924153d6 2022-11-17 thomas if (isspace((unsigned char)cl) && il < left_len) {
84 fe621944 2020-11-10 stsp il++;
85 fe621944 2020-11-10 stsp continue;
86 fe621944 2020-11-10 stsp }
87 924153d6 2022-11-17 thomas if (isspace((unsigned char)cr) && ir < right_len) {
88 fe621944 2020-11-10 stsp ir++;
89 fe621944 2020-11-10 stsp continue;
90 fe621944 2020-11-10 stsp }
91 fe621944 2020-11-10 stsp
92 fe621944 2020-11-10 stsp if (cl > cr)
93 fe621944 2020-11-10 stsp return 1;
94 fe621944 2020-11-10 stsp if (cr > cl)
95 fe621944 2020-11-10 stsp return -1;
96 fe621944 2020-11-10 stsp il++;
97 fe621944 2020-11-10 stsp ir++;
98 fe621944 2020-11-10 stsp }
99 fe621944 2020-11-10 stsp while (il < left_len) {
100 fe621944 2020-11-10 stsp unsigned char cl = left[il++];
101 924153d6 2022-11-17 thomas if (!isspace((unsigned char)cl))
102 fe621944 2020-11-10 stsp return 1;
103 fe621944 2020-11-10 stsp }
104 fe621944 2020-11-10 stsp while (ir < right_len) {
105 fe621944 2020-11-10 stsp unsigned char cr = right[ir++];
106 924153d6 2022-11-17 thomas if (!isspace((unsigned char)cr))
107 fe621944 2020-11-10 stsp return -1;
108 fe621944 2020-11-10 stsp }
109 fe621944 2020-11-10 stsp
110 fe621944 2020-11-10 stsp return 0;
111 fe621944 2020-11-10 stsp }
112 fe621944 2020-11-10 stsp
113 fe621944 2020-11-10 stsp cmp = memcmp(left, right, MIN(left_len, right_len));
114 fe621944 2020-11-10 stsp if (cmp)
115 fe621944 2020-11-10 stsp return cmp;
116 fe621944 2020-11-10 stsp if (left_len == right_len)
117 fe621944 2020-11-10 stsp return 0;
118 fe621944 2020-11-10 stsp return (left_len > right_len) ? 1 : -1;
119 fe621944 2020-11-10 stsp }
120 fe621944 2020-11-10 stsp
121 fe621944 2020-11-10 stsp int
122 fe621944 2020-11-10 stsp diff_atom_cmp(int *cmp,
123 fe621944 2020-11-10 stsp const struct diff_atom *left,
124 fe621944 2020-11-10 stsp const struct diff_atom *right)
125 fe621944 2020-11-10 stsp {
126 fe621944 2020-11-10 stsp off_t remain_left, remain_right;
127 fe621944 2020-11-10 stsp int flags = (left->root->diff_flags | right->root->diff_flags);
128 fe621944 2020-11-10 stsp bool ignore_whitespace = (flags & DIFF_FLAG_IGNORE_WHITESPACE);
129 fe621944 2020-11-10 stsp
130 fe621944 2020-11-10 stsp if (!left->len && !right->len) {
131 fe621944 2020-11-10 stsp *cmp = 0;
132 fe621944 2020-11-10 stsp return 0;
133 fe621944 2020-11-10 stsp }
134 fe621944 2020-11-10 stsp if (!ignore_whitespace) {
135 fe621944 2020-11-10 stsp if (!right->len) {
136 fe621944 2020-11-10 stsp *cmp = 1;
137 fe621944 2020-11-10 stsp return 0;
138 fe621944 2020-11-10 stsp }
139 fe621944 2020-11-10 stsp if (!left->len) {
140 fe621944 2020-11-10 stsp *cmp = -1;
141 fe621944 2020-11-10 stsp return 0;
142 fe621944 2020-11-10 stsp }
143 fe621944 2020-11-10 stsp }
144 fe621944 2020-11-10 stsp
145 fe621944 2020-11-10 stsp if (left->at != NULL && right->at != NULL) {
146 fe621944 2020-11-10 stsp *cmp = buf_cmp(left->at, left->len, right->at, right->len,
147 fe621944 2020-11-10 stsp ignore_whitespace);
148 fe621944 2020-11-10 stsp return 0;
149 fe621944 2020-11-10 stsp }
150 fe621944 2020-11-10 stsp
151 fe621944 2020-11-10 stsp remain_left = left->len;
152 fe621944 2020-11-10 stsp remain_right = right->len;
153 fe621944 2020-11-10 stsp while (remain_left > 0 || remain_right > 0) {
154 fe621944 2020-11-10 stsp const size_t chunksz = 8192;
155 fe621944 2020-11-10 stsp unsigned char buf_left[chunksz], buf_right[chunksz];
156 fe621944 2020-11-10 stsp const uint8_t *p_left, *p_right;
157 fe621944 2020-11-10 stsp off_t n_left, n_right;
158 fe621944 2020-11-10 stsp ssize_t r;
159 fe621944 2020-11-10 stsp
160 fe621944 2020-11-10 stsp if (!remain_right) {
161 fe621944 2020-11-10 stsp *cmp = 1;
162 fe621944 2020-11-10 stsp return 0;
163 fe621944 2020-11-10 stsp }
164 fe621944 2020-11-10 stsp if (!remain_left) {
165 fe621944 2020-11-10 stsp *cmp = -1;
166 fe621944 2020-11-10 stsp return 0;
167 fe621944 2020-11-10 stsp }
168 fe621944 2020-11-10 stsp
169 fe621944 2020-11-10 stsp n_left = MIN(chunksz, remain_left);
170 fe621944 2020-11-10 stsp n_right = MIN(chunksz, remain_right);
171 fe621944 2020-11-10 stsp
172 fe621944 2020-11-10 stsp if (left->at == NULL) {
173 fe621944 2020-11-10 stsp r = read_at(left->root->f,
174 fe621944 2020-11-10 stsp left->pos + (left->len - remain_left),
175 fe621944 2020-11-10 stsp buf_left, n_left);
176 fe621944 2020-11-10 stsp if (r) {
177 fe621944 2020-11-10 stsp *cmp = 0;
178 fe621944 2020-11-10 stsp return r;
179 fe621944 2020-11-10 stsp }
180 fe621944 2020-11-10 stsp p_left = buf_left;
181 fe621944 2020-11-10 stsp } else {
182 fe621944 2020-11-10 stsp p_left = left->at + (left->len - remain_left);
183 fe621944 2020-11-10 stsp }
184 fe621944 2020-11-10 stsp
185 fe621944 2020-11-10 stsp if (right->at == NULL) {
186 fe621944 2020-11-10 stsp r = read_at(right->root->f,
187 fe621944 2020-11-10 stsp right->pos + (right->len - remain_right),
188 fe621944 2020-11-10 stsp buf_right, n_right);
189 fe621944 2020-11-10 stsp if (r) {
190 fe621944 2020-11-10 stsp *cmp = 0;
191 fe621944 2020-11-10 stsp return r;
192 fe621944 2020-11-10 stsp }
193 fe621944 2020-11-10 stsp p_right = buf_right;
194 fe621944 2020-11-10 stsp } else {
195 fe621944 2020-11-10 stsp p_right = right->at + (right->len - remain_right);
196 fe621944 2020-11-10 stsp }
197 fe621944 2020-11-10 stsp
198 fe621944 2020-11-10 stsp r = buf_cmp(p_left, n_left, p_right, n_right,
199 fe621944 2020-11-10 stsp ignore_whitespace);
200 fe621944 2020-11-10 stsp if (r) {
201 fe621944 2020-11-10 stsp *cmp = r;
202 fe621944 2020-11-10 stsp return 0;
203 fe621944 2020-11-10 stsp }
204 fe621944 2020-11-10 stsp
205 fe621944 2020-11-10 stsp remain_left -= n_left;
206 fe621944 2020-11-10 stsp remain_right -= n_right;
207 fe621944 2020-11-10 stsp }
208 fe621944 2020-11-10 stsp
209 fe621944 2020-11-10 stsp *cmp = 0;
210 fe621944 2020-11-10 stsp return 0;
211 fe621944 2020-11-10 stsp }
212 fe621944 2020-11-10 stsp
213 fe621944 2020-11-10 stsp int
214 fe621944 2020-11-10 stsp diff_atom_same(bool *same,
215 fe621944 2020-11-10 stsp const struct diff_atom *left,
216 fe621944 2020-11-10 stsp const struct diff_atom *right)
217 fe621944 2020-11-10 stsp {
218 fe621944 2020-11-10 stsp int cmp;
219 fe621944 2020-11-10 stsp int r;
220 fe621944 2020-11-10 stsp if (left->hash != right->hash) {
221 fe621944 2020-11-10 stsp *same = false;
222 fe621944 2020-11-10 stsp return 0;
223 fe621944 2020-11-10 stsp }
224 fe621944 2020-11-10 stsp r = diff_atom_cmp(&cmp, left, right);
225 fe621944 2020-11-10 stsp if (r) {
226 fe621944 2020-11-10 stsp *same = true;
227 fe621944 2020-11-10 stsp return r;
228 fe621944 2020-11-10 stsp }
229 fe621944 2020-11-10 stsp *same = (cmp == 0);
230 fe621944 2020-11-10 stsp return 0;
231 fe621944 2020-11-10 stsp }
232 fe621944 2020-11-10 stsp
233 fe621944 2020-11-10 stsp static struct diff_chunk *
234 fe621944 2020-11-10 stsp diff_state_add_solved_chunk(struct diff_state *state,
235 fe621944 2020-11-10 stsp const struct diff_chunk *chunk)
236 fe621944 2020-11-10 stsp {
237 fe621944 2020-11-10 stsp diff_chunk_arraylist_t *result;
238 fe621944 2020-11-10 stsp struct diff_chunk *new_chunk;
239 fe621944 2020-11-10 stsp enum diff_chunk_type last_t;
240 fe621944 2020-11-10 stsp enum diff_chunk_type new_t;
241 fe621944 2020-11-10 stsp struct diff_chunk *last;
242 fe621944 2020-11-10 stsp
243 fe621944 2020-11-10 stsp /* Append to solved chunks; make sure that adjacent chunks of same type are combined, and that a minus chunk
244 fe621944 2020-11-10 stsp * never directly follows a plus chunk. */
245 fe621944 2020-11-10 stsp result = &state->result->chunks;
246 fe621944 2020-11-10 stsp
247 fe621944 2020-11-10 stsp last_t = result->len ? diff_chunk_type(&result->head[result->len - 1])
248 fe621944 2020-11-10 stsp : CHUNK_EMPTY;
249 fe621944 2020-11-10 stsp new_t = diff_chunk_type(chunk);
250 fe621944 2020-11-10 stsp
251 fe621944 2020-11-10 stsp debug("ADD %s chunk #%u:\n", chunk->solved ? "solved" : "UNSOLVED",
252 fe621944 2020-11-10 stsp result->len);
253 fe621944 2020-11-10 stsp debug("L\n");
254 fe621944 2020-11-10 stsp debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
255 fe621944 2020-11-10 stsp debug("R\n");
256 fe621944 2020-11-10 stsp debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
257 fe621944 2020-11-10 stsp
258 fe621944 2020-11-10 stsp if (result->len) {
259 fe621944 2020-11-10 stsp last = &result->head[result->len - 1];
260 fe621944 2020-11-10 stsp assert(chunk->left_start
261 fe621944 2020-11-10 stsp == last->left_start + last->left_count);
262 fe621944 2020-11-10 stsp assert(chunk->right_start
263 fe621944 2020-11-10 stsp == last->right_start + last->right_count);
264 fe621944 2020-11-10 stsp }
265 fe621944 2020-11-10 stsp
266 fe621944 2020-11-10 stsp if (new_t == last_t) {
267 fe621944 2020-11-10 stsp new_chunk = &result->head[result->len - 1];
268 fe621944 2020-11-10 stsp new_chunk->left_count += chunk->left_count;
269 fe621944 2020-11-10 stsp new_chunk->right_count += chunk->right_count;
270 fe621944 2020-11-10 stsp debug(" - added chunk touches previous one of same type, joined:\n");
271 fe621944 2020-11-10 stsp debug("L\n");
272 fe621944 2020-11-10 stsp debug_dump_atoms(&state->left, new_chunk->left_start, new_chunk->left_count);
273 fe621944 2020-11-10 stsp debug("R\n");
274 fe621944 2020-11-10 stsp debug_dump_atoms(&state->right, new_chunk->right_start, new_chunk->right_count);
275 fe621944 2020-11-10 stsp } else if (last_t == CHUNK_PLUS && new_t == CHUNK_MINUS) {
276 fe621944 2020-11-10 stsp enum diff_chunk_type prev_last_t =
277 fe621944 2020-11-10 stsp result->len > 1 ?
278 fe621944 2020-11-10 stsp diff_chunk_type(&result->head[result->len - 2])
279 fe621944 2020-11-10 stsp : CHUNK_EMPTY;
280 fe621944 2020-11-10 stsp /* If a minus-chunk follows a plus-chunk, place it above the plus-chunk->
281 fe621944 2020-11-10 stsp * Is the one before that also a minus? combine. */
282 fe621944 2020-11-10 stsp if (prev_last_t == CHUNK_MINUS) {
283 fe621944 2020-11-10 stsp new_chunk = &result->head[result->len - 2];
284 fe621944 2020-11-10 stsp new_chunk->left_count += chunk->left_count;
285 fe621944 2020-11-10 stsp new_chunk->right_count += chunk->right_count;
286 fe621944 2020-11-10 stsp
287 fe621944 2020-11-10 stsp debug(" - added minus-chunk follows plus-chunk,"
288 fe621944 2020-11-10 stsp " put before that plus-chunk and joined"
289 fe621944 2020-11-10 stsp " with preceding minus-chunk:\n");
290 fe621944 2020-11-10 stsp debug("L\n");
291 fe621944 2020-11-10 stsp debug_dump_atoms(&state->left, new_chunk->left_start, new_chunk->left_count);
292 fe621944 2020-11-10 stsp debug("R\n");
293 fe621944 2020-11-10 stsp debug_dump_atoms(&state->right, new_chunk->right_start, new_chunk->right_count);
294 fe621944 2020-11-10 stsp } else {
295 fe621944 2020-11-10 stsp ARRAYLIST_INSERT(new_chunk, *result, result->len - 1);
296 fe621944 2020-11-10 stsp if (!new_chunk)
297 fe621944 2020-11-10 stsp return NULL;
298 fe621944 2020-11-10 stsp *new_chunk = *chunk;
299 fe621944 2020-11-10 stsp
300 fe621944 2020-11-10 stsp /* The new minus chunk indicates to which position on
301 fe621944 2020-11-10 stsp * the right it corresponds, even though it doesn't add
302 fe621944 2020-11-10 stsp * any lines on the right. By moving above a plus chunk,
303 fe621944 2020-11-10 stsp * that position on the right has shifted. */
304 fe621944 2020-11-10 stsp last = &result->head[result->len - 1];
305 fe621944 2020-11-10 stsp new_chunk->right_start = last->right_start;
306 fe621944 2020-11-10 stsp
307 fe621944 2020-11-10 stsp debug(" - added minus-chunk follows plus-chunk,"
308 fe621944 2020-11-10 stsp " put before that plus-chunk\n");
309 fe621944 2020-11-10 stsp }
310 fe621944 2020-11-10 stsp
311 fe621944 2020-11-10 stsp /* That last_t == CHUNK_PLUS indicates to which position on the
312 fe621944 2020-11-10 stsp * left it corresponds, even though it doesn't add any lines on
313 fe621944 2020-11-10 stsp * the left. By inserting/extending the prev_last_t ==
314 fe621944 2020-11-10 stsp * CHUNK_MINUS, that position on the left has shifted. */
315 fe621944 2020-11-10 stsp last = &result->head[result->len - 1];
316 fe621944 2020-11-10 stsp last->left_start = new_chunk->left_start
317 fe621944 2020-11-10 stsp + new_chunk->left_count;
318 fe621944 2020-11-10 stsp
319 fe621944 2020-11-10 stsp } else {
320 fe621944 2020-11-10 stsp ARRAYLIST_ADD(new_chunk, *result);
321 fe621944 2020-11-10 stsp if (!new_chunk)
322 fe621944 2020-11-10 stsp return NULL;
323 fe621944 2020-11-10 stsp *new_chunk = *chunk;
324 fe621944 2020-11-10 stsp }
325 fe621944 2020-11-10 stsp return new_chunk;
326 fe621944 2020-11-10 stsp }
327 fe621944 2020-11-10 stsp
328 fe621944 2020-11-10 stsp /* Even if a left or right side is empty, diff output may need to know the
329 fe621944 2020-11-10 stsp * position in that file.
330 fe621944 2020-11-10 stsp * So left_start or right_start must never be NULL -- pass left_count or
331 fe621944 2020-11-10 stsp * right_count as zero to indicate staying at that position without consuming
332 fe621944 2020-11-10 stsp * any lines. */
333 fe621944 2020-11-10 stsp struct diff_chunk *
334 fe621944 2020-11-10 stsp diff_state_add_chunk(struct diff_state *state, bool solved,
335 fe621944 2020-11-10 stsp struct diff_atom *left_start, unsigned int left_count,
336 fe621944 2020-11-10 stsp struct diff_atom *right_start, unsigned int right_count)
337 fe621944 2020-11-10 stsp {
338 fe621944 2020-11-10 stsp struct diff_chunk *new_chunk;
339 fe621944 2020-11-10 stsp struct diff_chunk chunk = {
340 fe621944 2020-11-10 stsp .solved = solved,
341 fe621944 2020-11-10 stsp .left_start = left_start,
342 fe621944 2020-11-10 stsp .left_count = left_count,
343 fe621944 2020-11-10 stsp .right_start = right_start,
344 fe621944 2020-11-10 stsp .right_count = right_count,
345 fe621944 2020-11-10 stsp };
346 fe621944 2020-11-10 stsp
347 fe621944 2020-11-10 stsp /* An unsolved chunk means store as intermediate result for later
348 fe621944 2020-11-10 stsp * re-iteration.
349 fe621944 2020-11-10 stsp * If there already are intermediate results, that means even a
350 fe621944 2020-11-10 stsp * following solved chunk needs to go to intermediate results, so that
351 fe621944 2020-11-10 stsp * it is later put in the final correct position in solved chunks.
352 fe621944 2020-11-10 stsp */
353 fe621944 2020-11-10 stsp if (!solved || state->temp_result.len) {
354 fe621944 2020-11-10 stsp /* Append to temp_result */
355 fe621944 2020-11-10 stsp debug("ADD %s chunk to temp result:\n",
356 fe621944 2020-11-10 stsp chunk.solved ? "solved" : "UNSOLVED");
357 fe621944 2020-11-10 stsp debug("L\n");
358 fe621944 2020-11-10 stsp debug_dump_atoms(&state->left, left_start, left_count);
359 fe621944 2020-11-10 stsp debug("R\n");
360 fe621944 2020-11-10 stsp debug_dump_atoms(&state->right, right_start, right_count);
361 fe621944 2020-11-10 stsp ARRAYLIST_ADD(new_chunk, state->temp_result);
362 fe621944 2020-11-10 stsp if (!new_chunk)
363 fe621944 2020-11-10 stsp return NULL;
364 fe621944 2020-11-10 stsp *new_chunk = chunk;
365 fe621944 2020-11-10 stsp return new_chunk;
366 fe621944 2020-11-10 stsp }
367 fe621944 2020-11-10 stsp
368 fe621944 2020-11-10 stsp return diff_state_add_solved_chunk(state, &chunk);
369 fe621944 2020-11-10 stsp }
370 fe621944 2020-11-10 stsp
371 ef20f542 2022-06-26 thomas static void
372 fe621944 2020-11-10 stsp diff_data_init_root(struct diff_data *d, FILE *f, const uint8_t *data,
373 fe621944 2020-11-10 stsp unsigned long long len, int diff_flags)
374 fe621944 2020-11-10 stsp {
375 fe621944 2020-11-10 stsp *d = (struct diff_data){
376 fe621944 2020-11-10 stsp .f = f,
377 fe621944 2020-11-10 stsp .pos = 0,
378 fe621944 2020-11-10 stsp .data = data,
379 fe621944 2020-11-10 stsp .len = len,
380 fe621944 2020-11-10 stsp .root = d,
381 fe621944 2020-11-10 stsp .diff_flags = diff_flags,
382 fe621944 2020-11-10 stsp };
383 fe621944 2020-11-10 stsp }
384 fe621944 2020-11-10 stsp
385 fe621944 2020-11-10 stsp void
386 fe621944 2020-11-10 stsp diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
387 fe621944 2020-11-10 stsp struct diff_atom *from_atom, unsigned int atoms_count)
388 fe621944 2020-11-10 stsp {
389 fe621944 2020-11-10 stsp struct diff_atom *last_atom;
390 fe621944 2020-11-10 stsp
391 fe621944 2020-11-10 stsp debug("diff_data %p parent %p from_atom %p atoms_count %u\n",
392 fe621944 2020-11-10 stsp d, parent, from_atom, atoms_count);
393 fe621944 2020-11-10 stsp debug(" from_atom ");
394 fe621944 2020-11-10 stsp debug_dump_atom(parent, NULL, from_atom);
395 fe621944 2020-11-10 stsp
396 fe621944 2020-11-10 stsp if (atoms_count == 0) {
397 fe621944 2020-11-10 stsp *d = (struct diff_data){
398 fe621944 2020-11-10 stsp .f = NULL,
399 fe621944 2020-11-10 stsp .pos = 0,
400 fe621944 2020-11-10 stsp .data = NULL,
401 fe621944 2020-11-10 stsp .len = 0,
402 fe621944 2020-11-10 stsp .root = parent->root,
403 fe621944 2020-11-10 stsp .atoms.head = NULL,
404 fe621944 2020-11-10 stsp .atoms.len = atoms_count,
405 fe621944 2020-11-10 stsp };
406 fe621944 2020-11-10 stsp
407 fe621944 2020-11-10 stsp return;
408 fe621944 2020-11-10 stsp }
409 fe621944 2020-11-10 stsp
410 fe621944 2020-11-10 stsp last_atom = from_atom + atoms_count - 1;
411 fe621944 2020-11-10 stsp *d = (struct diff_data){
412 fe621944 2020-11-10 stsp .f = NULL,
413 fe621944 2020-11-10 stsp .pos = from_atom->pos,
414 fe621944 2020-11-10 stsp .data = from_atom->at,
415 fe621944 2020-11-10 stsp .len = (last_atom->pos + last_atom->len) - from_atom->pos,
416 fe621944 2020-11-10 stsp .root = parent->root,
417 fe621944 2020-11-10 stsp .atoms.head = from_atom,
418 fe621944 2020-11-10 stsp .atoms.len = atoms_count,
419 fe621944 2020-11-10 stsp };
420 fe621944 2020-11-10 stsp
421 fe621944 2020-11-10 stsp debug("subsection:\n");
422 fe621944 2020-11-10 stsp debug_dump(d);
423 fe621944 2020-11-10 stsp }
424 fe621944 2020-11-10 stsp
425 fe621944 2020-11-10 stsp void
426 fe621944 2020-11-10 stsp diff_data_free(struct diff_data *diff_data)
427 fe621944 2020-11-10 stsp {
428 fe621944 2020-11-10 stsp if (!diff_data)
429 fe621944 2020-11-10 stsp return;
430 fe621944 2020-11-10 stsp if (diff_data->atoms.allocated)
431 fe621944 2020-11-10 stsp ARRAYLIST_FREE(diff_data->atoms);
432 fe621944 2020-11-10 stsp }
433 fe621944 2020-11-10 stsp
434 fe621944 2020-11-10 stsp int
435 fe621944 2020-11-10 stsp diff_algo_none(const struct diff_algo_config *algo_config,
436 fe621944 2020-11-10 stsp struct diff_state *state)
437 fe621944 2020-11-10 stsp {
438 fe621944 2020-11-10 stsp debug("\n** %s\n", __func__);
439 fe621944 2020-11-10 stsp debug("left:\n");
440 fe621944 2020-11-10 stsp debug_dump(&state->left);
441 fe621944 2020-11-10 stsp debug("right:\n");
442 fe621944 2020-11-10 stsp debug_dump(&state->right);
443 fe621944 2020-11-10 stsp debug_dump_myers_graph(&state->left, &state->right, NULL, NULL, 0, NULL,
444 fe621944 2020-11-10 stsp 0);
445 fe621944 2020-11-10 stsp
446 fe621944 2020-11-10 stsp /* Add a chunk of equal lines, if any */
447 fe621944 2020-11-10 stsp struct diff_atom *l = state->left.atoms.head;
448 fe621944 2020-11-10 stsp unsigned int l_len = state->left.atoms.len;
449 fe621944 2020-11-10 stsp struct diff_atom *r = state->right.atoms.head;
450 fe621944 2020-11-10 stsp unsigned int r_len = state->right.atoms.len;
451 fe621944 2020-11-10 stsp unsigned int equal_atoms_start = 0;
452 fe621944 2020-11-10 stsp unsigned int equal_atoms_end = 0;
453 fe621944 2020-11-10 stsp unsigned int l_idx = 0;
454 fe621944 2020-11-10 stsp unsigned int r_idx = 0;
455 fe621944 2020-11-10 stsp
456 fe621944 2020-11-10 stsp while (equal_atoms_start < l_len
457 fe621944 2020-11-10 stsp && equal_atoms_start < r_len) {
458 fe621944 2020-11-10 stsp int err;
459 fe621944 2020-11-10 stsp bool same;
460 fe621944 2020-11-10 stsp err = diff_atom_same(&same, &l[equal_atoms_start],
461 fe621944 2020-11-10 stsp &r[equal_atoms_start]);
462 fe621944 2020-11-10 stsp if (err)
463 fe621944 2020-11-10 stsp return err;
464 fe621944 2020-11-10 stsp if (!same)
465 fe621944 2020-11-10 stsp break;
466 fe621944 2020-11-10 stsp equal_atoms_start++;
467 fe621944 2020-11-10 stsp }
468 fe621944 2020-11-10 stsp while (equal_atoms_end < (l_len - equal_atoms_start)
469 fe621944 2020-11-10 stsp && equal_atoms_end < (r_len - equal_atoms_start)) {
470 fe621944 2020-11-10 stsp int err;
471 fe621944 2020-11-10 stsp bool same;
472 fe621944 2020-11-10 stsp err = diff_atom_same(&same, &l[l_len - 1 - equal_atoms_end],
473 fe621944 2020-11-10 stsp &r[r_len - 1 - equal_atoms_end]);
474 fe621944 2020-11-10 stsp if (err)
475 fe621944 2020-11-10 stsp return err;
476 fe621944 2020-11-10 stsp if (!same)
477 fe621944 2020-11-10 stsp break;
478 fe621944 2020-11-10 stsp equal_atoms_end++;
479 fe621944 2020-11-10 stsp }
480 fe621944 2020-11-10 stsp
481 fe621944 2020-11-10 stsp /* Add a chunk of equal lines at the start */
482 fe621944 2020-11-10 stsp if (equal_atoms_start) {
483 fe621944 2020-11-10 stsp if (!diff_state_add_chunk(state, true,
484 fe621944 2020-11-10 stsp l, equal_atoms_start,
485 fe621944 2020-11-10 stsp r, equal_atoms_start))
486 fe621944 2020-11-10 stsp return ENOMEM;
487 fe621944 2020-11-10 stsp l_idx += equal_atoms_start;
488 fe621944 2020-11-10 stsp r_idx += equal_atoms_start;
489 fe621944 2020-11-10 stsp }
490 fe621944 2020-11-10 stsp
491 fe621944 2020-11-10 stsp /* Add a "minus" chunk with all lines from the left. */
492 fe621944 2020-11-10 stsp if (equal_atoms_start + equal_atoms_end < l_len) {
493 fe621944 2020-11-10 stsp unsigned int add_len = l_len - equal_atoms_start - equal_atoms_end;
494 fe621944 2020-11-10 stsp if (!diff_state_add_chunk(state, true,
495 fe621944 2020-11-10 stsp &l[l_idx], add_len,
496 fe621944 2020-11-10 stsp &r[r_idx], 0))
497 fe621944 2020-11-10 stsp return ENOMEM;
498 fe621944 2020-11-10 stsp l_idx += add_len;
499 fe621944 2020-11-10 stsp }
500 fe621944 2020-11-10 stsp
501 fe621944 2020-11-10 stsp /* Add a "plus" chunk with all lines from the right. */
502 fe621944 2020-11-10 stsp if (equal_atoms_start + equal_atoms_end < r_len) {
503 fe621944 2020-11-10 stsp unsigned int add_len = r_len - equal_atoms_start - equal_atoms_end;
504 fe621944 2020-11-10 stsp if (!diff_state_add_chunk(state, true,
505 fe621944 2020-11-10 stsp &l[l_idx], 0,
506 fe621944 2020-11-10 stsp &r[r_idx], add_len))
507 fe621944 2020-11-10 stsp return ENOMEM;
508 fe621944 2020-11-10 stsp r_idx += add_len;
509 fe621944 2020-11-10 stsp }
510 fe621944 2020-11-10 stsp
511 fe621944 2020-11-10 stsp /* Add a chunk of equal lines at the end */
512 fe621944 2020-11-10 stsp if (equal_atoms_end) {
513 fe621944 2020-11-10 stsp if (!diff_state_add_chunk(state, true,
514 fe621944 2020-11-10 stsp &l[l_idx], equal_atoms_end,
515 fe621944 2020-11-10 stsp &r[r_idx], equal_atoms_end))
516 fe621944 2020-11-10 stsp return ENOMEM;
517 fe621944 2020-11-10 stsp }
518 fe621944 2020-11-10 stsp
519 fe621944 2020-11-10 stsp return DIFF_RC_OK;
520 fe621944 2020-11-10 stsp }
521 fe621944 2020-11-10 stsp
522 ef20f542 2022-06-26 thomas static int
523 fe621944 2020-11-10 stsp diff_run_algo(const struct diff_algo_config *algo_config,
524 fe621944 2020-11-10 stsp struct diff_state *state)
525 fe621944 2020-11-10 stsp {
526 fe621944 2020-11-10 stsp int rc;
527 fe621944 2020-11-10 stsp
528 fe621944 2020-11-10 stsp if (!algo_config || !algo_config->impl
529 fe621944 2020-11-10 stsp || !state->recursion_depth_left
530 fe621944 2020-11-10 stsp || !state->left.atoms.len || !state->right.atoms.len) {
531 fe621944 2020-11-10 stsp debug("Fall back to diff_algo_none():%s%s%s\n",
532 fe621944 2020-11-10 stsp (!algo_config || !algo_config->impl) ? " no-cfg" : "",
533 fe621944 2020-11-10 stsp (!state->recursion_depth_left) ? " max-depth" : "",
534 fe621944 2020-11-10 stsp (!state->left.atoms.len || !state->right.atoms.len)?
535 fe621944 2020-11-10 stsp " trivial" : "");
536 fe621944 2020-11-10 stsp return diff_algo_none(algo_config, state);
537 fe621944 2020-11-10 stsp }
538 fe621944 2020-11-10 stsp
539 fe621944 2020-11-10 stsp ARRAYLIST_FREE(state->temp_result);
540 fe621944 2020-11-10 stsp ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
541 fe621944 2020-11-10 stsp rc = algo_config->impl(algo_config, state);
542 fe621944 2020-11-10 stsp switch (rc) {
543 fe621944 2020-11-10 stsp case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
544 fe621944 2020-11-10 stsp debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n",
545 fe621944 2020-11-10 stsp algo_config->fallback_algo);
546 fe621944 2020-11-10 stsp rc = diff_run_algo(algo_config->fallback_algo, state);
547 fe621944 2020-11-10 stsp goto return_rc;
548 fe621944 2020-11-10 stsp
549 fe621944 2020-11-10 stsp case DIFF_RC_OK:
550 fe621944 2020-11-10 stsp /* continue below */
551 fe621944 2020-11-10 stsp break;
552 fe621944 2020-11-10 stsp
553 fe621944 2020-11-10 stsp default:
554 fe621944 2020-11-10 stsp /* some error happened */
555 fe621944 2020-11-10 stsp goto return_rc;
556 fe621944 2020-11-10 stsp }
557 fe621944 2020-11-10 stsp
558 fe621944 2020-11-10 stsp /* Pick up any diff chunks that are still unsolved and feed to
559 fe621944 2020-11-10 stsp * inner_algo. inner_algo will solve unsolved chunks and append to
560 fe621944 2020-11-10 stsp * result, and subsequent solved chunks on this level are then appended
561 fe621944 2020-11-10 stsp * to result afterwards. */
562 fe621944 2020-11-10 stsp int i;
563 fe621944 2020-11-10 stsp for (i = 0; i < state->temp_result.len; i++) {
564 fe621944 2020-11-10 stsp struct diff_chunk *c = &state->temp_result.head[i];
565 fe621944 2020-11-10 stsp if (c->solved) {
566 fe621944 2020-11-10 stsp diff_state_add_solved_chunk(state, c);
567 fe621944 2020-11-10 stsp continue;
568 fe621944 2020-11-10 stsp }
569 fe621944 2020-11-10 stsp
570 fe621944 2020-11-10 stsp /* c is an unsolved chunk, feed to inner_algo */
571 fe621944 2020-11-10 stsp struct diff_state inner_state = {
572 fe621944 2020-11-10 stsp .result = state->result,
573 fe621944 2020-11-10 stsp .recursion_depth_left = state->recursion_depth_left - 1,
574 fe621944 2020-11-10 stsp .kd_buf = state->kd_buf,
575 fe621944 2020-11-10 stsp .kd_buf_size = state->kd_buf_size,
576 fe621944 2020-11-10 stsp };
577 fe621944 2020-11-10 stsp diff_data_init_subsection(&inner_state.left, &state->left,
578 fe621944 2020-11-10 stsp c->left_start, c->left_count);
579 fe621944 2020-11-10 stsp diff_data_init_subsection(&inner_state.right, &state->right,
580 fe621944 2020-11-10 stsp c->right_start, c->right_count);
581 fe621944 2020-11-10 stsp
582 fe621944 2020-11-10 stsp rc = diff_run_algo(algo_config->inner_algo, &inner_state);
583 fe621944 2020-11-10 stsp state->kd_buf = inner_state.kd_buf;
584 fe621944 2020-11-10 stsp state->kd_buf_size = inner_state.kd_buf_size;
585 fe621944 2020-11-10 stsp if (rc != DIFF_RC_OK)
586 fe621944 2020-11-10 stsp goto return_rc;
587 fe621944 2020-11-10 stsp }
588 fe621944 2020-11-10 stsp
589 fe621944 2020-11-10 stsp rc = DIFF_RC_OK;
590 fe621944 2020-11-10 stsp return_rc:
591 fe621944 2020-11-10 stsp ARRAYLIST_FREE(state->temp_result);
592 fe621944 2020-11-10 stsp return rc;
593 fe621944 2020-11-10 stsp }
594 fe621944 2020-11-10 stsp
595 fe621944 2020-11-10 stsp int
596 fe621944 2020-11-10 stsp diff_atomize_file(struct diff_data *d,
597 fe621944 2020-11-10 stsp const struct diff_config *config,
598 fe621944 2020-11-10 stsp FILE *f, const uint8_t *data, off_t len, int diff_flags)
599 fe621944 2020-11-10 stsp {
600 fe621944 2020-11-10 stsp if (!config->atomize_func)
601 fe621944 2020-11-10 stsp return EINVAL;
602 fe621944 2020-11-10 stsp
603 fe621944 2020-11-10 stsp diff_data_init_root(d, f, data, len, diff_flags);
604 fe621944 2020-11-10 stsp
605 fe621944 2020-11-10 stsp return config->atomize_func(config->atomize_func_data, d);
606 fe621944 2020-11-10 stsp
607 fe621944 2020-11-10 stsp }
608 fe621944 2020-11-10 stsp
609 fe621944 2020-11-10 stsp struct diff_result *
610 fe621944 2020-11-10 stsp diff_main(const struct diff_config *config, struct diff_data *left,
611 fe621944 2020-11-10 stsp struct diff_data *right)
612 fe621944 2020-11-10 stsp {
613 fe621944 2020-11-10 stsp struct diff_result *result = malloc(sizeof(struct diff_result));
614 fe621944 2020-11-10 stsp if (!result)
615 fe621944 2020-11-10 stsp return NULL;
616 fe621944 2020-11-10 stsp
617 fe621944 2020-11-10 stsp *result = (struct diff_result){};
618 fe621944 2020-11-10 stsp result->left = left;
619 fe621944 2020-11-10 stsp result->right = right;
620 fe621944 2020-11-10 stsp
621 fe621944 2020-11-10 stsp struct diff_state state = {
622 fe621944 2020-11-10 stsp .result = result,
623 b15f45f4 2022-09-02 thomas .recursion_depth_left = config->max_recursion_depth ?
624 b15f45f4 2022-09-02 thomas config->max_recursion_depth : UINT_MAX,
625 fe621944 2020-11-10 stsp .kd_buf = NULL,
626 fe621944 2020-11-10 stsp .kd_buf_size = 0,
627 fe621944 2020-11-10 stsp };
628 fe621944 2020-11-10 stsp diff_data_init_subsection(&state.left, left,
629 fe621944 2020-11-10 stsp left->atoms.head,
630 fe621944 2020-11-10 stsp left->atoms.len);
631 fe621944 2020-11-10 stsp diff_data_init_subsection(&state.right, right,
632 fe621944 2020-11-10 stsp right->atoms.head,
633 fe621944 2020-11-10 stsp right->atoms.len);
634 fe621944 2020-11-10 stsp
635 fe621944 2020-11-10 stsp result->rc = diff_run_algo(config->algo, &state);
636 fe621944 2020-11-10 stsp free(state.kd_buf);
637 fe621944 2020-11-10 stsp
638 fe621944 2020-11-10 stsp return result;
639 fe621944 2020-11-10 stsp }
640 fe621944 2020-11-10 stsp
641 fe621944 2020-11-10 stsp void
642 fe621944 2020-11-10 stsp diff_result_free(struct diff_result *result)
643 fe621944 2020-11-10 stsp {
644 fe621944 2020-11-10 stsp if (!result)
645 fe621944 2020-11-10 stsp return;
646 fe621944 2020-11-10 stsp ARRAYLIST_FREE(result->chunks);
647 fe621944 2020-11-10 stsp free(result);
648 0ce85e2d 2022-10-12 thomas }
649 0ce85e2d 2022-10-12 thomas
650 0ce85e2d 2022-10-12 thomas int
651 0ce85e2d 2022-10-12 thomas diff_result_contains_printable_chunks(struct diff_result *result)
652 0ce85e2d 2022-10-12 thomas {
653 0ce85e2d 2022-10-12 thomas struct diff_chunk *c;
654 0ce85e2d 2022-10-12 thomas enum diff_chunk_type t;
655 76ab3c0a 2022-11-03 thomas int i;
656 0ce85e2d 2022-10-12 thomas
657 76ab3c0a 2022-11-03 thomas for (i = 0; i < result->chunks.len; i++) {
658 0ce85e2d 2022-10-12 thomas c = &result->chunks.head[i];
659 0ce85e2d 2022-10-12 thomas t = diff_chunk_type(c);
660 0ce85e2d 2022-10-12 thomas if (t == CHUNK_MINUS || t == CHUNK_PLUS)
661 0ce85e2d 2022-10-12 thomas return 1;
662 0ce85e2d 2022-10-12 thomas }
663 0ce85e2d 2022-10-12 thomas
664 0ce85e2d 2022-10-12 thomas return 0;
665 fe621944 2020-11-10 stsp }