2 fe621944 2020-11-10 stsp * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
3 fe621944 2020-11-10 stsp * Copyright (c) 2020 Stefan Sperling <stsp@openbsd.org>
5 fe621944 2020-11-10 stsp * Permission to use, copy, modify, and distribute this software for any
6 fe621944 2020-11-10 stsp * purpose with or without fee is hereby granted, provided that the above
7 fe621944 2020-11-10 stsp * copyright notice and this permission notice appear in all copies.
9 fe621944 2020-11-10 stsp * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 fe621944 2020-11-10 stsp * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 fe621944 2020-11-10 stsp * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 fe621944 2020-11-10 stsp * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 fe621944 2020-11-10 stsp * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 fe621944 2020-11-10 stsp * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 fe621944 2020-11-10 stsp * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 fe621944 2020-11-10 stsp #include <sys/mman.h>
19 574ed2c3 2017-11-29 stsp #include <sys/stat.h>
20 8b925c6c 2022-07-16 thomas #include <sys/queue.h>
22 574ed2c3 2017-11-29 stsp #include <errno.h>
23 588a8092 2023-02-23 thomas #include <sha1.h>
24 588a8092 2023-02-23 thomas #include <sha2.h>
25 574ed2c3 2017-11-29 stsp #include <stdio.h>
26 574ed2c3 2017-11-29 stsp #include <stdlib.h>
27 574ed2c3 2017-11-29 stsp #include <string.h>
29 dd038bc6 2021-09-21 thomas.ad #include "got_compat.h"
31 7d283eee 2017-11-29 stsp #include "got_object.h"
32 fe621944 2020-11-10 stsp #include "got_opentemp.h"
33 fe621944 2020-11-10 stsp #include "got_error.h"
34 25ec7006 2022-07-01 thomas #include "got_diff.h"
36 718b3ab0 2018-03-17 stsp #include "got_lib_diff.h"
38 fe621944 2020-11-10 stsp const struct diff_algo_config myers_then_patience;
39 fe621944 2020-11-10 stsp const struct diff_algo_config myers_then_myers_divide;
40 fe621944 2020-11-10 stsp const struct diff_algo_config patience;
41 fe621944 2020-11-10 stsp const struct diff_algo_config myers_divide;
43 db97f624 2022-11-03 thomas const struct diff_algo_config myers_then_patience = {
44 fe621944 2020-11-10 stsp .impl = diff_algo_myers,
45 fe621944 2020-11-10 stsp .permitted_state_size = 1024 * 1024 * sizeof(int),
46 fe621944 2020-11-10 stsp .fallback_algo = &patience,
49 db97f624 2022-11-03 thomas const struct diff_algo_config myers_then_myers_divide = {
50 fe621944 2020-11-10 stsp .impl = diff_algo_myers,
51 fe621944 2020-11-10 stsp .permitted_state_size = 1024 * 1024 * sizeof(int),
52 fe621944 2020-11-10 stsp .fallback_algo = &myers_divide,
55 db97f624 2022-11-03 thomas const struct diff_algo_config patience = {
56 fe621944 2020-11-10 stsp .impl = diff_algo_patience,
57 fe621944 2020-11-10 stsp /* After subdivision, do Patience again: */
58 fe621944 2020-11-10 stsp .inner_algo = &patience,
59 fe621944 2020-11-10 stsp /* If subdivision failed, do Myers Divide et Impera: */
60 fe621944 2020-11-10 stsp .fallback_algo = &myers_then_myers_divide,
63 db97f624 2022-11-03 thomas const struct diff_algo_config myers_divide = {
64 fe621944 2020-11-10 stsp .impl = diff_algo_myers_divide,
65 fe621944 2020-11-10 stsp /* When division succeeded, start from the top: */
66 fe621944 2020-11-10 stsp .inner_algo = &myers_then_myers_divide,
67 fe621944 2020-11-10 stsp /* (fallback_algo = NULL implies diff_algo_none). */
70 fe621944 2020-11-10 stsp /* If the state for a forward-Myers is small enough, use Myers, otherwise first
71 fe621944 2020-11-10 stsp * do a Myers-divide. */
72 fe621944 2020-11-10 stsp const struct diff_config diff_config_myers_then_myers_divide = {
73 fe621944 2020-11-10 stsp .atomize_func = diff_atomize_text_by_line,
74 fe621944 2020-11-10 stsp .algo = &myers_then_myers_divide,
77 fe621944 2020-11-10 stsp /* If the state for a forward-Myers is small enough, use Myers, otherwise first
78 fe621944 2020-11-10 stsp * do a Patience. */
79 fe621944 2020-11-10 stsp const struct diff_config diff_config_myers_then_patience = {
80 fe621944 2020-11-10 stsp .atomize_func = diff_atomize_text_by_line,
81 fe621944 2020-11-10 stsp .algo = &myers_then_patience,
84 fe621944 2020-11-10 stsp /* Directly force Patience as a first divider of the source file. */
85 fe621944 2020-11-10 stsp const struct diff_config diff_config_patience = {
86 fe621944 2020-11-10 stsp .atomize_func = diff_atomize_text_by_line,
87 fe621944 2020-11-10 stsp .algo = &patience,
90 fe621944 2020-11-10 stsp /* Directly force Patience as a first divider of the source file. */
91 fe621944 2020-11-10 stsp const struct diff_config diff_config_no_algo = {
92 fe621944 2020-11-10 stsp .atomize_func = diff_atomize_text_by_line,
95 fe621944 2020-11-10 stsp const struct got_error *
96 dd2e2f52 2022-07-01 thomas got_diffreg_close(char *p1, size_t size1, char *p2, size_t size2)
98 fe621944 2020-11-10 stsp const struct got_error *err = NULL;
100 fe621944 2020-11-10 stsp if (p1 && munmap(p1, size1) == -1 && err == NULL)
101 fe621944 2020-11-10 stsp err = got_error_from_errno("munmap");
102 fe621944 2020-11-10 stsp if (p2 && munmap(p2, size2) == -1 && err == NULL)
103 fe621944 2020-11-10 stsp err = got_error_from_errno("munmap");
104 fe621944 2020-11-10 stsp return err;
107 cca5682e 2020-11-18 stsp const struct got_error *
108 cca5682e 2020-11-18 stsp got_diff_get_config(struct diff_config **cfg,
109 cca5682e 2020-11-18 stsp enum got_diff_algorithm algorithm,
110 cca5682e 2020-11-18 stsp diff_atomize_func_t atomize_func, void *atomize_func_data)
112 cca5682e 2020-11-18 stsp *cfg = calloc(1, sizeof(**cfg));
113 cca5682e 2020-11-18 stsp if (*cfg == NULL)
114 cca5682e 2020-11-18 stsp return got_error_from_errno("calloc");
116 fe621944 2020-11-10 stsp switch (algorithm) {
117 fe621944 2020-11-10 stsp case GOT_DIFF_ALGORITHM_PATIENCE:
118 cca5682e 2020-11-18 stsp (*cfg)->algo = &patience;
120 fe621944 2020-11-10 stsp case GOT_DIFF_ALGORITHM_MYERS:
121 cca5682e 2020-11-18 stsp (*cfg)->algo = &myers_then_myers_divide;
124 cca5682e 2020-11-18 stsp return got_error_msg(GOT_ERR_NOT_IMPL, "bad diff algorithm");
127 cca5682e 2020-11-18 stsp if (atomize_func) {
128 cca5682e 2020-11-18 stsp (*cfg)->atomize_func = atomize_func;
129 cca5682e 2020-11-18 stsp (*cfg)->atomize_func_data = atomize_func_data;
131 cca5682e 2020-11-18 stsp (*cfg)->atomize_func = diff_atomize_text_by_line;
133 cca5682e 2020-11-18 stsp (*cfg)->max_recursion_depth = 0; /* use default recursion depth */
135 cca5682e 2020-11-18 stsp return NULL;
138 7d283eee 2017-11-29 stsp const struct got_error *
139 72254787 2020-11-18 stsp got_diff_prepare_file(FILE *f, char **p, size_t *size,
140 fe621944 2020-11-10 stsp struct diff_data *diff_data, const struct diff_config *cfg,
141 64453f7e 2020-11-21 stsp int ignore_whitespace, int force_text_diff)
143 7d283eee 2017-11-29 stsp const struct got_error *err = NULL;
144 fe621944 2020-11-10 stsp struct stat st;
145 fe621944 2020-11-10 stsp int diff_flags = 0, rc;
149 fe621944 2020-11-10 stsp diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
150 fe621944 2020-11-10 stsp if (ignore_whitespace)
151 fe621944 2020-11-10 stsp diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
152 64453f7e 2020-11-21 stsp if (force_text_diff)
153 64453f7e 2020-11-21 stsp diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
155 72254787 2020-11-18 stsp if (fstat(fileno(f), &st) == -1) {
156 72254787 2020-11-18 stsp err = got_error_from_errno("fstat");
159 72254787 2020-11-18 stsp #ifndef GOT_DIFF_NO_MMAP
160 72254787 2020-11-18 stsp *p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE,
161 72254787 2020-11-18 stsp fileno(f), 0);
162 72254787 2020-11-18 stsp if (*p == MAP_FAILED)
164 72254787 2020-11-18 stsp *p = NULL; /* fall back on file I/O */
166 72254787 2020-11-18 stsp rc = diff_atomize_file(diff_data, cfg, f, *p, st.st_size, diff_flags);
168 fe621944 2020-11-10 stsp err = got_error_set_errno(rc, "diff_atomize_file");
173 fe621944 2020-11-10 stsp diff_data_free(diff_data);
175 fe621944 2020-11-10 stsp *size = st.st_size;
176 fe621944 2020-11-10 stsp return err;
179 fe621944 2020-11-10 stsp const struct got_error *
180 fe621944 2020-11-10 stsp got_diffreg(struct got_diffreg_result **diffreg_result, FILE *f1, FILE *f2,
181 64453f7e 2020-11-21 stsp enum got_diff_algorithm algorithm, int ignore_whitespace,
182 64453f7e 2020-11-21 stsp int force_text_diff)
184 fe621944 2020-11-10 stsp const struct got_error *err = NULL;
185 cca5682e 2020-11-18 stsp struct diff_config *cfg = NULL;
186 fe621944 2020-11-10 stsp char *p1 = NULL, *p2 = NULL;
187 fe621944 2020-11-10 stsp size_t size1, size2;
188 fe621944 2020-11-10 stsp struct diff_data d_left, d_right;
189 fe621944 2020-11-10 stsp struct diff_data *left, *right;
190 fe621944 2020-11-10 stsp struct diff_result *diff_result;
192 fe621944 2020-11-10 stsp if (diffreg_result) {
193 fe621944 2020-11-10 stsp *diffreg_result = calloc(1, sizeof(**diffreg_result));
194 fe621944 2020-11-10 stsp if (*diffreg_result == NULL)
195 fe621944 2020-11-10 stsp return got_error_from_errno("calloc");
196 fe621944 2020-11-10 stsp left = &(*diffreg_result)->left;
197 fe621944 2020-11-10 stsp right = &(*diffreg_result)->right;
199 fe621944 2020-11-10 stsp memset(&d_left, 0, sizeof(d_left));
200 fe621944 2020-11-10 stsp memset(&d_right, 0, sizeof(d_right));
201 fe621944 2020-11-10 stsp left = &d_left;
202 fe621944 2020-11-10 stsp right = &d_right;
205 cca5682e 2020-11-18 stsp err = got_diff_get_config(&cfg, algorithm, NULL, NULL);
209 72254787 2020-11-18 stsp err = got_diff_prepare_file(f1, &p1, &size1, left, cfg,
210 64453f7e 2020-11-21 stsp ignore_whitespace, force_text_diff);
214 72254787 2020-11-18 stsp err = got_diff_prepare_file(f2, &p2, &size2, right, cfg,
215 64453f7e 2020-11-21 stsp ignore_whitespace, force_text_diff);
219 fe621944 2020-11-10 stsp diff_result = diff_main(cfg, left, right);
220 fe621944 2020-11-10 stsp if (diff_result == NULL) {
221 fe621944 2020-11-10 stsp err = got_error_set_errno(ENOMEM, "malloc");
224 fe621944 2020-11-10 stsp if (diff_result->rc != DIFF_RC_OK) {
225 fe621944 2020-11-10 stsp err = got_error_set_errno(diff_result->rc, "diff");
229 fe621944 2020-11-10 stsp if (diffreg_result) {
230 fe621944 2020-11-10 stsp (*diffreg_result)->result = diff_result;
231 fe621944 2020-11-10 stsp (*diffreg_result)->map1 = p1;
232 fe621944 2020-11-10 stsp (*diffreg_result)->size1 = size1;
233 fe621944 2020-11-10 stsp (*diffreg_result)->map2 = p2;
234 fe621944 2020-11-10 stsp (*diffreg_result)->size2 = size2;
238 fe621944 2020-11-10 stsp if (diffreg_result == NULL) {
239 fe621944 2020-11-10 stsp diff_data_free(left);
240 fe621944 2020-11-10 stsp diff_data_free(right);
243 dd2e2f52 2022-07-01 thomas got_diffreg_close(p1, size1, p2, size2);
244 fe621944 2020-11-10 stsp if (diffreg_result) {
245 fe621944 2020-11-10 stsp diff_data_free(left);
246 fe621944 2020-11-10 stsp diff_data_free(right);
247 fe621944 2020-11-10 stsp free(*diffreg_result);
248 fe621944 2020-11-10 stsp *diffreg_result = NULL;
252 fe621944 2020-11-10 stsp return err;
255 fe621944 2020-11-10 stsp const struct got_error *
256 82c78e96 2022-08-06 thomas got_diffreg_output(struct got_diff_line **lines, size_t *nlines,
257 1cb46f00 2020-11-21 stsp struct got_diffreg_result *diff_result, int f1_exists, int f2_exists,
258 fe621944 2020-11-10 stsp const char *path1, const char *path2,
259 fe621944 2020-11-10 stsp enum got_diff_output_format output_format, int context_lines, FILE *outfile)
261 fe621944 2020-11-10 stsp struct diff_input_info info = {
262 fe621944 2020-11-10 stsp .left_path = path1,
263 fe621944 2020-11-10 stsp .right_path = path2,
264 1cb46f00 2020-11-21 stsp .flags = 0,
267 fe621944 2020-11-10 stsp struct diff_output_info *output_info;
269 1cb46f00 2020-11-21 stsp if (!f1_exists)
270 1cb46f00 2020-11-21 stsp info.flags |= DIFF_INPUT_LEFT_NONEXISTENT;
271 1cb46f00 2020-11-21 stsp if (!f2_exists)
272 1cb46f00 2020-11-21 stsp info.flags |= DIFF_INPUT_RIGHT_NONEXISTENT;
274 fe621944 2020-11-10 stsp switch (output_format) {
275 fe621944 2020-11-10 stsp case GOT_DIFF_OUTPUT_UNIDIFF:
276 fe621944 2020-11-10 stsp rc = diff_output_unidiff(
277 82c78e96 2022-08-06 thomas lines ? &output_info : NULL, outfile, &info,
278 fe621944 2020-11-10 stsp diff_result->result, context_lines);
279 fe621944 2020-11-10 stsp if (rc != DIFF_RC_OK)
280 fe621944 2020-11-10 stsp return got_error_set_errno(rc, "diff_output_unidiff");
282 a42e5f4f 2022-09-02 thomas case GOT_DIFF_OUTPUT_PLAIN:
283 a42e5f4f 2022-09-02 thomas rc = diff_output_plain(lines ? &output_info : NULL,
284 7a800a02 2022-09-03 thomas outfile, &info, diff_result->result, 1);
285 fe621944 2020-11-10 stsp if (rc != DIFF_RC_OK)
286 fe621944 2020-11-10 stsp return got_error_set_errno(rc, "diff_output_edscript");
291 82c78e96 2022-08-06 thomas if (lines && *lines) {
292 fe621944 2020-11-10 stsp if (output_info->line_offsets.len > 0) {
293 82c78e96 2022-08-06 thomas struct got_diff_line *p;
294 82c78e96 2022-08-06 thomas off_t prev_offset = 0, *o;
295 82c78e96 2022-08-06 thomas uint8_t *o2;
296 fe621944 2020-11-10 stsp int i, len;
297 fe621944 2020-11-10 stsp if (*nlines > 0) {
298 82c78e96 2022-08-06 thomas prev_offset = (*lines)[*nlines - 1].offset;
300 fe621944 2020-11-10 stsp * First line offset is always zero. Skip it
301 fe621944 2020-11-10 stsp * when appending to a pre-populated array.
303 fe621944 2020-11-10 stsp o = &output_info->line_offsets.head[1];
304 82c78e96 2022-08-06 thomas o2 = &output_info->line_types.head[1];
305 fe621944 2020-11-10 stsp len = output_info->line_offsets.len - 1;
307 fe621944 2020-11-10 stsp o = &output_info->line_offsets.head[0];
308 82c78e96 2022-08-06 thomas o2 = &output_info->line_types.head[0];
309 fe621944 2020-11-10 stsp len = output_info->line_offsets.len;
311 82c78e96 2022-08-06 thomas p = reallocarray(*lines, *nlines + len, sizeof(**lines));
312 fe621944 2020-11-10 stsp if (p == NULL)
313 fe621944 2020-11-10 stsp return got_error_from_errno("calloc");
314 82c78e96 2022-08-06 thomas for (i = 0; i < len; i++) {
315 82c78e96 2022-08-06 thomas p[*nlines + i].offset = o[i] + prev_offset;
316 82c78e96 2022-08-06 thomas p[*nlines + i].type = o2[i];
318 82c78e96 2022-08-06 thomas *lines = p;
319 fe621944 2020-11-10 stsp *nlines += len;
321 fe621944 2020-11-10 stsp diff_output_info_free(output_info);
324 fe621944 2020-11-10 stsp return NULL;
327 fe621944 2020-11-10 stsp const struct got_error *
328 fe621944 2020-11-10 stsp got_diffreg_result_free(struct got_diffreg_result *diffreg_result)
330 fe621944 2020-11-10 stsp const struct got_error *err;
332 fe621944 2020-11-10 stsp diff_result_free(diffreg_result->result);
333 fe621944 2020-11-10 stsp diff_data_free(&diffreg_result->left);
334 fe621944 2020-11-10 stsp diff_data_free(&diffreg_result->right);
335 dd2e2f52 2022-07-01 thomas err = got_diffreg_close(diffreg_result->map1, diffreg_result->size1,
336 fe621944 2020-11-10 stsp diffreg_result->map2, diffreg_result->size2);
337 fe621944 2020-11-10 stsp free(diffreg_result);
338 fe621944 2020-11-10 stsp return err;
341 fe621944 2020-11-10 stsp const struct got_error *
342 fe621944 2020-11-10 stsp got_diffreg_result_free_left(struct got_diffreg_result *diffreg_result)
344 fe621944 2020-11-10 stsp diff_data_free(&diffreg_result->left);
345 fe621944 2020-11-10 stsp memset(&diffreg_result->left, 0, sizeof(diffreg_result->left));
346 dd2e2f52 2022-07-01 thomas return got_diffreg_close(diffreg_result->map1, diffreg_result->size1,
347 dd2e2f52 2022-07-01 thomas NULL, 0);
350 fe621944 2020-11-10 stsp const struct got_error *
351 fe621944 2020-11-10 stsp got_diffreg_result_free_right(struct got_diffreg_result *diffreg_result)
353 fe621944 2020-11-10 stsp diff_data_free(&diffreg_result->right);
354 fe621944 2020-11-10 stsp memset(&diffreg_result->right, 0, sizeof(diffreg_result->right));
355 dd2e2f52 2022-07-01 thomas return got_diffreg_close(NULL, 0, diffreg_result->map2,
356 dd2e2f52 2022-07-01 thomas diffreg_result->size2);