2 fe621944 2020-11-10 stsp * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
3 fe621944 2020-11-10 stsp * Copyright (c) 2020 Stefan Sperling <stsp@openbsd.org>
5 fe621944 2020-11-10 stsp * Permission to use, copy, modify, and distribute this software for any
6 fe621944 2020-11-10 stsp * purpose with or without fee is hereby granted, provided that the above
7 fe621944 2020-11-10 stsp * copyright notice and this permission notice appear in all copies.
9 fe621944 2020-11-10 stsp * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 fe621944 2020-11-10 stsp * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 fe621944 2020-11-10 stsp * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 fe621944 2020-11-10 stsp * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 fe621944 2020-11-10 stsp * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 fe621944 2020-11-10 stsp * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 fe621944 2020-11-10 stsp * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 fe621944 2020-11-10 stsp #include <sys/mman.h>
19 574ed2c3 2017-11-29 stsp #include <sys/stat.h>
20 7d283eee 2017-11-29 stsp #include <sys/queue.h>
22 574ed2c3 2017-11-29 stsp #include <errno.h>
23 574ed2c3 2017-11-29 stsp #include <stdio.h>
24 574ed2c3 2017-11-29 stsp #include <stdlib.h>
25 574ed2c3 2017-11-29 stsp #include <string.h>
27 7d283eee 2017-11-29 stsp #include "got_object.h"
28 fe621944 2020-11-10 stsp #include "got_opentemp.h"
29 fe621944 2020-11-10 stsp #include "got_error.h"
31 718b3ab0 2018-03-17 stsp #include "got_lib_diff.h"
33 fe621944 2020-11-10 stsp const struct diff_algo_config myers_then_patience;
34 fe621944 2020-11-10 stsp const struct diff_algo_config myers_then_myers_divide;
35 fe621944 2020-11-10 stsp const struct diff_algo_config patience;
36 fe621944 2020-11-10 stsp const struct diff_algo_config myers_divide;
38 fe621944 2020-11-10 stsp const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
39 fe621944 2020-11-10 stsp .impl = diff_algo_myers,
40 fe621944 2020-11-10 stsp .permitted_state_size = 1024 * 1024 * sizeof(int),
41 fe621944 2020-11-10 stsp .fallback_algo = &patience,
44 fe621944 2020-11-10 stsp const struct diff_algo_config myers_then_myers_divide =
45 fe621944 2020-11-10 stsp (struct diff_algo_config){
46 fe621944 2020-11-10 stsp .impl = diff_algo_myers,
47 fe621944 2020-11-10 stsp .permitted_state_size = 1024 * 1024 * sizeof(int),
48 fe621944 2020-11-10 stsp .fallback_algo = &myers_divide,
51 fe621944 2020-11-10 stsp const struct diff_algo_config patience = (struct diff_algo_config){
52 fe621944 2020-11-10 stsp .impl = diff_algo_patience,
53 fe621944 2020-11-10 stsp /* After subdivision, do Patience again: */
54 fe621944 2020-11-10 stsp .inner_algo = &patience,
55 fe621944 2020-11-10 stsp /* If subdivision failed, do Myers Divide et Impera: */
56 fe621944 2020-11-10 stsp .fallback_algo = &myers_then_myers_divide,
59 fe621944 2020-11-10 stsp const struct diff_algo_config myers_divide = (struct diff_algo_config){
60 fe621944 2020-11-10 stsp .impl = diff_algo_myers_divide,
61 fe621944 2020-11-10 stsp /* When division succeeded, start from the top: */
62 fe621944 2020-11-10 stsp .inner_algo = &myers_then_myers_divide,
63 fe621944 2020-11-10 stsp /* (fallback_algo = NULL implies diff_algo_none). */
66 fe621944 2020-11-10 stsp /* If the state for a forward-Myers is small enough, use Myers, otherwise first
67 fe621944 2020-11-10 stsp * do a Myers-divide. */
68 fe621944 2020-11-10 stsp const struct diff_config diff_config_myers_then_myers_divide = {
69 fe621944 2020-11-10 stsp .atomize_func = diff_atomize_text_by_line,
70 fe621944 2020-11-10 stsp .algo = &myers_then_myers_divide,
73 fe621944 2020-11-10 stsp /* If the state for a forward-Myers is small enough, use Myers, otherwise first
74 fe621944 2020-11-10 stsp * do a Patience. */
75 fe621944 2020-11-10 stsp const struct diff_config diff_config_myers_then_patience = {
76 fe621944 2020-11-10 stsp .atomize_func = diff_atomize_text_by_line,
77 fe621944 2020-11-10 stsp .algo = &myers_then_patience,
80 fe621944 2020-11-10 stsp /* Directly force Patience as a first divider of the source file. */
81 fe621944 2020-11-10 stsp const struct diff_config diff_config_patience = {
82 fe621944 2020-11-10 stsp .atomize_func = diff_atomize_text_by_line,
83 fe621944 2020-11-10 stsp .algo = &patience,
86 fe621944 2020-11-10 stsp /* Directly force Patience as a first divider of the source file. */
87 fe621944 2020-11-10 stsp const struct diff_config diff_config_no_algo = {
88 fe621944 2020-11-10 stsp .atomize_func = diff_atomize_text_by_line,
91 fe621944 2020-11-10 stsp const struct got_error *
92 fe621944 2020-11-10 stsp got_diffreg_close(FILE *f1, char *p1, size_t size1,
93 fe621944 2020-11-10 stsp FILE *f2, char *p2, size_t size2)
95 fe621944 2020-11-10 stsp const struct got_error *err = NULL;
97 fe621944 2020-11-10 stsp if (p1 && munmap(p1, size1) == -1 && err == NULL)
98 fe621944 2020-11-10 stsp err = got_error_from_errno("munmap");
99 fe621944 2020-11-10 stsp if (p2 && munmap(p2, size2) == -1 && err == NULL)
100 fe621944 2020-11-10 stsp err = got_error_from_errno("munmap");
101 fe621944 2020-11-10 stsp if (f1 && fclose(f1) != 0 && err == NULL)
102 fe621944 2020-11-10 stsp err = got_error_from_errno("fclose");
103 fe621944 2020-11-10 stsp if (f2 && fclose(f2) != 0 && err == NULL)
104 fe621944 2020-11-10 stsp err = got_error_from_errno("fclose");
105 fe621944 2020-11-10 stsp return err;
108 fe621944 2020-11-10 stsp const struct diff_config *
109 fe621944 2020-11-10 stsp got_diff_get_config(enum got_diff_algorithm algorithm)
111 fe621944 2020-11-10 stsp switch (algorithm) {
112 fe621944 2020-11-10 stsp case GOT_DIFF_ALGORITHM_PATIENCE:
113 fe621944 2020-11-10 stsp return &diff_config_patience;
114 fe621944 2020-11-10 stsp case GOT_DIFF_ALGORITHM_MYERS:
115 fe621944 2020-11-10 stsp return &diff_config_myers_then_myers_divide;
117 fe621944 2020-11-10 stsp return NULL; /* should not happen */
120 7d283eee 2017-11-29 stsp const struct got_error *
121 fe621944 2020-11-10 stsp got_diff_prepare_file(FILE **f, char **p, int *f_created, size_t *size,
122 fe621944 2020-11-10 stsp struct diff_data *diff_data, const struct diff_config *cfg,
123 fe621944 2020-11-10 stsp int ignore_whitespace)
125 7d283eee 2017-11-29 stsp const struct got_error *err = NULL;
126 fe621944 2020-11-10 stsp struct stat st;
127 fe621944 2020-11-10 stsp int diff_flags = 0, rc;
131 fe621944 2020-11-10 stsp diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
132 fe621944 2020-11-10 stsp if (ignore_whitespace)
133 fe621944 2020-11-10 stsp diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
135 fe621944 2020-11-10 stsp if (f && *f) {
136 fe621944 2020-11-10 stsp if (fstat(fileno(*f), &st) == -1) {
137 fe621944 2020-11-10 stsp err = got_error_from_errno("fstat");
140 fe621944 2020-11-10 stsp #ifndef GOT_DIFF_NO_MMAP
141 fe621944 2020-11-10 stsp *p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE,
142 fe621944 2020-11-10 stsp fileno(*f), 0);
143 fe621944 2020-11-10 stsp if (*p == MAP_FAILED)
145 fe621944 2020-11-10 stsp *p = NULL; /* fall back on file I/O */
147 fe621944 2020-11-10 stsp *f_created = 1;
148 fe621944 2020-11-10 stsp st.st_size = 0;
149 fe621944 2020-11-10 stsp *f = got_opentemp();
150 fe621944 2020-11-10 stsp if (*f == NULL) {
151 fe621944 2020-11-10 stsp err = got_error_from_errno("got_opentemp");
156 fe621944 2020-11-10 stsp rc = diff_atomize_file(diff_data, cfg, *f, *p, st.st_size, diff_flags);
158 fe621944 2020-11-10 stsp err = got_error_set_errno(rc, "diff_atomize_file");
163 fe621944 2020-11-10 stsp diff_data_free(diff_data);
165 fe621944 2020-11-10 stsp *size = st.st_size;
166 fe621944 2020-11-10 stsp return err;
169 fe621944 2020-11-10 stsp const struct got_error *
170 fe621944 2020-11-10 stsp got_diffreg_prepared_files(struct got_diffreg_result **diffreg_result,
171 fe621944 2020-11-10 stsp const struct diff_config *cfg,
172 fe621944 2020-11-10 stsp struct diff_data *left, FILE *f1, char *p1, size_t size1,
173 fe621944 2020-11-10 stsp struct diff_data *right, FILE *f2, char *p2, size_t size2)
175 fe621944 2020-11-10 stsp const struct got_error *err = NULL;
176 fe621944 2020-11-10 stsp struct diff_result *diff_result;
178 fe621944 2020-11-10 stsp *diffreg_result = calloc(1, sizeof(**diffreg_result));
179 fe621944 2020-11-10 stsp if (*diffreg_result == NULL)
180 fe621944 2020-11-10 stsp return got_error_from_errno("calloc");
182 fe621944 2020-11-10 stsp diff_result = diff_main(cfg, left, right);
183 fe621944 2020-11-10 stsp if (diff_result == NULL) {
184 fe621944 2020-11-10 stsp err = got_error_set_errno(ENOMEM, "malloc");
187 fe621944 2020-11-10 stsp if (diff_result->rc != DIFF_RC_OK) {
188 fe621944 2020-11-10 stsp err = got_error_set_errno(diff_result->rc, "diff");
192 fe621944 2020-11-10 stsp (*diffreg_result)->result = diff_result;
193 fe621944 2020-11-10 stsp (*diffreg_result)->f1 = f1;
194 fe621944 2020-11-10 stsp (*diffreg_result)->map1 = p1;
195 fe621944 2020-11-10 stsp (*diffreg_result)->size1 = size1;
196 fe621944 2020-11-10 stsp (*diffreg_result)->f2 = f2;
197 fe621944 2020-11-10 stsp (*diffreg_result)->map2 = p2;
198 fe621944 2020-11-10 stsp (*diffreg_result)->size2 = size2;
201 fe621944 2020-11-10 stsp if (diffreg_result) {
202 fe621944 2020-11-10 stsp free(*diffreg_result);
203 fe621944 2020-11-10 stsp *diffreg_result = NULL;
207 fe621944 2020-11-10 stsp return err;
210 fe621944 2020-11-10 stsp const struct got_error *
211 fe621944 2020-11-10 stsp got_diffreg(struct got_diffreg_result **diffreg_result, FILE *f1, FILE *f2,
212 fe621944 2020-11-10 stsp enum got_diff_algorithm algorithm, int ignore_whitespace)
214 fe621944 2020-11-10 stsp const struct got_error *err = NULL;
215 fe621944 2020-11-10 stsp const struct diff_config *cfg;
216 fe621944 2020-11-10 stsp char *p1 = NULL, *p2 = NULL;
217 fe621944 2020-11-10 stsp int f1_created = 0, f2_created = 0;
218 fe621944 2020-11-10 stsp size_t size1, size2;
219 fe621944 2020-11-10 stsp struct diff_data d_left, d_right;
220 fe621944 2020-11-10 stsp struct diff_data *left, *right;
221 fe621944 2020-11-10 stsp struct diff_result *diff_result;
223 fe621944 2020-11-10 stsp if (diffreg_result) {
224 fe621944 2020-11-10 stsp *diffreg_result = calloc(1, sizeof(**diffreg_result));
225 fe621944 2020-11-10 stsp if (*diffreg_result == NULL)
226 fe621944 2020-11-10 stsp return got_error_from_errno("calloc");
227 fe621944 2020-11-10 stsp left = &(*diffreg_result)->left;
228 fe621944 2020-11-10 stsp right = &(*diffreg_result)->right;
230 fe621944 2020-11-10 stsp memset(&d_left, 0, sizeof(d_left));
231 fe621944 2020-11-10 stsp memset(&d_right, 0, sizeof(d_right));
232 fe621944 2020-11-10 stsp left = &d_left;
233 fe621944 2020-11-10 stsp right = &d_right;
236 fe621944 2020-11-10 stsp cfg = got_diff_get_config(algorithm);
237 fe621944 2020-11-10 stsp if (cfg == NULL) {
238 fe621944 2020-11-10 stsp err = got_error(GOT_ERR_NOT_IMPL);
242 fe621944 2020-11-10 stsp err = got_diff_prepare_file(&f1, &p1, &f1_created, &size1,
243 fe621944 2020-11-10 stsp left, cfg, ignore_whitespace);
247 fe621944 2020-11-10 stsp err = got_diff_prepare_file(&f2, &p2, &f2_created, &size2,
248 fe621944 2020-11-10 stsp right, cfg, ignore_whitespace);
252 fe621944 2020-11-10 stsp diff_result = diff_main(cfg, left, right);
253 fe621944 2020-11-10 stsp if (diff_result == NULL) {
254 fe621944 2020-11-10 stsp err = got_error_set_errno(ENOMEM, "malloc");
257 fe621944 2020-11-10 stsp if (diff_result->rc != DIFF_RC_OK) {
258 fe621944 2020-11-10 stsp err = got_error_set_errno(diff_result->rc, "diff");
262 fe621944 2020-11-10 stsp if (diffreg_result) {
263 fe621944 2020-11-10 stsp (*diffreg_result)->result = diff_result;
264 fe621944 2020-11-10 stsp if (f1_created)
265 fe621944 2020-11-10 stsp (*diffreg_result)->f1 = f1;
266 fe621944 2020-11-10 stsp (*diffreg_result)->map1 = p1;
267 fe621944 2020-11-10 stsp (*diffreg_result)->size1 = size1;
268 fe621944 2020-11-10 stsp if (f2_created)
269 fe621944 2020-11-10 stsp (*diffreg_result)->f2 = f2;
270 fe621944 2020-11-10 stsp (*diffreg_result)->map2 = p2;
271 fe621944 2020-11-10 stsp (*diffreg_result)->size2 = size2;
274 fe621944 2020-11-10 stsp if (diffreg_result == NULL) {
275 fe621944 2020-11-10 stsp diff_data_free(left);
276 fe621944 2020-11-10 stsp diff_data_free(right);
279 fe621944 2020-11-10 stsp got_diffreg_close(f1_created ? f1 : NULL, p1, size1,
280 fe621944 2020-11-10 stsp f2_created ? f2 : NULL, p2, size2);
281 fe621944 2020-11-10 stsp if (diffreg_result) {
282 fe621944 2020-11-10 stsp diff_data_free(left);
283 fe621944 2020-11-10 stsp diff_data_free(right);
284 fe621944 2020-11-10 stsp free(*diffreg_result);
285 fe621944 2020-11-10 stsp *diffreg_result = NULL;
289 fe621944 2020-11-10 stsp return err;
292 fe621944 2020-11-10 stsp const struct got_error *
293 fe621944 2020-11-10 stsp got_diffreg_output(off_t **line_offsets, size_t *nlines,
294 fe621944 2020-11-10 stsp struct got_diffreg_result *diff_result, FILE *f1, FILE *f2,
295 fe621944 2020-11-10 stsp const char *path1, const char *path2,
296 fe621944 2020-11-10 stsp enum got_diff_output_format output_format, int context_lines, FILE *outfile)
298 fe621944 2020-11-10 stsp struct diff_input_info info = {
299 fe621944 2020-11-10 stsp .left_path = path1,
300 fe621944 2020-11-10 stsp .right_path = path2,
303 fe621944 2020-11-10 stsp struct diff_output_info *output_info;
305 fe621944 2020-11-10 stsp switch (output_format) {
306 fe621944 2020-11-10 stsp case GOT_DIFF_OUTPUT_UNIDIFF:
307 fe621944 2020-11-10 stsp rc = diff_output_unidiff(
308 fe621944 2020-11-10 stsp line_offsets ? &output_info : NULL, outfile, &info,
309 fe621944 2020-11-10 stsp diff_result->result, context_lines);
310 fe621944 2020-11-10 stsp if (rc != DIFF_RC_OK)
311 fe621944 2020-11-10 stsp return got_error_set_errno(rc, "diff_output_unidiff");
313 fe621944 2020-11-10 stsp case GOT_DIFF_OUTPUT_EDSCRIPT:
314 fe621944 2020-11-10 stsp rc = diff_output_edscript(line_offsets ? &output_info : NULL,
315 fe621944 2020-11-10 stsp outfile, &info, diff_result->result);
316 fe621944 2020-11-10 stsp if (rc != DIFF_RC_OK)
317 fe621944 2020-11-10 stsp return got_error_set_errno(rc, "diff_output_edscript");
322 fe621944 2020-11-10 stsp if (line_offsets && *line_offsets) {
323 fe621944 2020-11-10 stsp if (output_info->line_offsets.len > 0) {
324 fe621944 2020-11-10 stsp off_t prev_offset = 0, *p, *o;
325 fe621944 2020-11-10 stsp int i, len;
326 fe621944 2020-11-10 stsp if (*nlines > 0) {
327 fe621944 2020-11-10 stsp prev_offset = (*line_offsets)[*nlines - 1];
329 fe621944 2020-11-10 stsp * First line offset is always zero. Skip it
330 fe621944 2020-11-10 stsp * when appending to a pre-populated array.
332 fe621944 2020-11-10 stsp o = &output_info->line_offsets.head[1];
333 fe621944 2020-11-10 stsp len = output_info->line_offsets.len - 1;
335 fe621944 2020-11-10 stsp o = &output_info->line_offsets.head[0];
336 fe621944 2020-11-10 stsp len = output_info->line_offsets.len;
338 fe621944 2020-11-10 stsp p = reallocarray(*line_offsets, *nlines + len,
339 fe621944 2020-11-10 stsp sizeof(off_t));
340 fe621944 2020-11-10 stsp if (p == NULL)
341 fe621944 2020-11-10 stsp return got_error_from_errno("calloc");
342 fe621944 2020-11-10 stsp for (i = 0; i < len; i++)
343 fe621944 2020-11-10 stsp p[*nlines + i] = o[i] + prev_offset;
344 fe621944 2020-11-10 stsp *line_offsets = p;
345 fe621944 2020-11-10 stsp *nlines += len;
347 fe621944 2020-11-10 stsp diff_output_info_free(output_info);
350 fe621944 2020-11-10 stsp return NULL;
353 fe621944 2020-11-10 stsp const struct got_error *
354 fe621944 2020-11-10 stsp got_diffreg_result_free(struct got_diffreg_result *diffreg_result)
356 fe621944 2020-11-10 stsp const struct got_error *err;
358 fe621944 2020-11-10 stsp diff_result_free(diffreg_result->result);
359 fe621944 2020-11-10 stsp diff_data_free(&diffreg_result->left);
360 fe621944 2020-11-10 stsp diff_data_free(&diffreg_result->right);
361 fe621944 2020-11-10 stsp err = got_diffreg_close(diffreg_result->f1, diffreg_result->map1,
362 fe621944 2020-11-10 stsp diffreg_result->size1, diffreg_result->f2,
363 fe621944 2020-11-10 stsp diffreg_result->map2, diffreg_result->size2);
364 fe621944 2020-11-10 stsp free(diffreg_result);
365 fe621944 2020-11-10 stsp return err;
368 fe621944 2020-11-10 stsp const struct got_error *
369 fe621944 2020-11-10 stsp got_diffreg_result_free_left(struct got_diffreg_result *diffreg_result)
371 fe621944 2020-11-10 stsp diff_data_free(&diffreg_result->left);
372 fe621944 2020-11-10 stsp memset(&diffreg_result->left, 0, sizeof(diffreg_result->left));
373 fe621944 2020-11-10 stsp return got_diffreg_close(diffreg_result->f1, diffreg_result->map1,
374 fe621944 2020-11-10 stsp diffreg_result->size1, NULL, NULL, 0);
377 fe621944 2020-11-10 stsp const struct got_error *
378 fe621944 2020-11-10 stsp got_diffreg_result_free_right(struct got_diffreg_result *diffreg_result)
380 fe621944 2020-11-10 stsp diff_data_free(&diffreg_result->right);
381 fe621944 2020-11-10 stsp memset(&diffreg_result->right, 0, sizeof(diffreg_result->right));
382 fe621944 2020-11-10 stsp return got_diffreg_close(NULL, NULL, 0, diffreg_result->f2,
383 fe621944 2020-11-10 stsp diffreg_result->map2, diffreg_result->size2);
387 fe621944 2020-11-10 stsp got_diff_dump_change(FILE *outfile, struct diff_chunk *change,
388 fe621944 2020-11-10 stsp FILE *f1, FILE *f2)