Blame


1 3b0f3d61 2020-01-22 neels /* Generic infrastructure to implement various diff algorithms. */
2 3b0f3d61 2020-01-22 neels /*
3 3b0f3d61 2020-01-22 neels * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 3b0f3d61 2020-01-22 neels *
5 3b0f3d61 2020-01-22 neels * Permission to use, copy, modify, and distribute this software for any
6 3b0f3d61 2020-01-22 neels * purpose with or without fee is hereby granted, provided that the above
7 3b0f3d61 2020-01-22 neels * copyright notice and this permission notice appear in all copies.
8 3b0f3d61 2020-01-22 neels *
9 3b0f3d61 2020-01-22 neels * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 3b0f3d61 2020-01-22 neels * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 3b0f3d61 2020-01-22 neels * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 3b0f3d61 2020-01-22 neels * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 3b0f3d61 2020-01-22 neels * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 3b0f3d61 2020-01-22 neels * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 3b0f3d61 2020-01-22 neels * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 3b0f3d61 2020-01-22 neels */
17 3b0f3d61 2020-01-22 neels
18 54fa8228 2020-01-27 neels #ifndef MAX
19 54fa8228 2020-01-27 neels #define MAX(A,B) ((A)>(B)?(A):(B))
20 54fa8228 2020-01-27 neels #endif
21 54fa8228 2020-01-27 neels #ifndef MIN
22 54fa8228 2020-01-27 neels #define MIN(A,B) ((A)<(B)?(A):(B))
23 54fa8228 2020-01-27 neels #endif
24 54fa8228 2020-01-27 neels
25 d362ea2e 2020-07-25 stsp struct diff_range {
26 54fa8228 2020-01-27 neels int start;
27 54fa8228 2020-01-27 neels int end;
28 54fa8228 2020-01-27 neels };
29 54fa8228 2020-01-27 neels
30 61a7b578 2020-05-06 neels static inline bool
31 d362ea2e 2020-07-25 stsp diff_range_empty(const struct diff_range *r)
32 54fa8228 2020-01-27 neels {
33 54fa8228 2020-01-27 neels return r->start == r->end;
34 54fa8228 2020-01-27 neels }
35 54fa8228 2020-01-27 neels
36 61a7b578 2020-05-06 neels static inline bool
37 d362ea2e 2020-07-25 stsp diff_ranges_touch(const struct diff_range *a, const struct diff_range *b)
38 54fa8228 2020-01-27 neels {
39 54fa8228 2020-01-27 neels return (a->end >= b->start) && (a->start <= b->end);
40 54fa8228 2020-01-27 neels }
41 54fa8228 2020-01-27 neels
42 61a7b578 2020-05-06 neels static inline void
43 d362ea2e 2020-07-25 stsp diff_ranges_merge(struct diff_range *a, const struct diff_range *b)
44 54fa8228 2020-01-27 neels {
45 d362ea2e 2020-07-25 stsp *a = (struct diff_range){
46 54fa8228 2020-01-27 neels .start = MIN(a->start, b->start),
47 54fa8228 2020-01-27 neels .end = MAX(a->end, b->end),
48 54fa8228 2020-01-27 neels };
49 54fa8228 2020-01-27 neels }
50 54fa8228 2020-01-27 neels
51 61a7b578 2020-05-06 neels static inline int
52 d362ea2e 2020-07-25 stsp diff_range_len(const struct diff_range *r)
53 54fa8228 2020-01-27 neels {
54 54fa8228 2020-01-27 neels if (!r)
55 54fa8228 2020-01-27 neels return 0;
56 54fa8228 2020-01-27 neels return r->end - r->start;
57 54fa8228 2020-01-27 neels }
58 54fa8228 2020-01-27 neels
59 3b0f3d61 2020-01-22 neels /* List of all possible return codes of a diff invocation. */
60 3e6cba3a 2020-08-13 stsp #define DIFF_RC_USE_DIFF_ALGO_FALLBACK -1
61 3e6cba3a 2020-08-13 stsp #define DIFF_RC_OK 0
62 3e6cba3a 2020-08-13 stsp /* Any positive return values are errno values from sys/errno.h */
63 3b0f3d61 2020-01-22 neels
64 c6eecea3 2020-07-26 stsp struct diff_data;
65 c6eecea3 2020-07-26 stsp
66 3b0f3d61 2020-01-22 neels struct diff_atom {
67 c6eecea3 2020-07-26 stsp struct diff_data *d; /* back pointer to associated diff data */
68 3b0f3d61 2020-01-22 neels
69 c6eecea3 2020-07-26 stsp off_t pos; /* if not memory-mapped */
70 c6eecea3 2020-07-26 stsp const uint8_t *at; /* if memory-mapped */
71 c6eecea3 2020-07-26 stsp off_t len;
72 c6eecea3 2020-07-26 stsp
73 0d27172a 2020-05-06 neels /* This hash is just a very cheap speed up for finding *mismatching*
74 0d27172a 2020-05-06 neels * atoms. When hashes match, we still need to compare entire atoms to
75 0d27172a 2020-05-06 neels * find out whether they are indeed identical or not. */
76 3b0f3d61 2020-01-22 neels unsigned int hash;
77 3b0f3d61 2020-01-22 neels
78 3b0f3d61 2020-01-22 neels /* State for the Patience Diff algorithm */
79 3b0f3d61 2020-01-22 neels /* TODO: keep a separate array for the patience state */
80 3b0f3d61 2020-01-22 neels struct {
81 3b0f3d61 2020-01-22 neels bool unique_here;
82 3b0f3d61 2020-01-22 neels bool unique_in_both;
83 3b0f3d61 2020-01-22 neels struct diff_atom *pos_in_other;
84 3b0f3d61 2020-01-22 neels struct diff_atom *prev_stack;
85 d362ea2e 2020-07-25 stsp struct diff_range identical_lines;
86 3b0f3d61 2020-01-22 neels } patience;
87 3b0f3d61 2020-01-22 neels };
88 3b0f3d61 2020-01-22 neels
89 0d27172a 2020-05-06 neels /* For each file, there is a "root" struct diff_data referencing the entire
90 0d27172a 2020-05-06 neels * file, which the atoms are parsed from. In recursion of diff algorithm, there
91 0d27172a 2020-05-06 neels * may be "child" struct diff_data only referencing a subsection of the file,
92 0d27172a 2020-05-06 neels * re-using the atoms parsing. For "root" structs, atoms_allocated will be
93 0d27172a 2020-05-06 neels * nonzero, indicating that the array of atoms is owned by that struct. For
94 0d27172a 2020-05-06 neels * "child" structs, atoms_allocated == 0, to indicate that the struct is
95 0d27172a 2020-05-06 neels * referencing a subset of atoms. */
96 3b0f3d61 2020-01-22 neels struct diff_data {
97 c6eecea3 2020-07-26 stsp int fd; /* if root diff_data and not memory-mapped */
98 c6eecea3 2020-07-26 stsp off_t pos; /* if not memory-mapped */
99 c6eecea3 2020-07-26 stsp const uint8_t *data; /* if memory-mapped */
100 c6eecea3 2020-07-26 stsp off_t len;
101 c6eecea3 2020-07-26 stsp
102 3b0f3d61 2020-01-22 neels ARRAYLIST(struct diff_atom) atoms;
103 3b0f3d61 2020-01-22 neels struct diff_data *root;
104 732e8ee0 2020-09-20 stsp
105 732e8ee0 2020-09-20 stsp bool ignore_whitespace;
106 3b0f3d61 2020-01-22 neels };
107 3b0f3d61 2020-01-22 neels
108 3b0f3d61 2020-01-22 neels void diff_data_free(struct diff_data *diff_data);
109 3b0f3d61 2020-01-22 neels
110 b3fb4686 2020-09-20 neels int
111 b3fb4686 2020-09-20 neels diff_atom_cmp(int *cmp,
112 b3fb4686 2020-09-20 neels const struct diff_atom *left,
113 b3fb4686 2020-09-20 neels const struct diff_atom *right);
114 b3fb4686 2020-09-20 neels
115 c6eecea3 2020-07-26 stsp /* Indicate whether two given diff atoms match. */
116 b3fb4686 2020-09-20 neels int
117 b3fb4686 2020-09-20 neels diff_atom_same(bool *same,
118 b3fb4686 2020-09-20 neels const struct diff_atom *left,
119 b3fb4686 2020-09-20 neels const struct diff_atom *right);
120 c6eecea3 2020-07-26 stsp
121 0d27172a 2020-05-06 neels /* The atom's index in the entire file. For atoms divided by lines of text, this
122 0d27172a 2020-05-06 neels * yields the line number (starting with 0). Also works for diff_data that
123 0d27172a 2020-05-06 neels * reference only a subsection of a file, always reflecting the global position
124 0d27172a 2020-05-06 neels * in the file (and not the relative position within the subsection). */
125 3b0f3d61 2020-01-22 neels #define diff_atom_root_idx(DIFF_DATA, ATOM) \
126 15ac50e2 2020-05-05 neels ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
127 15ac50e2 2020-05-05 neels ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
128 15ac50e2 2020-05-05 neels : (DIFF_DATA)->root->atoms.len)
129 3b0f3d61 2020-01-22 neels
130 0d27172a 2020-05-06 neels /* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
131 0d27172a 2020-05-06 neels * yields the line number (starting with 0). */
132 3b0f3d61 2020-01-22 neels #define diff_atom_idx(DIFF_DATA, ATOM) \
133 15ac50e2 2020-05-05 neels ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
134 15ac50e2 2020-05-05 neels ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
135 15ac50e2 2020-05-05 neels : (DIFF_DATA)->atoms.len)
136 3b0f3d61 2020-01-22 neels
137 3b0f3d61 2020-01-22 neels #define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
138 3b0f3d61 2020-01-22 neels for ((ATOM) = (FIRST_ATOM); \
139 0d27172a 2020-05-06 neels (ATOM) \
140 0d27172a 2020-05-06 neels && ((ATOM) >= (FIRST_ATOM)) \
141 0d27172a 2020-05-06 neels && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
142 3b0f3d61 2020-01-22 neels (ATOM)++)
143 3b0f3d61 2020-01-22 neels
144 3b0f3d61 2020-01-22 neels #define diff_data_foreach_atom(ATOM, DIFF_DATA) \
145 3b0f3d61 2020-01-22 neels foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
146 3b0f3d61 2020-01-22 neels
147 3b0f3d61 2020-01-22 neels #define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
148 3b0f3d61 2020-01-22 neels for ((ATOM) = (FROM); \
149 0d27172a 2020-05-06 neels (ATOM) \
150 0d27172a 2020-05-06 neels && ((ATOM) >= (DIFF_DATA)->atoms.head) \
151 0d27172a 2020-05-06 neels && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
152 3b0f3d61 2020-01-22 neels (ATOM)++)
153 3b0f3d61 2020-01-22 neels
154 0d27172a 2020-05-06 neels /* A diff chunk represents a set of atoms on the left and/or a set of atoms on
155 0d27172a 2020-05-06 neels * the right.
156 3b0f3d61 2020-01-22 neels *
157 3b0f3d61 2020-01-22 neels * If solved == false:
158 0d27172a 2020-05-06 neels * The diff algorithm has divided the source file, and this is a chunk that the
159 0d27172a 2020-05-06 neels * inner_algo should run on next.
160 3b0f3d61 2020-01-22 neels * The lines on the left should be diffed against the lines on the right.
161 0d27172a 2020-05-06 neels * (If there are no left lines or no right lines, it implies solved == true,
162 0d27172a 2020-05-06 neels * because there is nothing to diff.)
163 3b0f3d61 2020-01-22 neels *
164 3b0f3d61 2020-01-22 neels * If solved == true:
165 0d27172a 2020-05-06 neels * If there are only left atoms, it is a chunk removing atoms from the left ("a
166 0d27172a 2020-05-06 neels * minus chunk").
167 0d27172a 2020-05-06 neels * If there are only right atoms, it is a chunk adding atoms from the right ("a
168 0d27172a 2020-05-06 neels * plus chunk").
169 0d27172a 2020-05-06 neels * If there are both left and right lines, it is a chunk of equal content on
170 0d27172a 2020-05-06 neels * both sides, and left_count == right_count:
171 3b0f3d61 2020-01-22 neels *
172 3b0f3d61 2020-01-22 neels * - foo }
173 3b0f3d61 2020-01-22 neels * - bar }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3,
174 3b0f3d61 2020-01-22 neels * - baz } right_start = NULL, right_count = 0 }
175 3b0f3d61 2020-01-22 neels * moo }
176 3b0f3d61 2020-01-22 neels * goo }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3,
177 0d27172a 2020-05-06 neels * zoo } right_start = &right.atoms.head[0], right_count = 3 }
178 3b0f3d61 2020-01-22 neels * +loo }
179 3b0f3d61 2020-01-22 neels * +roo }-- diff_chunk{ left_start = NULL, left_count = 0,
180 0d27172a 2020-05-06 neels * +too } right_start = &right.atoms.head[3], right_count = 3 }
181 3b0f3d61 2020-01-22 neels *
182 3b0f3d61 2020-01-22 neels */
183 3b0f3d61 2020-01-22 neels struct diff_chunk {
184 3b0f3d61 2020-01-22 neels bool solved;
185 3b0f3d61 2020-01-22 neels struct diff_atom *left_start;
186 3b0f3d61 2020-01-22 neels unsigned int left_count;
187 3b0f3d61 2020-01-22 neels struct diff_atom *right_start;
188 3b0f3d61 2020-01-22 neels unsigned int right_count;
189 3b0f3d61 2020-01-22 neels };
190 3b0f3d61 2020-01-22 neels
191 3b0f3d61 2020-01-22 neels typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t;
192 3b0f3d61 2020-01-22 neels #define DIFF_RESULT_ALLOC_BLOCKSIZE 128
193 8546b045 2020-09-20 neels
194 8546b045 2020-09-20 neels enum diff_chunk_type {
195 8546b045 2020-09-20 neels CHUNK_EMPTY,
196 8546b045 2020-09-20 neels CHUNK_PLUS,
197 8546b045 2020-09-20 neels CHUNK_MINUS,
198 8546b045 2020-09-20 neels CHUNK_SAME,
199 8546b045 2020-09-20 neels CHUNK_ERROR,
200 8546b045 2020-09-20 neels };
201 8546b045 2020-09-20 neels
202 8546b045 2020-09-20 neels static inline enum diff_chunk_type
203 8546b045 2020-09-20 neels diff_chunk_type(const struct diff_chunk *chunk)
204 8546b045 2020-09-20 neels {
205 8546b045 2020-09-20 neels if (!chunk->left_count && !chunk->right_count)
206 8546b045 2020-09-20 neels return CHUNK_EMPTY;
207 8546b045 2020-09-20 neels if (!chunk->solved)
208 8546b045 2020-09-20 neels return CHUNK_ERROR;
209 8546b045 2020-09-20 neels if (!chunk->right_count)
210 8546b045 2020-09-20 neels return CHUNK_MINUS;
211 8546b045 2020-09-20 neels if (!chunk->left_count)
212 8546b045 2020-09-20 neels return CHUNK_PLUS;
213 8546b045 2020-09-20 neels if (chunk->left_count != chunk->right_count)
214 8546b045 2020-09-20 neels return CHUNK_ERROR;
215 8546b045 2020-09-20 neels return CHUNK_SAME;
216 8546b045 2020-09-20 neels }
217 3b0f3d61 2020-01-22 neels
218 3b0f3d61 2020-01-22 neels struct diff_result {
219 3e6cba3a 2020-08-13 stsp int rc;
220 3b0f3d61 2020-01-22 neels struct diff_data left;
221 3b0f3d61 2020-01-22 neels struct diff_data right;
222 3b0f3d61 2020-01-22 neels diff_chunk_arraylist_t chunks;
223 3b0f3d61 2020-01-22 neels };
224 3b0f3d61 2020-01-22 neels
225 3b0f3d61 2020-01-22 neels struct diff_state {
226 3b0f3d61 2020-01-22 neels /* The final result passed to the original diff caller. */
227 3b0f3d61 2020-01-22 neels struct diff_result *result;
228 3b0f3d61 2020-01-22 neels
229 0d27172a 2020-05-06 neels /* The root diff_data is in result->left,right, these are (possibly)
230 0d27172a 2020-05-06 neels * subsections of the root data. */
231 3b0f3d61 2020-01-22 neels struct diff_data left;
232 3b0f3d61 2020-01-22 neels struct diff_data right;
233 3b0f3d61 2020-01-22 neels
234 3b0f3d61 2020-01-22 neels unsigned int recursion_depth_left;
235 3b0f3d61 2020-01-22 neels
236 0d27172a 2020-05-06 neels /* Remaining chunks from one diff algorithm pass, if any solved == false
237 0d27172a 2020-05-06 neels * chunks came up. */
238 3b0f3d61 2020-01-22 neels diff_chunk_arraylist_t temp_result;
239 3b0f3d61 2020-01-22 neels };
240 3b0f3d61 2020-01-22 neels
241 3b0f3d61 2020-01-22 neels struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
242 0d27172a 2020-05-06 neels struct diff_atom *left_start,
243 0d27172a 2020-05-06 neels unsigned int left_count,
244 0d27172a 2020-05-06 neels struct diff_atom *right_start,
245 0d27172a 2020-05-06 neels unsigned int right_count);
246 3b0f3d61 2020-01-22 neels
247 3b0f3d61 2020-01-22 neels /* Signature of a utility function to divide both source files into diff atoms.
248 0d27172a 2020-05-06 neels * It is possible that a (future) algorithm requires both source files to decide
249 0d27172a 2020-05-06 neels * on atom split points, hence this gets both left and right to atomize at the
250 0d27172a 2020-05-06 neels * same time.
251 3b0f3d61 2020-01-22 neels * An example is diff_atomize_text_by_line() in diff_atomize_text.c.
252 3b0f3d61 2020-01-22 neels *
253 3b0f3d61 2020-01-22 neels * func_data: context pointer (free to be used by implementation).
254 0d27172a 2020-05-06 neels * left: struct diff_data with left->data and left->len already set up, and
255 0d27172a 2020-05-06 neels * left->atoms to be created.
256 0d27172a 2020-05-06 neels * right: struct diff_data with right->data and right->len already set up, and
257 0d27172a 2020-05-06 neels * right->atoms to be created.
258 3b0f3d61 2020-01-22 neels */
259 3e6cba3a 2020-08-13 stsp typedef int (*diff_atomize_func_t)(void *func_data,
260 0d27172a 2020-05-06 neels struct diff_data *left,
261 0d27172a 2020-05-06 neels struct diff_data *right);
262 3b0f3d61 2020-01-22 neels
263 3e6cba3a 2020-08-13 stsp extern int diff_atomize_text_by_line(void *func_data,
264 0d27172a 2020-05-06 neels struct diff_data *left,
265 0d27172a 2020-05-06 neels struct diff_data *right);
266 3b0f3d61 2020-01-22 neels
267 3b0f3d61 2020-01-22 neels struct diff_algo_config;
268 3e6cba3a 2020-08-13 stsp typedef int (*diff_algo_impl_t)(
269 0d27172a 2020-05-06 neels const struct diff_algo_config *algo_config, struct diff_state *state);
270 3b0f3d61 2020-01-22 neels
271 0d27172a 2020-05-06 neels /* Form a result with all left-side removed and all right-side added, i.e. no
272 0d27172a 2020-05-06 neels * actual diff algorithm involved. */
273 3e6cba3a 2020-08-13 stsp int diff_algo_none(const struct diff_algo_config *algo_config,
274 0d27172a 2020-05-06 neels struct diff_state *state);
275 3b0f3d61 2020-01-22 neels
276 0d27172a 2020-05-06 neels /* Myers Diff tracing from the start all the way through to the end, requiring
277 0d27172a 2020-05-06 neels * quadratic amounts of memory. This can fail if the required space surpasses
278 0d27172a 2020-05-06 neels * algo_config->permitted_state_size. */
279 3e6cba3a 2020-08-13 stsp extern int diff_algo_myers(const struct diff_algo_config *algo_config,
280 0d27172a 2020-05-06 neels struct diff_state *state);
281 3b0f3d61 2020-01-22 neels
282 0d27172a 2020-05-06 neels /* Myers "Divide et Impera": tracing forwards from the start and backwards from
283 0d27172a 2020-05-06 neels * the end to find a midpoint that divides the problem into smaller chunks.
284 0d27172a 2020-05-06 neels * Requires only linear amounts of memory. */
285 3e6cba3a 2020-08-13 stsp extern int diff_algo_myers_divide(
286 0d27172a 2020-05-06 neels const struct diff_algo_config *algo_config, struct diff_state *state);
287 3b0f3d61 2020-01-22 neels
288 0d27172a 2020-05-06 neels /* Patience Diff algorithm, which divides a larger diff into smaller chunks. For
289 0d27172a 2020-05-06 neels * very specific scenarios, it may lead to a complete diff result by itself, but
290 0d27172a 2020-05-06 neels * needs a fallback algo to solve chunks that don't have common-unique atoms. */
291 3e6cba3a 2020-08-13 stsp extern int diff_algo_patience(
292 0d27172a 2020-05-06 neels const struct diff_algo_config *algo_config, struct diff_state *state);
293 3b0f3d61 2020-01-22 neels
294 3b0f3d61 2020-01-22 neels /* Diff algorithms to use, possibly nested. For example:
295 3b0f3d61 2020-01-22 neels *
296 3b0f3d61 2020-01-22 neels * struct diff_algo_config myers, patience, myers_divide;
297 3b0f3d61 2020-01-22 neels *
298 3b0f3d61 2020-01-22 neels * myers = (struct diff_algo_config){
299 3b0f3d61 2020-01-22 neels * .impl = diff_algo_myers,
300 3b0f3d61 2020-01-22 neels * .permitted_state_size = 32 * 1024 * 1024,
301 0d27172a 2020-05-06 neels * // When too large, do diff_algo_patience:
302 0d27172a 2020-05-06 neels * .fallback_algo = &patience,
303 3b0f3d61 2020-01-22 neels * };
304 3b0f3d61 2020-01-22 neels *
305 0d27172a 2020-05-06 neels * const struct diff_algo_config patience = (struct diff_algo_config){
306 0d27172a 2020-05-06 neels * .impl = diff_algo_patience,
307 0d27172a 2020-05-06 neels * // After subdivision, do Patience again:
308 0d27172a 2020-05-06 neels * .inner_algo = &patience,
309 0d27172a 2020-05-06 neels * // If subdivision failed, do Myers Divide et Impera:
310 0d27172a 2020-05-06 neels * .fallback_algo = &myers_then_myers_divide,
311 3b0f3d61 2020-01-22 neels * };
312 0d27172a 2020-05-06 neels *
313 0d27172a 2020-05-06 neels * const struct diff_algo_config myers_divide = (struct diff_algo_config){
314 0d27172a 2020-05-06 neels * .impl = diff_algo_myers_divide,
315 0d27172a 2020-05-06 neels * // When division succeeded, start from the top:
316 0d27172a 2020-05-06 neels * .inner_algo = &myers_then_myers_divide,
317 0d27172a 2020-05-06 neels * // (fallback_algo = NULL implies diff_algo_none).
318 3b0f3d61 2020-01-22 neels * };
319 3b0f3d61 2020-01-22 neels * struct diff_config config = {
320 3b0f3d61 2020-01-22 neels * .algo = &myers,
321 3b0f3d61 2020-01-22 neels * ...
322 3b0f3d61 2020-01-22 neels * };
323 3b0f3d61 2020-01-22 neels * diff_main(&config, ...);
324 3b0f3d61 2020-01-22 neels */
325 3b0f3d61 2020-01-22 neels struct diff_algo_config {
326 3b0f3d61 2020-01-22 neels diff_algo_impl_t impl;
327 3b0f3d61 2020-01-22 neels
328 0d27172a 2020-05-06 neels /* Fail this algo if it would use more than this amount of memory, and
329 0d27172a 2020-05-06 neels * instead use fallback_algo (diff_algo_myers). permitted_state_size ==
330 0d27172a 2020-05-06 neels * 0 means no limitation. */
331 3b0f3d61 2020-01-22 neels size_t permitted_state_size;
332 3b0f3d61 2020-01-22 neels
333 0d27172a 2020-05-06 neels /* For algorithms that divide into smaller chunks, use this algorithm to
334 0d27172a 2020-05-06 neels * solve the divided chunks. */
335 3b0f3d61 2020-01-22 neels const struct diff_algo_config *inner_algo;
336 3b0f3d61 2020-01-22 neels
337 0d27172a 2020-05-06 neels /* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large
338 0d27172a 2020-05-06 neels * state, or diff_algo_patience can't find any common-unique atoms),
339 0d27172a 2020-05-06 neels * then use this algorithm instead. */
340 3b0f3d61 2020-01-22 neels const struct diff_algo_config *fallback_algo;
341 3b0f3d61 2020-01-22 neels };
342 3b0f3d61 2020-01-22 neels
343 3b0f3d61 2020-01-22 neels struct diff_config {
344 3b0f3d61 2020-01-22 neels diff_atomize_func_t atomize_func;
345 3b0f3d61 2020-01-22 neels void *atomize_func_data;
346 3b0f3d61 2020-01-22 neels
347 3b0f3d61 2020-01-22 neels const struct diff_algo_config *algo;
348 3b0f3d61 2020-01-22 neels
349 0d27172a 2020-05-06 neels /* How deep to step into subdivisions of a source file, a paranoia /
350 0d27172a 2020-05-06 neels * safety measure to guard against infinite loops through diff
351 0d27172a 2020-05-06 neels * algorithms. When the maximum recursion is reached, employ
352 0d27172a 2020-05-06 neels * diff_algo_none (i.e. remove all left atoms and add all right atoms).
353 0d27172a 2020-05-06 neels */
354 3b0f3d61 2020-01-22 neels unsigned int max_recursion_depth;
355 3b0f3d61 2020-01-22 neels };
356 3b0f3d61 2020-01-22 neels
357 3b0f3d61 2020-01-22 neels struct diff_result *diff_main(const struct diff_config *config,
358 c6eecea3 2020-07-26 stsp int left_fd, const uint8_t *left_data,
359 c6eecea3 2020-07-26 stsp off_t left_len,
360 c6eecea3 2020-07-26 stsp int right_fd, const uint8_t *right_data,
361 732e8ee0 2020-09-20 stsp off_t right_len, bool ignore_whitespace);
362 3b0f3d61 2020-01-22 neels void diff_result_free(struct diff_result *result);