Blame


1 3b0f3d61 2020-01-22 neels /* Generic infrastructure to implement various diff algorithms (implementation). */
2 3b0f3d61 2020-01-22 neels /*
3 3b0f3d61 2020-01-22 neels * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 3b0f3d61 2020-01-22 neels *
5 3b0f3d61 2020-01-22 neels * Permission to use, copy, modify, and distribute this software for any
6 3b0f3d61 2020-01-22 neels * purpose with or without fee is hereby granted, provided that the above
7 3b0f3d61 2020-01-22 neels * copyright notice and this permission notice appear in all copies.
8 3b0f3d61 2020-01-22 neels *
9 3b0f3d61 2020-01-22 neels * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 3b0f3d61 2020-01-22 neels * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 3b0f3d61 2020-01-22 neels * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 3b0f3d61 2020-01-22 neels * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 3b0f3d61 2020-01-22 neels * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 3b0f3d61 2020-01-22 neels * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 3b0f3d61 2020-01-22 neels * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 3b0f3d61 2020-01-22 neels */
17 3b0f3d61 2020-01-22 neels
18 3b0f3d61 2020-01-22 neels
19 3b0f3d61 2020-01-22 neels #include <sys/queue.h>
20 3b0f3d61 2020-01-22 neels #include <stdint.h>
21 3b0f3d61 2020-01-22 neels #include <stdlib.h>
22 3b0f3d61 2020-01-22 neels #include <stdbool.h>
23 3b0f3d61 2020-01-22 neels #include <stdio.h>
24 3b0f3d61 2020-01-22 neels #include <string.h>
25 3b0f3d61 2020-01-22 neels #include <limits.h>
26 3b0f3d61 2020-01-22 neels
27 3b0f3d61 2020-01-22 neels #include <assert.h>
28 3b0f3d61 2020-01-22 neels
29 3b0f3d61 2020-01-22 neels #include <diff/diff_main.h>
30 3b0f3d61 2020-01-22 neels
31 3b0f3d61 2020-01-22 neels #include "debug.h"
32 3b0f3d61 2020-01-22 neels
33 3b0f3d61 2020-01-22 neels /* Even if a left or right side is empty, diff output may need to know the position in that file.
34 3b0f3d61 2020-01-22 neels * So left_start or right_start must never be NULL -- pass left_count or right_count as zero to indicate staying at that
35 3b0f3d61 2020-01-22 neels * position without consuming any lines. */
36 61a7b578 2020-05-06 neels struct diff_chunk *
37 61a7b578 2020-05-06 neels diff_state_add_chunk(struct diff_state *state, bool solved,
38 61a7b578 2020-05-06 neels struct diff_atom *left_start, unsigned int left_count,
39 61a7b578 2020-05-06 neels struct diff_atom *right_start, unsigned int right_count)
40 3b0f3d61 2020-01-22 neels {
41 3b0f3d61 2020-01-22 neels struct diff_chunk *chunk;
42 3b0f3d61 2020-01-22 neels diff_chunk_arraylist_t *result;
43 3b0f3d61 2020-01-22 neels
44 3b0f3d61 2020-01-22 neels if (solved && !state->temp_result.len)
45 3b0f3d61 2020-01-22 neels result = &state->result->chunks;
46 3b0f3d61 2020-01-22 neels else
47 3b0f3d61 2020-01-22 neels result = &state->temp_result;
48 3b0f3d61 2020-01-22 neels
49 3b0f3d61 2020-01-22 neels ARRAYLIST_ADD(chunk, *result);
50 3b0f3d61 2020-01-22 neels if (!chunk)
51 3b0f3d61 2020-01-22 neels return NULL;
52 3b0f3d61 2020-01-22 neels *chunk = (struct diff_chunk){
53 3b0f3d61 2020-01-22 neels .solved = solved,
54 3b0f3d61 2020-01-22 neels .left_start = left_start,
55 3b0f3d61 2020-01-22 neels .left_count = left_count,
56 3b0f3d61 2020-01-22 neels .right_start = right_start,
57 3b0f3d61 2020-01-22 neels .right_count = right_count,
58 3b0f3d61 2020-01-22 neels };
59 3b0f3d61 2020-01-22 neels
60 3b0f3d61 2020-01-22 neels debug("Add %s chunk:\n", chunk->solved ? "solved" : "UNSOLVED");
61 3b0f3d61 2020-01-22 neels debug("L\n");
62 3b0f3d61 2020-01-22 neels debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
63 3b0f3d61 2020-01-22 neels debug("R\n");
64 3b0f3d61 2020-01-22 neels debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
65 3b0f3d61 2020-01-22 neels return chunk;
66 3b0f3d61 2020-01-22 neels }
67 3b0f3d61 2020-01-22 neels
68 61a7b578 2020-05-06 neels void
69 61a7b578 2020-05-06 neels diff_data_init_root(struct diff_data *d, const uint8_t *data, size_t len)
70 3b0f3d61 2020-01-22 neels {
71 3b0f3d61 2020-01-22 neels *d = (struct diff_data){
72 3b0f3d61 2020-01-22 neels .data = data,
73 3b0f3d61 2020-01-22 neels .len = len,
74 3b0f3d61 2020-01-22 neels .root = d,
75 3b0f3d61 2020-01-22 neels };
76 3b0f3d61 2020-01-22 neels }
77 3b0f3d61 2020-01-22 neels
78 61a7b578 2020-05-06 neels void
79 61a7b578 2020-05-06 neels diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
80 61a7b578 2020-05-06 neels struct diff_atom *from_atom, unsigned int atoms_count)
81 3b0f3d61 2020-01-22 neels {
82 3b0f3d61 2020-01-22 neels struct diff_atom *last_atom = from_atom + atoms_count - 1;
83 3b0f3d61 2020-01-22 neels *d = (struct diff_data){
84 3b0f3d61 2020-01-22 neels .data = from_atom->at,
85 3b0f3d61 2020-01-22 neels .len = (last_atom->at + last_atom->len) - from_atom->at,
86 3b0f3d61 2020-01-22 neels .root = parent->root,
87 3b0f3d61 2020-01-22 neels .atoms.head = from_atom,
88 3b0f3d61 2020-01-22 neels .atoms.len = atoms_count,
89 3b0f3d61 2020-01-22 neels };
90 3b0f3d61 2020-01-22 neels
91 3b0f3d61 2020-01-22 neels debug("subsection:\n");
92 3b0f3d61 2020-01-22 neels debug_dump(d);
93 3b0f3d61 2020-01-22 neels }
94 3b0f3d61 2020-01-22 neels
95 61a7b578 2020-05-06 neels void
96 61a7b578 2020-05-06 neels diff_data_free(struct diff_data *diff_data)
97 3b0f3d61 2020-01-22 neels {
98 3b0f3d61 2020-01-22 neels if (!diff_data)
99 3b0f3d61 2020-01-22 neels return;
100 3b0f3d61 2020-01-22 neels if (diff_data->atoms.allocated)
101 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(diff_data->atoms);
102 3b0f3d61 2020-01-22 neels }
103 3b0f3d61 2020-01-22 neels
104 61a7b578 2020-05-06 neels enum diff_rc
105 61a7b578 2020-05-06 neels diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state)
106 3b0f3d61 2020-01-22 neels {
107 3b0f3d61 2020-01-22 neels debug("\n** %s\n", __func__);
108 3b0f3d61 2020-01-22 neels debug("left:\n");
109 3b0f3d61 2020-01-22 neels debug_dump(&state->left);
110 3b0f3d61 2020-01-22 neels debug("right:\n");
111 3b0f3d61 2020-01-22 neels debug_dump(&state->right);
112 50198b5f 2020-05-05 neels debug_dump_myers_graph(&state->left, &state->right, NULL, NULL, 0, NULL, 0);
113 3b0f3d61 2020-01-22 neels
114 3b0f3d61 2020-01-22 neels /* Add a chunk of equal lines, if any */
115 3b0f3d61 2020-01-22 neels unsigned int equal_atoms = 0;
116 3b0f3d61 2020-01-22 neels while (equal_atoms < state->left.atoms.len && equal_atoms < state->right.atoms.len
117 3b0f3d61 2020-01-22 neels && diff_atom_same(&state->left.atoms.head[equal_atoms], &state->right.atoms.head[equal_atoms]))
118 3b0f3d61 2020-01-22 neels equal_atoms++;
119 3b0f3d61 2020-01-22 neels if (equal_atoms) {
120 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
121 3b0f3d61 2020-01-22 neels &state->left.atoms.head[0], equal_atoms,
122 3b0f3d61 2020-01-22 neels &state->right.atoms.head[0], equal_atoms))
123 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
124 3b0f3d61 2020-01-22 neels }
125 3b0f3d61 2020-01-22 neels
126 3b0f3d61 2020-01-22 neels /* Add a "minus" chunk with all lines from the left. */
127 3b0f3d61 2020-01-22 neels if (equal_atoms < state->left.atoms.len) {
128 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
129 3b0f3d61 2020-01-22 neels &state->left.atoms.head[equal_atoms],
130 3b0f3d61 2020-01-22 neels state->left.atoms.len - equal_atoms,
131 3b0f3d61 2020-01-22 neels NULL, 0))
132 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
133 3b0f3d61 2020-01-22 neels }
134 3b0f3d61 2020-01-22 neels
135 3b0f3d61 2020-01-22 neels /* Add a "plus" chunk with all lines from the right. */
136 3b0f3d61 2020-01-22 neels if (equal_atoms < state->right.atoms.len) {
137 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
138 3b0f3d61 2020-01-22 neels NULL, 0,
139 3b0f3d61 2020-01-22 neels &state->right.atoms.head[equal_atoms],
140 3b0f3d61 2020-01-22 neels state->right.atoms.len - equal_atoms))
141 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
142 3b0f3d61 2020-01-22 neels }
143 3b0f3d61 2020-01-22 neels return DIFF_RC_OK;
144 3b0f3d61 2020-01-22 neels }
145 3b0f3d61 2020-01-22 neels
146 61a7b578 2020-05-06 neels enum diff_rc
147 61a7b578 2020-05-06 neels diff_run_algo(const struct diff_algo_config *algo_config, struct diff_state *state)
148 3b0f3d61 2020-01-22 neels {
149 3b0f3d61 2020-01-22 neels enum diff_rc rc;
150 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(state->temp_result);
151 3b0f3d61 2020-01-22 neels
152 3b0f3d61 2020-01-22 neels if (!algo_config || !algo_config->impl || !state->recursion_depth_left) {
153 3b0f3d61 2020-01-22 neels debug("MAX RECURSION REACHED, just dumping diff chunks\n");
154 3b0f3d61 2020-01-22 neels return diff_algo_none(algo_config, state);
155 3b0f3d61 2020-01-22 neels }
156 3b0f3d61 2020-01-22 neels
157 3b0f3d61 2020-01-22 neels ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
158 3b0f3d61 2020-01-22 neels rc = algo_config->impl(algo_config, state);
159 3b0f3d61 2020-01-22 neels switch (rc) {
160 3b0f3d61 2020-01-22 neels case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
161 3b0f3d61 2020-01-22 neels debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", algo_config->fallback_algo);
162 3b0f3d61 2020-01-22 neels rc = diff_run_algo(algo_config->fallback_algo, state);
163 3b0f3d61 2020-01-22 neels goto return_rc;
164 3b0f3d61 2020-01-22 neels
165 3b0f3d61 2020-01-22 neels case DIFF_RC_OK:
166 3b0f3d61 2020-01-22 neels /* continue below */
167 3b0f3d61 2020-01-22 neels break;
168 3b0f3d61 2020-01-22 neels
169 3b0f3d61 2020-01-22 neels default:
170 3b0f3d61 2020-01-22 neels /* some error happened */
171 3b0f3d61 2020-01-22 neels goto return_rc;
172 3b0f3d61 2020-01-22 neels }
173 3b0f3d61 2020-01-22 neels
174 3b0f3d61 2020-01-22 neels /* Pick up any diff chunks that are still unsolved and feed to inner_algo.
175 3b0f3d61 2020-01-22 neels * inner_algo will solve unsolved chunks and append to result,
176 3b0f3d61 2020-01-22 neels * and subsequent solved chunks on this level are then appended to result afterwards. */
177 3b0f3d61 2020-01-22 neels int i;
178 3b0f3d61 2020-01-22 neels for (i = 0; i < state->temp_result.len; i++) {
179 3b0f3d61 2020-01-22 neels struct diff_chunk *c = &state->temp_result.head[i];
180 3b0f3d61 2020-01-22 neels if (c->solved) {
181 3b0f3d61 2020-01-22 neels struct diff_chunk *final_c;
182 3b0f3d61 2020-01-22 neels ARRAYLIST_ADD(final_c, state->result->chunks);
183 3b0f3d61 2020-01-22 neels if (!final_c) {
184 3b0f3d61 2020-01-22 neels rc = DIFF_RC_ENOMEM;
185 3b0f3d61 2020-01-22 neels goto return_rc;
186 3b0f3d61 2020-01-22 neels }
187 3b0f3d61 2020-01-22 neels *final_c = *c;
188 3b0f3d61 2020-01-22 neels continue;
189 3b0f3d61 2020-01-22 neels }
190 3b0f3d61 2020-01-22 neels
191 3b0f3d61 2020-01-22 neels /* c is an unsolved chunk, feed to inner_algo */
192 3b0f3d61 2020-01-22 neels struct diff_state inner_state = {
193 3b0f3d61 2020-01-22 neels .result = state->result,
194 3b0f3d61 2020-01-22 neels .recursion_depth_left = state->recursion_depth_left - 1,
195 3b0f3d61 2020-01-22 neels };
196 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&inner_state.left, &state->left, c->left_start, c->left_count);
197 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&inner_state.right, &state->right, c->right_start, c->right_count);
198 3b0f3d61 2020-01-22 neels
199 3b0f3d61 2020-01-22 neels rc = diff_run_algo(algo_config->inner_algo, &inner_state);
200 3b0f3d61 2020-01-22 neels
201 3b0f3d61 2020-01-22 neels if (rc != DIFF_RC_OK)
202 3b0f3d61 2020-01-22 neels goto return_rc;
203 3b0f3d61 2020-01-22 neels }
204 3b0f3d61 2020-01-22 neels
205 3b0f3d61 2020-01-22 neels rc = DIFF_RC_OK;
206 3b0f3d61 2020-01-22 neels return_rc:
207 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(state->temp_result);
208 3b0f3d61 2020-01-22 neels return rc;
209 3b0f3d61 2020-01-22 neels }
210 3b0f3d61 2020-01-22 neels
211 61a7b578 2020-05-06 neels struct diff_result *
212 61a7b578 2020-05-06 neels diff_main(const struct diff_config *config,
213 61a7b578 2020-05-06 neels const uint8_t *left_data, size_t left_len,
214 61a7b578 2020-05-06 neels const uint8_t *right_data, size_t right_len)
215 3b0f3d61 2020-01-22 neels {
216 3b0f3d61 2020-01-22 neels struct diff_result *result = malloc(sizeof(struct diff_result));
217 3b0f3d61 2020-01-22 neels if (!result)
218 3b0f3d61 2020-01-22 neels return NULL;
219 3b0f3d61 2020-01-22 neels
220 3b0f3d61 2020-01-22 neels *result = (struct diff_result){};
221 3b0f3d61 2020-01-22 neels diff_data_init_root(&result->left, left_data, left_len);
222 3b0f3d61 2020-01-22 neels diff_data_init_root(&result->right, right_data, right_len);
223 3b0f3d61 2020-01-22 neels
224 3b0f3d61 2020-01-22 neels if (!config->atomize_func) {
225 3b0f3d61 2020-01-22 neels result->rc = DIFF_RC_EINVAL;
226 3b0f3d61 2020-01-22 neels return result;
227 3b0f3d61 2020-01-22 neels }
228 3b0f3d61 2020-01-22 neels
229 3b0f3d61 2020-01-22 neels result->rc = config->atomize_func(config->atomize_func_data, &result->left, &result->right);
230 3b0f3d61 2020-01-22 neels if (result->rc != DIFF_RC_OK)
231 3b0f3d61 2020-01-22 neels return result;
232 3b0f3d61 2020-01-22 neels
233 3b0f3d61 2020-01-22 neels struct diff_state state = {
234 3b0f3d61 2020-01-22 neels .result = result,
235 3b0f3d61 2020-01-22 neels .recursion_depth_left = config->max_recursion_depth ? : 1024,
236 3b0f3d61 2020-01-22 neels };
237 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&state.left, &result->left, result->left.atoms.head, result->left.atoms.len);
238 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&state.right, &result->right, result->right.atoms.head, result->right.atoms.len);
239 3b0f3d61 2020-01-22 neels
240 3b0f3d61 2020-01-22 neels result->rc = diff_run_algo(config->algo, &state);
241 3b0f3d61 2020-01-22 neels
242 3b0f3d61 2020-01-22 neels return result;
243 3b0f3d61 2020-01-22 neels }
244 3b0f3d61 2020-01-22 neels
245 61a7b578 2020-05-06 neels void
246 61a7b578 2020-05-06 neels diff_result_free(struct diff_result *result)
247 3b0f3d61 2020-01-22 neels {
248 3b0f3d61 2020-01-22 neels if (!result)
249 3b0f3d61 2020-01-22 neels return;
250 3b0f3d61 2020-01-22 neels diff_data_free(&result->left);
251 3b0f3d61 2020-01-22 neels diff_data_free(&result->right);
252 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(result->chunks);
253 3b0f3d61 2020-01-22 neels free(result);
254 3b0f3d61 2020-01-22 neels }