Blame


1 3b0f3d61 2020-01-22 neels /* Generic infrastructure to implement various diff algorithms (implementation). */
2 3b0f3d61 2020-01-22 neels /*
3 3b0f3d61 2020-01-22 neels * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 3b0f3d61 2020-01-22 neels *
5 3b0f3d61 2020-01-22 neels * Permission to use, copy, modify, and distribute this software for any
6 3b0f3d61 2020-01-22 neels * purpose with or without fee is hereby granted, provided that the above
7 3b0f3d61 2020-01-22 neels * copyright notice and this permission notice appear in all copies.
8 3b0f3d61 2020-01-22 neels *
9 3b0f3d61 2020-01-22 neels * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 3b0f3d61 2020-01-22 neels * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 3b0f3d61 2020-01-22 neels * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 3b0f3d61 2020-01-22 neels * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 3b0f3d61 2020-01-22 neels * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 3b0f3d61 2020-01-22 neels * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 3b0f3d61 2020-01-22 neels * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 3b0f3d61 2020-01-22 neels */
17 3b0f3d61 2020-01-22 neels
18 3b0f3d61 2020-01-22 neels
19 3b0f3d61 2020-01-22 neels #include <sys/queue.h>
20 3b0f3d61 2020-01-22 neels #include <stdint.h>
21 3b0f3d61 2020-01-22 neels #include <stdlib.h>
22 3b0f3d61 2020-01-22 neels #include <stdbool.h>
23 3b0f3d61 2020-01-22 neels #include <stdio.h>
24 3b0f3d61 2020-01-22 neels #include <string.h>
25 3b0f3d61 2020-01-22 neels #include <limits.h>
26 3b0f3d61 2020-01-22 neels
27 3b0f3d61 2020-01-22 neels #include <assert.h>
28 3b0f3d61 2020-01-22 neels
29 3b0f3d61 2020-01-22 neels #include <diff/diff_main.h>
30 3b0f3d61 2020-01-22 neels
31 3b0f3d61 2020-01-22 neels #include "debug.h"
32 3b0f3d61 2020-01-22 neels
33 3b0f3d61 2020-01-22 neels /* Even if a left or right side is empty, diff output may need to know the position in that file.
34 3b0f3d61 2020-01-22 neels * So left_start or right_start must never be NULL -- pass left_count or right_count as zero to indicate staying at that
35 3b0f3d61 2020-01-22 neels * position without consuming any lines. */
36 3b0f3d61 2020-01-22 neels struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
37 3b0f3d61 2020-01-22 neels struct diff_atom *left_start, unsigned int left_count,
38 3b0f3d61 2020-01-22 neels struct diff_atom *right_start, unsigned int right_count)
39 3b0f3d61 2020-01-22 neels {
40 3b0f3d61 2020-01-22 neels struct diff_chunk *chunk;
41 3b0f3d61 2020-01-22 neels diff_chunk_arraylist_t *result;
42 3b0f3d61 2020-01-22 neels
43 3b0f3d61 2020-01-22 neels if (solved && !state->temp_result.len)
44 3b0f3d61 2020-01-22 neels result = &state->result->chunks;
45 3b0f3d61 2020-01-22 neels else
46 3b0f3d61 2020-01-22 neels result = &state->temp_result;
47 3b0f3d61 2020-01-22 neels
48 3b0f3d61 2020-01-22 neels ARRAYLIST_ADD(chunk, *result);
49 3b0f3d61 2020-01-22 neels if (!chunk)
50 3b0f3d61 2020-01-22 neels return NULL;
51 3b0f3d61 2020-01-22 neels *chunk = (struct diff_chunk){
52 3b0f3d61 2020-01-22 neels .solved = solved,
53 3b0f3d61 2020-01-22 neels .left_start = left_start,
54 3b0f3d61 2020-01-22 neels .left_count = left_count,
55 3b0f3d61 2020-01-22 neels .right_start = right_start,
56 3b0f3d61 2020-01-22 neels .right_count = right_count,
57 3b0f3d61 2020-01-22 neels };
58 3b0f3d61 2020-01-22 neels
59 3b0f3d61 2020-01-22 neels debug("Add %s chunk:\n", chunk->solved ? "solved" : "UNSOLVED");
60 3b0f3d61 2020-01-22 neels debug("L\n");
61 3b0f3d61 2020-01-22 neels debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
62 3b0f3d61 2020-01-22 neels debug("R\n");
63 3b0f3d61 2020-01-22 neels debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
64 3b0f3d61 2020-01-22 neels return chunk;
65 3b0f3d61 2020-01-22 neels }
66 3b0f3d61 2020-01-22 neels
67 3b0f3d61 2020-01-22 neels void diff_data_init_root(struct diff_data *d, const uint8_t *data, size_t len)
68 3b0f3d61 2020-01-22 neels {
69 3b0f3d61 2020-01-22 neels *d = (struct diff_data){
70 3b0f3d61 2020-01-22 neels .data = data,
71 3b0f3d61 2020-01-22 neels .len = len,
72 3b0f3d61 2020-01-22 neels .root = d,
73 3b0f3d61 2020-01-22 neels };
74 3b0f3d61 2020-01-22 neels }
75 3b0f3d61 2020-01-22 neels
76 3b0f3d61 2020-01-22 neels void diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
77 3b0f3d61 2020-01-22 neels struct diff_atom *from_atom, unsigned int atoms_count)
78 3b0f3d61 2020-01-22 neels {
79 3b0f3d61 2020-01-22 neels struct diff_atom *last_atom = from_atom + atoms_count - 1;
80 3b0f3d61 2020-01-22 neels *d = (struct diff_data){
81 3b0f3d61 2020-01-22 neels .data = from_atom->at,
82 3b0f3d61 2020-01-22 neels .len = (last_atom->at + last_atom->len) - from_atom->at,
83 3b0f3d61 2020-01-22 neels .root = parent->root,
84 3b0f3d61 2020-01-22 neels .atoms.head = from_atom,
85 3b0f3d61 2020-01-22 neels .atoms.len = atoms_count,
86 3b0f3d61 2020-01-22 neels };
87 3b0f3d61 2020-01-22 neels
88 3b0f3d61 2020-01-22 neels debug("subsection:\n");
89 3b0f3d61 2020-01-22 neels debug_dump(d);
90 3b0f3d61 2020-01-22 neels }
91 3b0f3d61 2020-01-22 neels
92 3b0f3d61 2020-01-22 neels void diff_data_free(struct diff_data *diff_data)
93 3b0f3d61 2020-01-22 neels {
94 3b0f3d61 2020-01-22 neels if (!diff_data)
95 3b0f3d61 2020-01-22 neels return;
96 3b0f3d61 2020-01-22 neels if (diff_data->atoms.allocated)
97 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(diff_data->atoms);
98 3b0f3d61 2020-01-22 neels }
99 3b0f3d61 2020-01-22 neels
100 3b0f3d61 2020-01-22 neels enum diff_rc diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state)
101 3b0f3d61 2020-01-22 neels {
102 3b0f3d61 2020-01-22 neels debug("\n** %s\n", __func__);
103 3b0f3d61 2020-01-22 neels debug("left:\n");
104 3b0f3d61 2020-01-22 neels debug_dump(&state->left);
105 3b0f3d61 2020-01-22 neels debug("right:\n");
106 3b0f3d61 2020-01-22 neels debug_dump(&state->right);
107 223979a9 2020-05-05 neels debug_dump_myers_graph(&state->left, &state->right, NULL, NULL, 0, NULL, 0);
108 3b0f3d61 2020-01-22 neels
109 3b0f3d61 2020-01-22 neels /* Add a chunk of equal lines, if any */
110 3b0f3d61 2020-01-22 neels unsigned int equal_atoms = 0;
111 3b0f3d61 2020-01-22 neels while (equal_atoms < state->left.atoms.len && equal_atoms < state->right.atoms.len
112 3b0f3d61 2020-01-22 neels && diff_atom_same(&state->left.atoms.head[equal_atoms], &state->right.atoms.head[equal_atoms]))
113 3b0f3d61 2020-01-22 neels equal_atoms++;
114 3b0f3d61 2020-01-22 neels if (equal_atoms) {
115 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
116 3b0f3d61 2020-01-22 neels &state->left.atoms.head[0], equal_atoms,
117 3b0f3d61 2020-01-22 neels &state->right.atoms.head[0], equal_atoms))
118 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
119 3b0f3d61 2020-01-22 neels }
120 3b0f3d61 2020-01-22 neels
121 3b0f3d61 2020-01-22 neels /* Add a "minus" chunk with all lines from the left. */
122 3b0f3d61 2020-01-22 neels if (equal_atoms < state->left.atoms.len) {
123 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
124 3b0f3d61 2020-01-22 neels &state->left.atoms.head[equal_atoms],
125 3b0f3d61 2020-01-22 neels state->left.atoms.len - equal_atoms,
126 3b0f3d61 2020-01-22 neels NULL, 0))
127 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
128 3b0f3d61 2020-01-22 neels }
129 3b0f3d61 2020-01-22 neels
130 3b0f3d61 2020-01-22 neels /* Add a "plus" chunk with all lines from the right. */
131 3b0f3d61 2020-01-22 neels if (equal_atoms < state->right.atoms.len) {
132 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
133 3b0f3d61 2020-01-22 neels NULL, 0,
134 3b0f3d61 2020-01-22 neels &state->right.atoms.head[equal_atoms],
135 3b0f3d61 2020-01-22 neels state->right.atoms.len - equal_atoms))
136 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
137 3b0f3d61 2020-01-22 neels }
138 3b0f3d61 2020-01-22 neels return DIFF_RC_OK;
139 3b0f3d61 2020-01-22 neels }
140 3b0f3d61 2020-01-22 neels
141 3b0f3d61 2020-01-22 neels enum diff_rc diff_run_algo(const struct diff_algo_config *algo_config, struct diff_state *state)
142 3b0f3d61 2020-01-22 neels {
143 3b0f3d61 2020-01-22 neels enum diff_rc rc;
144 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(state->temp_result);
145 3b0f3d61 2020-01-22 neels
146 3b0f3d61 2020-01-22 neels if (!algo_config || !algo_config->impl || !state->recursion_depth_left) {
147 3b0f3d61 2020-01-22 neels debug("MAX RECURSION REACHED, just dumping diff chunks\n");
148 3b0f3d61 2020-01-22 neels return diff_algo_none(algo_config, state);
149 3b0f3d61 2020-01-22 neels }
150 3b0f3d61 2020-01-22 neels
151 3b0f3d61 2020-01-22 neels ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
152 3b0f3d61 2020-01-22 neels rc = algo_config->impl(algo_config, state);
153 3b0f3d61 2020-01-22 neels switch (rc) {
154 3b0f3d61 2020-01-22 neels case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
155 3b0f3d61 2020-01-22 neels debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", algo_config->fallback_algo);
156 3b0f3d61 2020-01-22 neels rc = diff_run_algo(algo_config->fallback_algo, state);
157 3b0f3d61 2020-01-22 neels goto return_rc;
158 3b0f3d61 2020-01-22 neels
159 3b0f3d61 2020-01-22 neels case DIFF_RC_OK:
160 3b0f3d61 2020-01-22 neels /* continue below */
161 3b0f3d61 2020-01-22 neels break;
162 3b0f3d61 2020-01-22 neels
163 3b0f3d61 2020-01-22 neels default:
164 3b0f3d61 2020-01-22 neels /* some error happened */
165 3b0f3d61 2020-01-22 neels goto return_rc;
166 3b0f3d61 2020-01-22 neels }
167 3b0f3d61 2020-01-22 neels
168 3b0f3d61 2020-01-22 neels /* Pick up any diff chunks that are still unsolved and feed to inner_algo.
169 3b0f3d61 2020-01-22 neels * inner_algo will solve unsolved chunks and append to result,
170 3b0f3d61 2020-01-22 neels * and subsequent solved chunks on this level are then appended to result afterwards. */
171 3b0f3d61 2020-01-22 neels int i;
172 3b0f3d61 2020-01-22 neels for (i = 0; i < state->temp_result.len; i++) {
173 3b0f3d61 2020-01-22 neels struct diff_chunk *c = &state->temp_result.head[i];
174 3b0f3d61 2020-01-22 neels if (c->solved) {
175 3b0f3d61 2020-01-22 neels struct diff_chunk *final_c;
176 3b0f3d61 2020-01-22 neels ARRAYLIST_ADD(final_c, state->result->chunks);
177 3b0f3d61 2020-01-22 neels if (!final_c) {
178 3b0f3d61 2020-01-22 neels rc = DIFF_RC_ENOMEM;
179 3b0f3d61 2020-01-22 neels goto return_rc;
180 3b0f3d61 2020-01-22 neels }
181 3b0f3d61 2020-01-22 neels *final_c = *c;
182 3b0f3d61 2020-01-22 neels continue;
183 3b0f3d61 2020-01-22 neels }
184 3b0f3d61 2020-01-22 neels
185 3b0f3d61 2020-01-22 neels /* c is an unsolved chunk, feed to inner_algo */
186 3b0f3d61 2020-01-22 neels struct diff_state inner_state = {
187 3b0f3d61 2020-01-22 neels .result = state->result,
188 3b0f3d61 2020-01-22 neels .recursion_depth_left = state->recursion_depth_left - 1,
189 3b0f3d61 2020-01-22 neels };
190 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&inner_state.left, &state->left, c->left_start, c->left_count);
191 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&inner_state.right, &state->right, c->right_start, c->right_count);
192 3b0f3d61 2020-01-22 neels
193 3b0f3d61 2020-01-22 neels rc = diff_run_algo(algo_config->inner_algo, &inner_state);
194 3b0f3d61 2020-01-22 neels
195 3b0f3d61 2020-01-22 neels if (rc != DIFF_RC_OK)
196 3b0f3d61 2020-01-22 neels goto return_rc;
197 3b0f3d61 2020-01-22 neels }
198 3b0f3d61 2020-01-22 neels
199 3b0f3d61 2020-01-22 neels rc = DIFF_RC_OK;
200 3b0f3d61 2020-01-22 neels return_rc:
201 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(state->temp_result);
202 3b0f3d61 2020-01-22 neels return rc;
203 3b0f3d61 2020-01-22 neels }
204 3b0f3d61 2020-01-22 neels
205 3b0f3d61 2020-01-22 neels struct diff_result *diff_main(const struct diff_config *config,
206 3b0f3d61 2020-01-22 neels const uint8_t *left_data, size_t left_len,
207 3b0f3d61 2020-01-22 neels const uint8_t *right_data, size_t right_len)
208 3b0f3d61 2020-01-22 neels {
209 3b0f3d61 2020-01-22 neels struct diff_result *result = malloc(sizeof(struct diff_result));
210 3b0f3d61 2020-01-22 neels if (!result)
211 3b0f3d61 2020-01-22 neels return NULL;
212 3b0f3d61 2020-01-22 neels
213 3b0f3d61 2020-01-22 neels *result = (struct diff_result){};
214 3b0f3d61 2020-01-22 neels diff_data_init_root(&result->left, left_data, left_len);
215 3b0f3d61 2020-01-22 neels diff_data_init_root(&result->right, right_data, right_len);
216 3b0f3d61 2020-01-22 neels
217 3b0f3d61 2020-01-22 neels if (!config->atomize_func) {
218 3b0f3d61 2020-01-22 neels result->rc = DIFF_RC_EINVAL;
219 3b0f3d61 2020-01-22 neels return result;
220 3b0f3d61 2020-01-22 neels }
221 3b0f3d61 2020-01-22 neels
222 3b0f3d61 2020-01-22 neels result->rc = config->atomize_func(config->atomize_func_data, &result->left, &result->right);
223 3b0f3d61 2020-01-22 neels if (result->rc != DIFF_RC_OK)
224 3b0f3d61 2020-01-22 neels return result;
225 3b0f3d61 2020-01-22 neels
226 3b0f3d61 2020-01-22 neels struct diff_state state = {
227 3b0f3d61 2020-01-22 neels .result = result,
228 3b0f3d61 2020-01-22 neels .recursion_depth_left = config->max_recursion_depth ? : 1024,
229 3b0f3d61 2020-01-22 neels };
230 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&state.left, &result->left, result->left.atoms.head, result->left.atoms.len);
231 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&state.right, &result->right, result->right.atoms.head, result->right.atoms.len);
232 3b0f3d61 2020-01-22 neels
233 3b0f3d61 2020-01-22 neels result->rc = diff_run_algo(config->algo, &state);
234 3b0f3d61 2020-01-22 neels
235 3b0f3d61 2020-01-22 neels return result;
236 3b0f3d61 2020-01-22 neels }
237 3b0f3d61 2020-01-22 neels
238 3b0f3d61 2020-01-22 neels void diff_result_free(struct diff_result *result)
239 3b0f3d61 2020-01-22 neels {
240 3b0f3d61 2020-01-22 neels if (!result)
241 3b0f3d61 2020-01-22 neels return;
242 3b0f3d61 2020-01-22 neels diff_data_free(&result->left);
243 3b0f3d61 2020-01-22 neels diff_data_free(&result->right);
244 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(result->chunks);
245 3b0f3d61 2020-01-22 neels free(result);
246 3b0f3d61 2020-01-22 neels }