Blob


1 /* Generic infrastructure to implement various diff algorithms (implementation). */
2 /*
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
19 #include <sys/queue.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <stdbool.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <limits.h>
27 #include <assert.h>
29 #include <diff/diff_main.h>
31 #include "debug.h"
33 /* Even if a left or right side is empty, diff output may need to know the position in that file.
34 * So left_start or right_start must never be NULL -- pass left_count or right_count as zero to indicate staying at that
35 * position without consuming any lines. */
36 struct diff_chunk *
37 diff_state_add_chunk(struct diff_state *state, bool solved,
38 struct diff_atom *left_start, unsigned int left_count,
39 struct diff_atom *right_start, unsigned int right_count)
40 {
41 struct diff_chunk *chunk;
42 diff_chunk_arraylist_t *result;
44 if (solved && !state->temp_result.len)
45 result = &state->result->chunks;
46 else
47 result = &state->temp_result;
49 ARRAYLIST_ADD(chunk, *result);
50 if (!chunk)
51 return NULL;
52 *chunk = (struct diff_chunk){
53 .solved = solved,
54 .left_start = left_start,
55 .left_count = left_count,
56 .right_start = right_start,
57 .right_count = right_count,
58 };
60 debug("Add %s chunk:\n", chunk->solved ? "solved" : "UNSOLVED");
61 debug("L\n");
62 debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
63 debug("R\n");
64 debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
65 return chunk;
66 }
68 void
69 diff_data_init_root(struct diff_data *d, const uint8_t *data, size_t len)
70 {
71 *d = (struct diff_data){
72 .data = data,
73 .len = len,
74 .root = d,
75 };
76 }
78 void
79 diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
80 struct diff_atom *from_atom, unsigned int atoms_count)
81 {
82 struct diff_atom *last_atom = from_atom + atoms_count - 1;
83 *d = (struct diff_data){
84 .data = from_atom->at,
85 .len = (last_atom->at + last_atom->len) - from_atom->at,
86 .root = parent->root,
87 .atoms.head = from_atom,
88 .atoms.len = atoms_count,
89 };
91 debug("subsection:\n");
92 debug_dump(d);
93 }
95 void
96 diff_data_free(struct diff_data *diff_data)
97 {
98 if (!diff_data)
99 return;
100 if (diff_data->atoms.allocated)
101 ARRAYLIST_FREE(diff_data->atoms);
104 enum diff_rc
105 diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state)
107 debug("\n** %s\n", __func__);
108 debug("left:\n");
109 debug_dump(&state->left);
110 debug("right:\n");
111 debug_dump(&state->right);
112 debug_dump_myers_graph(&state->left, &state->right, NULL, NULL, 0, NULL, 0);
114 /* Add a chunk of equal lines, if any */
115 unsigned int equal_atoms = 0;
116 while (equal_atoms < state->left.atoms.len && equal_atoms < state->right.atoms.len
117 && diff_atom_same(&state->left.atoms.head[equal_atoms], &state->right.atoms.head[equal_atoms]))
118 equal_atoms++;
119 if (equal_atoms) {
120 if (!diff_state_add_chunk(state, true,
121 &state->left.atoms.head[0], equal_atoms,
122 &state->right.atoms.head[0], equal_atoms))
123 return DIFF_RC_ENOMEM;
126 /* Add a "minus" chunk with all lines from the left. */
127 if (equal_atoms < state->left.atoms.len) {
128 if (!diff_state_add_chunk(state, true,
129 &state->left.atoms.head[equal_atoms],
130 state->left.atoms.len - equal_atoms,
131 NULL, 0))
132 return DIFF_RC_ENOMEM;
135 /* Add a "plus" chunk with all lines from the right. */
136 if (equal_atoms < state->right.atoms.len) {
137 if (!diff_state_add_chunk(state, true,
138 NULL, 0,
139 &state->right.atoms.head[equal_atoms],
140 state->right.atoms.len - equal_atoms))
141 return DIFF_RC_ENOMEM;
143 return DIFF_RC_OK;
146 enum diff_rc
147 diff_run_algo(const struct diff_algo_config *algo_config, struct diff_state *state)
149 enum diff_rc rc;
150 ARRAYLIST_FREE(state->temp_result);
152 if (!algo_config || !algo_config->impl || !state->recursion_depth_left) {
153 debug("MAX RECURSION REACHED, just dumping diff chunks\n");
154 return diff_algo_none(algo_config, state);
157 ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
158 rc = algo_config->impl(algo_config, state);
159 switch (rc) {
160 case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
161 debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", algo_config->fallback_algo);
162 rc = diff_run_algo(algo_config->fallback_algo, state);
163 goto return_rc;
165 case DIFF_RC_OK:
166 /* continue below */
167 break;
169 default:
170 /* some error happened */
171 goto return_rc;
174 /* Pick up any diff chunks that are still unsolved and feed to inner_algo.
175 * inner_algo will solve unsolved chunks and append to result,
176 * and subsequent solved chunks on this level are then appended to result afterwards. */
177 int i;
178 for (i = 0; i < state->temp_result.len; i++) {
179 struct diff_chunk *c = &state->temp_result.head[i];
180 if (c->solved) {
181 struct diff_chunk *final_c;
182 ARRAYLIST_ADD(final_c, state->result->chunks);
183 if (!final_c) {
184 rc = DIFF_RC_ENOMEM;
185 goto return_rc;
187 *final_c = *c;
188 continue;
191 /* c is an unsolved chunk, feed to inner_algo */
192 struct diff_state inner_state = {
193 .result = state->result,
194 .recursion_depth_left = state->recursion_depth_left - 1,
195 };
196 diff_data_init_subsection(&inner_state.left, &state->left, c->left_start, c->left_count);
197 diff_data_init_subsection(&inner_state.right, &state->right, c->right_start, c->right_count);
199 rc = diff_run_algo(algo_config->inner_algo, &inner_state);
201 if (rc != DIFF_RC_OK)
202 goto return_rc;
205 rc = DIFF_RC_OK;
206 return_rc:
207 ARRAYLIST_FREE(state->temp_result);
208 return rc;
211 struct diff_result *
212 diff_main(const struct diff_config *config,
213 const uint8_t *left_data, size_t left_len,
214 const uint8_t *right_data, size_t right_len)
216 struct diff_result *result = malloc(sizeof(struct diff_result));
217 if (!result)
218 return NULL;
220 *result = (struct diff_result){};
221 diff_data_init_root(&result->left, left_data, left_len);
222 diff_data_init_root(&result->right, right_data, right_len);
224 if (!config->atomize_func) {
225 result->rc = DIFF_RC_EINVAL;
226 return result;
229 result->rc = config->atomize_func(config->atomize_func_data, &result->left, &result->right);
230 if (result->rc != DIFF_RC_OK)
231 return result;
233 struct diff_state state = {
234 .result = result,
235 .recursion_depth_left = config->max_recursion_depth ? : 1024,
236 };
237 diff_data_init_subsection(&state.left, &result->left, result->left.atoms.head, result->left.atoms.len);
238 diff_data_init_subsection(&state.right, &result->right, result->right.atoms.head, result->right.atoms.len);
240 result->rc = diff_run_algo(config->algo, &state);
242 return result;
245 void
246 diff_result_free(struct diff_result *result)
248 if (!result)
249 return;
250 diff_data_free(&result->left);
251 diff_data_free(&result->right);
252 ARRAYLIST_FREE(result->chunks);
253 free(result);