Blob


1 /* Generic infrastructure to implement various diff algorithms (implementation). */
2 /*
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
19 #include <sys/queue.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <stdbool.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <limits.h>
27 #include <assert.h>
29 #include <diff/diff_main.h>
31 #include "debug.h"
33 /* Even if a left or right side is empty, diff output may need to know the position in that file.
34 * So left_start or right_start must never be NULL -- pass left_count or right_count as zero to indicate staying at that
35 * position without consuming any lines. */
36 struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
37 struct diff_atom *left_start, unsigned int left_count,
38 struct diff_atom *right_start, unsigned int right_count)
39 {
40 struct diff_chunk *chunk;
41 diff_chunk_arraylist_t *result;
43 if (solved && !state->temp_result.len)
44 result = &state->result->chunks;
45 else
46 result = &state->temp_result;
48 ARRAYLIST_ADD(chunk, *result);
49 if (!chunk)
50 return NULL;
51 *chunk = (struct diff_chunk){
52 .solved = solved,
53 .left_start = left_start,
54 .left_count = left_count,
55 .right_start = right_start,
56 .right_count = right_count,
57 };
59 debug("Add %s chunk:\n", chunk->solved ? "solved" : "UNSOLVED");
60 debug("L\n");
61 debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
62 debug("R\n");
63 debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
64 return chunk;
65 }
67 void diff_data_init_root(struct diff_data *d, const uint8_t *data, size_t len)
68 {
69 *d = (struct diff_data){
70 .data = data,
71 .len = len,
72 .root = d,
73 };
74 }
76 void diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
77 struct diff_atom *from_atom, unsigned int atoms_count)
78 {
79 struct diff_atom *last_atom = from_atom + atoms_count - 1;
80 *d = (struct diff_data){
81 .data = from_atom->at,
82 .len = (last_atom->at + last_atom->len) - from_atom->at,
83 .root = parent->root,
84 .atoms.head = from_atom,
85 .atoms.len = atoms_count,
86 };
88 debug("subsection:\n");
89 debug_dump(d);
90 }
92 void diff_data_free(struct diff_data *diff_data)
93 {
94 if (!diff_data)
95 return;
96 if (diff_data->atoms.allocated)
97 ARRAYLIST_FREE(diff_data->atoms);
98 }
100 enum diff_rc diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state)
102 debug("\n** %s\n", __func__);
103 debug("left:\n");
104 debug_dump(&state->left);
105 debug("right:\n");
106 debug_dump(&state->right);
107 debug_dump_myers_graph(&state->left, &state->right, NULL);
109 /* Add a chunk of equal lines, if any */
110 unsigned int equal_atoms = 0;
111 while (equal_atoms < state->left.atoms.len && equal_atoms < state->right.atoms.len
112 && diff_atom_same(&state->left.atoms.head[equal_atoms], &state->right.atoms.head[equal_atoms]))
113 equal_atoms++;
114 if (equal_atoms) {
115 if (!diff_state_add_chunk(state, true,
116 &state->left.atoms.head[0], equal_atoms,
117 &state->right.atoms.head[0], equal_atoms))
118 return DIFF_RC_ENOMEM;
121 /* Add a "minus" chunk with all lines from the left. */
122 if (equal_atoms < state->left.atoms.len) {
123 if (!diff_state_add_chunk(state, true,
124 &state->left.atoms.head[equal_atoms],
125 state->left.atoms.len - equal_atoms,
126 NULL, 0))
127 return DIFF_RC_ENOMEM;
130 /* Add a "plus" chunk with all lines from the right. */
131 if (equal_atoms < state->right.atoms.len) {
132 if (!diff_state_add_chunk(state, true,
133 NULL, 0,
134 &state->right.atoms.head[equal_atoms],
135 state->right.atoms.len - equal_atoms))
136 return DIFF_RC_ENOMEM;
138 return DIFF_RC_OK;
141 enum diff_rc diff_run_algo(const struct diff_algo_config *algo_config, struct diff_state *state)
143 enum diff_rc rc;
144 ARRAYLIST_FREE(state->temp_result);
146 if (!algo_config || !algo_config->impl || !state->recursion_depth_left) {
147 debug("MAX RECURSION REACHED, just dumping diff chunks\n");
148 return diff_algo_none(algo_config, state);
151 ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
152 rc = algo_config->impl(algo_config, state);
153 switch (rc) {
154 case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
155 debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", algo_config->fallback_algo);
156 rc = diff_run_algo(algo_config->fallback_algo, state);
157 goto return_rc;
159 case DIFF_RC_OK:
160 /* continue below */
161 break;
163 default:
164 /* some error happened */
165 goto return_rc;
168 /* Pick up any diff chunks that are still unsolved and feed to inner_algo.
169 * inner_algo will solve unsolved chunks and append to result,
170 * and subsequent solved chunks on this level are then appended to result afterwards. */
171 int i;
172 for (i = 0; i < state->temp_result.len; i++) {
173 struct diff_chunk *c = &state->temp_result.head[i];
174 if (c->solved) {
175 struct diff_chunk *final_c;
176 ARRAYLIST_ADD(final_c, state->result->chunks);
177 if (!final_c) {
178 rc = DIFF_RC_ENOMEM;
179 goto return_rc;
181 *final_c = *c;
182 continue;
185 /* c is an unsolved chunk, feed to inner_algo */
186 struct diff_state inner_state = {
187 .result = state->result,
188 .recursion_depth_left = state->recursion_depth_left - 1,
189 };
190 diff_data_init_subsection(&inner_state.left, &state->left, c->left_start, c->left_count);
191 diff_data_init_subsection(&inner_state.right, &state->right, c->right_start, c->right_count);
193 rc = diff_run_algo(algo_config->inner_algo, &inner_state);
195 if (rc != DIFF_RC_OK)
196 goto return_rc;
199 rc = DIFF_RC_OK;
200 return_rc:
201 ARRAYLIST_FREE(state->temp_result);
202 return rc;
205 struct diff_result *diff_main(const struct diff_config *config,
206 const uint8_t *left_data, size_t left_len,
207 const uint8_t *right_data, size_t right_len)
209 struct diff_result *result = malloc(sizeof(struct diff_result));
210 if (!result)
211 return NULL;
213 *result = (struct diff_result){};
214 diff_data_init_root(&result->left, left_data, left_len);
215 diff_data_init_root(&result->right, right_data, right_len);
217 if (!config->atomize_func) {
218 result->rc = DIFF_RC_EINVAL;
219 return result;
222 result->rc = config->atomize_func(config->atomize_func_data, &result->left, &result->right);
223 if (result->rc != DIFF_RC_OK)
224 return result;
226 struct diff_state state = {
227 .result = result,
228 .recursion_depth_left = config->max_recursion_depth ? : 1024,
229 };
230 diff_data_init_subsection(&state.left, &result->left, result->left.atoms.head, result->left.atoms.len);
231 diff_data_init_subsection(&state.right, &result->right, result->right.atoms.head, result->right.atoms.len);
233 result->rc = diff_run_algo(config->algo, &state);
235 return result;
238 void diff_result_free(struct diff_result *result)
240 if (!result)
241 return;
242 diff_data_free(&result->left);
243 diff_data_free(&result->right);
244 ARRAYLIST_FREE(result->chunks);
245 free(result);