1 3b0f3d61 2020-01-22 neels /* Generic infrastructure to implement various diff algorithms (implementation). */
3 3b0f3d61 2020-01-22 neels * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
5 3b0f3d61 2020-01-22 neels * Permission to use, copy, modify, and distribute this software for any
6 3b0f3d61 2020-01-22 neels * purpose with or without fee is hereby granted, provided that the above
7 3b0f3d61 2020-01-22 neels * copyright notice and this permission notice appear in all copies.
9 3b0f3d61 2020-01-22 neels * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 3b0f3d61 2020-01-22 neels * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 3b0f3d61 2020-01-22 neels * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 3b0f3d61 2020-01-22 neels * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 3b0f3d61 2020-01-22 neels * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 3b0f3d61 2020-01-22 neels * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 3b0f3d61 2020-01-22 neels * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 3b0f3d61 2020-01-22 neels #include <sys/queue.h>
20 3b0f3d61 2020-01-22 neels #include <stdint.h>
21 3b0f3d61 2020-01-22 neels #include <stdlib.h>
22 3b0f3d61 2020-01-22 neels #include <stdbool.h>
23 3b0f3d61 2020-01-22 neels #include <stdio.h>
24 3b0f3d61 2020-01-22 neels #include <string.h>
25 3b0f3d61 2020-01-22 neels #include <limits.h>
27 3b0f3d61 2020-01-22 neels #include <assert.h>
29 3b0f3d61 2020-01-22 neels #include <diff/diff_main.h>
31 3b0f3d61 2020-01-22 neels #include "debug.h"
33 3b0f3d61 2020-01-22 neels /* Even if a left or right side is empty, diff output may need to know the position in that file.
34 3b0f3d61 2020-01-22 neels * So left_start or right_start must never be NULL -- pass left_count or right_count as zero to indicate staying at that
35 3b0f3d61 2020-01-22 neels * position without consuming any lines. */
36 3b0f3d61 2020-01-22 neels struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
37 3b0f3d61 2020-01-22 neels struct diff_atom *left_start, unsigned int left_count,
38 3b0f3d61 2020-01-22 neels struct diff_atom *right_start, unsigned int right_count)
40 3b0f3d61 2020-01-22 neels struct diff_chunk *chunk;
41 3b0f3d61 2020-01-22 neels diff_chunk_arraylist_t *result;
43 3b0f3d61 2020-01-22 neels if (solved && !state->temp_result.len)
44 3b0f3d61 2020-01-22 neels result = &state->result->chunks;
46 3b0f3d61 2020-01-22 neels result = &state->temp_result;
48 3b0f3d61 2020-01-22 neels ARRAYLIST_ADD(chunk, *result);
49 3b0f3d61 2020-01-22 neels if (!chunk)
50 3b0f3d61 2020-01-22 neels return NULL;
51 3b0f3d61 2020-01-22 neels *chunk = (struct diff_chunk){
52 3b0f3d61 2020-01-22 neels .solved = solved,
53 3b0f3d61 2020-01-22 neels .left_start = left_start,
54 3b0f3d61 2020-01-22 neels .left_count = left_count,
55 3b0f3d61 2020-01-22 neels .right_start = right_start,
56 3b0f3d61 2020-01-22 neels .right_count = right_count,
59 3b0f3d61 2020-01-22 neels debug("Add %s chunk:\n", chunk->solved ? "solved" : "UNSOLVED");
60 3b0f3d61 2020-01-22 neels debug("L\n");
61 3b0f3d61 2020-01-22 neels debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
62 3b0f3d61 2020-01-22 neels debug("R\n");
63 3b0f3d61 2020-01-22 neels debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
64 3b0f3d61 2020-01-22 neels return chunk;
67 3b0f3d61 2020-01-22 neels void diff_data_init_root(struct diff_data *d, const uint8_t *data, size_t len)
69 3b0f3d61 2020-01-22 neels *d = (struct diff_data){
70 3b0f3d61 2020-01-22 neels .data = data,
71 3b0f3d61 2020-01-22 neels .len = len,
76 3b0f3d61 2020-01-22 neels void diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
77 3b0f3d61 2020-01-22 neels struct diff_atom *from_atom, unsigned int atoms_count)
79 3b0f3d61 2020-01-22 neels struct diff_atom *last_atom = from_atom + atoms_count - 1;
80 3b0f3d61 2020-01-22 neels *d = (struct diff_data){
81 3b0f3d61 2020-01-22 neels .data = from_atom->at,
82 3b0f3d61 2020-01-22 neels .len = (last_atom->at + last_atom->len) - from_atom->at,
83 3b0f3d61 2020-01-22 neels .root = parent->root,
84 3b0f3d61 2020-01-22 neels .atoms.head = from_atom,
85 3b0f3d61 2020-01-22 neels .atoms.len = atoms_count,
88 3b0f3d61 2020-01-22 neels debug("subsection:\n");
89 3b0f3d61 2020-01-22 neels debug_dump(d);
92 3b0f3d61 2020-01-22 neels void diff_data_free(struct diff_data *diff_data)
94 3b0f3d61 2020-01-22 neels if (!diff_data)
96 3b0f3d61 2020-01-22 neels if (diff_data->atoms.allocated)
97 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(diff_data->atoms);
100 3b0f3d61 2020-01-22 neels enum diff_rc diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state)
102 3b0f3d61 2020-01-22 neels debug("\n** %s\n", __func__);
103 3b0f3d61 2020-01-22 neels debug("left:\n");
104 3b0f3d61 2020-01-22 neels debug_dump(&state->left);
105 3b0f3d61 2020-01-22 neels debug("right:\n");
106 3b0f3d61 2020-01-22 neels debug_dump(&state->right);
107 3b0f3d61 2020-01-22 neels debug_dump_myers_graph(&state->left, &state->right, NULL);
109 3b0f3d61 2020-01-22 neels /* Add a chunk of equal lines, if any */
110 3b0f3d61 2020-01-22 neels unsigned int equal_atoms = 0;
111 3b0f3d61 2020-01-22 neels while (equal_atoms < state->left.atoms.len && equal_atoms < state->right.atoms.len
112 3b0f3d61 2020-01-22 neels && diff_atom_same(&state->left.atoms.head[equal_atoms], &state->right.atoms.head[equal_atoms]))
113 3b0f3d61 2020-01-22 neels equal_atoms++;
114 3b0f3d61 2020-01-22 neels if (equal_atoms) {
115 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
116 3b0f3d61 2020-01-22 neels &state->left.atoms.head[0], equal_atoms,
117 3b0f3d61 2020-01-22 neels &state->right.atoms.head[0], equal_atoms))
118 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
121 3b0f3d61 2020-01-22 neels /* Add a "minus" chunk with all lines from the left. */
122 3b0f3d61 2020-01-22 neels if (equal_atoms < state->left.atoms.len) {
123 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
124 3b0f3d61 2020-01-22 neels &state->left.atoms.head[equal_atoms],
125 3b0f3d61 2020-01-22 neels state->left.atoms.len - equal_atoms,
127 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
130 3b0f3d61 2020-01-22 neels /* Add a "plus" chunk with all lines from the right. */
131 3b0f3d61 2020-01-22 neels if (equal_atoms < state->right.atoms.len) {
132 3b0f3d61 2020-01-22 neels if (!diff_state_add_chunk(state, true,
134 3b0f3d61 2020-01-22 neels &state->right.atoms.head[equal_atoms],
135 3b0f3d61 2020-01-22 neels state->right.atoms.len - equal_atoms))
136 3b0f3d61 2020-01-22 neels return DIFF_RC_ENOMEM;
138 3b0f3d61 2020-01-22 neels return DIFF_RC_OK;
141 3b0f3d61 2020-01-22 neels enum diff_rc diff_run_algo(const struct diff_algo_config *algo_config, struct diff_state *state)
143 3b0f3d61 2020-01-22 neels enum diff_rc rc;
144 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(state->temp_result);
146 3b0f3d61 2020-01-22 neels if (!algo_config || !algo_config->impl || !state->recursion_depth_left) {
147 3b0f3d61 2020-01-22 neels debug("MAX RECURSION REACHED, just dumping diff chunks\n");
148 3b0f3d61 2020-01-22 neels return diff_algo_none(algo_config, state);
151 3b0f3d61 2020-01-22 neels ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
152 3b0f3d61 2020-01-22 neels rc = algo_config->impl(algo_config, state);
153 3b0f3d61 2020-01-22 neels switch (rc) {
154 3b0f3d61 2020-01-22 neels case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
155 3b0f3d61 2020-01-22 neels debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", algo_config->fallback_algo);
156 3b0f3d61 2020-01-22 neels rc = diff_run_algo(algo_config->fallback_algo, state);
157 3b0f3d61 2020-01-22 neels goto return_rc;
159 3b0f3d61 2020-01-22 neels case DIFF_RC_OK:
160 3b0f3d61 2020-01-22 neels /* continue below */
164 3b0f3d61 2020-01-22 neels /* some error happened */
165 3b0f3d61 2020-01-22 neels goto return_rc;
168 3b0f3d61 2020-01-22 neels /* Pick up any diff chunks that are still unsolved and feed to inner_algo.
169 3b0f3d61 2020-01-22 neels * inner_algo will solve unsolved chunks and append to result,
170 3b0f3d61 2020-01-22 neels * and subsequent solved chunks on this level are then appended to result afterwards. */
172 3b0f3d61 2020-01-22 neels for (i = 0; i < state->temp_result.len; i++) {
173 3b0f3d61 2020-01-22 neels struct diff_chunk *c = &state->temp_result.head[i];
174 3b0f3d61 2020-01-22 neels if (c->solved) {
175 3b0f3d61 2020-01-22 neels struct diff_chunk *final_c;
176 3b0f3d61 2020-01-22 neels ARRAYLIST_ADD(final_c, state->result->chunks);
177 3b0f3d61 2020-01-22 neels if (!final_c) {
178 3b0f3d61 2020-01-22 neels rc = DIFF_RC_ENOMEM;
179 3b0f3d61 2020-01-22 neels goto return_rc;
181 3b0f3d61 2020-01-22 neels *final_c = *c;
185 3b0f3d61 2020-01-22 neels /* c is an unsolved chunk, feed to inner_algo */
186 3b0f3d61 2020-01-22 neels struct diff_state inner_state = {
187 3b0f3d61 2020-01-22 neels .result = state->result,
188 3b0f3d61 2020-01-22 neels .recursion_depth_left = state->recursion_depth_left - 1,
190 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&inner_state.left, &state->left, c->left_start, c->left_count);
191 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&inner_state.right, &state->right, c->right_start, c->right_count);
193 3b0f3d61 2020-01-22 neels rc = diff_run_algo(algo_config->inner_algo, &inner_state);
195 3b0f3d61 2020-01-22 neels if (rc != DIFF_RC_OK)
196 3b0f3d61 2020-01-22 neels goto return_rc;
199 3b0f3d61 2020-01-22 neels rc = DIFF_RC_OK;
200 3b0f3d61 2020-01-22 neels return_rc:
201 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(state->temp_result);
202 3b0f3d61 2020-01-22 neels return rc;
205 3b0f3d61 2020-01-22 neels struct diff_result *diff_main(const struct diff_config *config,
206 3b0f3d61 2020-01-22 neels const uint8_t *left_data, size_t left_len,
207 3b0f3d61 2020-01-22 neels const uint8_t *right_data, size_t right_len)
209 3b0f3d61 2020-01-22 neels struct diff_result *result = malloc(sizeof(struct diff_result));
210 3b0f3d61 2020-01-22 neels if (!result)
211 3b0f3d61 2020-01-22 neels return NULL;
213 3b0f3d61 2020-01-22 neels *result = (struct diff_result){};
214 3b0f3d61 2020-01-22 neels diff_data_init_root(&result->left, left_data, left_len);
215 3b0f3d61 2020-01-22 neels diff_data_init_root(&result->right, right_data, right_len);
217 3b0f3d61 2020-01-22 neels if (!config->atomize_func) {
218 3b0f3d61 2020-01-22 neels result->rc = DIFF_RC_EINVAL;
219 3b0f3d61 2020-01-22 neels return result;
222 3b0f3d61 2020-01-22 neels result->rc = config->atomize_func(config->atomize_func_data, &result->left, &result->right);
223 3b0f3d61 2020-01-22 neels if (result->rc != DIFF_RC_OK)
224 3b0f3d61 2020-01-22 neels return result;
226 3b0f3d61 2020-01-22 neels struct diff_state state = {
227 3b0f3d61 2020-01-22 neels .result = result,
228 3b0f3d61 2020-01-22 neels .recursion_depth_left = config->max_recursion_depth ? : 1024,
230 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&state.left, &result->left, result->left.atoms.head, result->left.atoms.len);
231 3b0f3d61 2020-01-22 neels diff_data_init_subsection(&state.right, &result->right, result->right.atoms.head, result->right.atoms.len);
233 3b0f3d61 2020-01-22 neels result->rc = diff_run_algo(config->algo, &state);
235 3b0f3d61 2020-01-22 neels return result;
238 3b0f3d61 2020-01-22 neels void diff_result_free(struct diff_result *result)
240 3b0f3d61 2020-01-22 neels if (!result)
242 3b0f3d61 2020-01-22 neels diff_data_free(&result->left);
243 3b0f3d61 2020-01-22 neels diff_data_free(&result->right);
244 3b0f3d61 2020-01-22 neels ARRAYLIST_FREE(result->chunks);
245 3b0f3d61 2020-01-22 neels free(result);