commit 1e4671c4bdaba2b7c21bd11f407278f6063b99eb from: Martin Pieuchot date: Wed Mar 18 14:17:35 2020 UTC KNF commit - 06b928e23f06a14d0c75544ee5814fde7d801fe0 commit + 1e4671c4bdaba2b7c21bd11f407278f6063b99eb blob - 029339c0fdcdc96bc1419c0ccf0c1ccaa507c5ee blob + 6b81da5097f413165d395437388a92004d74bb2a --- debug.h +++ debug.h @@ -28,8 +28,11 @@ #define debug_dump_atom dump_atom #define debug_dump_atoms dump_atoms -static inline void dump_atom(const struct diff_data *left, const struct diff_data *right, const struct diff_atom *atom) +static inline void +dump_atom(const struct diff_data *left, const struct diff_data *right, const struct diff_atom *atom) { + const char *s; + if (!atom) { print("NULL atom\n"); return; @@ -37,10 +40,11 @@ static inline void dump_atom(const struct diff_data *l if (left) print(" %3ld", diff_atom_root_idx(left, atom)); if (right && atom->patience.pos_in_other) - print(" %3ld", diff_atom_root_idx(right, atom->patience.pos_in_other)); + print(" %3ld", diff_atom_root_idx(right, + atom->patience.pos_in_other)); - print(" %s%s '", atom->patience.unique_here ? "u" : " ", atom->patience.unique_in_both ? "c" : " "); - const char *s; + print(" %s%s '", atom->patience.unique_here ? "u" : " ", + atom->patience.unique_in_both ? "c" : " "); for (s = atom->at; s < (const char*)(atom->at + atom->len); s++) { if (*s == '\r') print("\\r"); @@ -54,7 +58,8 @@ static inline void dump_atom(const struct diff_data *l print("'\n"); } -static inline void dump_atoms(const struct diff_data *d, struct diff_atom *atom, unsigned int count) +static inline void +dump_atoms(const struct diff_data *d, struct diff_atom *atom, unsigned int count) { if (count > 42) { dump_atoms(d, atom, 20); @@ -69,16 +74,17 @@ static inline void dump_atoms(const struct diff_data * } } -static inline void dump(struct diff_data *d) +static inline void +dump(struct diff_data *d) { dump_atoms(d, d->atoms.head, d->atoms.len); } -static inline void dump_myers_graph(const struct diff_data *l, const struct diff_data *r, - int *kd) +static inline void +dump_myers_graph(const struct diff_data *l, const struct diff_data *r, int *kd) { - int x; - int y; + int x, y; + print(" "); for (x = 0; x <= l->atoms.len; x++) print("%2d", x); @@ -132,8 +138,9 @@ static inline void dump_myers_graph(const struct diff_ } } -static inline void debug_dump_myers_graph(const struct diff_data *l, const struct diff_data *r, - int *kd) +static inline void +debug_dump_myers_graph(const struct diff_data *l, const struct diff_data *r, + int *kd) { if (l->atoms.len > 99 || r->atoms.len > 99) return; blob - 8128ea1989ba2ca2f9365f3c7489680e23224221 blob + dadb98385c1d5f515757d7513358fd847108f429 --- diff.c +++ diff.c @@ -76,22 +76,26 @@ main(int argc, char *argv[]) const struct diff_algo_config myers, patience, myers_divide; -const struct diff_algo_config myers = (struct diff_algo_config){ +const struct diff_algo_config myers = { .impl = diff_algo_myers, .permitted_state_size = 1024 * 1024 * sizeof(int), .fallback_algo = &patience, }; -const struct diff_algo_config patience = (struct diff_algo_config){ +const struct diff_algo_config patience = { .impl = diff_algo_patience, - .inner_algo = &patience, // After subdivision, do Patience again. - .fallback_algo = &myers_divide, // If subdivision failed, do Myers Divide et Impera. + /* After subdivision, do Patience again. */ + .inner_algo = &patience, + /* If subdivision failed, do Myers Divide et Impera. */ + .fallback_algo = &myers_divide, }; const struct diff_algo_config myers_divide = (struct diff_algo_config){ .impl = diff_algo_myers_divide, - .inner_algo = &myers, // When division succeeded, start from the top. - // (fallback_algo = NULL implies diff_algo_none). + /* When division succeeded, start from the top. */ + .inner_algo = &myers, + /* (fallback_algo = NULL implies diff_algo_none). */ + .fallback_algo = NULL, }; const struct diff_config diff_config = { @@ -112,7 +116,8 @@ diffreg(char *file1, char *file2, int flags) str1 = mmapfile(file1, &st1); str2 = mmapfile(file2, &st2); - diff_unidiff(stdout, &diff_config, &info, str1, st1.st_size, str2, st2.st_size, 3); + diff_unidiff(stdout, &diff_config, &info, str1, st1.st_size, str2, + st2.st_size, 3); munmap(str1, st1.st_size); munmap(str2, st2.st_size); blob - 390cfdd8fbe95702abda85e6d3570c726f457856 blob + 2c03554ea5338a9813e8d4bacde3d2288cb35df5 --- diff_atomize_text.c +++ diff_atomize_text.c @@ -25,10 +25,10 @@ diff_data_atomize_text_lines(struct diff_data *d) { const uint8_t *pos = d->data; const uint8_t *end = pos + d->len; - enum diff_rc rc; - unsigned int array_size_estimate = d->len / 50; unsigned int pow2 = 1; + enum diff_rc rc; + while (array_size_estimate >>= 1) pow2++; @@ -37,13 +37,18 @@ diff_data_atomize_text_lines(struct diff_data *d) while (pos < end) { const uint8_t *line_end = pos; unsigned int hash = 0; + struct diff_atom *atom; - while (line_end < end && *line_end != '\r' && *line_end != '\n') { + while (line_end < end && + *line_end != '\r' && *line_end != '\n') { hash = hash * 23 + *line_end; line_end++; } - /* When not at the end of data, the line ending char ('\r' or '\n') must follow */ + /* + * When not at the end of data, the line ending char ('\r' + * or '\n') must follow + */ if (line_end < end) line_end++; /* If that was an '\r', also pull in any following '\n' */ @@ -51,7 +56,6 @@ diff_data_atomize_text_lines(struct diff_data *d) line_end++; /* Record the found line as diff atom */ - struct diff_atom *atom; ARRAYLIST_ADD(atom, d->atoms); if (!atom) return DIFF_RC_ENOMEM; @@ -70,9 +74,11 @@ diff_data_atomize_text_lines(struct diff_data *d) } enum diff_rc -diff_atomize_text_by_line(void *func_data, struct diff_data *left, struct diff_data *right) +diff_atomize_text_by_line(void *func_data, struct diff_data *left, + struct diff_data *right) { enum diff_rc rc; + rc = diff_data_atomize_text_lines(left); if (rc != DIFF_RC_OK) return rc; blob - 7c15435737542abfcf505d0f0e0909552752c0ee blob + 8d0a4e044720b05ed22b8d9f0db8e3add0a57e63 --- diff_main.c +++ diff_main.c @@ -16,7 +16,9 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* Generic infrastructure to implement various diff algorithms (implementation). */ +/* + * Generic infrastructure to implement various diff algorithms (implementation). + */ #include #include @@ -31,13 +33,17 @@ #include "diff_main.h" #include "debug.h" -/* Even if a left or right side is empty, diff output may need to know the position in that file. - * So left_start or right_start must never be NULL -- pass left_count or right_count as zero to indicate staying at that - * position without consuming any lines. */ +/* + * Even if a left or right side is empty, diff output may need to know the + * position in that file. So left_start or right_start must never be NULL + * -- pass left_count or right_count as zero to indicate staying at that + * position without consuming any lines. + */ struct diff_chunk * diff_state_add_chunk(struct diff_state *state, bool solved, struct diff_atom *left_start, unsigned int left_count, - struct diff_atom *right_start, unsigned int right_count) { + struct diff_atom *right_start, unsigned int right_count) +{ struct diff_chunk *chunk; struct diff_chunk_arraylist *result; @@ -49,7 +55,7 @@ diff_state_add_chunk(struct diff_state *state, bool so ARRAYLIST_ADD(chunk, *result); if (!chunk) return NULL; - *chunk = (struct diff_chunk){ + *chunk = (struct diff_chunk) { .solved = solved, .left_start = left_start, .left_count = left_count, @@ -68,7 +74,7 @@ diff_state_add_chunk(struct diff_state *state, bool so void diff_data_init_root(struct diff_data *d, const uint8_t *data, size_t len) { - *d = (struct diff_data){ + *d = (struct diff_data) { .data = data, .len = len, .root = d, @@ -77,9 +83,11 @@ diff_data_init_root(struct diff_data *d, const uint8_t void diff_data_init_subsection(struct diff_data *d, struct diff_data *parent, - struct diff_atom *from_atom, unsigned int atoms_count) { + struct diff_atom *from_atom, unsigned int atoms_count) +{ struct diff_atom *last_atom = from_atom + atoms_count - 1; - *d = (struct diff_data){ + + *d = (struct diff_data) { .data = from_atom->at, .len = (last_atom->at + last_atom->len) - from_atom->at, .root = parent->root, @@ -101,8 +109,11 @@ diff_data_free(struct diff_data *diff_data) } enum diff_rc -diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state) +diff_algo_none(const struct diff_algo_config *algo_config, + struct diff_state *state) { + unsigned int equal_atoms = 0; + debug("\n** %s\n", __func__); debug("left:\n"); debug_dump(&state->left); @@ -111,10 +122,12 @@ diff_algo_none(const struct diff_algo_config *algo_con debug_dump_myers_graph(&state->left, &state->right, NULL); /* Add a chunk of equal lines, if any */ - unsigned int equal_atoms = 0; - while (equal_atoms < state->left.atoms.len && equal_atoms < state->right.atoms.len && - diff_atom_same(&state->left.atoms.head[equal_atoms], &state->right.atoms.head[equal_atoms])) + while (equal_atoms < state->left.atoms.len && + equal_atoms < state->right.atoms.len && + diff_atom_same(&state->left.atoms.head[equal_atoms], + &state->right.atoms.head[equal_atoms])) equal_atoms++; + if (equal_atoms) { if (!diff_state_add_chunk(state, true, &state->left.atoms.head[0], equal_atoms, @@ -143,12 +156,14 @@ diff_algo_none(const struct diff_algo_config *algo_con } enum diff_rc -diff_run_algo(const struct diff_algo_config *algo_config, struct diff_state *state) +diff_run_algo(const struct diff_algo_config *algo_config, + struct diff_state *state) { enum diff_rc rc; + ARRAYLIST_FREE(state->temp_result); - if (!algo_config || !algo_config->impl || !state->recursion_depth_left) { + if (!algo_config || !algo_config->impl || !state->recursion_depth_left){ debug("MAX RECURSION REACHED, just dumping diff chunks\n"); return diff_algo_none(algo_config, state); } @@ -157,7 +172,8 @@ diff_run_algo(const struct diff_algo_config *algo_conf rc = algo_config->impl(algo_config, state); switch (rc) { case DIFF_RC_USE_DIFF_ALGO_FALLBACK: - debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", algo_config->fallback_algo); + debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", + algo_config->fallback_algo); rc = diff_run_algo(algo_config->fallback_algo, state); goto return_rc; @@ -170,14 +186,21 @@ diff_run_algo(const struct diff_algo_config *algo_conf goto return_rc; } - /* Pick up any diff chunks that are still unsolved and feed to inner_algo. + /* + * Pick up any diff chunks that are still unsolved and feed to + * inner_algo. * inner_algo will solve unsolved chunks and append to result, - * and subsequent solved chunks on this level are then appended to result afterwards. */ + * and subsequent solved chunks on this level are then appended + * to result afterwards. + */ int i; for (i = 0; i < state->temp_result.len; i++) { + struct diff_state inner_state; struct diff_chunk *c = &state->temp_result.head[i]; + if (c->solved) { struct diff_chunk *final_c; + ARRAYLIST_ADD(final_c, state->result->chunks); if (!final_c) { rc = DIFF_RC_ENOMEM; @@ -188,15 +211,16 @@ diff_run_algo(const struct diff_algo_config *algo_conf } /* c is an unsolved chunk, feed to inner_algo */ - struct diff_state inner_state = { + inner_state = (struct diff_state) { .result = state->result, .recursion_depth_left = state->recursion_depth_left - 1, }; - diff_data_init_subsection(&inner_state.left, &state->left, c->left_start, c->left_count); - diff_data_init_subsection(&inner_state.right, &state->right, c->right_start, c->right_count); + diff_data_init_subsection(&inner_state.left, &state->left, + c->left_start, c->left_count); + diff_data_init_subsection(&inner_state.right, &state->right, + c->right_start, c->right_count); rc = diff_run_algo(algo_config->inner_algo, &inner_state); - if (rc != DIFF_RC_OK) goto return_rc; } @@ -212,7 +236,10 @@ diff_main(const struct diff_config *config, const uint8_t *left_data, size_t left_len, const uint8_t *right_data, size_t right_len) { - struct diff_result *result = malloc(sizeof(struct diff_result)); + struct diff_result *result; + struct diff_state state; + + result = malloc(sizeof(struct diff_result)); if (!result) return NULL; @@ -225,16 +252,19 @@ diff_main(const struct diff_config *config, return result; } - result->rc = config->atomize_func(config->atomize_func_data, &result->left, &result->right); + result->rc = config->atomize_func(config->atomize_func_data, + &result->left, &result->right); if (result->rc != DIFF_RC_OK) return result; - struct diff_state state = { + state = (struct diff_state) { .result = result, .recursion_depth_left = config->max_recursion_depth ? : 1024, }; - diff_data_init_subsection(&state.left, &result->left, result->left.atoms.head, result->left.atoms.len); - diff_data_init_subsection(&state.right, &result->right, result->right.atoms.head, result->right.atoms.len); + diff_data_init_subsection(&state.left, &result->left, + result->left.atoms.head, result->left.atoms.len); + diff_data_init_subsection(&state.right, &result->right, + result->right.atoms.head, result->right.atoms.len); result->rc = diff_run_algo(config->algo, &state); blob - f367d0243bf9c2dfbf553ef3a1c63a1e3bb114a2 blob + d018670201b56be5b1902a1265459412acd1e4ba --- diff_output.c +++ diff_output.c @@ -19,19 +19,21 @@ #include "diff_main.h" #include "debug.h" - /* * Common parts for printing diff output */ void -diff_output_lines(FILE *dest, const char *prefix, struct diff_atom *start_atom, unsigned int count) +diff_output_lines(FILE *dest, const char *prefix, struct diff_atom *start_atom, + unsigned int count) { struct diff_atom *atom; + foreach_diff_atom(atom, start_atom, count) { - fprintf(dest, "%s", prefix); - int i; unsigned int len = atom->len; + int i; + + fprintf(dest, "%s", prefix); if (len && atom->at[len - 1] == '\n') { len--; if (len && atom->at[len - 1] == '\r') @@ -65,7 +67,10 @@ diff_output_info(FILE *dest, const struct diff_input_i */ enum diff_rc diff_output_plain(FILE *dest, const struct diff_input_info *info, - const struct diff_result *result) { + const struct diff_result *result) +{ + int i; + if (!result) return DIFF_RC_EINVAL; if (result->rc != DIFF_RC_OK) @@ -73,15 +78,18 @@ diff_output_plain(FILE *dest, const struct diff_input_ diff_output_info(dest, info); - int i; for (i = 0; i < result->chunks.len; i++) { struct diff_chunk *c = &result->chunks.head[i]; + if (c->left_count && c->right_count) - diff_output_lines(dest, c->solved ? " " : "?", c->left_start, c->left_count); + diff_output_lines(dest, c->solved ? " " : "?", + c->left_start, c->left_count); else if (c->left_count && !c->right_count) - diff_output_lines(dest, c->solved ? "-" : "?", c->left_start, c->left_count); + diff_output_lines(dest, c->solved ? "-" : "?", + c->left_start, c->left_count); else if (c->right_count && !c->left_count) - diff_output_lines(dest, c->solved ? "+" : "?", c->right_start, c->right_count); + diff_output_lines(dest, c->solved ? "+" : "?", + c->right_start, c->right_count); } return DIFF_RC_OK; } @@ -91,10 +99,12 @@ diff_plain(FILE *dest, const struct diff_config *diff_ const struct diff_input_info *info, const char *left, int left_len, const char *right, int right_len) { + struct diff_result *result; enum diff_rc rc; + left_len = left_len < 0 ? strlen(left) : left_len; right_len = right_len < 0 ? strlen(right) : right_len; - struct diff_result *result = diff_main(diff_config, left, left_len, right, right_len); + result = diff_main(diff_config, left, left_len, right, right_len); rc = diff_output_plain(dest, info, result); diff_result_free(result); return rc; @@ -139,25 +149,29 @@ chunk_context_empty(const struct chunk_context *cc) } static void -chunk_context_get(struct chunk_context *cc, const struct diff_result *r, int chunk_idx, - int context_lines) +chunk_context_get(struct chunk_context *cc, const struct diff_result *r, + int chunk_idx, int context_lines) { const struct diff_chunk *c = &r->chunks.head[chunk_idx]; - int left_start = diff_atom_root_idx(&r->left, c->left_start); - int right_start = diff_atom_root_idx(&r->right, c->right_start); + int left_start, right_start; - *cc = (struct chunk_context){ + left_start = diff_atom_root_idx(&r->left, c->left_start); + right_start = diff_atom_root_idx(&r->right, c->right_start); + + *cc = (struct chunk_context) { .chunk = { .start = chunk_idx, .end = chunk_idx + 1, }, .left = { .start = MAX(0, left_start - context_lines), - .end = MIN(r->left.atoms.len, left_start + c->left_count + context_lines), + .end = MIN(r->left.atoms.len, + left_start + c->left_count + context_lines), }, .right = { .start = MAX(0, right_start - context_lines), - .end = MIN(r->right.atoms.len, right_start + c->right_count + context_lines), + .end = MIN(r->right.atoms.len, + right_start + c->right_count + context_lines), }, }; } @@ -179,8 +193,13 @@ chunk_contexts_merge(struct chunk_context *cc, const s } static void -diff_output_unidiff_chunk(FILE *dest, bool *info_printed, const struct diff_input_info *info, - const struct diff_result *result, const struct chunk_context *cc) { +diff_output_unidiff_chunk(FILE *dest, bool *info_printed, + const struct diff_input_info *info, const struct diff_result *result, + const struct chunk_context *cc) +{ + const struct diff_chunk *first_chunk, *last_chunk; + int chunk_start_line, chunk_end_line, c_idx; + if (range_empty(&cc->left) && range_empty(&cc->right)) return; @@ -193,83 +212,127 @@ diff_output_unidiff_chunk(FILE *dest, bool *info_print cc->left.start + 1, cc->left.end - cc->left.start, cc->right.start + 1, cc->right.end - cc->right.start); - /* Got the absolute line numbers where to start printing, and the index of the interesting (non-context) chunk. - * To print context lines above the interesting chunk, nipping on the previous chunk index may be necessary. - * It is guaranteed to be only context lines where left == right, so it suffices to look on the left. */ - const struct diff_chunk *first_chunk = &result->chunks.head[cc->chunk.start]; - int chunk_start_line = diff_atom_root_idx(&result->left, first_chunk->left_start); + /* + * Got the absolute line numbers where to start printing, and the + * index of the interesting (non-context) chunk. + * To print context lines above the interesting chunk, nipping on + * the previous chunk index may be necessary. + * It is guaranteed to be only context lines where left == right, + * so it suffices to look on the left. + */ + first_chunk = &result->chunks.head[cc->chunk.start]; + chunk_start_line = diff_atom_root_idx(&result->left, + first_chunk->left_start); + if (cc->left.start < chunk_start_line) - diff_output_lines(dest, " ", &result->left.atoms.head[cc->left.start], + diff_output_lines(dest, " ", + &result->left.atoms.head[cc->left.start], chunk_start_line - cc->left.start); /* Now write out all the joined chunks and contexts between them */ - int c_idx; for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) { const struct diff_chunk *c = &result->chunks.head[c_idx]; if (c->left_count && c->right_count) - diff_output_lines(dest, c->solved ? " " : "?", c->left_start, c->left_count); + diff_output_lines(dest, c->solved ? " " : "?", + c->left_start, c->left_count); else if (c->left_count && !c->right_count) - diff_output_lines(dest, c->solved ? "-" : "?", c->left_start, c->left_count); + diff_output_lines(dest, c->solved ? "-" : "?", + c->left_start, c->left_count); else if (c->right_count && !c->left_count) - diff_output_lines(dest, c->solved ? "+" : "?", c->right_start, c->right_count); + diff_output_lines(dest, c->solved ? "+" : "?", + c->right_start, c->right_count); } /* Trailing context? */ - const struct diff_chunk *last_chunk = &result->chunks.head[cc->chunk.end - 1]; - int chunk_end_line = diff_atom_root_idx(&result->left, last_chunk->left_start + last_chunk->left_count); + last_chunk = &result->chunks.head[cc->chunk.end - 1]; + chunk_end_line = diff_atom_root_idx(&result->left, + last_chunk->left_start + last_chunk->left_count); if (cc->left.end > chunk_end_line) - diff_output_lines(dest, " ", &result->left.atoms.head[chunk_end_line], + diff_output_lines(dest, " ", + &result->left.atoms.head[chunk_end_line], cc->left.end - chunk_end_line); } enum diff_rc diff_output_unidiff(FILE *dest, const struct diff_input_info *info, - const struct diff_result *result, unsigned int context_lines) { + const struct diff_result *result, unsigned int context_lines) +{ + struct chunk_context cc = {}; + bool info_printed = false; + int i; + if (!result) return DIFF_RC_EINVAL; if (result->rc != DIFF_RC_OK) return result->rc; - struct chunk_context cc = {}; - bool info_printed = false; - - int i; for (i = 0; i < result->chunks.len; i++) { struct diff_chunk *c = &result->chunks.head[i]; enum chunk_type t = chunk_type(c); if (t == CHUNK_MINUS || t == CHUNK_PLUS) { if (chunk_context_empty(&cc)) { - /* These are the first lines being printed. - * Note down the start point, any number of subsequent chunks may be joined up to this - * unidiff chunk by context lines or by being directly adjacent. */ - chunk_context_get(&cc, result, i, context_lines); - debug("new chunk to be printed: chunk %d-%d left %d-%d right %d-%d\n", + /* + * These are the first lines being printed. + * Note down the start point, any number of + * subsequent chunks may be joined up to this + * unidiff chunk by context lines or by being + * directly adjacent. + */ + chunk_context_get(&cc, result, i, + context_lines); + debug("new chunk to be printed:" + " chunk %d-%d left %d-%d right %d-%d\n", cc.chunk.start, cc.chunk.end, - cc.left.start, cc.left.end, cc.right.start, cc.right.end); + cc.left.start, cc.left.end, cc.right.start, + cc.right.end); } else { - /* There already is a previous chunk noted down for being printed. - * Does it join up with this one? */ struct chunk_context next; - chunk_context_get(&next, result, i, context_lines); - debug("new chunk to be printed: chunk %d-%d left %d-%d right %d-%d\n", + + /* + * There already is a previous chunk noted down + * for being printed. + * Does it join up with this one? + */ + chunk_context_get(&next, result, i, + context_lines); + debug("new chunk to be printed:" + " chunk %d-%d left %d-%d right %d-%d\n", next.chunk.start, next.chunk.end, - next.left.start, next.left.end, next.right.start, next.right.end); + next.left.start, next.left.end, + next.right.start, next.right.end); if (chunk_contexts_touch(&cc, &next)) { - /* This next context touches or overlaps the previous one, join. */ + /* + * This next context touches or + * overlaps the previous one, join. + */ chunk_contexts_merge(&cc, &next); - debug("new chunk to be printed touches previous chunk, now: left %d-%d right %d-%d\n", - cc.left.start, cc.left.end, cc.right.start, cc.right.end); + debug("new chunk to be printed touches" + " previous chunk, now: left %d-%d" + " right %d-%d\n", + cc.left.start, cc.left.end, + cc.right.start, cc.right.end); } else { - /* No touching, so the previous context is complete with a gap between it and - * this next one. Print the previous one and start fresh here. */ - debug("new chunk to be printed does not touch previous chunk; print left %d-%d right %d-%d\n", - cc.left.start, cc.left.end, cc.right.start, cc.right.end); - diff_output_unidiff_chunk(dest, &info_printed, info, result, &cc); + /* + * No touching, so the previous context + * is complete with a gap between it + * and this next one. + * Print the previous one and start + * fresh here. + */ + debug("new chunk to be printed does not" + "touch previous chunk;" + " print left %d-%d right %d-%d\n", + cc.left.start, cc.left.end, + cc.right.start, cc.right.end); + diff_output_unidiff_chunk(dest, + &info_printed, info, result, &cc); cc = next; - debug("new unprinted chunk is left %d-%d right %d-%d\n", - cc.left.start, cc.left.end, cc.right.start, cc.right.end); + debug("new unprinted chunk is left" + " %d-%d right %d-%d\n", + cc.left.start, cc.left.end, + cc.right.start, cc.right.end); } } } @@ -277,7 +340,8 @@ diff_output_unidiff(FILE *dest, const struct diff_inpu } if (!chunk_context_empty(&cc)) - diff_output_unidiff_chunk(dest, &info_printed, info, result, &cc); + diff_output_unidiff_chunk(dest, &info_printed, info, result, + &cc); return DIFF_RC_OK; } @@ -287,10 +351,12 @@ diff_unidiff(FILE *dest, const struct diff_config *dif const char *left, int left_len, const char *right, int right_len, unsigned int context_lines) { + struct diff_result *result; enum diff_rc rc; + left_len = left_len < 0 ? strlen(left) : left_len; right_len = right_len < 0 ? strlen(right) : right_len; - struct diff_result *result = diff_main(diff_config, left, left_len, right, right_len); + result = diff_main(diff_config, left, left_len, right, right_len); rc = diff_output_unidiff(dest, info, result, context_lines); diff_result_free(result); return rc; blob - f0221d93269d0ae66aec497b9729efe8687aac83 blob + 6a391da4b5ba6caee417e3e850adfbe39e0220dc --- diff_patience.c +++ diff_patience.c @@ -30,16 +30,15 @@ static void diff_atoms_mark_unique(struct diff_data *d, unsigned int *unique_count) { - struct diff_atom *i; + struct diff_atom *i, *j; unsigned int count = 0; + diff_data_foreach_atom(i, d) { i->patience.unique_here = true; i->patience.unique_in_both = true; count++; } diff_data_foreach_atom(i, d) { - struct diff_atom *j; - if (!i->patience.unique_here) continue; @@ -60,26 +59,33 @@ diff_atoms_mark_unique(struct diff_data *d, unsigned i *unique_count = count; } -/* Mark those lines as atom->patience.unique_in_both = true that appear exactly once in each side. */ +/* + * Mark those lines as atom->patience.unique_in_both = true that appear + * exactly once in each side. + */ static void diff_atoms_mark_unique_in_both(struct diff_data *left, struct diff_data *right, unsigned int *unique_in_both_count) { - /* Derive the final unique_in_both count without needing an explicit iteration. So this is just some - * optimiziation to save one iteration in the end. */ unsigned int unique_in_both; + struct diff_atom *i, *j; + int found_in_b; + bool found_in_a; + /* + * Derive the final unique_in_both count without needing an + * explicit iteration. So this is just some optimiziation to + * save one iteration in the end. + */ diff_atoms_mark_unique(left, &unique_in_both); diff_atoms_mark_unique(right, NULL); debug("unique_in_both %u\n", unique_in_both); - struct diff_atom *i; diff_data_foreach_atom(i, left) { if (!i->patience.unique_here) continue; - struct diff_atom *j; - int found_in_b = 0; + found_in_b = 0; diff_data_foreach_atom(j, right) { if (!diff_atom_same(i, j)) continue; @@ -96,18 +102,21 @@ diff_atoms_mark_unique_in_both(struct diff_data *left, if (found_in_b == 0 || found_in_b > 1) { i->patience.unique_in_both = false; unique_in_both--; - debug("unique_in_both %u (%d) ", unique_in_both, found_in_b); + debug("unique_in_both %u (%d) ", unique_in_both, + found_in_b); debug_dump_atom(left, NULL, i); } } - /* Still need to unmark right[*]->patience.unique_in_both for atoms that don't exist in left */ + /* + * Still need to unmark right[*]->patience.unique_in_both for + * atoms that don't exist in left + */ diff_data_foreach_atom(i, right) { if (!i->patience.unique_here || !i->patience.unique_in_both) continue; - struct diff_atom *j; - bool found_in_a = false; + found_in_a = false; diff_data_foreach_atom(j, left) { if (!j->patience.unique_in_both) continue; @@ -126,51 +135,59 @@ diff_atoms_mark_unique_in_both(struct diff_data *left, } static void -diff_atoms_swallow_identical_neighbors(struct diff_data *left, struct diff_data *right, - unsigned int *unique_in_both_count) +diff_atoms_swallow_identical_neighbors(struct diff_data *left, + struct diff_data *right, unsigned int *unique_in_both_count) { - debug("trivially combine identical lines arount unique_in_both lines\n"); - - unsigned int l_idx; + struct range identical_l; + struct range identical_r; + unsigned int l_idx, r_idx; unsigned int next_l_idx; unsigned int l_min = 0; unsigned int r_min = 0; + + debug("trivially combine identical lines around unique_in_both lines\n"); + for (l_idx = 0; l_idx < left->atoms.len; l_idx = next_l_idx) { - next_l_idx = l_idx + 1; struct diff_atom *l = &left->atoms.head[l_idx]; + next_l_idx = l_idx + 1; if (!l->patience.unique_in_both) continue; debug("check identical lines around "); debug_dump_atom(left, right, l); - unsigned int r_idx = diff_atom_idx(right, l->patience.pos_in_other); + r_idx = diff_atom_idx(right, l->patience.pos_in_other); - struct range identical_l; - struct range identical_r; /* Swallow upwards. - * Each common-unique line swallows identical lines upwards and downwards. - * All common-unique lines that were part of the identical lines following below were already swallowed - * in the previous iteration, so we will never hit another common-unique line above. */ + * + * Each common-unique line swallows identical lines upwards + * and downwards. + * All common-unique lines that were part of the identical + * lines following below were already swallowed + * in the previous iteration, so we will never hit another + * common-unique line above. + */ for (identical_l.start = l_idx, identical_r.start = r_idx; - identical_l.start > l_min && - identical_r.start > r_min && + (identical_l.start > l_min && identical_r.start > r_min && diff_atom_same(&left->atoms.head[identical_l.start - 1], - &right->atoms.head[identical_r.start - 1]); - identical_l.start--, identical_r.start--); + &right->atoms.head[identical_r.start - 1])); + identical_l.start--, identical_r.start--) + ; /* Swallow downwards */ for (identical_l.end = l_idx + 1, identical_r.end = r_idx + 1; - identical_l.end < left->atoms.len && + (identical_l.end < left->atoms.len && identical_r.end < right->atoms.len && diff_atom_same(&left->atoms.head[identical_l.end], - &right->atoms.head[identical_r.end]); - identical_l.end++, identical_r.end++, - next_l_idx++) { + &right->atoms.head[identical_r.end])); + identical_l.end++, identical_r.end++, next_l_idx++) { if (left->atoms.head[identical_l.end].patience.unique_in_both) { - /* Part of a chunk of identical lines, remove from listing of unique_in_both lines */ + /* + * Part of a chunk of identical lines, remove + * from listing of unique_in_both lines + */ left->atoms.head[identical_l.end].patience.unique_in_both = false; right->atoms.head[identical_r.end].patience.unique_in_both = false; (*unique_in_both_count)--; @@ -184,7 +201,8 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat r_min = identical_r.end; if (!range_empty(&l->patience.identical_lines)) { - debug("common-unique line at l=%u r=%u swallowed identical lines l=%u-%u r=%u-%u\n", + debug("common-unique line at l=%u r=%u" + " swallowed identical lines l=%u-%u r=%u-%u\n", l_idx, r_idx, identical_l.start, identical_l.end, identical_r.start, identical_r.end); @@ -193,23 +211,30 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat } } -/* Among the lines that appear exactly once in each side, find the longest streak that appear in both files in the same - * order (with other stuff allowed to interleave). Use patience sort for that, as in the Patience Diff algorithm. - * See https://bramcohen.livejournal.com/73318.html and, for a much more detailed explanation, - * https://blog.jcoglan.com/2017/09/19/the-patience-diff-algorithm/ */ +/* + * Among the lines that appear exactly once in each side, find the longest + * streak that appear in both files in the same order (with other stuff + * allowed to interleave). Use patience sort for that, as in the Patience + * Diff algorithm. + * See https://bramcohen.livejournal.com/73318.html and, for a much more + * detailed explanation, + * https://blog.jcoglan.com/2017/09/19/the-patience-diff-algorithm/ + */ enum diff_rc -diff_algo_patience(const struct diff_algo_config *algo_config, struct diff_state *state) +diff_algo_patience(const struct diff_algo_config *algo_config, + struct diff_state *state) { - enum diff_rc rc = DIFF_RC_ENOMEM; - struct diff_data *left = &state->left; struct diff_data *right = &state->right; - unsigned int unique_in_both_count; + enum diff_rc rc = DIFF_RC_ENOMEM; debug("\n** %s\n", __func__); - /* Find those lines that appear exactly once in 'left' and exactly once in 'right'. */ + /* + * Find those lines that appear exactly once in 'left' and exactly + * once in 'right'. + */ diff_atoms_mark_unique_in_both(left, right, &unique_in_both_count); debug("unique_in_both_count %u\n", unique_in_both_count); @@ -219,36 +244,56 @@ diff_algo_patience(const struct diff_algo_config *algo debug_dump(right); if (!unique_in_both_count) { - /* Cannot apply Patience, tell the caller to use fallback_algo instead. */ + /* + * Cannot apply Patience, tell the caller to use fallback_algo + * instead. + */ return DIFF_RC_USE_DIFF_ALGO_FALLBACK; } - diff_atoms_swallow_identical_neighbors(left, right, &unique_in_both_count); + diff_atoms_swallow_identical_neighbors(left, right, + &unique_in_both_count); debug("After swallowing identical neighbors: unique_in_both = %u\n", unique_in_both_count); - /* An array of Longest Common Sequence is the result of the below subscope: */ + /* + * An array of Longest Common Sequence is the result of the below + * subscope: + */ unsigned int lcs_count = 0; struct diff_atom **lcs = NULL; struct diff_atom *lcs_tail = NULL; + /* + * This subscope marks the lifetime of the atom_pointers + * allocation + */ { - /* This subscope marks the lifetime of the atom_pointers allocation */ + struct diff_atom **atom_pointers, **uniques, **patience_stacks; + struct diff_atom **uniques_end; + struct diff_atom *atom; + unsigned int i, patience_stacks_count; + unsigned int target_stack; /* One chunk of storage for atom pointers */ - struct diff_atom **atom_pointers = recallocarray(NULL, 0, unique_in_both_count * 2, sizeof(struct diff_atom*)); + atom_pointers = recallocarray(NULL, 0, unique_in_both_count * 2, + sizeof(struct diff_atom*)); - /* Half for the list of atoms that still need to be put on stacks */ - struct diff_atom **uniques = atom_pointers; + /* + * Half for the list of atoms that still need to be put on + * stacks + */ + uniques = atom_pointers; - /* Half for the patience sort state's "card stacks" -- we remember only each stack's topmost "card" */ - struct diff_atom **patience_stacks = atom_pointers + unique_in_both_count; - unsigned int patience_stacks_count = 0; + /* + * Half for the patience sort state's "card stacks" -- + * we remember only each stack's topmost "card" + */ + patience_stacks = atom_pointers + unique_in_both_count; + patience_stacks_count = 0; /* Take all common, unique items from 'left' ... */ - - struct diff_atom *atom; - struct diff_atom **uniques_end = uniques; + uniques_end = uniques; diff_data_foreach_atom(atom, left) { if (!atom->patience.unique_in_both) continue; @@ -256,24 +301,33 @@ diff_algo_patience(const struct diff_algo_config *algo uniques_end++; } - /* ...and sort them to the order found in 'right'. - * The idea is to find the leftmost stack that has a higher line number and add it to the stack's top. - * If there is no such stack, open a new one on the right. The line number is derived from the atom*, - * which are array items and hence reflect the relative position in the source file. So we got the - * common-uniques from 'left' and sort them according to atom->patience.pos_in_other. */ - unsigned int i; + /* + * ...and sort them to the order found in 'right'. + * + * The idea is to find the leftmost stack that has a higher + * line number and add it to the stack's top. + * If there is no such stack, open a new one on the right. + * The line number is derived from the atom*, + * which are array items and hence reflect the relative + * position in the source file. So we got the common-uniques + * from 'left' and sort them according to + * `atom->patience.pos_in_other'. + */ for (i = 0; i < unique_in_both_count; i++) { atom = uniques[i]; - unsigned int target_stack; - if (!patience_stacks_count) target_stack = 0; else { - /* binary search to find the stack to put this atom "card" on. */ unsigned int lo = 0; unsigned int hi = patience_stacks_count; + + /* + * binary search to find the stack to put + * this atom "card" on. + */ while (lo < hi) { unsigned int mid = (lo + hi) >> 1; + if (patience_stacks[mid]->patience.pos_in_other < atom->patience.pos_in_other) lo = mid + 1; else @@ -288,29 +342,41 @@ diff_algo_patience(const struct diff_algo_config *algo if (target_stack == patience_stacks_count) patience_stacks_count++; - /* Record a back reference to the next stack on the left, which will form the final longest sequence - * later. */ - atom->patience.prev_stack = target_stack ? patience_stacks[target_stack - 1] : NULL; - + /* + * Record a back reference to the next stack on the + * left, which will form the final longest sequence + * later. + */ + atom->patience.prev_stack = target_stack ? + patience_stacks[target_stack - 1] : NULL; } - /* backtrace through prev_stack references to form the final longest common sequence */ + /* + * backtrace through prev_stack references to form the + * final longest common sequence + */ lcs_tail = patience_stacks[patience_stacks_count - 1]; lcs_count = patience_stacks_count; - /* uniques and patience_stacks are no longer needed. Backpointers are in atom->patience.prev_stack */ + /* + * uniques and patience_stacks are no longer needed. + * Backpointers are in atom->patience.prev_stack + */ free(atom_pointers); } lcs = recallocarray(NULL, 0, lcs_count, sizeof(struct diff_atom*)); + struct diff_atom **lcs_backtrace_pos = &lcs[lcs_count - 1]; struct diff_atom *atom; - for (atom = lcs_tail; atom; atom = atom->patience.prev_stack, lcs_backtrace_pos--) { + unsigned int i; + + for (atom = lcs_tail; atom; + atom = atom->patience.prev_stack, lcs_backtrace_pos--) { assert(lcs_backtrace_pos >= lcs); *lcs_backtrace_pos = atom; } - unsigned int i; if (DEBUG) { debug("\npatience LCS:\n"); for (i = 0; i < lcs_count; i++) {