commit 0d27172a828e5ff3c6457cbf0d36a88c9bd8e370 from: Neels Hofmeyr date: Wed May 06 22:04:06 2020 UTC KNF: use max 80 columns commit - 61a7b57805472a03ad39d7bf4ef6d705eb0ccac2 commit + 0d27172a828e5ff3c6457cbf0d36a88c9bd8e370 blob - 9946f9378ce360c8824bc29c354cf4d1f921648e blob + 8105d6365f0c62b2042433e8267d0359a019823c --- diff/diff.c +++ diff/diff.c @@ -80,7 +80,10 @@ main(int argc, char *argv[]) return diffreg(argv[0], argv[1], 0); } -const struct diff_algo_config myers_then_patience, myers_then_myers_divide, patience, myers_divide; +const struct diff_algo_config myers_then_patience; +const struct diff_algo_config myers_then_myers_divide; +const struct diff_algo_config patience; +const struct diff_algo_config myers_divide; const struct diff_algo_config myers_then_patience = (struct diff_algo_config){ .impl = diff_algo_myers, @@ -88,7 +91,8 @@ const struct diff_algo_config myers_then_patience = (s .fallback_algo = &patience, }; -const struct diff_algo_config myers_then_myers_divide = (struct diff_algo_config){ +const struct diff_algo_config myers_then_myers_divide = + (struct diff_algo_config){ .impl = diff_algo_myers, .permitted_state_size = 1024 * 1024 * sizeof(int), .fallback_algo = &myers_divide, @@ -96,14 +100,17 @@ const struct diff_algo_config myers_then_myers_divide const struct diff_algo_config patience = (struct diff_algo_config){ .impl = diff_algo_patience, - .inner_algo = &patience, // After subdivision, do Patience again. - .fallback_algo = &myers_then_myers_divide, // If subdivision failed, do Myers Divide et Impera. + /* After subdivision, do Patience again: */ + .inner_algo = &patience, + /* If subdivision failed, do Myers Divide et Impera: */ + .fallback_algo = &myers_then_myers_divide, }; const struct diff_algo_config myers_divide = (struct diff_algo_config){ .impl = diff_algo_myers_divide, - .inner_algo = &myers_then_myers_divide, // When division succeeded, start from the top. - // (fallback_algo = NULL implies diff_algo_none). + /* When division succeeded, start from the top: */ + .inner_algo = &myers_then_myers_divide, + /* (fallback_algo = NULL implies diff_algo_none). */ }; const struct diff_config diff_config = { @@ -127,7 +134,9 @@ diffreg(char *file1, char *file2, int flags) }; struct diff_result *result; enum diff_rc rc; - const struct diff_config *cfg = do_patience ? &diff_config_patience : &diff_config; + const struct diff_config *cfg; + + cfg = do_patience ? &diff_config_patience : &diff_config; str1 = mmapfile(file1, &st1); str2 = mmapfile(file2, &st2); blob - 395468a8c29ce7c6d5ca5283a1a1aa247acb3d87 blob + 4919ba0a7b679293b70df000738153eefb9d6b91 --- include/diff/arraylist.h +++ include/diff/arraylist.h @@ -24,7 +24,8 @@ static inline void *reallocarray(void *buf, size_t n, { return realloc(buf, n * member_size); } -static inline void *recallocarray(void *buf, size_t oldn, size_t n, size_t member_size) +static inline void *recallocarray(void *buf, size_t oldn, size_t n, + size_t member_size) { buf = reallocarray(buf, n, member_size); bzero(((char*)buf) + (oldn * member_size), (n - oldn) * member_size); @@ -40,10 +41,13 @@ static inline void *recallocarray(void *buf, size_t ol * typedef ARRAYLIST(any_type_t) any_type_list_t; * any_type_list_t list; * - * ARRAYLIST_INIT(list, 128); // < number of (at first unused) members to add on each realloc + * // pass the number of (at first unused) members to add on each realloc: + * ARRAYLIST_INIT(list, 128); * any_type_t *x; * while (bar) { - * ARRAYLIST_ADD(x, list); // < enlarges the allocated array as needed; list.head may change due to realloc + * // This enlarges the allocated array as needed; + * // list.head may change due to realloc: + * ARRAYLIST_ADD(x, list); * if (!x) * abort(); * *x = random_foo_value; @@ -72,12 +76,17 @@ static inline void *recallocarray(void *buf, size_t ol NEW_ITEM_P = NULL; \ break; \ } \ - if ((ARRAY_LIST).head == NULL || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \ - (ARRAY_LIST).allocated += (ARRAY_LIST).alloc_blocksize ? : 8; \ - (ARRAY_LIST).head = recallocarray((ARRAY_LIST).head, (ARRAY_LIST).len, \ - (ARRAY_LIST).allocated, sizeof(*(ARRAY_LIST).head)); \ + if ((ARRAY_LIST).head == NULL \ + || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \ + (ARRAY_LIST).allocated += \ + (ARRAY_LIST).alloc_blocksize ? : 8; \ + (ARRAY_LIST).head = recallocarray((ARRAY_LIST).head, \ + (ARRAY_LIST).len, \ + (ARRAY_LIST).allocated, \ + sizeof(*(ARRAY_LIST).head)); \ }; \ - if (!(ARRAY_LIST).head || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \ + if (!(ARRAY_LIST).head \ + || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \ NEW_ITEM_P = NULL; \ break; \ } \ blob - 570d5c58f2fda7b886d942ae92961bfddb6c1664 blob + 0f70fdefe19e056855268e0ccc659fb2b49aff20 --- include/diff/diff_main.h +++ include/diff/diff_main.h @@ -79,8 +79,9 @@ struct diff_atom { const uint8_t *at; size_t len; - /* This hash is just a very cheap speed up for finding *mismatching* atoms. When hashes match, we still need to - * compare entire atoms to find out whether they are indeed identical or not. */ + /* This hash is just a very cheap speed up for finding *mismatching* + * atoms. When hashes match, we still need to compare entire atoms to + * find out whether they are indeed identical or not. */ unsigned int hash; /* State for the Patience Diff algorithm */ @@ -102,11 +103,13 @@ diff_atom_same(const struct diff_atom *left, const str && memcmp(left->at, right->at, left->len) == 0; } -/* For each file, there is a "root" struct diff_data referencing the entire file, which the atoms are parsed from. In - * recursion of diff algorithm, there may be "child" struct diff_data only referencing a subsection of the file, - * re-using the atoms parsing. For "root" structs, atoms_allocated will be nonzero, indicating that the array of atoms - * is owned by that struct. For "child" structs, atoms_allocated == 0, to indicate that the struct is referencing a - * subset of atoms. */ +/* For each file, there is a "root" struct diff_data referencing the entire + * file, which the atoms are parsed from. In recursion of diff algorithm, there + * may be "child" struct diff_data only referencing a subsection of the file, + * re-using the atoms parsing. For "root" structs, atoms_allocated will be + * nonzero, indicating that the array of atoms is owned by that struct. For + * "child" structs, atoms_allocated == 0, to indicate that the struct is + * referencing a subset of atoms. */ struct diff_data { const uint8_t *data; size_t len; @@ -116,16 +119,17 @@ struct diff_data { void diff_data_free(struct diff_data *diff_data); -/* The atom's index in the entire file. For atoms divided by lines of text, this yields the line number (starting with - * 0). Also works for diff_data that reference only a subsection of a file, always reflecting the global position in - * the file (and not the relative position within the subsection). */ +/* The atom's index in the entire file. For atoms divided by lines of text, this + * yields the line number (starting with 0). Also works for diff_data that + * reference only a subsection of a file, always reflecting the global position + * in the file (and not the relative position within the subsection). */ #define diff_atom_root_idx(DIFF_DATA, ATOM) \ ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \ ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \ : (DIFF_DATA)->root->atoms.len) -/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this yields the line number (starting with - * 0). */ +/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this + * yields the line number (starting with 0). */ #define diff_atom_idx(DIFF_DATA, ATOM) \ ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \ ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \ @@ -133,7 +137,9 @@ void diff_data_free(struct diff_data *diff_data); #define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \ for ((ATOM) = (FIRST_ATOM); \ - (ATOM) && ((ATOM) >= (FIRST_ATOM)) && ((ATOM) - (FIRST_ATOM) < (COUNT)); \ + (ATOM) \ + && ((ATOM) >= (FIRST_ATOM)) \ + && ((ATOM) - (FIRST_ATOM) < (COUNT)); \ (ATOM)++) #define diff_data_foreach_atom(ATOM, DIFF_DATA) \ @@ -141,31 +147,38 @@ void diff_data_free(struct diff_data *diff_data); #define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \ for ((ATOM) = (FROM); \ - (ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \ + (ATOM) \ + && ((ATOM) >= (DIFF_DATA)->atoms.head) \ + && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \ (ATOM)++) -/* A diff chunk represents a set of atoms on the left and/or a set of atoms on the right. +/* A diff chunk represents a set of atoms on the left and/or a set of atoms on + * the right. * * If solved == false: - * The diff algorithm has divided the source file, and this is a chunk that the inner_algo should run on next. + * The diff algorithm has divided the source file, and this is a chunk that the + * inner_algo should run on next. * The lines on the left should be diffed against the lines on the right. - * (If there are no left lines or no right lines, it implies solved == true, because there is nothing to diff.) + * (If there are no left lines or no right lines, it implies solved == true, + * because there is nothing to diff.) * * If solved == true: - * If there are only left atoms, it is a chunk removing atoms from the left ("a minus chunk"). - * If there are only right atoms, it is a chunk adding atoms from the right ("a plus chunk"). - * If there are both left and right lines, it is a chunk of equal content on both sides, - * and left_count == right_count: + * If there are only left atoms, it is a chunk removing atoms from the left ("a + * minus chunk"). + * If there are only right atoms, it is a chunk adding atoms from the right ("a + * plus chunk"). + * If there are both left and right lines, it is a chunk of equal content on + * both sides, and left_count == right_count: * * - foo } * - bar }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3, * - baz } right_start = NULL, right_count = 0 } * moo } * goo }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3, - * zoo } right_start = &right.atoms.head[0], right_count = 3 } + * zoo } right_start = &right.atoms.head[0], right_count = 3 } * +loo } * +roo }-- diff_chunk{ left_start = NULL, left_count = 0, - * +too } right_start = &right.atoms.head[3], right_count = 3 } + * +too } right_start = &right.atoms.head[3], right_count = 3 } * */ struct diff_chunk { @@ -190,50 +203,70 @@ struct diff_state { /* The final result passed to the original diff caller. */ struct diff_result *result; - /* The root diff_data is in result->left,right, these are (possibly) subsections of the root data. */ + /* The root diff_data is in result->left,right, these are (possibly) + * subsections of the root data. */ struct diff_data left; struct diff_data right; unsigned int recursion_depth_left; - /* Remaining chunks from one diff algorithm pass, if any solved == false chunks came up. */ + /* Remaining chunks from one diff algorithm pass, if any solved == false + * chunks came up. */ diff_chunk_arraylist_t temp_result; }; struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved, - struct diff_atom *left_start, unsigned int left_count, - struct diff_atom *right_start, unsigned int right_count); + struct diff_atom *left_start, + unsigned int left_count, + struct diff_atom *right_start, + unsigned int right_count); /* Signature of a utility function to divide both source files into diff atoms. - * It is possible that a (future) algorithm requires both source files to decide on atom split points, hence this gets - * both left and right to atomize at the same time. + * It is possible that a (future) algorithm requires both source files to decide + * on atom split points, hence this gets both left and right to atomize at the + * same time. * An example is diff_atomize_text_by_line() in diff_atomize_text.c. * * func_data: context pointer (free to be used by implementation). - * left: struct diff_data with left->data and left->len already set up, and left->atoms to be created. - * right: struct diff_data with right->data and right->len already set up, and right->atoms to be created. + * left: struct diff_data with left->data and left->len already set up, and + * left->atoms to be created. + * right: struct diff_data with right->data and right->len already set up, and + * right->atoms to be created. */ -typedef enum diff_rc (*diff_atomize_func_t)(void *func_data, struct diff_data *left, struct diff_data *right); +typedef enum diff_rc (*diff_atomize_func_t)(void *func_data, + struct diff_data *left, + struct diff_data *right); -extern enum diff_rc diff_atomize_text_by_line(void *func_data, struct diff_data *left, struct diff_data *right); +extern enum diff_rc diff_atomize_text_by_line(void *func_data, + struct diff_data *left, + struct diff_data *right); struct diff_algo_config; -typedef enum diff_rc (*diff_algo_impl_t)(const struct diff_algo_config *algo_config, struct diff_state *state); +typedef enum diff_rc (*diff_algo_impl_t)( + const struct diff_algo_config *algo_config, struct diff_state *state); -/* Form a result with all left-side removed and all right-side added, i.e. no actual diff algorithm involved. */ -enum diff_rc diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state); +/* Form a result with all left-side removed and all right-side added, i.e. no + * actual diff algorithm involved. */ +enum diff_rc diff_algo_none(const struct diff_algo_config *algo_config, + struct diff_state *state); -/* Myers Diff tracing from the start all the way through to the end, requiring quadratic amounts of memory. This can - * fail if the required space surpasses algo_config->permitted_state_size. */ -extern enum diff_rc diff_algo_myers(const struct diff_algo_config *algo_config, struct diff_state *state); +/* Myers Diff tracing from the start all the way through to the end, requiring + * quadratic amounts of memory. This can fail if the required space surpasses + * algo_config->permitted_state_size. */ +extern enum diff_rc diff_algo_myers(const struct diff_algo_config *algo_config, + struct diff_state *state); -/* Myers "Divide et Impera": tracing forwards from the start and backwards from the end to find a midpoint that divides - * the problem into smaller chunks. Requires only linear amounts of memory. */ -extern enum diff_rc diff_algo_myers_divide(const struct diff_algo_config *algo_config, struct diff_state *state); +/* Myers "Divide et Impera": tracing forwards from the start and backwards from + * the end to find a midpoint that divides the problem into smaller chunks. + * Requires only linear amounts of memory. */ +extern enum diff_rc diff_algo_myers_divide( + const struct diff_algo_config *algo_config, struct diff_state *state); -/* Patience Diff algorithm, which divides a larger diff into smaller chunks. For very specific scenarios, it may lead to - * a complete diff result by itself, but needs a fallback algo to solve chunks that don't have common-unique atoms. */ -extern enum diff_rc diff_algo_patience(const struct diff_algo_config *algo_config, struct diff_state *state); +/* Patience Diff algorithm, which divides a larger diff into smaller chunks. For + * very specific scenarios, it may lead to a complete diff result by itself, but + * needs a fallback algo to solve chunks that don't have common-unique atoms. */ +extern enum diff_rc diff_algo_patience( + const struct diff_algo_config *algo_config, struct diff_state *state); /* Diff algorithms to use, possibly nested. For example: * @@ -242,21 +275,24 @@ extern enum diff_rc diff_algo_patience(const struct di * myers = (struct diff_algo_config){ * .impl = diff_algo_myers, * .permitted_state_size = 32 * 1024 * 1024, - * .fallback_algo = &patience, // when too large, do diff_algo_patience + * // When too large, do diff_algo_patience: + * .fallback_algo = &patience, * }; * - * patience = (struct diff_algo_config){ - * .impl = diff_algo_patience, - * .inner_algo = &patience, // After subdivision, do Patience again. - * .fallback_algo = &myers_divide, // If subdivision failed, do Myers Divide et Impera. + * const struct diff_algo_config patience = (struct diff_algo_config){ + * .impl = diff_algo_patience, + * // After subdivision, do Patience again: + * .inner_algo = &patience, + * // If subdivision failed, do Myers Divide et Impera: + * .fallback_algo = &myers_then_myers_divide, * }; - * - * myers_divide = (struct diff_algo_config){ - * .impl = diff_algo_myers_divide, - * .inner_algo = &myers, // When division succeeded, start from the top. - * // (fallback_algo = NULL implies diff_algo_none). + * + * const struct diff_algo_config myers_divide = (struct diff_algo_config){ + * .impl = diff_algo_myers_divide, + * // When division succeeded, start from the top: + * .inner_algo = &myers_then_myers_divide, + * // (fallback_algo = NULL implies diff_algo_none). * }; - * * struct diff_config config = { * .algo = &myers, * ... @@ -266,15 +302,18 @@ extern enum diff_rc diff_algo_patience(const struct di struct diff_algo_config { diff_algo_impl_t impl; - /* Fail this algo if it would use more than this amount of memory, and instead use fallback_algo - * (diff_algo_myers). permitted_state_size == 0 means no limitation. */ + /* Fail this algo if it would use more than this amount of memory, and + * instead use fallback_algo (diff_algo_myers). permitted_state_size == + * 0 means no limitation. */ size_t permitted_state_size; - /* For algorithms that divide into smaller chunks, use this algorithm to solve the divided chunks. */ + /* For algorithms that divide into smaller chunks, use this algorithm to + * solve the divided chunks. */ const struct diff_algo_config *inner_algo; - /* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large state, or diff_algo_patience can't find - * any common-unique atoms), then use this algorithm instead. */ + /* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large + * state, or diff_algo_patience can't find any common-unique atoms), + * then use this algorithm instead. */ const struct diff_algo_config *fallback_algo; }; @@ -284,9 +323,11 @@ struct diff_config { const struct diff_algo_config *algo; - /* How deep to step into subdivisions of a source file, a paranoia / safety measure to guard against infinite - * loops through diff algorithms. When the maximum recursion is reached, employ diff_algo_none (i.e. remove all - * left atoms and add all right atoms). */ + /* How deep to step into subdivisions of a source file, a paranoia / + * safety measure to guard against infinite loops through diff + * algorithms. When the maximum recursion is reached, employ + * diff_algo_none (i.e. remove all left atoms and add all right atoms). + */ unsigned int max_recursion_depth; }; blob - b229f4cceee81ddba11ecdfeacbfa29bd4ccf721 blob + b36e18bec3150b1185bd70c72a07067b0f7799bc --- include/diff/diff_output.h +++ include/diff/diff_output.h @@ -28,6 +28,8 @@ struct diff_input_info { enum diff_rc diff_output_plain(FILE *dest, const struct diff_input_info *info, const struct diff_result *result); enum diff_rc diff_output_unidiff(FILE *dest, const struct diff_input_info *info, - const struct diff_result *result, unsigned int context_lines); + const struct diff_result *result, + unsigned int context_lines); enum diff_rc diff_output_info(FILE *dest, const struct diff_input_info *info); -void diff_output_lines(FILE *dest, const char *prefix, struct diff_atom *start_atom, unsigned int count); +void diff_output_lines(FILE *dest, const char *prefix, + struct diff_atom *start_atom, unsigned int count); blob - 4e13a9434d0d167686e127df728bf257d02d1f0c blob + 6bdd58965fe55ca3ffcf79a6ba3e2ca4568ecfed --- lib/debug.h +++ lib/debug.h @@ -27,7 +27,8 @@ #define debug_dump_atoms dump_atoms static inline void -dump_atom(const struct diff_data *left, const struct diff_data *right, const struct diff_atom *atom) +dump_atom(const struct diff_data *left, const struct diff_data *right, + const struct diff_atom *atom) { if (!atom) { print("NULL atom\n"); @@ -36,9 +37,12 @@ dump_atom(const struct diff_data *left, const struct d if (left) print(" %3ld", diff_atom_root_idx(left, atom)); if (right && atom->patience.pos_in_other) - print(" %3ld", diff_atom_root_idx(right, atom->patience.pos_in_other)); + print(" %3ld", + diff_atom_root_idx(right, atom->patience.pos_in_other)); - print(" %s%s '", atom->patience.unique_here ? "u" : " ", atom->patience.unique_in_both ? "c" : " "); + print(" %s%s '", + atom->patience.unique_here ? "u" : " ", + atom->patience.unique_in_both ? "c" : " "); const char *s; for (s = atom->at; s < (const char*)(atom->at + atom->len); s++) { if (*s == '\r') @@ -54,7 +58,8 @@ dump_atom(const struct diff_data *left, const struct d } static inline void -dump_atoms(const struct diff_data *d, struct diff_atom *atom, unsigned int count) +dump_atoms(const struct diff_data *d, struct diff_atom *atom, + unsigned int count) { if (count > 42) { dump_atoms(d, atom, 20); @@ -76,11 +81,13 @@ dump(struct diff_data *d) } /* kd is a quadratic space myers matrix from the original Myers algorithm. - * kd_forward and kd_backward are linear slices of a myers matrix from the Myers Divide algorithm. + * kd_forward and kd_backward are linear slices of a myers matrix from the Myers + * Divide algorithm. */ static inline void dump_myers_graph(const struct diff_data *l, const struct diff_data *r, - int *kd, int *kd_forward, int kd_forward_d, int *kd_backward, int kd_backward_d) + int *kd, int *kd_forward, int kd_forward_d, + int *kd_backward, int kd_backward_d) { #define COLOR_YELLOW "\033[1;33m" #define COLOR_GREEN "\033[1;32m" @@ -127,9 +134,12 @@ dump_myers_graph(const struct diff_data *l, const stru size_t kd_len = max + 1 + max; int di; #define xc_to_y(X, C, DELTA) ((X) - (C) + (DELTA)) - int delta = (int)r->atoms.len - (int)l->atoms.len; + int delta = (int)r->atoms.len + - (int)l->atoms.len; int ki; - for (ki = kd_forward_d; ki >= -kd_forward_d; ki -= 2) { + for (ki = kd_forward_d; + ki >= -kd_forward_d; + ki -= 2) { if (x != kd_forward[ki]) continue; if (y != xk_to_y(x, ki)) @@ -143,9 +153,12 @@ dump_myers_graph(const struct diff_data *l, const stru int max = l->atoms.len + r->atoms.len; size_t kd_len = max + 1 + max; int di; - int delta = (int)r->atoms.len - (int)l->atoms.len; + int delta = (int)r->atoms.len + - (int)l->atoms.len; int ki; - for (ki = kd_backward_d; ki >= -kd_backward_d; ki -= 2) { + for (ki = kd_backward_d; + ki >= -kd_backward_d; + ki -= 2) { if (x != kd_backward[ki]) continue; if (y != xc_to_y(x, ki, delta)) @@ -174,7 +187,8 @@ dump_myers_graph(const struct diff_data *l, const stru print(" "); for (x = 0; x < l->atoms.len; x++) { - if (diff_atom_same(&l->atoms.head[x], &r->atoms.head[y])) + if (diff_atom_same(&l->atoms.head[x], + &r->atoms.head[y])) print("|\\"); else print("| "); @@ -185,11 +199,13 @@ dump_myers_graph(const struct diff_data *l, const stru static inline void debug_dump_myers_graph(const struct diff_data *l, const struct diff_data *r, - int *kd, int *kd_forward, int kd_forward_d, int *kd_backward, int kd_backward_d) + int *kd, int *kd_forward, int kd_forward_d, + int *kd_backward, int kd_backward_d) { if (l->atoms.len > 99 || r->atoms.len > 99) return; - dump_myers_graph(l, r, kd, kd_forward, kd_forward_d, kd_backward, kd_backward_d); + dump_myers_graph(l, r, kd, kd_forward, kd_forward_d, + kd_backward, kd_backward_d); } #else blob - fbeb6164469b907b7e572d24ec0f8c6310a47eeb blob + f6b9cbeca28fdc250d60823064f09fad0aba2fe4 --- lib/diff_atomize_text.c +++ lib/diff_atomize_text.c @@ -39,11 +39,13 @@ diff_data_atomize_text_lines(struct diff_data *d) line_end++; } - /* When not at the end of data, the line ending char ('\r' or '\n') must follow */ + /* When not at the end of data, the line ending char ('\r' or + * '\n') must follow */ if (line_end < end) line_end++; /* If that was an '\r', also pull in any following '\n' */ - if (line_end[0] == '\r' && line_end < end && line_end[1] == '\n') + if (line_end[0] == '\r' + && line_end < end && line_end[1] == '\n') line_end++; /* Record the found line as diff atom */ @@ -66,7 +68,8 @@ diff_data_atomize_text_lines(struct diff_data *d) } enum diff_rc -diff_atomize_text_by_line(void *func_data, struct diff_data *left, struct diff_data *right) +diff_atomize_text_by_line(void *func_data, struct diff_data *left, + struct diff_data *right) { enum diff_rc rc; rc = diff_data_atomize_text_lines(left); blob - 8858013c493332f472d8fb36c12f51e62f1c19d4 blob + 541789d3a2848a9f50d3a6ffba47c91a61124717 --- lib/diff_main.c +++ lib/diff_main.c @@ -30,9 +30,11 @@ #include "debug.h" -/* Even if a left or right side is empty, diff output may need to know the position in that file. - * So left_start or right_start must never be NULL -- pass left_count or right_count as zero to indicate staying at that - * position without consuming any lines. */ +/* Even if a left or right side is empty, diff output may need to know the + * position in that file. + * So left_start or right_start must never be NULL -- pass left_count or + * right_count as zero to indicate staying at that position without consuming + * any lines. */ struct diff_chunk * diff_state_add_chunk(struct diff_state *state, bool solved, struct diff_atom *left_start, unsigned int left_count, @@ -102,24 +104,30 @@ diff_data_free(struct diff_data *diff_data) } enum diff_rc -diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state) +diff_algo_none(const struct diff_algo_config *algo_config, + struct diff_state *state) { debug("\n** %s\n", __func__); debug("left:\n"); debug_dump(&state->left); debug("right:\n"); debug_dump(&state->right); - debug_dump_myers_graph(&state->left, &state->right, NULL, NULL, 0, NULL, 0); + debug_dump_myers_graph(&state->left, &state->right, NULL, NULL, 0, NULL, + 0); /* Add a chunk of equal lines, if any */ unsigned int equal_atoms = 0; - while (equal_atoms < state->left.atoms.len && equal_atoms < state->right.atoms.len - && diff_atom_same(&state->left.atoms.head[equal_atoms], &state->right.atoms.head[equal_atoms])) + while (equal_atoms < state->left.atoms.len + && equal_atoms < state->right.atoms.len + && diff_atom_same(&state->left.atoms.head[equal_atoms], + &state->right.atoms.head[equal_atoms])) equal_atoms++; if (equal_atoms) { if (!diff_state_add_chunk(state, true, - &state->left.atoms.head[0], equal_atoms, - &state->right.atoms.head[0], equal_atoms)) + &state->left.atoms.head[0], + equal_atoms, + &state->right.atoms.head[0], + equal_atoms)) return DIFF_RC_ENOMEM; } @@ -144,12 +152,14 @@ diff_algo_none(const struct diff_algo_config *algo_con } enum diff_rc -diff_run_algo(const struct diff_algo_config *algo_config, struct diff_state *state) +diff_run_algo(const struct diff_algo_config *algo_config, + struct diff_state *state) { enum diff_rc rc; ARRAYLIST_FREE(state->temp_result); - if (!algo_config || !algo_config->impl || !state->recursion_depth_left) { + if (!algo_config || !algo_config->impl + || !state->recursion_depth_left) { debug("MAX RECURSION REACHED, just dumping diff chunks\n"); return diff_algo_none(algo_config, state); } @@ -158,7 +168,8 @@ diff_run_algo(const struct diff_algo_config *algo_conf rc = algo_config->impl(algo_config, state); switch (rc) { case DIFF_RC_USE_DIFF_ALGO_FALLBACK: - debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", algo_config->fallback_algo); + debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", + algo_config->fallback_algo); rc = diff_run_algo(algo_config->fallback_algo, state); goto return_rc; @@ -171,9 +182,10 @@ diff_run_algo(const struct diff_algo_config *algo_conf goto return_rc; } - /* Pick up any diff chunks that are still unsolved and feed to inner_algo. - * inner_algo will solve unsolved chunks and append to result, - * and subsequent solved chunks on this level are then appended to result afterwards. */ + /* Pick up any diff chunks that are still unsolved and feed to + * inner_algo. inner_algo will solve unsolved chunks and append to + * result, and subsequent solved chunks on this level are then appended + * to result afterwards. */ int i; for (i = 0; i < state->temp_result.len; i++) { struct diff_chunk *c = &state->temp_result.head[i]; @@ -193,8 +205,10 @@ diff_run_algo(const struct diff_algo_config *algo_conf .result = state->result, .recursion_depth_left = state->recursion_depth_left - 1, }; - diff_data_init_subsection(&inner_state.left, &state->left, c->left_start, c->left_count); - diff_data_init_subsection(&inner_state.right, &state->right, c->right_start, c->right_count); + diff_data_init_subsection(&inner_state.left, &state->left, + c->left_start, c->left_count); + diff_data_init_subsection(&inner_state.right, &state->right, + c->right_start, c->right_count); rc = diff_run_algo(algo_config->inner_algo, &inner_state); @@ -226,7 +240,8 @@ diff_main(const struct diff_config *config, return result; } - result->rc = config->atomize_func(config->atomize_func_data, &result->left, &result->right); + result->rc = config->atomize_func(config->atomize_func_data, + &result->left, &result->right); if (result->rc != DIFF_RC_OK) return result; @@ -234,8 +249,12 @@ diff_main(const struct diff_config *config, .result = result, .recursion_depth_left = config->max_recursion_depth ? : 1024, }; - diff_data_init_subsection(&state.left, &result->left, result->left.atoms.head, result->left.atoms.len); - diff_data_init_subsection(&state.right, &result->right, result->right.atoms.head, result->right.atoms.len); + diff_data_init_subsection(&state.left, &result->left, + result->left.atoms.head, + result->left.atoms.len); + diff_data_init_subsection(&state.right, &result->right, + result->right.atoms.head, + result->right.atoms.len); result->rc = diff_run_algo(config->algo, &state); blob - c14e542b7962f990a43c57c58bb31670d56d76f9 blob + 4ab3f306b594b78d50eb8182ebef5b69b1ad2870 --- lib/diff_myers.c +++ lib/diff_myers.c @@ -27,16 +27,14 @@ * * Myers approaches finding the smallest diff as a graph problem. * The crux is that the original algorithm requires quadratic amount of memory: - * both sides' lengths added, and that squared. So if we're diffing lines of text, two files with 1000 lines each would - * blow up to a matrix of about 2000 * 2000 ints of state, about 16 Mb of RAM to figure out 2 kb of text. - * The solution is using Myers' "divide and conquer" extension algorithm, which does the original traversal from both - * ends of the files to reach a middle where these "snakes" touch, hence does not need to backtrace the traversal, and - * so gets away with only keeping a single column of that huge state matrix in memory. - * - * Todo: the divide and conquer requires linear *space*, not necessarily linear *time*. It recurses, apparently doing - * multiple Myers passes, and also it apparently favors fragmented diffs in cases where chunks of text were moved to a - * different place. Up to a given count of diff atoms (text lines), it might be desirable to accept the quadratic memory - * usage, get nicer diffs and less re-iteration of the same data? + * both sides' lengths added, and that squared. So if we're diffing lines of + * text, two files with 1000 lines each would blow up to a matrix of about + * 2000 * 2000 ints of state, about 16 Mb of RAM to figure out 2 kb of text. + * The solution is using Myers' "divide and conquer" extension algorithm, which + * does the original traversal from both ends of the files to reach a middle + * where these "snakes" touch, hence does not need to backtrace the traversal, + * and so gets away with only keeping a single column of that huge state matrix + * in memory. */ struct diff_box { @@ -70,19 +68,25 @@ struct diff_box { * * Moving right means delete an atom from the left-hand-side, * Moving down means add an atom from the right-hand-side. - * Diagonals indicate identical atoms on both sides, the challenge is to use as many diagonals as possible. + * Diagonals indicate identical atoms on both sides, the challenge is to use as + * many diagonals as possible. * - * The original Myers algorithm walks all the way from the top left to the bottom right, remembers all steps, and then - * backtraces to find the shortest path. However, that requires keeping the entire graph in memory, which needs + * The original Myers algorithm walks all the way from the top left to the + * bottom right, remembers all steps, and then backtraces to find the shortest + * path. However, that requires keeping the entire graph in memory, which needs * quadratic space. * - * Myers adds a variant that uses linear space -- note, not linear time, only linear space: walk forward and backward, - * find a meeting point in the middle, and recurse on the two separate sections. This is called "divide and conquer". + * Myers adds a variant that uses linear space -- note, not linear time, only + * linear space: walk forward and backward, find a meeting point in the middle, + * and recurse on the two separate sections. This is called "divide and + * conquer". * - * d: the step number, starting with 0, a.k.a. the distance from the starting point. - * k: relative index in the state array for the forward scan, indicating on which diagonal through the diff graph we - * currently are. - * c: relative index in the state array for the backward scan, indicating the diagonal number from the bottom up. + * d: the step number, starting with 0, a.k.a. the distance from the starting + * point. + * k: relative index in the state array for the forward scan, indicating on + * which diagonal through the diff graph we currently are. + * c: relative index in the state array for the backward scan, indicating the + * diagonal number from the bottom up. * * The "divide and conquer" traversal through the Myers graph looks like this: * @@ -109,17 +113,22 @@ struct diff_box { * x,y pairs here are the coordinates in the Myers graph: * x = atom index in left-side source, y = atom index in the right-side source. * - * Only one forward column and one backward column are kept in mem, each need at most left.len + 1 + right.len items. - * Note that each d step occupies either the even or the odd items of a column: if e.g. the previous column is in the - * odd items, the next column is formed in the even items, without overwriting the previous column's results. + * Only one forward column and one backward column are kept in mem, each need at + * most left.len + 1 + right.len items. Note that each d step occupies either + * the even or the odd items of a column: if e.g. the previous column is in the + * odd items, the next column is formed in the even items, without overwriting + * the previous column's results. * - * Also note that from the diagonal index k and the x coordinate, the y coordinate can be derived: + * Also note that from the diagonal index k and the x coordinate, the y + * coordinate can be derived: * y = x - k - * Hence the state array only needs to keep the x coordinate, i.e. the position in the left-hand file, and the y - * coordinate, i.e. position in the right-hand file, is derived from the index in the state array. + * Hence the state array only needs to keep the x coordinate, i.e. the position + * in the left-hand file, and the y coordinate, i.e. position in the right-hand + * file, is derived from the index in the state array. * - * The two traces meet at 4,3, the first step (here found in the forward traversal) where a forward position is on or - * past a backward traced position on the same diagonal. + * The two traces meet at 4,3, the first step (here found in the forward + * traversal) where a forward position is on or past a backward traced position + * on the same diagonal. * * This divides the problem space into: * @@ -144,8 +153,8 @@ struct diff_box { * 1 o-b b: backward d=1 first reaches here (sliding up the snake) * B \ f: then forward d=2 reaches here (sliding down the snake) * 2 o As result, the box from b to f is found to be identical; - * C \ leaving a top box from 0,0 to 1,1 and a bottom trivial tail 3,3 to 4,3. - * 3 f-o + * C \ leaving a top box from 0,0 to 1,1 and a bottom trivial + * 3 f-o tail 3,3 to 4,3. * * 3 o-* * Y | @@ -175,15 +184,23 @@ struct diff_box { /* Do one forwards step in the "divide and conquer" graph traversal. * left: the left side to diff. * right: the right side to diff against. - * kd_forward: the traversal state for forwards traversal, modified by this function. + * kd_forward: the traversal state for forwards traversal, modified by this + * function. * This is carried over between invocations with increasing d. - * kd_forward points at the center of the state array, allowing negative indexes. - * kd_backward: the traversal state for backwards traversal, to find a meeting point. - * Since forwards is done first, kd_backward will be valid for d - 1, not d. - * kd_backward points at the center of the state array, allowing negative indexes. - * d: Step or distance counter, indicating for what value of d the kd_forward should be populated. - * For d == 0, kd_forward[0] is initialized, i.e. the first invocation should be for d == 0. + * kd_forward points at the center of the state array, allowing + * negative indexes. + * kd_backward: the traversal state for backwards traversal, to find a meeting + * point. + * Since forwards is done first, kd_backward will be valid for d - + * 1, not d. + * kd_backward points at the center of the state array, allowing + * negative indexes. + * d: Step or distance counter, indicating for what value of d the kd_forward + * should be populated. + * For d == 0, kd_forward[0] is initialized, i.e. the first invocation should + * be for d == 0. * meeting_snake: resulting meeting point, if any. + * Return true when a meeting point has been identified. */ static bool diff_divide_myers_forward(struct diff_data *left, struct diff_data *right, @@ -198,14 +215,18 @@ diff_divide_myers_forward(struct diff_data *left, stru for (k = d; k >= -d; k -= 2) { if (k < -(int)right->atoms.len || k > (int)left->atoms.len) { - /* This diagonal is completely outside of the Myers graph, don't calculate it. */ + /* This diagonal is completely outside of the Myers + * graph, don't calculate it. */ if (k < -(int)right->atoms.len) - debug(" %d k < -(int)right->atoms.len %d\n", k, -(int)right->atoms.len); + debug(" %d k < -(int)right->atoms.len %d\n", k, + -(int)right->atoms.len); else - debug(" %d k > left->atoms.len %d\n", k, left->atoms.len); + debug(" %d k > left->atoms.len %d\n", k, + left->atoms.len); if (k < 0) { - /* We are traversing negatively, and already below the entire graph, nothing will come - * of this. */ + /* We are traversing negatively, and already + * below the entire graph, nothing will come of + * this. */ debug(" break"); break; } @@ -214,12 +235,15 @@ diff_divide_myers_forward(struct diff_data *left, stru } debug("- k = %d\n", k); if (d == 0) { - /* This is the initializing step. There is no prev_k yet, get the initial x from the top left of - * the Myers graph. */ + /* This is the initializing step. There is no prev_k + * yet, get the initial x from the top left of the Myers + * graph. */ x = 0; } - /* Favoring "-" lines first means favoring moving rightwards in the Myers graph. - * For this, all k should derive from k - 1, only the bottom most k derive from k + 1: + /* Favoring "-" lines first means favoring moving rightwards in + * the Myers graph. + * For this, all k should derive from k - 1, only the bottom + * most k derive from k + 1: * * | d= 0 1 2 * ----+---------------- @@ -230,27 +254,34 @@ diff_divide_myers_forward(struct diff_data *left, stru * | / * 0 | -->0,0 3,3 * | \\ / - * -1 | 0,1 <-- bottom most for d=1 from prev_k = -1 + 1 = 0 - * | \\ - * -2 | 0,2 <-- bottom most for d=2 from prev_k = -2 + 1 = -1 + * -1 | 0,1 <-- bottom most for d=1 from + * | \\ prev_k = -1 + 1 = 0 + * -2 | 0,2 <-- bottom most for d=2 from + * prev_k = -2 + 1 = -1 * - * Except when a k + 1 from a previous run already means a further advancement in the graph. + * Except when a k + 1 from a previous run already means a + * further advancement in the graph. * If k == d, there is no k + 1 and k - 1 is the only option. - * If k < d, use k + 1 in case that yields a larger x. Also use k + 1 if k - 1 is outside the graph. + * If k < d, use k + 1 in case that yields a larger x. Also use + * k + 1 if k - 1 is outside the graph. */ - else if (k > -d && (k == d - || (k - 1 >= -(int)right->atoms.len - && kd_forward[k - 1] >= kd_forward[k + 1]))) { + else if (k > -d + && (k == d + || (k - 1 >= -(int)right->atoms.len + && kd_forward[k - 1] >= kd_forward[k + 1]))) { /* Advance from k - 1. - * From position prev_k, step to the right in the Myers graph: x += 1. + * From position prev_k, step to the right in the Myers + * graph: x += 1. */ int prev_k = k - 1; int prev_x = kd_forward[prev_k]; x = prev_x + 1; } else { /* The bottom most one. - * From position prev_k, step to the bottom in the Myers graph: y += 1. - * Incrementing y is achieved by decrementing k while keeping the same x. + * From position prev_k, step to the bottom in the Myers + * graph: y += 1. + * Incrementing y is achieved by decrementing k while + * keeping the same x. * (since we're deriving y from y = x - k). */ int prev_k = k + 1; @@ -261,7 +292,8 @@ diff_divide_myers_forward(struct diff_data *left, stru int x_before_slide = x; /* Slide down any snake that we might find here. */ while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len - && diff_atom_same(&left->atoms.head[x], &right->atoms.head[xk_to_y(x, k)])) + && diff_atom_same(&left->atoms.head[x], + &right->atoms.head[xk_to_y(x, k)])) x++; kd_forward[k] = x; if (x_before_slide != x) { @@ -271,17 +303,8 @@ diff_divide_myers_forward(struct diff_data *left, stru if (DEBUG) { int fi; for (fi = d; fi >= k; fi--) { - debug("kd_forward[%d] = (%d, %d)\n", fi, kd_forward[fi], kd_forward[fi] - fi); - /* - if (kd_forward[fi] >= 0 && kd_forward[fi] < left->atoms.len) - debug_dump_atom(left, right, &left->atoms.head[kd_forward[fi]]); - else - debug("\n"); - if (kd_forward[fi]-fi >= 0 && kd_forward[fi]-fi < right->atoms.len) - debug_dump_atom(right, left, &right->atoms.head[kd_forward[fi]-fi]); - else - debug("\n"); - */ + debug("kd_forward[%d] = (%d, %d)\n", fi, + kd_forward[fi], kd_forward[fi] - fi); } } @@ -289,10 +312,11 @@ diff_divide_myers_forward(struct diff_data *left, stru || xk_to_y(x, k) < 0 || xk_to_y(x, k) > right->atoms.len) continue; - /* Figured out a new forwards traversal, see if this has gone onto or even past a preceding backwards - * traversal. + /* Figured out a new forwards traversal, see if this has gone + * onto or even past a preceding backwards traversal. * - * If the delta in length is odd, then d and backwards_d hit the same state indexes: + * If the delta in length is odd, then d and backwards_d hit the + * same state indexes: * | d= 0 1 2 1 0 * ----+---------------- ---------------- * k= | c= @@ -311,7 +335,8 @@ diff_divide_myers_forward(struct diff_data *left, stru * -2 | 0,2 -3 * | * - * If the delta is even, they end up off-by-one, i.e. on different diagonals: + * If the delta is even, they end up off-by-one, i.e. on + * different diagonals: * * | d= 0 1 2 1 0 * ----+---------------- ---------------- @@ -329,35 +354,44 @@ diff_divide_myers_forward(struct diff_data *left, stru * -2 | 0,2 -2 * | * - * So in the forward path, we can only match up diagonals when the delta is odd. + * So in the forward path, we can only match up diagonals when + * the delta is odd. */ if ((delta & 1) == 0) continue; - /* Forwards is done first, so the backwards one was still at d - 1. Can't do this for d == 0. */ + /* Forwards is done first, so the backwards one was still at + * d - 1. Can't do this for d == 0. */ int backwards_d = d - 1; if (backwards_d < 0) continue; debug("backwards_d = %d\n", backwards_d); - /* If both sides have the same length, forward and backward start on the same diagonal, meaning the - * backwards state index c == k. - * As soon as the lengths are not the same, the backwards traversal starts on a different diagonal, and - * c = k shifted by the difference in length. + /* If both sides have the same length, forward and backward + * start on the same diagonal, meaning the backwards state index + * c == k. + * As soon as the lengths are not the same, the backwards + * traversal starts on a different diagonal, and c = k shifted + * by the difference in length. */ int c = k_to_c(k, delta); - /* When the file sizes are very different, the traversal trees start on far distant diagonals. - * They don't necessarily meet straight on. See whether this forward value is on a diagonal that - * is also valid in kd_backward[], and match them if so. */ + /* When the file sizes are very different, the traversal trees + * start on far distant diagonals. + * They don't necessarily meet straight on. See whether this + * forward value is on a diagonal that is also valid in + * kd_backward[], and match them if so. */ if (c >= -backwards_d && c <= backwards_d) { - /* Current k is on a diagonal that exists in kd_backward[]. If the two x positions have - * met or passed (forward walked onto or past backward), then we've found a midpoint / a - * mid-box. + /* Current k is on a diagonal that exists in + * kd_backward[]. If the two x positions have met or + * passed (forward walked onto or past backward), then + * we've found a midpoint / a mid-box. * - * When forwards and backwards traversals meet, the endpoints of the mid-snake are - * not the two points in kd_forward and kd_backward, but rather the section that - * was slid (if any) of the current forward/backward traversal only. + * When forwards and backwards traversals meet, the + * endpoints of the mid-snake are not the two points in + * kd_forward and kd_backward, but rather the section + * that was slid (if any) of the current + * forward/backward traversal only. * * For example: * @@ -381,22 +415,27 @@ diff_divide_myers_forward(struct diff_data *left, stru * | | * o-o-o * - * The forward traversal reached M from the top and slid downwards to A. - * The backward traversal already reached X, which is not a straight line from M - * anymore, so picking a mid-snake from M to X would yield a mistake. + * The forward traversal reached M from the top and slid + * downwards to A. The backward traversal already + * reached X, which is not a straight line from M + * anymore, so picking a mid-snake from M to X would + * yield a mistake. * - * The correct mid-snake is between M and A. M is where the forward traversal hit - * the diagonal that the backward traversal has already passed, and A is what it - * reaches when sliding down identical lines. + * The correct mid-snake is between M and A. M is where + * the forward traversal hit the diagonal that the + * backward traversal has already passed, and A is what + * it reaches when sliding down identical lines. */ int backward_x = kd_backward[c]; debug("Compare: k=%d c=%d is (%d,%d) >= (%d,%d)?\n", - k, c, x, xk_to_y(x, k), backward_x, xc_to_y(backward_x, c, delta)); + k, c, x, xk_to_y(x, k), backward_x, + xc_to_y(backward_x, c, delta)); if (x >= backward_x) { *meeting_snake = (struct diff_box){ .left_start = x_before_slide, .left_end = x, - .right_start = xc_to_y(x_before_slide, c, delta), + .right_start = xc_to_y(x_before_slide, + c, delta), .right_end = xk_to_y(x, k), }; debug("HIT x=(%u,%u) - y=(%u,%u)\n", @@ -404,30 +443,40 @@ diff_divide_myers_forward(struct diff_data *left, stru meeting_snake->right_start, meeting_snake->left_end, meeting_snake->right_end); - debug_dump_myers_graph(left, right, NULL, kd_forward, d, kd_backward, d-1); + debug_dump_myers_graph(left, right, NULL, + kd_forward, d, + kd_backward, d-1); return true; } } } - debug_dump_myers_graph(left, right, NULL, kd_forward, d, kd_backward, d-1); + debug_dump_myers_graph(left, right, NULL, kd_forward, d, + kd_backward, d-1); return false; } /* Do one backwards step in the "divide and conquer" graph traversal. * left: the left side to diff. * right: the right side to diff against. - * kd_forward: the traversal state for forwards traversal, to find a meeting point. - * Since forwards is done first, after this, both kd_forward and kd_backward will be valid for d. - * kd_forward points at the center of the state array, allowing negative indexes. - * kd_backward: the traversal state for backwards traversal, to find a meeting point. + * kd_forward: the traversal state for forwards traversal, to find a meeting + * point. + * Since forwards is done first, after this, both kd_forward and + * kd_backward will be valid for d. + * kd_forward points at the center of the state array, allowing + * negative indexes. + * kd_backward: the traversal state for backwards traversal, to find a meeting + * point. * This is carried over between invocations with increasing d. - * kd_backward points at the center of the state array, allowing negative indexes. - * d: Step or distance counter, indicating for what value of d the kd_backward should be populated. - * Before the first invocation, kd_backward[0] shall point at the bottom right of the Myers graph - * (left.len, right.len). + * kd_backward points at the center of the state array, allowing + * negative indexes. + * d: Step or distance counter, indicating for what value of d the kd_backward + * should be populated. + * Before the first invocation, kd_backward[0] shall point at the bottom + * right of the Myers graph (left.len, right.len). * The first invocation will be for d == 1. * meeting_snake: resulting meeting point, if any. + * Return true when a meeting point has been identified. */ static bool diff_divide_myers_backward(struct diff_data *left, struct diff_data *right, @@ -442,14 +491,18 @@ diff_divide_myers_backward(struct diff_data *left, str for (c = d; c >= -d; c -= 2) { if (c < -(int)left->atoms.len || c > (int)right->atoms.len) { - /* This diagonal is completely outside of the Myers graph, don't calculate it. */ + /* This diagonal is completely outside of the Myers + * graph, don't calculate it. */ if (c < -(int)left->atoms.len) - debug(" %d c < -(int)left->atoms.len %d\n", c, -(int)left->atoms.len); + debug(" %d c < -(int)left->atoms.len %d\n", c, + -(int)left->atoms.len); else - debug(" %d c > right->atoms.len %d\n", c, right->atoms.len); + debug(" %d c > right->atoms.len %d\n", c, + right->atoms.len); if (c < 0) { - /* We are traversing negatively, and already below the entire graph, nothing will come - * of this. */ + /* We are traversing negatively, and already + * below the entire graph, nothing will come of + * this. */ debug(" break"); break; } @@ -458,12 +511,15 @@ diff_divide_myers_backward(struct diff_data *left, str } debug("- c = %d\n", c); if (d == 0) { - /* This is the initializing step. There is no prev_c yet, get the initial x from the bottom - * right of the Myers graph. */ + /* This is the initializing step. There is no prev_c + * yet, get the initial x from the bottom right of the + * Myers graph. */ x = left->atoms.len; } - /* Favoring "-" lines first means favoring moving rightwards in the Myers graph. - * For this, all c should derive from c - 1, only the bottom most c derive from c + 1: + /* Favoring "-" lines first means favoring moving rightwards in + * the Myers graph. + * For this, all c should derive from c - 1, only the bottom + * most c derive from c + 1: * * 2 1 0 * --------------------------------------------------- @@ -480,41 +536,54 @@ diff_divide_myers_backward(struct diff_data *left, str * / * bottom most for d=2 --> 3,4 -2 * - * Except when a c + 1 from a previous run already means a further advancement in the graph. + * Except when a c + 1 from a previous run already means a + * further advancement in the graph. * If c == d, there is no c + 1 and c - 1 is the only option. - * If c < d, use c + 1 in case that yields a larger x. Also use c + 1 if c - 1 is outside the graph. + * If c < d, use c + 1 in case that yields a larger x. + * Also use c + 1 if c - 1 is outside the graph. */ else if (c > -d && (c == d || (c - 1 >= -(int)right->atoms.len && kd_backward[c - 1] <= kd_backward[c + 1]))) { /* A top one. - * From position prev_c, step upwards in the Myers graph: y -= 1. - * Decrementing y is achieved by incrementing c while keeping the same x. - * (since we're deriving y from y = x - c + delta). + * From position prev_c, step upwards in the Myers + * graph: y -= 1. + * Decrementing y is achieved by incrementing c while + * keeping the same x. (since we're deriving y from + * y = x - c + delta). */ int prev_c = c - 1; int prev_x = kd_backward[prev_c]; x = prev_x; } else { /* The bottom most one. - * From position prev_c, step to the left in the Myers graph: x -= 1. + * From position prev_c, step to the left in the Myers + * graph: x -= 1. */ int prev_c = c + 1; int prev_x = kd_backward[prev_c]; x = prev_x - 1; } - /* Slide up any snake that we might find here (sections of identical lines on both sides). */ - debug("c=%d x-1=%d Yb-1=%d-1=%d\n", c, x-1, xc_to_y(x, c, delta), xc_to_y(x, c, delta)-1); + /* Slide up any snake that we might find here (sections of + * identical lines on both sides). */ + debug("c=%d x-1=%d Yb-1=%d-1=%d\n", c, x-1, xc_to_y(x, c, + delta), + xc_to_y(x, c, delta)-1); if (x > 0) { - debug(" l="); debug_dump_atom(left, right, &left->atoms.head[x-1]); + debug(" l="); + debug_dump_atom(left, right, &left->atoms.head[x-1]); } if (xc_to_y(x, c, delta) > 0) { - debug(" r="); debug_dump_atom(right, left, &right->atoms.head[xc_to_y(x, c, delta)-1]); + debug(" r="); + debug_dump_atom(right, left, + &right->atoms.head[xc_to_y(x, c, delta)-1]); } int x_before_slide = x; while (x > 0 && xc_to_y(x, c, delta) > 0 - && diff_atom_same(&left->atoms.head[x-1], &right->atoms.head[xc_to_y(x, c, delta)-1])) + && diff_atom_same(&left->atoms.head[x-1], + &right->atoms.head[xc_to_y(x, c, + delta)-1])) x--; kd_backward[c] = x; if (x_before_slide != x) { @@ -524,30 +593,24 @@ diff_divide_myers_backward(struct diff_data *left, str if (DEBUG) { int fi; for (fi = d; fi >= c; fi--) { - debug("kd_backward[%d] = (%d, %d)\n", fi, kd_backward[fi], + debug("kd_backward[%d] = (%d, %d)\n", + fi, + kd_backward[fi], kd_backward[fi] - fi + delta); - /* - if (kd_backward[fi] >= 0 && kd_backward[fi] < left->atoms.len) - debug_dump_atom(left, right, &left->atoms.head[kd_backward[fi]]); - else - debug("\n"); - if (kd_backward[fi]-fi+delta >= 0 && kd_backward[fi]-fi+delta < right->atoms.len) - debug_dump_atom(right, left, &right->atoms.head[kd_backward[fi]-fi+delta]); - else - debug("\n"); - */ } } if (x < 0 || x > left->atoms.len - || xc_to_y(x, c, delta) < 0 || xc_to_y(x, c, delta) > right->atoms.len) + || xc_to_y(x, c, delta) < 0 + || xc_to_y(x, c, delta) > right->atoms.len) continue; - /* Figured out a new backwards traversal, see if this has gone onto or even past a preceding forwards - * traversal. + /* Figured out a new backwards traversal, see if this has gone + * onto or even past a preceding forwards traversal. * - * If the delta in length is even, then d and backwards_d hit the same state indexes -- note how this is - * different from in the forwards traversal, because now both d are the same: + * If the delta in length is even, then d and backwards_d hit + * the same state indexes -- note how this is different from in + * the forwards traversal, because now both d are the same: * * | d= 0 1 2 2 1 0 * ----+---------------- -------------------- @@ -567,88 +630,105 @@ diff_divide_myers_backward(struct diff_data *left, str * -2 | 0,2 -2 * | * -3 - * If the delta is odd, they end up off-by-one, i.e. on different diagonals. - * So in the backward path, we can only match up diagonals when the delta is even. + * If the delta is odd, they end up off-by-one, i.e. on + * different diagonals. + * So in the backward path, we can only match up diagonals when + * the delta is even. */ - if ((delta & 1) == 0) { - /* Forwards was done first, now both d are the same. */ - int forwards_d = d; + if ((delta & 1) != 0) + continue; + /* Forwards was done first, now both d are the same. */ + int forwards_d = d; - /* As soon as the lengths are not the same, the backwards traversal starts on a different diagonal, and - * c = k shifted by the difference in length. - */ - int k = c_to_k(c, delta); + /* As soon as the lengths are not the same, the + * backwards traversal starts on a different diagonal, + * and c = k shifted by the difference in length. + */ + int k = c_to_k(c, delta); - /* When the file sizes are very different, the traversal trees start on far distant diagonals. - * They don't necessarily meet straight on. See whether this backward value is also on a valid - * diagonal in kd_forward[], and match them if so. */ - if (k >= -forwards_d && k <= forwards_d) { - /* Current c is on a diagonal that exists in kd_forward[]. If the two x positions have - * met or passed (backward walked onto or past forward), then we've found a midpoint / a - * mid-box. - * - * When forwards and backwards traversals meet, the endpoints of the mid-snake are - * not the two points in kd_forward and kd_backward, but rather the section that - * was slid (if any) of the current forward/backward traversal only. - * - * For example: - * - * o-o-o - * | | - * o A - * | \ - * o o - * \ - * M - * |\ - * o o-o-o - * | | | - * o o X - * \ - * o - * \ - * o - * \ - * o - * - * The backward traversal reached M from the bottom and slid upwards. - * The forward traversal already reached X, which is not a straight line from M - * anymore, so picking a mid-snake from M to X would yield a mistake. - * - * The correct mid-snake is between M and A. M is where the backward traversal hit - * the diagonal that the forwards traversal has already passed, and A is what it - * reaches when sliding up identical lines. - */ + /* When the file sizes are very different, the traversal trees + * start on far distant diagonals. + * They don't necessarily meet straight on. See whether this + * backward value is also on a valid diagonal in kd_forward[], + * and match them if so. */ + if (k >= -forwards_d && k <= forwards_d) { + /* Current c is on a diagonal that exists in + * kd_forward[]. If the two x positions have met or + * passed (backward walked onto or past forward), then + * we've found a midpoint / a mid-box. + * + * When forwards and backwards traversals meet, the + * endpoints of the mid-snake are not the two points in + * kd_forward and kd_backward, but rather the section + * that was slid (if any) of the current + * forward/backward traversal only. + * + * For example: + * + * o-o-o + * | | + * o A + * | \ + * o o + * \ + * M + * |\ + * o o-o-o + * | | | + * o o X + * \ + * o + * \ + * o + * \ + * o + * + * The backward traversal reached M from the bottom and + * slid upwards. The forward traversal already reached + * X, which is not a straight line from M anymore, so + * picking a mid-snake from M to X would yield a + * mistake. + * + * The correct mid-snake is between M and A. M is where + * the backward traversal hit the diagonal that the + * forwards traversal has already passed, and A is what + * it reaches when sliding up identical lines. + */ - int forward_x = kd_forward[k]; - debug("Compare: k=%d c=%d is (%d,%d) >= (%d,%d)?\n", - k, c, forward_x, xk_to_y(forward_x, k), x, xc_to_y(x, c, delta)); - if (forward_x >= x) { - *meeting_snake = (struct diff_box){ - .left_start = x, - .left_end = x_before_slide, - .right_start = xc_to_y(x, c, delta), - .right_end = xk_to_y(x_before_slide, k), - }; - debug("HIT x=%u,%u - y=%u,%u\n", - meeting_snake->left_start, - meeting_snake->right_start, - meeting_snake->left_end, - meeting_snake->right_end); - debug_dump_myers_graph(left, right, NULL, kd_forward, d, kd_backward, d); - return true; - } + int forward_x = kd_forward[k]; + debug("Compare: k=%d c=%d is (%d,%d) >= (%d,%d)?\n", + k, c, forward_x, xk_to_y(forward_x, k), + x, xc_to_y(x, c, delta)); + if (forward_x >= x) { + *meeting_snake = (struct diff_box){ + .left_start = x, + .left_end = x_before_slide, + .right_start = xc_to_y(x, c, delta), + .right_end = xk_to_y(x_before_slide, k), + }; + debug("HIT x=%u,%u - y=%u,%u\n", + meeting_snake->left_start, + meeting_snake->right_start, + meeting_snake->left_end, + meeting_snake->right_end); + debug_dump_myers_graph(left, right, NULL, + kd_forward, d, + kd_backward, d); + return true; } } } - debug_dump_myers_graph(left, right, NULL, kd_forward, d, kd_backward, d); + debug_dump_myers_graph(left, right, NULL, kd_forward, d, kd_backward, + d); return false; } -/* Myers "Divide et Impera": tracing forwards from the start and backwards from the end to find a midpoint that divides - * the problem into smaller chunks. Requires only linear amounts of memory. */ +/* Myers "Divide et Impera": tracing forwards from the start and backwards from + * the end to find a midpoint that divides the problem into smaller chunks. + * Requires only linear amounts of memory. */ enum diff_rc -diff_algo_myers_divide(const struct diff_algo_config *algo_config, struct diff_state *state) +diff_algo_myers_divide(const struct diff_algo_config *algo_config, + struct diff_state *state) { enum diff_rc rc = DIFF_RC_ENOMEM; struct diff_data *left = &state->left; @@ -661,7 +741,8 @@ diff_algo_myers_divide(const struct diff_algo_config * debug_dump(right); debug_dump_myers_graph(left, right, NULL, NULL, 0, NULL, 0); - /* Allocate two columns of a Myers graph, one for the forward and one for the backward traversal. */ + /* Allocate two columns of a Myers graph, one for the forward and one + * for the backward traversal. */ unsigned int max = left->atoms.len + right->atoms.len; size_t kd_len = max + 1; size_t kd_buf_size = kd_len << 1; @@ -674,7 +755,8 @@ diff_algo_myers_divide(const struct diff_algo_config * int *kd_forward = kd_buf; int *kd_backward = kd_buf + kd_len; - /* The 'k' axis in Myers spans positive and negative indexes, so point the kd to the middle. + /* The 'k' axis in Myers spans positive and negative indexes, so point + * the kd to the middle. * It is then possible to index from -max/2 .. max/2. */ kd_forward += max/2; kd_backward += max/2; @@ -693,8 +775,10 @@ diff_algo_myers_divide(const struct diff_algo_config * } if (!found_midpoint) { - /* Divide and conquer failed to find a meeting point. Use the fallback_algo defined in the algo_config - * (leave this to the caller). This is just paranoia/sanity, we normally should always find a midpoint. + /* Divide and conquer failed to find a meeting point. Use the + * fallback_algo defined in the algo_config (leave this to the + * caller). This is just paranoia/sanity, we normally should + * always find a midpoint. */ debug(" no midpoint \n"); rc = DIFF_RC_USE_DIFF_ALGO_FALLBACK; @@ -702,7 +786,8 @@ diff_algo_myers_divide(const struct diff_algo_config * } else { debug(" mid snake L: %u to %u of %u R: %u to %u of %u\n", mid_snake.left_start, mid_snake.left_end, left->atoms.len, - mid_snake.right_start, mid_snake.right_end, right->atoms.len); + mid_snake.right_start, mid_snake.right_end, + right->atoms.len); /* Section before the mid-snake. */ debug("Section before the mid-snake\n"); @@ -713,67 +798,82 @@ diff_algo_myers_divide(const struct diff_algo_config * unsigned int right_section_len = mid_snake.right_start; if (left_section_len && right_section_len) { - /* Record an unsolved chunk, the caller will apply inner_algo() on this chunk. */ + /* Record an unsolved chunk, the caller will apply + * inner_algo() on this chunk. */ if (!diff_state_add_chunk(state, false, left_atom, left_section_len, - right_atom, right_section_len)) + right_atom, + right_section_len)) goto return_rc; } else if (left_section_len && !right_section_len) { - /* Only left atoms and none on the right, they form a "minus" chunk, then. */ + /* Only left atoms and none on the right, they form a + * "minus" chunk, then. */ if (!diff_state_add_chunk(state, true, left_atom, left_section_len, right_atom, 0)) goto return_rc; } else if (!left_section_len && right_section_len) { - /* No left atoms, only atoms on the right, they form a "plus" chunk, then. */ + /* No left atoms, only atoms on the right, they form a + * "plus" chunk, then. */ if (!diff_state_add_chunk(state, true, left_atom, 0, - right_atom, right_section_len)) + right_atom, + right_section_len)) goto return_rc; } - /* else: left_section_len == 0 and right_section_len == 0, i.e. nothing before the mid-snake. */ + /* else: left_section_len == 0 and right_section_len == 0, i.e. + * nothing before the mid-snake. */ if (!diff_box_empty(&mid_snake)) { - /* The midpoint is a "snake", i.e. on a section of identical data on both sides: that - * section immediately becomes a solved diff chunk. */ + /* The midpoint is a "snake", i.e. on a section of + * identical data on both sides: that section + * immediately becomes a solved diff chunk. */ debug("the mid-snake\n"); if (!diff_state_add_chunk(state, true, - &left->atoms.head[mid_snake.left_start], - mid_snake.left_end - mid_snake.left_start, - &right->atoms.head[mid_snake.right_start], - mid_snake.right_end - mid_snake.right_start)) + &left->atoms.head[mid_snake.left_start], + mid_snake.left_end - mid_snake.left_start, + &right->atoms.head[mid_snake.right_start], + mid_snake.right_end - mid_snake.right_start)) goto return_rc; } /* Section after the mid-snake. */ debug("Section after the mid-snake\n"); - debug(" left_end %u right_end %u\n", mid_snake.left_end, mid_snake.right_end); - debug(" left_count %u right_count %u\n", left->atoms.len, right->atoms.len); + debug(" left_end %u right_end %u\n", + mid_snake.left_end, mid_snake.right_end); + debug(" left_count %u right_count %u\n", + left->atoms.len, right->atoms.len); left_atom = &left->atoms.head[mid_snake.left_end]; left_section_len = left->atoms.len - mid_snake.left_end; right_atom = &right->atoms.head[mid_snake.right_end]; right_section_len = right->atoms.len - mid_snake.right_end; if (left_section_len && right_section_len) { - /* Record an unsolved chunk, the caller will apply inner_algo() on this chunk. */ + /* Record an unsolved chunk, the caller will apply + * inner_algo() on this chunk. */ if (!diff_state_add_chunk(state, false, left_atom, left_section_len, - right_atom, right_section_len)) + right_atom, + right_section_len)) goto return_rc; } else if (left_section_len && !right_section_len) { - /* Only left atoms and none on the right, they form a "minus" chunk, then. */ + /* Only left atoms and none on the right, they form a + * "minus" chunk, then. */ if (!diff_state_add_chunk(state, true, left_atom, left_section_len, right_atom, 0)) goto return_rc; } else if (!left_section_len && right_section_len) { - /* No left atoms, only atoms on the right, they form a "plus" chunk, then. */ + /* No left atoms, only atoms on the right, they form a + * "plus" chunk, then. */ if (!diff_state_add_chunk(state, true, left_atom, 0, - right_atom, right_section_len)) + right_atom, + right_section_len)) goto return_rc; } - /* else: left_section_len == 0 and right_section_len == 0, i.e. nothing after the mid-snake. */ + /* else: left_section_len == 0 and right_section_len == 0, i.e. + * nothing after the mid-snake. */ } rc = DIFF_RC_OK; @@ -784,13 +884,16 @@ return_rc: return rc; } -/* Myers Diff tracing from the start all the way through to the end, requiring quadratic amounts of memory. This can - * fail if the required space surpasses algo_config->permitted_state_size. */ +/* Myers Diff tracing from the start all the way through to the end, requiring + * quadratic amounts of memory. This can fail if the required space surpasses + * algo_config->permitted_state_size. */ enum diff_rc -diff_algo_myers(const struct diff_algo_config *algo_config, struct diff_state *state) +diff_algo_myers(const struct diff_algo_config *algo_config, + struct diff_state *state) { - /* do a diff_divide_myers_forward() without a _backward(), so that it walks forward across the entire - * files to reach the end. Keep each run's state, and do a final backtrace. */ + /* do a diff_divide_myers_forward() without a _backward(), so that it + * walks forward across the entire files to reach the end. Keep each + * run's state, and do a final backtrace. */ enum diff_rc rc = DIFF_RC_ENOMEM; struct diff_data *left = &state->left; struct diff_data *right = &state->right; @@ -802,7 +905,8 @@ diff_algo_myers(const struct diff_algo_config *algo_co debug_dump(right); debug_dump_myers_graph(left, right, NULL, NULL, 0, NULL, 0); - /* Allocate two columns of a Myers graph, one for the forward and one for the backward traversal. */ + /* Allocate two columns of a Myers graph, one for the forward and one + * for the backward traversal. */ unsigned int max = left->atoms.len + right->atoms.len; size_t kd_len = max + 1 + max; size_t kd_buf_size = kd_len * kd_len; @@ -822,7 +926,8 @@ diff_algo_myers(const struct diff_algo_config *algo_co for (i = 0; i < kd_buf_size; i++) kd_buf[i] = -1; - /* The 'k' axis in Myers spans positive and negative indexes, so point the kd to the middle. + /* The 'k' axis in Myers spans positive and negative indexes, so point + * the kd to the middle. * It is then possible to index from -max .. max. */ int *kd_origin = kd_buf + max; int *kd_column = kd_origin; @@ -838,15 +943,21 @@ diff_algo_myers(const struct diff_algo_config *algo_co debug("-- %s d=%d\n", __func__, d); for (k = d; k >= -d; k -= 2) { - if (k < -(int)right->atoms.len || k > (int)left->atoms.len) { - /* This diagonal is completely outside of the Myers graph, don't calculate it. */ + if (k < -(int)right->atoms.len + || k > (int)left->atoms.len) { + /* This diagonal is completely outside of the + * Myers graph, don't calculate it. */ if (k < -(int)right->atoms.len) - debug(" %d k < -(int)right->atoms.len %d\n", k, -(int)right->atoms.len); + debug(" %d k <" + " -(int)right->atoms.len %d\n", + k, -(int)right->atoms.len); else - debug(" %d k > left->atoms.len %d\n", k, left->atoms.len); + debug(" %d k > left->atoms.len %d\n", k, + left->atoms.len); if (k < 0) { - /* We are traversing negatively, and already below the entire graph, nothing will come - * of this. */ + /* We are traversing negatively, and + * already below the entire graph, + * nothing will come of this. */ debug(" break"); break; } @@ -856,46 +967,62 @@ diff_algo_myers(const struct diff_algo_config *algo_co debug("- k = %d\n", k); if (d == 0) { - /* This is the initializing step. There is no prev_k yet, get the initial x from the top left of - * the Myers graph. */ + /* This is the initializing step. There is no + * prev_k yet, get the initial x from the top + * left of the Myers graph. */ x = 0; } else { int *kd_prev_column = kd_column - kd_len; - /* Favoring "-" lines first means favoring moving rightwards in the Myers graph. - * For this, all k should derive from k - 1, only the bottom most k derive from k + 1: + /* Favoring "-" lines first means favoring + * moving rightwards in the Myers graph. + * For this, all k should derive from k - 1, + * only the bottom most k derive from k + 1: * * | d= 0 1 2 * ----+---------------- * k= | - * 2 | 2,0 <-- from prev_k = 2 - 1 = 1 - * | / + * 2 | 2,0 <-- from + * | / prev_k = 2 - 1 = 1 * 1 | 1,0 * | / * 0 | -->0,0 3,3 * | \\ / - * -1 | 0,1 <-- bottom most for d=1 from prev_k = -1 + 1 = 0 - * | \\ - * -2 | 0,2 <-- bottom most for d=2 from prev_k = -2 + 1 = -1 + * -1 | 0,1 <-- bottom most for d=1 + * | \\ from prev_k = -1+1 = 0 + * -2 | 0,2 <-- bottom most for + * d=2 from + * prev_k = -2+1 = -1 * - * Except when a k + 1 from a previous run already means a further advancement in the graph. - * If k == d, there is no k + 1 and k - 1 is the only option. - * If k < d, use k + 1 in case that yields a larger x. Also use k + 1 if k - 1 is outside the graph. + * Except when a k + 1 from a previous run + * already means a further advancement in the + * graph. + * If k == d, there is no k + 1 and k - 1 is the + * only option. + * If k < d, use k + 1 in case that yields a + * larger x. Also use k + 1 if k - 1 is outside + * the graph. */ - if (k > -d && (k == d - || (k - 1 >= -(int)right->atoms.len - && kd_prev_column[k - 1] >= kd_prev_column[k + 1]))) { + if (k > -d + && (k == d + || (k - 1 >= -(int)right->atoms.len + && kd_prev_column[k - 1] + >= kd_prev_column[k + 1]))) { /* Advance from k - 1. - * From position prev_k, step to the right in the Myers graph: x += 1. + * From position prev_k, step to the + * right in the Myers graph: x += 1. */ int prev_k = k - 1; int prev_x = kd_prev_column[prev_k]; x = prev_x + 1; } else { /* The bottom most one. - * From position prev_k, step to the bottom in the Myers graph: y += 1. - * Incrementing y is achieved by decrementing k while keeping the same x. - * (since we're deriving y from y = x - k). + * From position prev_k, step to the + * bottom in the Myers graph: y += 1. + * Incrementing y is achieved by + * decrementing k while keeping the same + * x. (since we're deriving y from y = + * x - k). */ int prev_k = k + 1; int prev_x = kd_prev_column[prev_k]; @@ -904,29 +1031,24 @@ diff_algo_myers(const struct diff_algo_config *algo_co } /* Slide down any snake that we might find here. */ - while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len - && diff_atom_same(&left->atoms.head[x], &right->atoms.head[xk_to_y(x, k)])) + while (x < left->atoms.len + && xk_to_y(x, k) < right->atoms.len + && diff_atom_same(&left->atoms.head[x], + &right->atoms.head[xk_to_y(x, k)])) x++; kd_column[k] = x; if (DEBUG) { int fi; for (fi = d; fi >= k; fi-=2) { - debug("kd_column[%d] = (%d, %d)\n", fi, kd_column[fi], kd_column[fi] - fi); -#if 0 - if (kd_column[fi] >= 0 && kd_column[fi] < left->atoms.len) - debug_dump_atom(left, right, &left->atoms.head[kd_column[fi]]); - else - debug("\n"); - if (kd_column[fi]-fi >= 0 && kd_column[fi]-fi < right->atoms.len) - debug_dump_atom(right, left, &right->atoms.head[kd_column[fi]-fi]); - else - debug("\n"); -#endif + debug("kd_column[%d] = (%d, %d)\n", fi, + kd_column[fi], + kd_column[fi] - fi); } } - if (x == left->atoms.len && xk_to_y(x, k) == right->atoms.len) { + if (x == left->atoms.len + && xk_to_y(x, k) == right->atoms.len) { /* Found a path */ backtrack_d = d; backtrack_k = k; @@ -967,8 +1089,10 @@ diff_algo_myers(const struct diff_algo_config *algo_co x = kd_column[k]; y = xk_to_y(x, k); - /* When the best position is identified, remember it for that kd_column. - * That kd_column is no longer needed otherwise, so just re-purpose kd_column[0] = x and kd_column[1] = y, + /* When the best position is identified, remember it for that + * kd_column. + * That kd_column is no longer needed otherwise, so just + * re-purpose kd_column[0] = x and kd_column[1] = y, * so that there is no need to allocate more memory. */ kd_column[0] = x; @@ -993,8 +1117,8 @@ diff_algo_myers(const struct diff_algo_config *algo_co * | / * 1 | 1,0 4,3 * | / / \ - * 0 | -->0,0 3,3 4,4 --> backtrack_d = 4, backtrack_k = 0 - * | \ / \ + * 0 | -->0,0 3,3 4,4 --> backtrack_d = 4, + * | \ / \ backtrack_k = 0 * -1 | 0,1 3,4 * | \ * -2 | 0,2__ @@ -1004,20 +1128,24 @@ diff_algo_myers(const struct diff_algo_config *algo_co kd_prev_column[k-1], xk_to_y(kd_prev_column[k-1],k-1), kd_prev_column[k+1], xk_to_y(kd_prev_column[k+1],k+1)); if (y == 0 - || (x > 0 && kd_prev_column[k - 1] >= kd_prev_column[k + 1])) { + || (x > 0 + && kd_prev_column[k - 1] >= kd_prev_column[k + 1])) { k = k - 1; debug("prev k=k-1=%d x=%d y=%d\n", - k, kd_prev_column[k], xk_to_y(kd_prev_column[k], k)); + k, kd_prev_column[k], + xk_to_y(kd_prev_column[k], k)); } else { k = k + 1; debug("prev k=k+1=%d x=%d y=%d\n", - k, kd_prev_column[k], xk_to_y(kd_prev_column[k], k)); + k, kd_prev_column[k], + xk_to_y(kd_prev_column[k], k)); } kd_column = kd_prev_column; } /* Forwards again, this time recording the diff chunks. - * Definitely start from 0,0. kd_column[0] may actually point to the bottom of a snake starting at 0,0 */ + * Definitely start from 0,0. kd_column[0] may actually point to the + * bottom of a snake starting at 0,0 */ x = 0; y = 0; @@ -1036,8 +1164,9 @@ diff_algo_myers(const struct diff_algo_config *algo_co rc = DIFF_RC_ENOMEM; if (left_section_len && right_section_len) { /* This must be a snake slide. - * Snake slides have a straight line leading into them (except when starting at (0,0)). Find - * out whether the lead-in is horizontal or vertical: + * Snake slides have a straight line leading into them + * (except when starting at (0,0)). Find out whether the + * lead-in is horizontal or vertical: * * left * ----------> @@ -1049,10 +1178,12 @@ diff_algo_myers(const struct diff_algo_config *algo_co * t| o o * v * - * If left_section_len > right_section_len, the lead-in is horizontal, meaning first - * remove one atom from the left before sliding down the snake. - * If right_section_len > left_section_len, the lead-in is vetical, so add one atom from - * the right before sliding down the snake. */ + * If left_section_len > right_section_len, the lead-in + * is horizontal, meaning first remove one atom from the + * left before sliding down the snake. + * If right_section_len > left_section_len, the lead-in + * is vetical, so add one atom from the right before + * sliding down the snake. */ if (left_section_len == right_section_len + 1) { if (!diff_state_add_chunk(state, true, left_atom, 1, @@ -1068,26 +1199,31 @@ diff_algo_myers(const struct diff_algo_config *algo_co right_atom++; right_section_len--; } else if (left_section_len != right_section_len) { - /* The numbers are making no sense. Should never happen. */ + /* The numbers are making no sense. Should never + * happen. */ rc = DIFF_RC_USE_DIFF_ALGO_FALLBACK; goto return_rc; } if (!diff_state_add_chunk(state, true, left_atom, left_section_len, - right_atom, right_section_len)) + right_atom, + right_section_len)) goto return_rc; } else if (left_section_len && !right_section_len) { - /* Only left atoms and none on the right, they form a "minus" chunk, then. */ + /* Only left atoms and none on the right, they form a + * "minus" chunk, then. */ if (!diff_state_add_chunk(state, true, left_atom, left_section_len, right_atom, 0)) goto return_rc; } else if (!left_section_len && right_section_len) { - /* No left atoms, only atoms on the right, they form a "plus" chunk, then. */ + /* No left atoms, only atoms on the right, they form a + * "plus" chunk, then. */ if (!diff_state_add_chunk(state, true, left_atom, 0, - right_atom, right_section_len)) + right_atom, + right_section_len)) goto return_rc; } blob - cdc0e9bd029917c5bf84ef03f748cd13965f386c blob + 8d2e931317d282485fa79c4c3fabaf475ea31f6e --- lib/diff_output.c +++ lib/diff_output.c @@ -18,7 +18,8 @@ #include void -diff_output_lines(FILE *dest, const char *prefix, struct diff_atom *start_atom, unsigned int count) +diff_output_lines(FILE *dest, const char *prefix, struct diff_atom *start_atom, + unsigned int count) { struct diff_atom *atom; foreach_diff_atom(atom, start_atom, count) { blob - 4195b51f8a5f76a61aff4d614ce64a923c8ecc71 blob + 67effa60cee65ccf37fdd85d42607d9456e75e06 --- lib/diff_output_plain.c +++ lib/diff_output_plain.c @@ -30,11 +30,14 @@ diff_output_plain(FILE *dest, const struct diff_input_ for (i = 0; i < result->chunks.len; i++) { struct diff_chunk *c = &result->chunks.head[i]; if (c->left_count && c->right_count) - diff_output_lines(dest, c->solved ? " " : "?", c->left_start, c->left_count); + diff_output_lines(dest, c->solved ? " " : "?", + c->left_start, c->left_count); else if (c->left_count && !c->right_count) - diff_output_lines(dest, c->solved ? "-" : "?", c->left_start, c->left_count); + diff_output_lines(dest, c->solved ? "-" : "?", + c->left_start, c->left_count); else if (c->right_count && !c->left_count) - diff_output_lines(dest, c->solved ? "+" : "?", c->right_start, c->right_count); + diff_output_lines(dest, c->solved ? "+" : "?", + c->right_start, c->right_count); } return DIFF_RC_OK; } blob - 8e537ca9fd7a8c27f7af66f3592eaac6043a3b2e blob + 869c38d01b3d1953319161eaad4cb107ca3332d8 --- lib/diff_output_unidiff.c +++ lib/diff_output_unidiff.c @@ -55,31 +55,39 @@ chunk_context_empty(const struct chunk_context *cc) } static void -chunk_context_get(struct chunk_context *cc, const struct diff_result *r, int chunk_idx, - int context_lines) +chunk_context_get(struct chunk_context *cc, const struct diff_result *r, + int chunk_idx, int context_lines) { const struct diff_chunk *c = &r->chunks.head[chunk_idx]; int left_start = diff_atom_root_idx(&r->left, c->left_start); + int left_end = MIN(r->left.atoms.len, + left_start + c->left_count + context_lines); int right_start = diff_atom_root_idx(&r->right, c->right_start); + int right_end = MIN(r->right.atoms.len, + right_start + c->right_count + context_lines); + left_start = MAX(0, left_start - context_lines); + right_start = MAX(0, right_start - context_lines); + *cc = (struct chunk_context){ .chunk = { .start = chunk_idx, .end = chunk_idx + 1, }, .left = { - .start = MAX(0, left_start - context_lines), - .end = MIN(r->left.atoms.len, left_start + c->left_count + context_lines), + .start = left_start, + .end = left_end, }, .right = { - .start = MAX(0, right_start - context_lines), - .end = MIN(r->right.atoms.len, right_start + c->right_count + context_lines), + .start = right_start, + .end = right_end, }, }; } static bool -chunk_contexts_touch(const struct chunk_context *cc, const struct chunk_context *other) +chunk_contexts_touch(const struct chunk_context *cc, + const struct chunk_context *other) { return ranges_touch(&cc->chunk, &other->chunk) || ranges_touch(&cc->left, &other->left) @@ -87,7 +95,8 @@ chunk_contexts_touch(const struct chunk_context *cc, c } static void -chunk_contexts_merge(struct chunk_context *cc, const struct chunk_context *other) +chunk_contexts_merge(struct chunk_context *cc, + const struct chunk_context *other) { ranges_merge(&cc->chunk, &other->chunk); ranges_merge(&cc->left, &other->left); @@ -95,8 +104,10 @@ chunk_contexts_merge(struct chunk_context *cc, const s } static void -diff_output_unidiff_chunk(FILE *dest, bool *header_printed, const struct diff_input_info *info, - const struct diff_result *result, const struct chunk_context *cc) +diff_output_unidiff_chunk(FILE *dest, bool *header_printed, + const struct diff_input_info *info, + const struct diff_result *result, + const struct chunk_context *cc) { if (range_empty(&cc->left) && range_empty(&cc->right)) return; @@ -112,13 +123,20 @@ diff_output_unidiff_chunk(FILE *dest, bool *header_pri cc->left.start + 1, cc->left.end - cc->left.start, cc->right.start + 1, cc->right.end - cc->right.start); - /* Got the absolute line numbers where to start printing, and the index of the interesting (non-context) chunk. - * To print context lines above the interesting chunk, nipping on the previous chunk index may be necessary. - * It is guaranteed to be only context lines where left == right, so it suffices to look on the left. */ - const struct diff_chunk *first_chunk = &result->chunks.head[cc->chunk.start]; - int chunk_start_line = diff_atom_root_idx(&result->left, first_chunk->left_start); + /* Got the absolute line numbers where to start printing, and the index + * of the interesting (non-context) chunk. + * To print context lines above the interesting chunk, nipping on the + * previous chunk index may be necessary. + * It is guaranteed to be only context lines where left == right, so it + * suffices to look on the left. */ + const struct diff_chunk *first_chunk; + int chunk_start_line; + first_chunk = &result->chunks.head[cc->chunk.start]; + chunk_start_line = diff_atom_root_idx(&result->left, + first_chunk->left_start); if (cc->left.start < chunk_start_line) - diff_output_lines(dest, " ", &result->left.atoms.head[cc->left.start], + diff_output_lines(dest, " ", + &result->left.atoms.head[cc->left.start], chunk_start_line - cc->left.start); /* Now write out all the joined chunks and contexts between them */ @@ -127,24 +145,36 @@ diff_output_unidiff_chunk(FILE *dest, bool *header_pri const struct diff_chunk *c = &result->chunks.head[c_idx]; if (c->left_count && c->right_count) - diff_output_lines(dest, c->solved ? " " : "?", c->left_start, c->left_count); + diff_output_lines(dest, + c->solved ? " " : "?", + c->left_start, c->left_count); else if (c->left_count && !c->right_count) - diff_output_lines(dest, c->solved ? "-" : "?", c->left_start, c->left_count); + diff_output_lines(dest, + c->solved ? "-" : "?", + c->left_start, c->left_count); else if (c->right_count && !c->left_count) - diff_output_lines(dest, c->solved ? "+" : "?", c->right_start, c->right_count); + diff_output_lines(dest, + c->solved ? "+" : "?", + c->right_start, c->right_count); } /* Trailing context? */ - const struct diff_chunk *last_chunk = &result->chunks.head[cc->chunk.end - 1]; - int chunk_end_line = diff_atom_root_idx(&result->left, last_chunk->left_start + last_chunk->left_count); + const struct diff_chunk *last_chunk; + int chunk_end_line; + last_chunk = &result->chunks.head[cc->chunk.end - 1]; + chunk_end_line = diff_atom_root_idx(&result->left, + last_chunk->left_start + + last_chunk->left_count); if (cc->left.end > chunk_end_line) - diff_output_lines(dest, " ", &result->left.atoms.head[chunk_end_line], + diff_output_lines(dest, " ", + &result->left.atoms.head[chunk_end_line], cc->left.end - chunk_end_line); } enum diff_rc diff_output_unidiff(FILE *dest, const struct diff_input_info *info, - const struct diff_result *result, unsigned int context_lines) + const struct diff_result *result, + unsigned int context_lines) { if (!result) return DIFF_RC_EINVAL; @@ -165,44 +195,53 @@ diff_output_unidiff(FILE *dest, const struct diff_inpu if (chunk_context_empty(&cc)) { /* These are the first lines being printed. - * Note down the start point, any number of subsequent chunks may be joined up to this - * unidiff chunk by context lines or by being directly adjacent. */ + * Note down the start point, any number of subsequent + * chunks may be joined up to this unidiff chunk by + * context lines or by being directly adjacent. */ chunk_context_get(&cc, result, i, context_lines); - debug("new chunk to be printed: chunk %d-%d left %d-%d right %d-%d\n", + debug("new chunk to be printed:" + " chunk %d-%d left %d-%d right %d-%d\n", cc.chunk.start, cc.chunk.end, cc.left.start, cc.left.end, cc.right.start, cc.right.end); continue; } - /* There already is a previous chunk noted down for being printed. - * Does it join up with this one? */ + /* There already is a previous chunk noted down for being + * printed. Does it join up with this one? */ chunk_context_get(&next, result, i, context_lines); - debug("new chunk to be printed: chunk %d-%d left %d-%d right %d-%d\n", + debug("new chunk to be printed:" + " chunk %d-%d left %d-%d right %d-%d\n", next.chunk.start, next.chunk.end, next.left.start, next.left.end, next.right.start, next.right.end); if (chunk_contexts_touch(&cc, &next)) { - /* This next context touches or overlaps the previous one, join. */ + /* This next context touches or overlaps the previous + * one, join. */ chunk_contexts_merge(&cc, &next); - debug("new chunk to be printed touches previous chunk, now: left %d-%d right %d-%d\n", + debug("new chunk to be printed touches previous chunk," + " now: left %d-%d right %d-%d\n", cc.left.start, cc.left.end, cc.right.start, cc.right.end); continue; } - /* No touching, so the previous context is complete with a gap between it and - * this next one. Print the previous one and start fresh here. */ - debug("new chunk to be printed does not touch previous chunk; print left %d-%d right %d-%d\n", + /* No touching, so the previous context is complete with a gap + * between it and this next one. Print the previous one and + * start fresh here. */ + debug("new chunk to be printed does not touch previous chunk;" + " print left %d-%d right %d-%d\n", cc.left.start, cc.left.end, cc.right.start, cc.right.end); - diff_output_unidiff_chunk(dest, &header_printed, info, result, &cc); + diff_output_unidiff_chunk(dest, &header_printed, info, result, + &cc); cc = next; debug("new unprinted chunk is left %d-%d right %d-%d\n", cc.left.start, cc.left.end, cc.right.start, cc.right.end); } if (!chunk_context_empty(&cc)) - diff_output_unidiff_chunk(dest, &header_printed, info, result, &cc); + diff_output_unidiff_chunk(dest, &header_printed, info, result, + &cc); return DIFF_RC_OK; } blob - 9688701396a6430dba17305759f33a0b93832d9a blob + 9d6b49e69f97d7d22f37fd297adf27243d920387 --- lib/diff_patience.c +++ lib/diff_patience.c @@ -55,13 +55,15 @@ diff_atoms_mark_unique(struct diff_data *d, unsigned i *unique_count = count; } -/* Mark those lines as atom->patience.unique_in_both = true that appear exactly once in each side. */ +/* Mark those lines as atom->patience.unique_in_both = true that appear exactly + * once in each side. */ static void diff_atoms_mark_unique_in_both(struct diff_data *left, struct diff_data *right, unsigned int *unique_in_both_count) { - /* Derive the final unique_in_both count without needing an explicit iteration. So this is just some - * optimiziation to save one iteration in the end. */ + /* Derive the final unique_in_both count without needing an explicit + * iteration. So this is just some optimiziation to save one iteration + * in the end. */ unsigned int unique_in_both; diff_atoms_mark_unique(left, &unique_in_both); @@ -91,12 +93,14 @@ diff_atoms_mark_unique_in_both(struct diff_data *left, if (found_in_b == 0 || found_in_b > 1) { i->patience.unique_in_both = false; unique_in_both--; - debug("unique_in_both %u (%d) ", unique_in_both, found_in_b); + debug("unique_in_both %u (%d) ", unique_in_both, + found_in_b); debug_dump_atom(left, NULL, i); } } - /* Still need to unmark right[*]->patience.unique_in_both for atoms that don't exist in left */ + /* Still need to unmark right[*]->patience.unique_in_both for atoms that + * don't exist in left */ diff_data_foreach_atom(i, right) { if (!i->patience.unique_here || !i->patience.unique_in_both) @@ -121,10 +125,12 @@ diff_atoms_mark_unique_in_both(struct diff_data *left, } static void -diff_atoms_swallow_identical_neighbors(struct diff_data *left, struct diff_data *right, +diff_atoms_swallow_identical_neighbors(struct diff_data *left, + struct diff_data *right, unsigned int *unique_in_both_count) { - debug("trivially combine identical lines arount unique_in_both lines\n"); + debug("trivially combine identical lines" + " around unique_in_both lines\n"); unsigned int l_idx; unsigned int next_l_idx; @@ -140,46 +146,58 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat debug("check identical lines around "); debug_dump_atom(left, right, l); - unsigned int r_idx = diff_atom_idx(right, l->patience.pos_in_other); + unsigned int r_idx = diff_atom_idx(right, + l->patience.pos_in_other); struct range identical_l; struct range identical_r; /* Swallow upwards. - * Each common-unique line swallows identical lines upwards and downwards. - * All common-unique lines that were part of the identical lines following below were already swallowed - * in the previous iteration, so we will never hit another common-unique line above. */ + * Each common-unique line swallows identical lines upwards and + * downwards. + * All common-unique lines that were part of the identical lines + * following below were already swallowed in the previous + * iteration, so we will never hit another common-unique line + * above. */ for (identical_l.start = l_idx, identical_r.start = r_idx; identical_l.start > l_min - && identical_r.start > r_min - && diff_atom_same(&left->atoms.head[identical_l.start - 1], - &right->atoms.head[identical_r.start - 1]); + && identical_r.start > r_min + && diff_atom_same( + &left->atoms.head[identical_l.start - 1], + &right->atoms.head[identical_r.start - 1]); identical_l.start--, identical_r.start--); /* Swallow downwards */ for (identical_l.end = l_idx + 1, identical_r.end = r_idx + 1; identical_l.end < left->atoms.len - && identical_r.end < right->atoms.len - && diff_atom_same(&left->atoms.head[identical_l.end], - &right->atoms.head[identical_r.end]); + && identical_r.end < right->atoms.len + && diff_atom_same(&left->atoms.head[identical_l.end], + &right->atoms.head[identical_r.end]); identical_l.end++, identical_r.end++, next_l_idx++) { - if (left->atoms.head[identical_l.end].patience.unique_in_both) { - /* Part of a chunk of identical lines, remove from listing of unique_in_both lines */ - left->atoms.head[identical_l.end].patience.unique_in_both = false; - right->atoms.head[identical_r.end].patience.unique_in_both = false; - (*unique_in_both_count)--; - } + struct diff_atom *l_end; + struct diff_atom *r_end; + l_end = &left->atoms.head[identical_l.end]; + l_end = &right->atoms.head[identical_r.end]; + if (!l_end->patience.unique_in_both) + continue; + /* Part of a chunk of identical lines, remove from + * listing of unique_in_both lines */ + l_end->patience.unique_in_both = false; + r_end->patience.unique_in_both = false; + (*unique_in_both_count)--; } l->patience.identical_lines = identical_l; - l->patience.pos_in_other->patience.identical_lines = identical_r; + l->patience.pos_in_other->patience.identical_lines = + identical_r; l_min = identical_l.end; r_min = identical_r.end; if (!range_empty(&l->patience.identical_lines)) { - debug("common-unique line at l=%u r=%u swallowed identical lines l=%u-%u r=%u-%u\n", + debug("common-unique line at l=%u r=%u swallowed" + " identical lines l=%u-%u r=%u-%u\n", l_idx, r_idx, identical_l.start, identical_l.end, identical_r.start, identical_r.end); @@ -188,12 +206,36 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat } } -/* Among the lines that appear exactly once in each side, find the longest streak that appear in both files in the same - * order (with other stuff allowed to interleave). Use patience sort for that, as in the Patience Diff algorithm. - * See https://bramcohen.livejournal.com/73318.html and, for a much more detailed explanation, +/* binary search to find the stack to put this atom "card" on. */ +static int +find_target_stack(struct diff_atom *atom, + struct diff_atom **patience_stacks, + unsigned int patience_stacks_count) +{ + unsigned int lo = 0; + unsigned int hi = patience_stacks_count; + while (lo < hi) { + unsigned int mid = (lo + hi) >> 1; + + if (patience_stacks[mid]->patience.pos_in_other + < atom->patience.pos_in_other) + lo = mid + 1; + else + hi = mid; + } + return lo; +} + +/* Among the lines that appear exactly once in each side, find the longest + * streak that appear in both files in the same order (with other stuff allowed + * to interleave). Use patience sort for that, as in the Patience Diff + * algorithm. + * See https://bramcohen.livejournal.com/73318.html and, for a much more + * detailed explanation, * https://blog.jcoglan.com/2017/09/19/the-patience-diff-algorithm/ */ enum diff_rc -diff_algo_patience(const struct diff_algo_config *algo_config, struct diff_state *state) +diff_algo_patience(const struct diff_algo_config *algo_config, + struct diff_state *state) { enum diff_rc rc = DIFF_RC_ENOMEM; @@ -204,7 +246,8 @@ diff_algo_patience(const struct diff_algo_config *algo debug("\n** %s\n", __func__); - /* Find those lines that appear exactly once in 'left' and exactly once in 'right'. */ + /* Find those lines that appear exactly once in 'left' and exactly once + * in 'right'. */ diff_atoms_mark_unique_in_both(left, right, &unique_in_both_count); debug("unique_in_both_count %u\n", unique_in_both_count); @@ -214,30 +257,39 @@ diff_algo_patience(const struct diff_algo_config *algo debug_dump(right); if (!unique_in_both_count) { - /* Cannot apply Patience, tell the caller to use fallback_algo instead. */ + /* Cannot apply Patience, tell the caller to use fallback_algo + * instead. */ return DIFF_RC_USE_DIFF_ALGO_FALLBACK; } - diff_atoms_swallow_identical_neighbors(left, right, &unique_in_both_count); + diff_atoms_swallow_identical_neighbors(left, right, + &unique_in_both_count); debug("After swallowing identical neighbors: unique_in_both = %u\n", unique_in_both_count); - /* An array of Longest Common Sequence is the result of the below subscope: */ + /* An array of Longest Common Sequence is the result of the below + * subscope: */ unsigned int lcs_count = 0; struct diff_atom **lcs = NULL; struct diff_atom *lcs_tail = NULL; { - /* This subscope marks the lifetime of the atom_pointers allocation */ + /* This subscope marks the lifetime of the atom_pointers + * allocation */ /* One chunk of storage for atom pointers */ - struct diff_atom **atom_pointers = recallocarray(NULL, 0, unique_in_both_count * 2, sizeof(struct diff_atom*)); + struct diff_atom **atom_pointers; + atom_pointers = recallocarray(NULL, 0, unique_in_both_count * 2, + sizeof(struct diff_atom*)); - /* Half for the list of atoms that still need to be put on stacks */ + /* Half for the list of atoms that still need to be put on + * stacks */ struct diff_atom **uniques = atom_pointers; - /* Half for the patience sort state's "card stacks" -- we remember only each stack's topmost "card" */ - struct diff_atom **patience_stacks = atom_pointers + unique_in_both_count; + /* Half for the patience sort state's "card stacks" -- we + * remember only each stack's topmost "card" */ + struct diff_atom **patience_stacks; + patience_stacks = atom_pointers + unique_in_both_count; unsigned int patience_stacks_count = 0; /* Take all common, unique items from 'left' ... */ @@ -252,48 +304,39 @@ diff_algo_patience(const struct diff_algo_config *algo } /* ...and sort them to the order found in 'right'. - * The idea is to find the leftmost stack that has a higher line number and add it to the stack's top. - * If there is no such stack, open a new one on the right. The line number is derived from the atom*, - * which are array items and hence reflect the relative position in the source file. So we got the - * common-uniques from 'left' and sort them according to atom->patience.pos_in_other. */ + * The idea is to find the leftmost stack that has a higher line + * number and add it to the stack's top. + * If there is no such stack, open a new one on the right. The + * line number is derived from the atom*, which are array items + * and hence reflect the relative position in the source file. + * So we got the common-uniques from 'left' and sort them + * according to atom->patience.pos_in_other. */ unsigned int i; for (i = 0; i < unique_in_both_count; i++) { atom = uniques[i]; unsigned int target_stack; - - if (!patience_stacks_count) - target_stack = 0; - else { - /* binary search to find the stack to put this atom "card" on. */ - unsigned int lo = 0; - unsigned int hi = patience_stacks_count; - while (lo < hi) { - unsigned int mid = (lo + hi) >> 1; - if (patience_stacks[mid]->patience.pos_in_other < atom->patience.pos_in_other) - lo = mid + 1; - else - hi = mid; - } - - target_stack = lo; - } - + target_stack = find_target_stack(atom, patience_stacks, + patience_stacks_count); assert(target_stack <= patience_stacks_count); patience_stacks[target_stack] = atom; if (target_stack == patience_stacks_count) patience_stacks_count++; - /* Record a back reference to the next stack on the left, which will form the final longest sequence + /* Record a back reference to the next stack on the + * left, which will form the final longest sequence * later. */ - atom->patience.prev_stack = target_stack ? patience_stacks[target_stack - 1] : NULL; + atom->patience.prev_stack = target_stack ? + patience_stacks[target_stack - 1] : NULL; } - /* backtrace through prev_stack references to form the final longest common sequence */ + /* backtrace through prev_stack references to form the final + * longest common sequence */ lcs_tail = patience_stacks[patience_stacks_count - 1]; lcs_count = patience_stacks_count; - /* uniques and patience_stacks are no longer needed. Backpointers are in atom->patience.prev_stack */ + /* uniques and patience_stacks are no longer needed. + * Backpointers are in atom->patience.prev_stack */ free(atom_pointers); } @@ -314,14 +357,17 @@ diff_algo_patience(const struct diff_algo_config *algo } - /* TODO: For each common-unique line found (now listed in lcs), swallow lines upwards and downwards that are - * identical on each side. Requires a way to represent atoms being glued to adjacent atoms. */ + /* TODO: For each common-unique line found (now listed in lcs), swallow + * lines upwards and downwards that are identical on each side. Requires + * a way to represent atoms being glued to adjacent atoms. */ debug("\ntraverse LCS, possibly recursing:\n"); - /* Now we have pinned positions in both files at which it makes sense to divide the diff problem into smaller - * chunks. Go into the next round: look at each section in turn, trying to again find common-unique lines in - * those smaller sections. As soon as no more are found, the remaining smaller sections are solved by Myers. */ + /* Now we have pinned positions in both files at which it makes sense to + * divide the diff problem into smaller chunks. Go into the next round: + * look at each section in turn, trying to again find common-unique + * lines in those smaller sections. As soon as no more are found, the + * remaining smaller sections are solved by Myers. */ unsigned int left_pos = 0; unsigned int right_pos = 0; for (i = 0; i <= lcs_count; i++) { @@ -347,17 +393,22 @@ diff_algo_patience(const struct diff_algo_config *algo right_idx = right->atoms.len; } - /* 'atom' now marks an atom that matches on both sides according to patience-diff - * (a common-unique identical atom in both files). - * Handle the section before and the atom itself; the section after will be handled by the next loop - * iteration -- note that i loops to last element + 1 ("i <= lcs_count"), so that there will be another - * final iteration to pick up the last remaining items after the last LCS atom. + /* 'atom' now marks an atom that matches on both sides according + * to patience-diff (a common-unique identical atom in both + * files). + * Handle the section before and the atom itself; the section + * after will be handled by the next loop iteration -- note that + * i loops to last element + 1 ("i <= lcs_count"), so that there + * will be another final iteration to pick up the last remaining + * items after the last LCS atom. * The sections before might also be empty on left and/or right. - * left_pos and right_pos mark the indexes of the first atoms that have not yet been handled in the - * previous loop iteration. - * left_idx and right_idx mark the indexes of the matching atom on left and right, respectively. */ + * left_pos and right_pos mark the indexes of the first atoms + * that have not yet been handled in the previous loop + * iteration. left_idx and right_idx mark the indexes of the + * matching atom on left and right, respectively. */ - debug("iteration %u left_pos %u left_idx %u right_pos %u right_idx %u\n", + debug("iteration %u left_pos %u left_idx %u" + " right_pos %u right_idx %u\n", i, left_pos, left_idx, right_pos, right_idx); /* Section before the matching atom */ @@ -368,34 +419,45 @@ diff_algo_patience(const struct diff_algo_config *algo unsigned int right_section_len = right_idx - right_pos; if (left_section_len && right_section_len) { - /* Record an unsolved chunk, the caller will apply inner_algo() on this chunk. */ + /* Record an unsolved chunk, the caller will apply + * inner_algo() on this chunk. */ if (!diff_state_add_chunk(state, false, left_atom, left_section_len, - right_atom, right_section_len)) + right_atom, + right_section_len)) goto return_rc; } else if (left_section_len && !right_section_len) { - /* Only left atoms and none on the right, they form a "minus" chunk, then. */ + /* Only left atoms and none on the right, they form a + * "minus" chunk, then. */ if (!diff_state_add_chunk(state, true, left_atom, left_section_len, right_atom, 0)) goto return_rc; } else if (!left_section_len && right_section_len) { - /* No left atoms, only atoms on the right, they form a "plus" chunk, then. */ + /* No left atoms, only atoms on the right, they form a + * "plus" chunk, then. */ if (!diff_state_add_chunk(state, true, left_atom, 0, right_atom, right_section_len)) goto return_rc; } - /* else: left_section_len == 0 and right_section_len == 0, i.e. nothing here. */ + /* else: left_section_len == 0 and right_section_len == 0, i.e. + * nothing here. */ - /* The atom found to match on both sides forms a chunk of equals on each side. In the very last - * iteration of this loop, there is no matching atom, we were just cleaning out the remaining lines. */ + /* The atom found to match on both sides forms a chunk of equals + * on each side. In the very last iteration of this loop, there + * is no matching atom, we were just cleaning out the remaining + * lines. */ if (atom) { - if (!diff_state_add_chunk(state, true, - left->atoms.head + atom->patience.identical_lines.start, - range_len(&atom->patience.identical_lines), - right->atoms.head + atom_r->patience.identical_lines.start, - range_len(&atom_r->patience.identical_lines))) + void *ok; + ok = diff_state_add_chunk(state, true, + left->atoms.head + + atom->patience.identical_lines.start, + range_len(&atom->patience.identical_lines), + right->atoms.head + + atom_r->patience.identical_lines.start, + range_len(&atom_r->patience.identical_lines)); + if (!ok) goto return_rc; left_pos = atom->patience.identical_lines.end; right_pos = atom_r->patience.identical_lines.end; @@ -403,7 +465,8 @@ diff_algo_patience(const struct diff_algo_config *algo left_pos = left_idx + 1; right_pos = right_idx + 1; } - debug("end of iteration %u left_pos %u left_idx %u right_pos %u right_idx %u\n", + debug("end of iteration %u left_pos %u left_idx %u" + " right_pos %u right_idx %u\n", i, left_pos, left_idx, right_pos, right_idx); } debug("** END %s\n", __func__);