commit a5de2633143c41b5d79b996d2b8f64df0808eaeb from: Neels Hofmeyr date: Sat Oct 24 01:38:21 2020 UTC patience comments commit - 9f9e0ab43b25a75c30b677d0642dddfe868f4f34 commit + a5de2633143c41b5d79b996d2b8f64df0808eaeb blob - c74bcf7a8413d667ee4804a41a38fffdbcf1f714 blob + 0c983c45b9f23e0e1e04094533004da12a66192a --- lib/diff_patience.c +++ lib/diff_patience.c @@ -1,5 +1,6 @@ /* Implementation of the Patience Diff algorithm invented by Bram Cohen: - * Divide a diff problem into smaller chunks by an LCS of common-unique lines. */ + * Divide a diff problem into smaller chunks by an LCS (Longest Common Sequence) + * of common-unique lines. */ /* * Copyright (c) 2020 Neels Hofmeyr * @@ -380,10 +381,10 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat /* Swallow upwards. * Each common-unique line swallows identical lines upwards and * downwards. - * All common-unique lines that were part of the identical lines - * following below were already swallowed in the previous - * iteration, so we will never hit another common-unique line - * above. */ + * Will never hit another identical common-unique line above on + * the left, because those would already have swallowed this + * common-unique line in a previous iteration. + */ for (identical_l.start = l_idx, identical_r.start = r_idx; identical_l.start > l_min && identical_r.start > r_min; identical_l.start--, identical_r.start--) { @@ -397,7 +398,8 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat break; } - /* Swallow downwards */ + /* Swallow downwards. Join any common-unique lines in a block of + * matching L/R lines with this one. */ for (identical_l.end = l_idx + 1, identical_r.end = r_idx + 1; identical_l.end < left->atoms.len && identical_r.end < right->atoms.len; @@ -417,8 +419,9 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat r_end = &right->atoms.head[identical_r.end]; if (!PATIENCE(l_end).unique_in_both) continue; - /* Part of a chunk of identical lines, remove from - * listing of unique_in_both lines */ + /* A unique_in_both atom is joined with a preceding + * unique_in_both atom, remove the joined atom from + * listing of unique_in_both atoms */ PATIENCE(l_end).unique_in_both = false; PATIENCE(r_end).unique_in_both = false; (*unique_in_both_count)--; @@ -620,11 +623,20 @@ diff_algo_patience(const struct diff_algo_config *algo * look at each section in turn, trying to again find common-unique * lines in those smaller sections. As soon as no more are found, the * remaining smaller sections are solved by Myers. */ + /* left_pos and right_pos are indexes in left/right->atoms.head until + * which the atoms are already handled (added to result chunks). */ unsigned int left_pos = 0; unsigned int right_pos = 0; for (i = 0; i <= lcs_count; i++) { struct diff_atom *atom; struct diff_atom *atom_r; + /* left_idx and right_idx are indexes of the start of this + * section of identical lines on both sides. + * left_pos marks the index of the first still unhandled line, + * left_idx is the start of an identical section some way + * further down, and this loop adds an unsolved chunk of + * [left_pos..left_idx[ and a solved chunk of + * [left_idx..identical_lines.end[. */ unsigned int left_idx; unsigned int right_idx; @@ -639,25 +651,23 @@ diff_algo_patience(const struct diff_algo_config *algo PATIENCE(atom).identical_lines.start, PATIENCE(atom).identical_lines.end, PATIENCE(atom_r).identical_lines.start, PATIENCE(atom_r).identical_lines.end); } else { + /* There are no more identical lines until the end of + * left and right. */ atom = NULL; atom_r = NULL; left_idx = left->atoms.len; right_idx = right->atoms.len; } - /* 'atom' now marks an atom that matches on both sides according - * to patience-diff (a common-unique identical atom in both - * files). + /* 'atom' (if not NULL) now marks an atom that matches on both + * sides according to patience-diff (a common-unique identical + * atom in both files). * Handle the section before and the atom itself; the section * after will be handled by the next loop iteration -- note that * i loops to last element + 1 ("i <= lcs_count"), so that there * will be another final iteration to pick up the last remaining * items after the last LCS atom. - * The sections before might also be empty on left and/or right. - * left_pos and right_pos mark the indexes of the first atoms - * that have not yet been handled in the previous loop - * iteration. left_idx and right_idx mark the indexes of the - * matching atom on left and right, respectively. */ + */ debug("iteration %u left_pos %u left_idx %u" " right_pos %u right_idx %u\n",