Commit Diff


commit - 9f9e0ab43b25a75c30b677d0642dddfe868f4f34
commit + a5de2633143c41b5d79b996d2b8f64df0808eaeb
blob - c74bcf7a8413d667ee4804a41a38fffdbcf1f714
blob + 0c983c45b9f23e0e1e04094533004da12a66192a
--- lib/diff_patience.c
+++ lib/diff_patience.c
@@ -1,5 +1,6 @@
 /* Implementation of the Patience Diff algorithm invented by Bram Cohen:
- * Divide a diff problem into smaller chunks by an LCS of common-unique lines. */
+ * Divide a diff problem into smaller chunks by an LCS (Longest Common Sequence)
+ * of common-unique lines. */
 /*
  * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
  *
@@ -380,10 +381,10 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat
 		/* Swallow upwards.
 		 * Each common-unique line swallows identical lines upwards and
 		 * downwards.
-		 * All common-unique lines that were part of the identical lines
-		 * following below were already swallowed in the previous
-		 * iteration, so we will never hit another common-unique line
-		 * above. */
+		 * Will never hit another identical common-unique line above on
+		 * the left, because those would already have swallowed this
+		 * common-unique line in a previous iteration.
+		 */
 		for (identical_l.start = l_idx, identical_r.start = r_idx;
 		     identical_l.start > l_min && identical_r.start > r_min;
 		     identical_l.start--, identical_r.start--) {
@@ -397,7 +398,8 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat
 				break;
 		}
 
-		/* Swallow downwards */
+		/* Swallow downwards. Join any common-unique lines in a block of
+		 * matching L/R lines with this one. */
 		for (identical_l.end = l_idx + 1, identical_r.end = r_idx + 1;
 		     identical_l.end < left->atoms.len
 			 && identical_r.end < right->atoms.len;
@@ -417,8 +419,9 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat
 			r_end = &right->atoms.head[identical_r.end];
 			if (!PATIENCE(l_end).unique_in_both)
 				continue;
-			/* Part of a chunk of identical lines, remove from
-			 * listing of unique_in_both lines */
+			/* A unique_in_both atom is joined with a preceding
+			 * unique_in_both atom, remove the joined atom from
+			 * listing of unique_in_both atoms */
 			PATIENCE(l_end).unique_in_both = false;
 			PATIENCE(r_end).unique_in_both = false;
 			(*unique_in_both_count)--;
@@ -620,11 +623,20 @@ diff_algo_patience(const struct diff_algo_config *algo
 	 * look at each section in turn, trying to again find common-unique
 	 * lines in those smaller sections. As soon as no more are found, the
 	 * remaining smaller sections are solved by Myers. */
+	/* left_pos and right_pos are indexes in left/right->atoms.head until
+	 * which the atoms are already handled (added to result chunks). */
 	unsigned int left_pos = 0;
 	unsigned int right_pos = 0;
 	for (i = 0; i <= lcs_count; i++) {
 		struct diff_atom *atom;
 		struct diff_atom *atom_r;
+		/* left_idx and right_idx are indexes of the start of this
+		 * section of identical lines on both sides.
+		 * left_pos marks the index of the first still unhandled line,
+		 * left_idx is the start of an identical section some way
+		 * further down, and this loop adds an unsolved chunk of
+		 * [left_pos..left_idx[ and a solved chunk of
+		 * [left_idx..identical_lines.end[. */
 		unsigned int left_idx;
 		unsigned int right_idx;
 
@@ -639,25 +651,23 @@ diff_algo_patience(const struct diff_algo_config *algo
 			      PATIENCE(atom).identical_lines.start, PATIENCE(atom).identical_lines.end,
 			      PATIENCE(atom_r).identical_lines.start, PATIENCE(atom_r).identical_lines.end);
 		} else {
+			/* There are no more identical lines until the end of
+			 * left and right. */
 			atom = NULL;
 			atom_r = NULL;
 			left_idx = left->atoms.len;
 			right_idx = right->atoms.len;
 		}
 
-		/* 'atom' now marks an atom that matches on both sides according
-		 * to patience-diff (a common-unique identical atom in both
-		 * files).
+		/* 'atom' (if not NULL) now marks an atom that matches on both
+		 * sides according to patience-diff (a common-unique identical
+		 * atom in both files).
 		 * Handle the section before and the atom itself; the section
 		 * after will be handled by the next loop iteration -- note that
 		 * i loops to last element + 1 ("i <= lcs_count"), so that there
 		 * will be another final iteration to pick up the last remaining
 		 * items after the last LCS atom.
-		 * The sections before might also be empty on left and/or right.
-		 * left_pos and right_pos mark the indexes of the first atoms
-		 * that have not yet been handled in the previous loop
-		 * iteration.  left_idx and right_idx mark the indexes of the
-		 * matching atom on left and right, respectively. */
+		 */
 
 		debug("iteration %u  left_pos %u  left_idx %u"
 		      "  right_pos %u  right_idx %u\n",