Commit Diff


commit - 54fa822893d2fcc170a2d3051d1640bf5303894e
commit + bb7fb738462f1e54a85659098b77605727b9dee0
blob - 94cc6fad7ecce1ff97127e1643d5ed82cb5ea3f0
blob + f6dcef58bb67e1a01ce4a633f7928a184a56ebb1
--- include/diff/diff_main.h
+++ include/diff/diff_main.h
@@ -86,6 +86,7 @@ struct diff_atom {
 		bool unique_in_both;
 		struct diff_atom *pos_in_other;
 		struct diff_atom *prev_stack;
+		struct range identical_lines;
 	} patience;
 };
 
blob - 9fcf8196417c1df4fda8effb7cdbb57995974764
blob + 17a6bffe139d685b095825de47ef10ff51e508b5
--- lib/diff_patience.c
+++ lib/diff_patience.c
@@ -118,7 +118,73 @@ static void diff_atoms_mark_unique_in_both(struct diff
 		*unique_in_both_count = unique_in_both;
 }
 
+static void diff_atoms_swallow_identical_neighbors(struct diff_data *left, struct diff_data *right,
+						   unsigned int *unique_in_both_count)
+{
+	debug("trivially combine identical lines arount unique_in_both lines\n");
 
+	unsigned int l_idx;
+	unsigned int next_l_idx;
+	unsigned int l_min = 0;
+	unsigned int r_min = 0;
+	for (l_idx = 0; l_idx < left->atoms.len; l_idx = next_l_idx) {
+		next_l_idx = l_idx + 1;
+		struct diff_atom *l = &left->atoms.head[l_idx];
+
+		if (!l->patience.unique_in_both)
+			continue;
+
+		debug("check identical lines around ");
+		debug_dump_atom(left, right, l);
+
+		unsigned int r_idx = diff_atom_idx(right, l->patience.pos_in_other);
+
+		struct range identical_l;
+		struct range identical_r;
+
+		/* Swallow upwards.
+		 * Each common-unique line swallows identical lines upwards and downwards.
+		 * All common-unique lines that were part of the identical lines following below were already swallowed
+		 * in the previous iteration, so we will never hit another common-unique line above. */
+		for (identical_l.start = l_idx, identical_r.start = r_idx;
+		     identical_l.start > l_min
+		     && identical_r.start > r_min
+		     && diff_atom_same(&left->atoms.head[identical_l.start - 1],
+				       &right->atoms.head[identical_r.start - 1]);
+		     identical_l.start--, identical_r.start--);
+
+		/* Swallow downwards */
+		for (identical_l.end = l_idx + 1, identical_r.end = r_idx + 1;
+		     identical_l.end < left->atoms.len
+		     && identical_r.end < right->atoms.len
+		     && diff_atom_same(&left->atoms.head[identical_l.end],
+				       &right->atoms.head[identical_r.end]);
+		     identical_l.end++, identical_r.end++,
+		     next_l_idx++) {
+			if (left->atoms.head[identical_l.end].patience.unique_in_both) {
+				/* Part of a chunk of identical lines, remove from listing of unique_in_both lines */
+				left->atoms.head[identical_l.end].patience.unique_in_both = false;
+				right->atoms.head[identical_r.end].patience.unique_in_both = false;
+				(*unique_in_both_count)--;
+			}
+		}
+
+		l->patience.identical_lines = identical_l;
+		l->patience.pos_in_other->patience.identical_lines = identical_r;
+
+		l_min = identical_l.end;
+		r_min = identical_r.end;
+
+		if (!range_empty(&l->patience.identical_lines)) {
+			debug("common-unique line at l=%u r=%u swallowed identical lines l=%u-%u r=%u-%u\n",
+			      l_idx, r_idx,
+			      identical_l.start, identical_l.end,
+			      identical_r.start, identical_r.end);
+		}
+		debug("next_l_idx = %u\n", next_l_idx);
+	}
+}
+
 /* Among the lines that appear exactly once in each side, find the longest streak that appear in both files in the same
  * order (with other stuff allowed to interleave). Use patience sort for that, as in the Patience Diff algorithm.
  * See https://bramcohen.livejournal.com/73318.html and, for a much more detailed explanation,
@@ -148,6 +214,10 @@ enum diff_rc diff_algo_patience(const struct diff_algo
 		return DIFF_RC_USE_DIFF_ALGO_FALLBACK;
 	}
 
+	diff_atoms_swallow_identical_neighbors(left, right, &unique_in_both_count);
+	debug("After swallowing identical neighbors: unique_in_both = %u\n",
+	      unique_in_both_count);
+
 	/* An array of Longest Common Sequence is the result of the below subscope: */
 	unsigned int lcs_count = 0;
 	struct diff_atom **lcs = NULL;
@@ -252,15 +322,23 @@ enum diff_rc diff_algo_patience(const struct diff_algo
 	unsigned int right_pos = 0;
 	for (i = 0; i <= lcs_count; i++) {
 		struct diff_atom *atom;
+		struct diff_atom *atom_r;
 		unsigned int left_idx;
 		unsigned int right_idx;
 
 		if (i < lcs_count) {
 			atom = lcs[i];
-			left_idx = diff_atom_idx(left, atom);
-			right_idx = diff_atom_idx(right, atom->patience.pos_in_other);
+			atom_r = atom->patience.pos_in_other;
+			debug("lcs[%u] = left[%u] = right[%u]\n", i,
+			      diff_atom_idx(left, atom), diff_atom_idx(right, atom_r));
+			left_idx = atom->patience.identical_lines.start;
+			right_idx = atom_r->patience.identical_lines.start;
+			debug(" identical lines l %u-%u  r %u-%u\n",
+			      atom->patience.identical_lines.start, atom->patience.identical_lines.end,
+			      atom_r->patience.identical_lines.start, atom_r->patience.identical_lines.end);
 		} else {
 			atom = NULL;
+			atom_r = NULL;
 			left_idx = left->atoms.len;
 			right_idx = right->atoms.len;
 		}
@@ -312,13 +390,19 @@ enum diff_rc diff_algo_patience(const struct diff_algo
 		 * iteration of this loop, there is no matching atom, we were just cleaning out the remaining lines. */
 		if (atom) {
 			if (!diff_state_add_chunk(state, true,
-						  atom, 1,
-						  atom->patience.pos_in_other, 1))
+						  left->atoms.head + atom->patience.identical_lines.start,
+						  range_len(&atom->patience.identical_lines),
+						  right->atoms.head + atom_r->patience.identical_lines.start,
+						  range_len(&atom_r->patience.identical_lines)))
 				goto return_rc;
+			left_pos = atom->patience.identical_lines.end;
+			right_pos = atom_r->patience.identical_lines.end;
+		} else {
+			left_pos = left_idx + 1;
+			right_pos = right_idx + 1;
 		}
-
-		left_pos = left_idx + 1;
-		right_pos = right_idx + 1;
+		debug("end of iteration %u  left_pos %u  left_idx %u  right_pos %u  right_idx %u\n",
+		      i, left_pos, left_idx, right_pos, right_idx);
 	}
 	debug("** END %s\n", __func__);
 
@@ -328,4 +412,3 @@ return_rc:
 	free(lcs);
 	return rc;
 }
-