Commit Diff


commit - 50198b5f2f9c8dd7d1deefed0fa25cabbf2af92f
commit + f71e8098324d6de96d593537a80b348d7f032e4d
blob - 111366765d59948fc0ea91555be00ef59b425b15
blob + 1acfa698d7852914725eff21eebb0a9ebc77795b
--- lib/diff_myers.c
+++ lib/diff_myers.c
@@ -190,8 +190,6 @@ static void diff_divide_myers_forward(struct diff_data
 				      struct diff_box *meeting_snake)
 {
 	int delta = (int)right->atoms.len - (int)left->atoms.len;
-	int prev_x;
-	int prev_y;
 	int k;
 	int x;
 
@@ -246,8 +244,7 @@ static void diff_divide_myers_forward(struct diff_data
 			 * From position prev_k, step to the right in the Myers graph: x += 1.
 			 */
 			int prev_k = k - 1;
-			prev_x = kd_forward[prev_k];
-			prev_y = xk_to_y(prev_x, prev_k);
+			int prev_x = kd_forward[prev_k];
 			x = prev_x + 1;
 		} else {
 			/* The bottom most one.
@@ -256,11 +253,11 @@ static void diff_divide_myers_forward(struct diff_data
 			 * (since we're deriving y from y = x - k).
 			 */
 			int prev_k = k + 1;
-			prev_x = kd_forward[prev_k];
-			prev_y = xk_to_y(prev_x, prev_k);
+			int prev_x = kd_forward[prev_k];
 			x = prev_x;
 		}
 
+		int x_before_slide = x;
 		/* Slide down any snake that we might find here. */
 		while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len
 		       && diff_atom_same(&left->atoms.head[x], &right->atoms.head[xk_to_y(x, k)]))
@@ -354,35 +351,48 @@ static void diff_divide_myers_forward(struct diff_data
 			 * met or passed (forward walked onto or past backward), then we've found a midpoint / a
 			 * mid-box.
 			 *
-			 * But we need to avoid matching a situation like this:
-			 *       0  1
-			 *        x y
-			 *   0   o-o-o
-			 *     x |\| |
-			 *   1   o-o-o
-			 *     y | |\|
-			 *   2  (B)o-o  <--(B) backwards traversal reached here
-			 *     a | | |
-			 *   3   o-o-o<-- prev_x, prev_y
-			 *     b | | |
-			 *   4   o-o(F) <--(F) forwards traversal  reached here
-			 *     x |\| |     Now both are on the same diagonal and look like they passed,
-			 *   5   o-o-o     but actually they have sneaked past each other and have not met.
-			 *     y | |\|
-			 *   6   o-o-o
+			 * When forwards and backwards traversals meet, the endpoints of the mid-snake are
+			 * not the two points in kd_forward and kd_backward, but rather the section that
+			 * was slid (if any) of the current forward/backward traversal only.
 			 *
-			 * The solution is to notice that prev_x,prev_y were also already past (B).
+			 * For example:
+			 *
+			 *   o
+			 *    \
+			 *     o
+			 *      \
+			 *       o
+			 *        \
+			 *         o
+			 *          \
+			 *       X o o
+			 *       | | |
+			 *     o-o-o o
+			 *          \|
+			 *           M
+			 *            \
+			 *             o
+			 *              \
+			 *               A o
+			 *               | |
+			 *             o-o-o
+			 *
+			 * The forward traversal reached M from the top and slid downwards to A.
+			 * The backward traversal already reached X, which is not a straight line from M
+			 * anymore, so picking a mid-snake from M to X would yield a mistake.
+			 *
+			 * The correct mid-snake is between M and A. M is where the forward traversal hit
+			 * the diagonal that the backward traversal has already passed, and A is what it
+			 * reaches when sliding down identical lines.
 			 */
 			int backward_x = kd_backward[c];
-			int backward_y = xc_to_y(backward_x, c, delta);
 			debug("Compare:  k=%d c=%d  is (%d,%d) >= (%d,%d)?\n",
 			      k, c, x, xk_to_y(x, k), backward_x, xc_to_y(backward_x, c, delta));
-			if (prev_x <= backward_x && prev_y <= backward_y
-			    && x >= backward_x) {
+			if (x >= backward_x) {
 				*meeting_snake = (struct diff_box){
-					.left_start = backward_x,
+					.left_start = x_before_slide,
 					.left_end = x,
-					.right_start = xc_to_y(backward_x, c, delta),
+					.right_start = xc_to_y(x_before_slide, c, delta),
 					.right_end = xk_to_y(x, k),
 				};
 				debug("HIT x=(%u,%u) - y=(%u,%u)\n",
@@ -419,8 +429,6 @@ static void diff_divide_myers_backward(struct diff_dat
 				       struct diff_box *meeting_snake)
 {
 	int delta = (int)right->atoms.len - (int)left->atoms.len;
-	int prev_x;
-	int prev_y;
 	int c;
 	int x;
 
@@ -479,16 +487,14 @@ static void diff_divide_myers_backward(struct diff_dat
 			 * (since we're deriving y from y = x - c + delta).
 			 */
 			int prev_c = c - 1;
-			prev_x = kd_backward[prev_c];
-			prev_y = xc_to_y(prev_x, prev_c, delta);
+			int prev_x = kd_backward[prev_c];
 			x = prev_x;
 		} else {
 			/* The bottom most one.
 			 * From position prev_c, step to the left in the Myers graph: x -= 1.
 			 */
 			int prev_c = c + 1;
-			prev_x = kd_backward[prev_c];
-			prev_y = xc_to_y(prev_x, prev_c, delta);
+			int prev_x = kd_backward[prev_c];
 			x = prev_x - 1;
 		}
 
@@ -500,6 +506,7 @@ static void diff_divide_myers_backward(struct diff_dat
 		if (xc_to_y(x, c, delta) > 0) {
 			debug("  r="); debug_dump_atom(right, left, &right->atoms.head[xc_to_y(x, c, delta)-1]);
 		}
+		int x_before_slide = x;
 		while (x > 0 && xc_to_y(x, c, delta) > 0
 		       && diff_atom_same(&left->atoms.head[x-1], &right->atoms.head[xc_to_y(x, c, delta)-1]))
 			x--;
@@ -569,18 +576,50 @@ static void diff_divide_myers_backward(struct diff_dat
 			if (k >= -forwards_d && k <= forwards_d) {
 				/* Current c is on a diagonal that exists in kd_forward[]. If the two x positions have
 				 * met or passed (backward walked onto or past forward), then we've found a midpoint / a
-				 * mid-box. */
+				 * mid-box.
+				 *
+				 * When forwards and backwards traversals meet, the endpoints of the mid-snake are
+				 * not the two points in kd_forward and kd_backward, but rather the section that
+				 * was slid (if any) of the current forward/backward traversal only.
+				 *
+				 * For example:
+				 *
+				 *   o-o-o
+				 *   | |
+				 *   o A
+				 *   |  \
+				 *   o   o
+				 *        \
+				 *         M
+				 *         |\
+				 *         o o-o-o
+				 *         | | |
+				 *         o o X
+				 *          \
+				 *           o
+				 *            \
+				 *             o
+				 *              \
+				 *               o
+				 *
+				 * The backward traversal reached M from the bottom and slid upwards.
+				 * The forward traversal already reached X, which is not a straight line from M
+				 * anymore, so picking a mid-snake from M to X would yield a mistake.
+				 *
+				 * The correct mid-snake is between M and A. M is where the backward traversal hit
+				 * the diagonal that the forwards traversal has already passed, and A is what it
+				 * reaches when sliding up identical lines.
+				 */
+
 				int forward_x = kd_forward[k];
-				int forward_y = xk_to_y(forward_x, k);
 				debug("Compare:  k=%d c=%d  is (%d,%d) >= (%d,%d)?\n",
 				      k, c, forward_x, xk_to_y(forward_x, k), x, xc_to_y(x, c, delta));
-				if (forward_x <= prev_x && forward_y <= prev_y
-				    && forward_x >= x) {
+				if (forward_x >= x) {
 					*meeting_snake = (struct diff_box){
 						.left_start = x,
-						.left_end = forward_x,
+						.left_end = x_before_slide,
 						.right_start = xc_to_y(x, c, delta),
-						.right_end = xk_to_y(forward_x, k),
+						.right_end = xk_to_y(x_before_slide, k),
 					};
 					debug("HIT x=%u,%u - y=%u,%u\n",
 					      meeting_snake->left_start,