Commit Diff


commit - 8546b0450f31c11e9433f5d5c6dc1d79c86107ed
commit + cd25827e24b8fad0a3f4a89b72fbdba86bc2c5d1
blob - f56135e983c75dc1d4d176da40de71623e9d95b9
blob + 0e5a1532ffd05811db5e9dc363a73df4406cf063
--- lib/diff_myers.c
+++ lib/diff_myers.c
@@ -51,9 +51,6 @@ struct diff_box {
 	unsigned int right_end;
 };
 
-#define diff_box_empty(DIFF_SNAKE) ((DIFF_SNAKE)->left_end == 0)
-
-
 /* If the two contents of a file are A B C D E and X B C Y,
  * the Myers diff graph looks like:
  *
@@ -750,6 +747,16 @@ diff_divide_myers_backward(bool *found_midpoint,
 	debug_dump_myers_graph(left, right, NULL, kd_forward, d, kd_backward,
 			       d);
 	return 0;
+}
+
+/* Integer square root approximation */
+static int
+shift_sqrt(int val)
+{
+	int i;
+        for (i = 1; val > 0; val >>= 2)
+		i <<= 1;
+        return i;
 }
 
 /* Myers "Divide et Impera": tracing forwards from the start and backwards from
@@ -783,6 +790,7 @@ diff_algo_myers_divide(const struct diff_algo_config *
 		kd_buf[i] = -1;
 	int *kd_forward = kd_buf;
 	int *kd_backward = kd_buf + kd_len;
+	int max_effort = shift_sqrt(max/2);
 
 	/* The 'k' axis in Myers spans positive and negative indexes, so point
 	 * the kd to the middle.
@@ -809,7 +817,107 @@ diff_algo_myers_divide(const struct diff_algo_config *
 		if (r)
 			return r;
 		if (found_midpoint)
+			break;
+
+		/* Limit the effort spent looking for a mid snake. If files have
+		 * very few lines in common, the effort spent to find nice mid
+		 * snakes is just not worth it, the diff result will still be
+		 * essentially minus everything on the left, plus everything on
+		 * the right, with a few useless matches here and there. */
+		if (d > max_effort) {
+			/* pick the furthest reaching point from
+			 * kd_forward and kd_backward, and use that as a
+			 * midpoint, to not step into another diff algo
+			 * recursion with unchanged box. */
+			int delta = (int)right->atoms.len - (int)left->atoms.len;
+			int x;
+			int y;
+			int i;
+			int best_forward_i = 0;
+			int best_forward_distance = 0;
+			int best_backward_i = 0;
+			int best_backward_distance = 0;
+			int distance;
+			int best_forward_x;
+			int best_forward_y;
+			int best_backward_x;
+			int best_backward_y;
+
+			debug("~~~ d = %d > max_effort = %d\n", d, max_effort);
+
+			for (i = d; i >= -d; i -= 2) {
+				if (i >= -(int)right->atoms.len && i <= (int)left->atoms.len) {
+					x = kd_forward[i];
+					y = xk_to_y(x, i);
+					distance = x + y;
+					if (distance > best_forward_distance) {
+						best_forward_distance = distance;
+						best_forward_i = i;
+					}
+				}
+
+				if (i >= -(int)left->atoms.len && i <= (int)right->atoms.len) {
+					x = kd_backward[i];
+					y = xc_to_y(x, i, delta);
+					distance = (right->atoms.len - x)
+						+ (left->atoms.len - y);
+					if (distance >= best_backward_distance) {
+						best_backward_distance = distance;
+						best_backward_i = i;
+					}
+				}
+			}
+
+			/* The myers-divide didn't meet in the middle. We just
+			 * figured out the places where the forward path
+			 * advanced the most, and the backward path advanced the
+			 * most. Just divide at whichever one of those two is better.
+			 *
+			 *   o-o
+			 *     |
+			 *     o
+			 *      \
+			 *       o
+			 *        \
+			 *         F <-- cut here
+			 *
+			 *
+			 *
+			 *           or here --> B
+			 *                        \
+			 *                         o
+			 *                          \
+			 *                           o
+			 *                           |
+			 *                           o-o
+			 */
+			best_forward_x = kd_forward[best_forward_i];
+			best_forward_y = xk_to_y(best_forward_x, best_forward_i);
+			best_backward_x = kd_backward[best_backward_i];
+			best_backward_y = xc_to_y(x, best_backward_i, delta);
+
+			if (best_forward_distance >= best_backward_distance) {
+				x = best_forward_x;
+				y = best_forward_y;
+			} else {
+				x = best_backward_x;
+				y = best_backward_y;
+			}
+
+			debug("max_effort cut at x=%d y=%d\n", x, y);
+			if (x < 0 || y < 0
+			    || x > left->atoms.len || y > right->atoms.len)
+				break;
+
+			found_midpoint = true;
+			mid_snake = (struct diff_box){
+				.left_start = x,
+				.left_end = x,
+				.right_start = y,
+				.right_end = y,
+			};
 			break;
+		}
 	}
 
 	if (!found_midpoint) {
@@ -862,7 +970,7 @@ diff_algo_myers_divide(const struct diff_algo_config *
 		/* else: left_section_len == 0 and right_section_len == 0, i.e.
 		 * nothing before the mid-snake. */
 
-		if (!diff_box_empty(&mid_snake)) {
+		if (mid_snake.left_end > mid_snake.left_start) {
 			/* The midpoint is a "snake", i.e. on a section of
 			 * identical data on both sides: that section
 			 * immediately becomes a solved diff chunk. */