commit - 8546b0450f31c11e9433f5d5c6dc1d79c86107ed
commit + cd25827e24b8fad0a3f4a89b72fbdba86bc2c5d1
blob - f56135e983c75dc1d4d176da40de71623e9d95b9
blob + 0e5a1532ffd05811db5e9dc363a73df4406cf063
--- lib/diff_myers.c
+++ lib/diff_myers.c
unsigned int right_end;
};
-#define diff_box_empty(DIFF_SNAKE) ((DIFF_SNAKE)->left_end == 0)
-
-
/* If the two contents of a file are A B C D E and X B C Y,
* the Myers diff graph looks like:
*
debug_dump_myers_graph(left, right, NULL, kd_forward, d, kd_backward,
d);
return 0;
+}
+
+/* Integer square root approximation */
+static int
+shift_sqrt(int val)
+{
+ int i;
+ for (i = 1; val > 0; val >>= 2)
+ i <<= 1;
+ return i;
}
/* Myers "Divide et Impera": tracing forwards from the start and backwards from
kd_buf[i] = -1;
int *kd_forward = kd_buf;
int *kd_backward = kd_buf + kd_len;
+ int max_effort = shift_sqrt(max/2);
/* The 'k' axis in Myers spans positive and negative indexes, so point
* the kd to the middle.
if (r)
return r;
if (found_midpoint)
+ break;
+
+ /* Limit the effort spent looking for a mid snake. If files have
+ * very few lines in common, the effort spent to find nice mid
+ * snakes is just not worth it, the diff result will still be
+ * essentially minus everything on the left, plus everything on
+ * the right, with a few useless matches here and there. */
+ if (d > max_effort) {
+ /* pick the furthest reaching point from
+ * kd_forward and kd_backward, and use that as a
+ * midpoint, to not step into another diff algo
+ * recursion with unchanged box. */
+ int delta = (int)right->atoms.len - (int)left->atoms.len;
+ int x;
+ int y;
+ int i;
+ int best_forward_i = 0;
+ int best_forward_distance = 0;
+ int best_backward_i = 0;
+ int best_backward_distance = 0;
+ int distance;
+ int best_forward_x;
+ int best_forward_y;
+ int best_backward_x;
+ int best_backward_y;
+
+ debug("~~~ d = %d > max_effort = %d\n", d, max_effort);
+
+ for (i = d; i >= -d; i -= 2) {
+ if (i >= -(int)right->atoms.len && i <= (int)left->atoms.len) {
+ x = kd_forward[i];
+ y = xk_to_y(x, i);
+ distance = x + y;
+ if (distance > best_forward_distance) {
+ best_forward_distance = distance;
+ best_forward_i = i;
+ }
+ }
+
+ if (i >= -(int)left->atoms.len && i <= (int)right->atoms.len) {
+ x = kd_backward[i];
+ y = xc_to_y(x, i, delta);
+ distance = (right->atoms.len - x)
+ + (left->atoms.len - y);
+ if (distance >= best_backward_distance) {
+ best_backward_distance = distance;
+ best_backward_i = i;
+ }
+ }
+ }
+
+ /* The myers-divide didn't meet in the middle. We just
+ * figured out the places where the forward path
+ * advanced the most, and the backward path advanced the
+ * most. Just divide at whichever one of those two is better.
+ *
+ * o-o
+ * |
+ * o
+ * \
+ * o
+ * \
+ * F <-- cut here
+ *
+ *
+ *
+ * or here --> B
+ * \
+ * o
+ * \
+ * o
+ * |
+ * o-o
+ */
+ best_forward_x = kd_forward[best_forward_i];
+ best_forward_y = xk_to_y(best_forward_x, best_forward_i);
+ best_backward_x = kd_backward[best_backward_i];
+ best_backward_y = xc_to_y(x, best_backward_i, delta);
+
+ if (best_forward_distance >= best_backward_distance) {
+ x = best_forward_x;
+ y = best_forward_y;
+ } else {
+ x = best_backward_x;
+ y = best_backward_y;
+ }
+
+ debug("max_effort cut at x=%d y=%d\n", x, y);
+ if (x < 0 || y < 0
+ || x > left->atoms.len || y > right->atoms.len)
+ break;
+
+ found_midpoint = true;
+ mid_snake = (struct diff_box){
+ .left_start = x,
+ .left_end = x,
+ .right_start = y,
+ .right_end = y,
+ };
break;
+ }
}
if (!found_midpoint) {
/* else: left_section_len == 0 and right_section_len == 0, i.e.
* nothing before the mid-snake. */
- if (!diff_box_empty(&mid_snake)) {
+ if (mid_snake.left_end > mid_snake.left_start) {
/* The midpoint is a "snake", i.e. on a section of
* identical data on both sides: that section
* immediately becomes a solved diff chunk. */