commit 4c42e835223f6d22891beb0abc2d2663ae3d9b7a from: Neels Hofmeyr date: Tue May 05 04:51:32 2020 UTC fix diff_algo_myers_divide When forwards and backwards traversals meet, the endpoints of the mid-snake are not the two points in kd_forward and kd_backward, but rather the section that was slid (if any) of the current forward/backward traversal only. For example: o-o-o | | o A | \ o o \ M |\ o o-o-o | | | o o X \ o \ o \ o The backward traversal reached M from the bottom and slid upwards. The forward traversal already reached X, which is not a straight line from M anymore, so picking a mid-snake from M to X would yield a mistake. The correct mid-snake is between M and A. M is where the bottom traversal hit the diagonal that the forwards traversal has already passed, and A is what it reaches when sliding up identical lines. commit - 223979a920a4be072343306ca1c3a34efb036f31 commit + 4c42e835223f6d22891beb0abc2d2663ae3d9b7a blob - 111366765d59948fc0ea91555be00ef59b425b15 blob + 1acfa698d7852914725eff21eebb0a9ebc77795b --- lib/diff_myers.c +++ lib/diff_myers.c @@ -190,8 +190,6 @@ static void diff_divide_myers_forward(struct diff_data struct diff_box *meeting_snake) { int delta = (int)right->atoms.len - (int)left->atoms.len; - int prev_x; - int prev_y; int k; int x; @@ -246,8 +244,7 @@ static void diff_divide_myers_forward(struct diff_data * From position prev_k, step to the right in the Myers graph: x += 1. */ int prev_k = k - 1; - prev_x = kd_forward[prev_k]; - prev_y = xk_to_y(prev_x, prev_k); + int prev_x = kd_forward[prev_k]; x = prev_x + 1; } else { /* The bottom most one. @@ -256,11 +253,11 @@ static void diff_divide_myers_forward(struct diff_data * (since we're deriving y from y = x - k). */ int prev_k = k + 1; - prev_x = kd_forward[prev_k]; - prev_y = xk_to_y(prev_x, prev_k); + int prev_x = kd_forward[prev_k]; x = prev_x; } + int x_before_slide = x; /* Slide down any snake that we might find here. */ while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len && diff_atom_same(&left->atoms.head[x], &right->atoms.head[xk_to_y(x, k)])) @@ -354,35 +351,48 @@ static void diff_divide_myers_forward(struct diff_data * met or passed (forward walked onto or past backward), then we've found a midpoint / a * mid-box. * - * But we need to avoid matching a situation like this: - * 0 1 - * x y - * 0 o-o-o - * x |\| | - * 1 o-o-o - * y | |\| - * 2 (B)o-o <--(B) backwards traversal reached here - * a | | | - * 3 o-o-o<-- prev_x, prev_y - * b | | | - * 4 o-o(F) <--(F) forwards traversal reached here - * x |\| | Now both are on the same diagonal and look like they passed, - * 5 o-o-o but actually they have sneaked past each other and have not met. - * y | |\| - * 6 o-o-o + * When forwards and backwards traversals meet, the endpoints of the mid-snake are + * not the two points in kd_forward and kd_backward, but rather the section that + * was slid (if any) of the current forward/backward traversal only. * - * The solution is to notice that prev_x,prev_y were also already past (B). + * For example: + * + * o + * \ + * o + * \ + * o + * \ + * o + * \ + * X o o + * | | | + * o-o-o o + * \| + * M + * \ + * o + * \ + * A o + * | | + * o-o-o + * + * The forward traversal reached M from the top and slid downwards to A. + * The backward traversal already reached X, which is not a straight line from M + * anymore, so picking a mid-snake from M to X would yield a mistake. + * + * The correct mid-snake is between M and A. M is where the forward traversal hit + * the diagonal that the backward traversal has already passed, and A is what it + * reaches when sliding down identical lines. */ int backward_x = kd_backward[c]; - int backward_y = xc_to_y(backward_x, c, delta); debug("Compare: k=%d c=%d is (%d,%d) >= (%d,%d)?\n", k, c, x, xk_to_y(x, k), backward_x, xc_to_y(backward_x, c, delta)); - if (prev_x <= backward_x && prev_y <= backward_y - && x >= backward_x) { + if (x >= backward_x) { *meeting_snake = (struct diff_box){ - .left_start = backward_x, + .left_start = x_before_slide, .left_end = x, - .right_start = xc_to_y(backward_x, c, delta), + .right_start = xc_to_y(x_before_slide, c, delta), .right_end = xk_to_y(x, k), }; debug("HIT x=(%u,%u) - y=(%u,%u)\n", @@ -419,8 +429,6 @@ static void diff_divide_myers_backward(struct diff_dat struct diff_box *meeting_snake) { int delta = (int)right->atoms.len - (int)left->atoms.len; - int prev_x; - int prev_y; int c; int x; @@ -479,16 +487,14 @@ static void diff_divide_myers_backward(struct diff_dat * (since we're deriving y from y = x - c + delta). */ int prev_c = c - 1; - prev_x = kd_backward[prev_c]; - prev_y = xc_to_y(prev_x, prev_c, delta); + int prev_x = kd_backward[prev_c]; x = prev_x; } else { /* The bottom most one. * From position prev_c, step to the left in the Myers graph: x -= 1. */ int prev_c = c + 1; - prev_x = kd_backward[prev_c]; - prev_y = xc_to_y(prev_x, prev_c, delta); + int prev_x = kd_backward[prev_c]; x = prev_x - 1; } @@ -500,6 +506,7 @@ static void diff_divide_myers_backward(struct diff_dat if (xc_to_y(x, c, delta) > 0) { debug(" r="); debug_dump_atom(right, left, &right->atoms.head[xc_to_y(x, c, delta)-1]); } + int x_before_slide = x; while (x > 0 && xc_to_y(x, c, delta) > 0 && diff_atom_same(&left->atoms.head[x-1], &right->atoms.head[xc_to_y(x, c, delta)-1])) x--; @@ -569,18 +576,50 @@ static void diff_divide_myers_backward(struct diff_dat if (k >= -forwards_d && k <= forwards_d) { /* Current c is on a diagonal that exists in kd_forward[]. If the two x positions have * met or passed (backward walked onto or past forward), then we've found a midpoint / a - * mid-box. */ + * mid-box. + * + * When forwards and backwards traversals meet, the endpoints of the mid-snake are + * not the two points in kd_forward and kd_backward, but rather the section that + * was slid (if any) of the current forward/backward traversal only. + * + * For example: + * + * o-o-o + * | | + * o A + * | \ + * o o + * \ + * M + * |\ + * o o-o-o + * | | | + * o o X + * \ + * o + * \ + * o + * \ + * o + * + * The backward traversal reached M from the bottom and slid upwards. + * The forward traversal already reached X, which is not a straight line from M + * anymore, so picking a mid-snake from M to X would yield a mistake. + * + * The correct mid-snake is between M and A. M is where the backward traversal hit + * the diagonal that the forwards traversal has already passed, and A is what it + * reaches when sliding up identical lines. + */ + int forward_x = kd_forward[k]; - int forward_y = xk_to_y(forward_x, k); debug("Compare: k=%d c=%d is (%d,%d) >= (%d,%d)?\n", k, c, forward_x, xk_to_y(forward_x, k), x, xc_to_y(x, c, delta)); - if (forward_x <= prev_x && forward_y <= prev_y - && forward_x >= x) { + if (forward_x >= x) { *meeting_snake = (struct diff_box){ .left_start = x, - .left_end = forward_x, + .left_end = x_before_slide, .right_start = xc_to_y(x, c, delta), - .right_end = xk_to_y(forward_x, k), + .right_end = xk_to_y(x_before_slide, k), }; debug("HIT x=%u,%u - y=%u,%u\n", meeting_snake->left_start,