commit - 54fa822893d2fcc170a2d3051d1640bf5303894e
commit + bb7fb738462f1e54a85659098b77605727b9dee0
blob - 94cc6fad7ecce1ff97127e1643d5ed82cb5ea3f0
blob + f6dcef58bb67e1a01ce4a633f7928a184a56ebb1
--- include/diff/diff_main.h
+++ include/diff/diff_main.h
bool unique_in_both;
struct diff_atom *pos_in_other;
struct diff_atom *prev_stack;
+ struct range identical_lines;
} patience;
};
blob - 9fcf8196417c1df4fda8effb7cdbb57995974764
blob + 17a6bffe139d685b095825de47ef10ff51e508b5
--- lib/diff_patience.c
+++ lib/diff_patience.c
*unique_in_both_count = unique_in_both;
}
+static void diff_atoms_swallow_identical_neighbors(struct diff_data *left, struct diff_data *right,
+ unsigned int *unique_in_both_count)
+{
+ debug("trivially combine identical lines arount unique_in_both lines\n");
+ unsigned int l_idx;
+ unsigned int next_l_idx;
+ unsigned int l_min = 0;
+ unsigned int r_min = 0;
+ for (l_idx = 0; l_idx < left->atoms.len; l_idx = next_l_idx) {
+ next_l_idx = l_idx + 1;
+ struct diff_atom *l = &left->atoms.head[l_idx];
+
+ if (!l->patience.unique_in_both)
+ continue;
+
+ debug("check identical lines around ");
+ debug_dump_atom(left, right, l);
+
+ unsigned int r_idx = diff_atom_idx(right, l->patience.pos_in_other);
+
+ struct range identical_l;
+ struct range identical_r;
+
+ /* Swallow upwards.
+ * Each common-unique line swallows identical lines upwards and downwards.
+ * All common-unique lines that were part of the identical lines following below were already swallowed
+ * in the previous iteration, so we will never hit another common-unique line above. */
+ for (identical_l.start = l_idx, identical_r.start = r_idx;
+ identical_l.start > l_min
+ && identical_r.start > r_min
+ && diff_atom_same(&left->atoms.head[identical_l.start - 1],
+ &right->atoms.head[identical_r.start - 1]);
+ identical_l.start--, identical_r.start--);
+
+ /* Swallow downwards */
+ for (identical_l.end = l_idx + 1, identical_r.end = r_idx + 1;
+ identical_l.end < left->atoms.len
+ && identical_r.end < right->atoms.len
+ && diff_atom_same(&left->atoms.head[identical_l.end],
+ &right->atoms.head[identical_r.end]);
+ identical_l.end++, identical_r.end++,
+ next_l_idx++) {
+ if (left->atoms.head[identical_l.end].patience.unique_in_both) {
+ /* Part of a chunk of identical lines, remove from listing of unique_in_both lines */
+ left->atoms.head[identical_l.end].patience.unique_in_both = false;
+ right->atoms.head[identical_r.end].patience.unique_in_both = false;
+ (*unique_in_both_count)--;
+ }
+ }
+
+ l->patience.identical_lines = identical_l;
+ l->patience.pos_in_other->patience.identical_lines = identical_r;
+
+ l_min = identical_l.end;
+ r_min = identical_r.end;
+
+ if (!range_empty(&l->patience.identical_lines)) {
+ debug("common-unique line at l=%u r=%u swallowed identical lines l=%u-%u r=%u-%u\n",
+ l_idx, r_idx,
+ identical_l.start, identical_l.end,
+ identical_r.start, identical_r.end);
+ }
+ debug("next_l_idx = %u\n", next_l_idx);
+ }
+}
+
/* Among the lines that appear exactly once in each side, find the longest streak that appear in both files in the same
* order (with other stuff allowed to interleave). Use patience sort for that, as in the Patience Diff algorithm.
* See https://bramcohen.livejournal.com/73318.html and, for a much more detailed explanation,
return DIFF_RC_USE_DIFF_ALGO_FALLBACK;
}
+ diff_atoms_swallow_identical_neighbors(left, right, &unique_in_both_count);
+ debug("After swallowing identical neighbors: unique_in_both = %u\n",
+ unique_in_both_count);
+
/* An array of Longest Common Sequence is the result of the below subscope: */
unsigned int lcs_count = 0;
struct diff_atom **lcs = NULL;
unsigned int right_pos = 0;
for (i = 0; i <= lcs_count; i++) {
struct diff_atom *atom;
+ struct diff_atom *atom_r;
unsigned int left_idx;
unsigned int right_idx;
if (i < lcs_count) {
atom = lcs[i];
- left_idx = diff_atom_idx(left, atom);
- right_idx = diff_atom_idx(right, atom->patience.pos_in_other);
+ atom_r = atom->patience.pos_in_other;
+ debug("lcs[%u] = left[%u] = right[%u]\n", i,
+ diff_atom_idx(left, atom), diff_atom_idx(right, atom_r));
+ left_idx = atom->patience.identical_lines.start;
+ right_idx = atom_r->patience.identical_lines.start;
+ debug(" identical lines l %u-%u r %u-%u\n",
+ atom->patience.identical_lines.start, atom->patience.identical_lines.end,
+ atom_r->patience.identical_lines.start, atom_r->patience.identical_lines.end);
} else {
atom = NULL;
+ atom_r = NULL;
left_idx = left->atoms.len;
right_idx = right->atoms.len;
}
* iteration of this loop, there is no matching atom, we were just cleaning out the remaining lines. */
if (atom) {
if (!diff_state_add_chunk(state, true,
- atom, 1,
- atom->patience.pos_in_other, 1))
+ left->atoms.head + atom->patience.identical_lines.start,
+ range_len(&atom->patience.identical_lines),
+ right->atoms.head + atom_r->patience.identical_lines.start,
+ range_len(&atom_r->patience.identical_lines)))
goto return_rc;
+ left_pos = atom->patience.identical_lines.end;
+ right_pos = atom_r->patience.identical_lines.end;
+ } else {
+ left_pos = left_idx + 1;
+ right_pos = right_idx + 1;
}
-
- left_pos = left_idx + 1;
- right_pos = right_idx + 1;
+ debug("end of iteration %u left_pos %u left_idx %u right_pos %u right_idx %u\n",
+ i, left_pos, left_idx, right_pos, right_idx);
}
debug("** END %s\n", __func__);
free(lcs);
return rc;
}
-