Commit Diff


commit - 926387ed74169349bdfa40445192f2b23d815244
commit + ca6fcbdce0110845f1195d2d1f5a14ed52a45baf
blob - ab51ce7a36d7cfb1b9e50d047239e9b760dba83b
blob + 7dcfd74d9e824db631a9d3bc775f8e4ff365720c
--- lib/diff_patience.c
+++ lib/diff_patience.c
@@ -29,8 +29,18 @@
 #include "diff_internal.h"
 #include "diff_debug.h"
 
+/* Algorithm to find unique lines:
+ * 0: stupidly iterate atoms
+ * 1: qsort
+ * 2: mergesort
+ */
+#define UNIQUE_STRATEGY 2
+
 /* Per-atom state for the Patience Diff algorithm */
 struct atom_patience {
+#if UNIQUE_STRATEGY == 0
+	bool unique_here;
+#endif
 	bool unique_in_both;
 	struct diff_atom *pos_in_other;
 	struct diff_atom *prev_stack;
@@ -46,7 +56,136 @@ struct atom_patience {
 	(((struct atom_patience*)((ATOM)->root->current->algo_data))\
 	 [diff_atom_idx((ATOM)->root->current, ATOM)])
 
-int diff_atoms_mergesort_compar(const void *_a, const void *_b)
+#if UNIQUE_STRATEGY == 0
+
+/* Stupid iteration and comparison of all atoms */
+static int
+diff_atoms_mark_unique(struct diff_data *d, unsigned int *unique_count)
+{
+	struct diff_atom *i;
+	unsigned int count = 0;
+	diff_data_foreach_atom(i, d) {
+		PATIENCE(i).unique_here = true;
+		PATIENCE(i).unique_in_both = true;
+		count++;
+	}
+	diff_data_foreach_atom(i, d) {
+		struct diff_atom *j;
+
+		if (!PATIENCE(i).unique_here)
+			continue;
+
+		diff_data_foreach_atom_from(i + 1, j, d) {
+			bool same;
+			int r = diff_atom_same(&same, i, j);
+			if (r)
+				return r;
+			if (!same)
+				continue;
+			if (PATIENCE(i).unique_here) {
+				PATIENCE(i).unique_here = false;
+				PATIENCE(i).unique_in_both = false;
+				count--;
+			}
+			PATIENCE(j).unique_here = false;
+			PATIENCE(j).unique_in_both = false;
+			count--;
+		}
+	}
+	if (unique_count)
+		*unique_count = count;
+	return 0;
+}
+
+/* Mark those lines as PATIENCE(atom).unique_in_both = true that appear exactly
+ * once in each side. */
+static int
+diff_atoms_mark_unique_in_both(struct diff_data *left, struct diff_data *right,
+			       unsigned int *unique_in_both_count)
+{
+	/* Derive the final unique_in_both count without needing an explicit
+	 * iteration. So this is just some optimiziation to save one iteration
+	 * in the end. */
+	unsigned int unique_in_both;
+	int r;
+
+	r = diff_atoms_mark_unique(left, &unique_in_both);
+	if (r)
+		return r;
+	r = diff_atoms_mark_unique(right, NULL);
+	if (r)
+		return r;
+
+	debug("unique_in_both %u\n", unique_in_both);
+
+	struct diff_atom *i;
+	diff_data_foreach_atom(i, left) {
+		if (!PATIENCE(i).unique_here)
+			continue;
+		struct diff_atom *j;
+		int found_in_b = 0;
+		diff_data_foreach_atom(j, right) {
+			bool same;
+			int r = diff_atom_same(&same, i, j);
+			if (r)
+				return r;
+			if (!same)
+				continue;
+			if (!PATIENCE(j).unique_here) {
+				found_in_b = 2; /* or more */
+				break;
+			} else {
+				found_in_b = 1;
+				PATIENCE(j).pos_in_other = i;
+				PATIENCE(i).pos_in_other = j;
+			}
+		}
+
+		if (found_in_b == 0 || found_in_b > 1) {
+			PATIENCE(i).unique_in_both = false;
+			unique_in_both--;
+			debug("unique_in_both %u  (%d) ", unique_in_both,
+			      found_in_b);
+			debug_dump_atom(left, NULL, i);
+		}
+	}
+
+	/* Still need to unmark right[*]->patience.unique_in_both for atoms that
+	 * don't exist in left */
+	diff_data_foreach_atom(i, right) {
+		if (!PATIENCE(i).unique_here
+		    || !PATIENCE(i).unique_in_both)
+			continue;
+		struct diff_atom *j;
+		bool found_in_a = false;
+		diff_data_foreach_atom(j, left) {
+			bool same;
+			int r;
+			if (!PATIENCE(j).unique_in_both)
+				continue;
+			r = diff_atom_same(&same, i, j);
+			if (r)
+				return r;
+			if (!same)
+				continue;
+			found_in_a = true;
+			break;
+		}
+
+		if (!found_in_a)
+			PATIENCE(i).unique_in_both = false;
+	}
+
+	if (unique_in_both_count)
+		*unique_in_both_count = unique_in_both;
+	return 0;
+}
+
+#else /* UNIQUE_STRATEGY != 0 */
+
+/* Use an optimized sorting algorithm (qsort, mergesort) to find unique lines */
+
+int diff_atoms_compar(const void *_a, const void *_b)
 {
 	const struct diff_atom *a = *(struct diff_atom**)_a;
 	const struct diff_atom *b = *(struct diff_atom**)_b;
@@ -54,7 +193,7 @@ int diff_atoms_mergesort_compar(const void *_a, const 
 	int rc = 0;
 
 	/* If there's been an error (e.g. I/O error) in a previous compar, we
-	 * have no way to abort the mergesort but just report the rc and stop
+	 * have no way to abort the sort but just report the rc and stop
 	 * comparing. Make sure to catch errors on either side. If atoms are
 	 * from more than one diff_data, make sure the error, if any, spreads
 	 * to all of them, so we can cut short all future comparisons. */
@@ -93,14 +232,14 @@ int diff_atoms_mergesort_compar(const void *_a, const 
 
 /* Sort an array of struct diff_atom* in-place. */
 static int diff_atoms_sort(struct diff_atom *atoms[],
-			    size_t atoms_count)
+			   size_t atoms_count)
 {
-	/*
-	 * Use mergesort(3) bcause it provides stable ordering
-	 * even if two or more elements are equal.
-	 */
+#if UNIQUE_STRATEGY == 1
+	qsort(atoms, atoms_count, sizeof(struct diff_atom*), diff_atoms_compar);
+#else
 	mergesort(atoms, atoms_count, sizeof(struct diff_atom*),
-	      diff_atoms_mergesort_compar);
+		  diff_atoms_compar);
+#endif
 	return atoms[0]->root->err;
 }
 
@@ -179,6 +318,7 @@ free_and_exit:
 	free(all_atoms);
 	return rc;
 }
+#endif /* UNIQUE_STRATEGY != 0 */
 
 static int
 diff_atoms_swallow_identical_neighbors(struct diff_data *left,