commit 72e4a018d51ae1a3a7c2870aa8dc33cbad2c3143 from: Neels Hofmeyr date: Thu Oct 22 01:17:21 2020 UTC patience: optimize: less diff_atom_cmp() via hash commit - 123481a5f49c26d4316bfac1b8f5599384e17d11 commit + 72e4a018d51ae1a3a7c2870aa8dc33cbad2c3143 blob - 6184577200859fe1405bbb080a18331a64692ec7 blob + 48f13cb8937b1ceada2e72b1df9a5bab9abf059d --- lib/diff_patience.c +++ lib/diff_patience.c @@ -280,6 +280,8 @@ diff_atoms_mark_unique_in_both(struct diff_data *left, * each side, but exist once in both sources. */ for (i = 0; i < len; i++) { bool same; + unsigned int next_differing_i; + unsigned int last_identical_i; unsigned int j; unsigned int count_first_side = 1; unsigned int count_other_side = 0; @@ -287,13 +289,29 @@ diff_atoms_mark_unique_in_both(struct diff_data *left, debug("a: "); debug_dump_atom(a->root, NULL, a); - for (j = i+1; j < len; j++) { - b = all_atoms[j]; + /* Do as few diff_atom_cmp() as possible: first walk forward + * only using the cheap hash as indicator for differing atoms; + * then walk backwards until hitting an identical atom. */ + for (next_differing_i = i + 1; next_differing_i < len; + next_differing_i++) { + b = all_atoms[next_differing_i]; + if (a->hash != b->hash) + break; + } + for (last_identical_i = next_differing_i - 1; + last_identical_i > i; + last_identical_i--) { + b = all_atoms[last_identical_i]; rc = diff_atom_same(&same, a, b); if (rc) goto free_and_exit; - if (!same) + if (same) break; + } + next_differing_i = last_identical_i + 1; + + for (j = i+1; j < next_differing_i; j++) { + b = all_atoms[j]; /* A following atom is the same. See on which side the * repetition counts. */ if (a->root == b->root)