commit b3fb46867c2efc085c584bd336b9dca9cf26a161 from: Neels Hofmeyr date: Sun Sep 20 14:13:07 2020 UTC add diff_atom_cmp() and error handling to diff_atom_same() commit - e10a628aa1d46df600ffc7194d35f67416eaefdd commit + b3fb46867c2efc085c584bd336b9dca9cf26a161 blob - 0dbd866be6ebf41975cc4e1b917e8f2d8b0be150 blob + c70915a7fff65bfdfe065d3836d6114ffa8a3421 --- include/diff/diff_main.h +++ include/diff/diff_main.h @@ -105,9 +105,16 @@ struct diff_data { void diff_data_free(struct diff_data *diff_data); +int +diff_atom_cmp(int *cmp, + const struct diff_atom *left, + const struct diff_atom *right); + /* Indicate whether two given diff atoms match. */ -bool diff_atom_same(const struct diff_atom *left, - const struct diff_atom *right); +int +diff_atom_same(bool *same, + const struct diff_atom *left, + const struct diff_atom *right); /* The atom's index in the entire file. For atoms divided by lines of text, this * yields the line number (starting with 0). Also works for diff_data that blob - 39136bde16a7fb4b736b65a49793d7ae7a196d8e blob + 67e31955bdbcf8b9120e49f0711927a1c52ebcfe --- lib/diff_main.c +++ lib/diff_main.c @@ -33,17 +33,56 @@ #include "debug.h" -bool -diff_atom_same(const struct diff_atom *left, const struct diff_atom *right) +static int +read_at(int fd, int at_pos, unsigned char *buf, size_t len) { + int r; + if (lseek(fd, at_pos, SEEK_SET) == -1) + return errno; + r = read(fd, buf, len); + if (r == -1) + return errno; + if (r != len) + return EIO; + return 0; +} + +static int +buf_cmp(const unsigned char *left, size_t left_len, + const unsigned char *right, size_t right_len) +{ + int cmp = memcmp(left, right, MIN(left_len, right_len)); + if (cmp) + return cmp; + if (left_len == right_len) + return 0; + return (left_len > right_len) ? 1 : -1; +} + +int +diff_atom_cmp(int *cmp, + const struct diff_atom *left, + const struct diff_atom *right) +{ off_t remain_left, remain_right; - bool same = true; - if (left->hash != right->hash || left->len != right->len) - return false; + if (!left->len && !right->len) { + *cmp = 0; + return 0; + } + if (!right->len) { + *cmp = 1; + return 0; + } + if (!left->len) { + *cmp = -1; + return 0; + } - if (left->at != NULL && right->at != NULL) - return (memcmp(left->at, right->at, left->len) == 0); + if (left->at != NULL && right->at != NULL) { + *cmp = buf_cmp(left->at, left->len, right->at, right->len); + return 0; + } remain_left = left->len; remain_right = right->len; @@ -54,63 +93,71 @@ diff_atom_same(const struct diff_atom *left, const str off_t n_left, n_right; ssize_t r; + if (!remain_right) { + *cmp = 1; + return 0; + } + if (!remain_left) { + *cmp = -1; + return 0; + } + n_left = MIN(chunksz, remain_left); n_right = MIN(chunksz, remain_right); if (left->at == NULL) { - if (lseek(left->d->root->fd, - left->pos + (left->len - remain_left), - SEEK_SET) == -1) - abort(); /* XXX cannot return error */ - r = read(left->d->root->fd, buf_left, n_left); - if (r == -1) - abort(); /* XXX cannot return error */ - if (r != n_left) - abort(); /* XXX cannot return error */ + r = read_at(left->d->root->fd, + left->pos + (left->len - remain_left), + buf_left, n_left); + if (r) { + *cmp = 0; + return r; + } p_left = buf_left; } else { p_left = left->at + (left->len - remain_left); } if (right->at == NULL) { - if (lseek(right->d->root->fd, - right->pos + (right->len - remain_right), - SEEK_SET) == -1) - abort(); /* XXX cannot return error */ - r = read(right->d->root->fd, buf_right, n_right); - if (r == -1) - abort(); /* XXX cannot return error */ - if (r != n_right) - abort(); /* XXX cannot return error */ + r = read_at(right->d->root->fd, + right->pos + (right->len - remain_right), + buf_right, n_right); + if (r) { + *cmp = 0; + return r; + } p_right = buf_right; } else { p_right = right->at + (right->len - remain_right); - n_right = MIN(chunksz, remain_right); } - - if (n_left == 0) { - if (n_right != 0) - same = false; - break; - } else if (n_right == 0) { - if (n_left != 0) - same = false; - break; - } else if (n_left == n_right) { - if (memcmp(p_left, p_right, n_left) != 0) { - same = false; - break; - } - } else { - same = false; - break; + + r = buf_cmp(p_left, n_left, p_right, n_right); + if (r) { + *cmp = r; + return 0; } remain_left -= n_left; remain_right -= n_right; } - return same; + *cmp = 0; + return 0; +} + +int +diff_atom_same(bool *same, + const struct diff_atom *left, + const struct diff_atom *right) +{ + int cmp; + int r = diff_atom_cmp(&cmp, left, right); + if (r) { + *same = true; + return r; + } + *same = (cmp == 0); + return 0; } /* Even if a left or right side is empty, diff output may need to know the @@ -206,10 +253,17 @@ diff_algo_none(const struct diff_algo_config *algo_con /* Add a chunk of equal lines, if any */ unsigned int equal_atoms = 0; while (equal_atoms < state->left.atoms.len - && equal_atoms < state->right.atoms.len - && diff_atom_same(&state->left.atoms.head[equal_atoms], - &state->right.atoms.head[equal_atoms])) + && equal_atoms < state->right.atoms.len) { + int r; + bool same; + r = diff_atom_same(&same, &state->left.atoms.head[equal_atoms], + &state->right.atoms.head[equal_atoms]); + if (r) + return r; + if (!same) + break; equal_atoms++; + } if (equal_atoms) { if (!diff_state_add_chunk(state, true, &state->left.atoms.head[0], blob - 5a91ceba82cb3d83ec084178d1e08b56cb150395 blob + c164f2277b98e6aadeed4c94b4479e596473d531 --- lib/diff_myers.c +++ lib/diff_myers.c @@ -298,10 +298,19 @@ diff_divide_myers_forward(struct diff_data *left, stru int x_before_slide = x; /* Slide down any snake that we might find here. */ - while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len - && diff_atom_same(&left->atoms.head[x], - &right->atoms.head[xk_to_y(x, k)])) - x++; + while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len) { + bool same; + int r = diff_atom_same(&same, + &left->atoms.head[x], + &right->atoms.head[ + xk_to_y(x, k)]); + if (r) { + abort(); // TODO: error handling + } + if (!same) + break; + x++; + } kd_forward[k] = x; if (x_before_slide != x) { debug(" down %d similar lines\n", x - x_before_slide); @@ -587,11 +596,19 @@ diff_divide_myers_backward(struct diff_data *left, str &right->atoms.head[xc_to_y(x, c, delta)-1]); } int x_before_slide = x; - while (x > 0 && xc_to_y(x, c, delta) > 0 - && diff_atom_same(&left->atoms.head[x-1], - &right->atoms.head[xc_to_y(x, c, - delta)-1])) + while (x > 0 && xc_to_y(x, c, delta) > 0) { + bool same; + int r = diff_atom_same(&same, + &left->atoms.head[x-1], + &right->atoms.head[ + xc_to_y(x, c, delta)-1]); + if (r) { + abort(); // TODO: error handling + } + if (!same) + break; x--; + } kd_backward[c] = x; if (x_before_slide != x) { debug(" up %d similar lines\n", x_before_slide - x); @@ -1039,10 +1056,18 @@ diff_algo_myers(const struct diff_algo_config *algo_co /* Slide down any snake that we might find here. */ while (x < left->atoms.len - && xk_to_y(x, k) < right->atoms.len - && diff_atom_same(&left->atoms.head[x], - &right->atoms.head[xk_to_y(x, k)])) - x++; + && xk_to_y(x, k) < right->atoms.len) { + bool same; + int r = diff_atom_same(&same, + &left->atoms.head[x], + &right->atoms.head[ + xk_to_y(x, k)]); + if (r) + return r; + if (!same) + break; + x++; + } kd_column[k] = x; if (DEBUG) { blob - 280be3f6230fb2de2e3b77a26030d2c313f5f696 blob + 79fe3c4b8682eaf5665b62bb3c1e6d8ebb7bc745 --- lib/diff_patience.c +++ lib/diff_patience.c @@ -46,16 +46,20 @@ diff_atoms_mark_unique(struct diff_data *d, unsigned i continue; diff_data_foreach_atom_from(i + 1, j, d) { - if (diff_atom_same(i, j)) { - if (i->patience.unique_here) { - i->patience.unique_here = false; - i->patience.unique_in_both = false; - count--; - } - j->patience.unique_here = false; - j->patience.unique_in_both = false; + bool same; + int r = diff_atom_same(&same, i, j); + if (r) + abort(); // TODO: error handling + if (!same) + continue; + if (i->patience.unique_here) { + i->patience.unique_here = false; + i->patience.unique_in_both = false; count--; } + j->patience.unique_here = false; + j->patience.unique_in_both = false; + count--; } } if (unique_count) @@ -85,7 +89,11 @@ diff_atoms_mark_unique_in_both(struct diff_data *left, struct diff_atom *j; int found_in_b = 0; diff_data_foreach_atom(j, right) { - if (!diff_atom_same(i, j)) + bool same; + int r = diff_atom_same(&same, i, j); + if (r) + abort(); // TODO: error handling + if (!same) continue; if (!j->patience.unique_here) { found_in_b = 2; /* or more */ @@ -115,9 +123,14 @@ diff_atoms_mark_unique_in_both(struct diff_data *left, struct diff_atom *j; bool found_in_a = false; diff_data_foreach_atom(j, left) { + bool same; + int r; if (!j->patience.unique_in_both) continue; - if (!diff_atom_same(i, j)) + r = diff_atom_same(&same, i, j); + if (r) + abort(); // TODO: error handling + if (!same) continue; found_in_a = true; break; @@ -167,23 +180,34 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat * iteration, so we will never hit another common-unique line * above. */ for (identical_l.start = l_idx, identical_r.start = r_idx; - identical_l.start > l_min - && identical_r.start > r_min - && diff_atom_same( + identical_l.start > l_min && identical_r.start > r_min; + identical_l.start--, identical_r.start--) { + bool same; + int r = diff_atom_same(&same, &left->atoms.head[identical_l.start - 1], &right->atoms.head[identical_r.start - 1]); - identical_l.start--, identical_r.start--); + if (r) + abort(); // TODO: error handling + if (!same) + break; + } /* Swallow downwards */ for (identical_l.end = l_idx + 1, identical_r.end = r_idx + 1; identical_l.end < left->atoms.len - && identical_r.end < right->atoms.len - && diff_atom_same(&left->atoms.head[identical_l.end], - &right->atoms.head[identical_r.end]); + && identical_r.end < right->atoms.len; identical_l.end++, identical_r.end++, next_l_idx++) { struct diff_atom *l_end; struct diff_atom *r_end; + bool same; + int r = diff_atom_same(&same, + &left->atoms.head[identical_l.end], + &right->atoms.head[identical_r.end]); + if (r) + abort(); // TODO: error handling + if (!same) + break; l_end = &left->atoms.head[identical_l.end]; r_end = &right->atoms.head[identical_r.end]; if (!l_end->patience.unique_in_both)