commit c16dde50bb5e92533dcbbc513875d726f0f9dd48 from: Stefan Sperling date: Thu Oct 22 14:08:19 2020 UTC allow diff API users to atomize files separately This is a breaking API change (not that we care about that at this point). This can avoid redundant work spent on atomizing a file multiple times. There are use cases where one particular file must be compared to other files over and over again, such as when blaming file history. The old API gave access to both versions of the file to the atomizer just in case a future atomizer implementation needs this. This can still be achieved by passing a second file via the atomizer's private data pointer. commit - b756ffd26fea20daeeb98e205cc5655350cad80f commit + c16dde50bb5e92533dcbbc513875d726f0f9dd48 blob - 6bd48428c4f92c5ed7c7d1695af2f866b953cefb blob + 26e0568cfba8a9d74e2f9e1ee65261383fe2e9b5 --- diff/diff.c +++ diff/diff.c @@ -188,7 +188,8 @@ diffreg(char *file1, char *file2, enum diffreg_algo al .left_path = file1, .right_path = file2, }; - struct diff_result *result; + struct diff_data left = {}, right = {}; + struct diff_result *result = NULL; int rc; const struct diff_config *cfg; int diff_flags = 0; @@ -217,8 +218,14 @@ diffreg(char *file1, char *file2, enum diffreg_algo al if (show_function_prototypes) diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES; - result = diff_main(cfg, f1, str1, st1.st_size, f2, str2, st2.st_size, - diff_flags); + rc = diff_atomize_file(&left, cfg, f1, str1, st1.st_size, diff_flags); + if (rc) + goto done; + rc = diff_atomize_file(&right, cfg, f2, str2, st2.st_size, diff_flags); + if (rc) + goto done; + + result = diff_main(cfg, &left, &right); #if 0 rc = diff_output_plain(stdout, &info, result); #else @@ -229,8 +236,10 @@ diffreg(char *file1, char *file2, enum diffreg_algo al context_lines); } #endif +done: diff_result_free(result); - + diff_data_free(&left); + diff_data_free(&right); if (str1) munmap(str1, st1.st_size); if (str2) blob - 99669105d130b9e49ecf3ba09743e0e2c8b1e031 blob + 40142164742fb5bb3d4316a5bca80a90d132836c --- include/diff_main.h +++ include/diff_main.h @@ -60,32 +60,29 @@ typedef ARRAYLIST(struct diff_chunk) diff_chunk_arrayl struct diff_result { int rc; - struct diff_data left; - struct diff_data right; + + /* + * Pointers to diff data passed in via diff_main. + * Do not free these diff_data before freeing the diff_result struct. + */ + struct diff_data *left; + struct diff_data *right; + diff_chunk_arraylist_t chunks; }; struct diff_state; -/* Signature of a utility function to divide both source files into diff atoms. - * It is possible that a (future) algorithm requires both source files to decide - * on atom split points, hence this gets both left and right to atomize at the - * same time. +/* Signature of a utility function to divide a file into diff atoms. * An example is diff_atomize_text_by_line() in diff_atomize_text.c. * * func_data: context pointer (free to be used by implementation). - * left: struct diff_data with left->data and left->len already set up, and - * left->atoms to be created. - * right: struct diff_data with right->data and right->len already set up, and - * right->atoms to be created. + * d: struct diff_data with d->data and d->len already set up, and + * d->atoms to be created. */ -typedef int (*diff_atomize_func_t)(void *func_data, - struct diff_data *left, - struct diff_data *right); +typedef int (*diff_atomize_func_t)(void *func_data, struct diff_data *d); -extern int diff_atomize_text_by_line(void *func_data, - struct diff_data *left, - struct diff_data *right); +extern int diff_atomize_text_by_line(void *func_data, struct diff_data *d); struct diff_algo_config; typedef int (*diff_algo_impl_t)( @@ -177,9 +174,9 @@ struct diff_config { unsigned int max_recursion_depth; }; +int diff_atomize_file(struct diff_data *d, const struct diff_config *config, + FILE *f, const uint8_t *data, off_t len, int diff_flags); struct diff_result *diff_main(const struct diff_config *config, - FILE *left_f, const uint8_t *left_data, - off_t left_len, - FILE *right_f, const uint8_t *right_data, - off_t right_len, int diff_flags); + struct diff_data *left, + struct diff_data *right); void diff_result_free(struct diff_result *result); blob - 03081043814bb567b617b3838d6eef93db8052c1 blob + a3d25db09a7170f984d78db4fa9d4de2a904607a --- lib/diff_atomize_text.c +++ lib/diff_atomize_text.c @@ -173,12 +173,7 @@ diff_data_atomize_text_lines(struct diff_data *d) } int -diff_atomize_text_by_line(void *func_data, struct diff_data *left, - struct diff_data *right) +diff_atomize_text_by_line(void *func_data, struct diff_data *d) { - int rc; - rc = diff_data_atomize_text_lines(left); - if (rc != DIFF_RC_OK) - return rc; - return diff_data_atomize_text_lines(right); + return diff_data_atomize_text_lines(d); } blob - 7c1b831a3433f7d51209934dad51ee7c36826bae blob + 9b2364aff9620be21302c924d9bbcd01b709c773 --- lib/diff_main.c +++ lib/diff_main.c @@ -542,45 +542,45 @@ return_rc: ARRAYLIST_FREE(state->temp_result); return rc; } + +int +diff_atomize_file(struct diff_data *d, + const struct diff_config *config, + FILE *f, const uint8_t *data, off_t len, int diff_flags) +{ + if (!config->atomize_func) + return EINVAL; + diff_data_init_root(d, f, data, len, diff_flags); + + return config->atomize_func(config->atomize_func_data, d); + +} + struct diff_result * -diff_main(const struct diff_config *config, - FILE *left_f, const uint8_t *left_data, off_t left_len, - FILE *right_f, const uint8_t *right_data, off_t right_len, - int diff_flags) +diff_main(const struct diff_config *config, struct diff_data *left, + struct diff_data *right) { struct diff_result *result = malloc(sizeof(struct diff_result)); if (!result) return NULL; *result = (struct diff_result){}; - diff_data_init_root(&result->left, left_f, left_data, left_len, - diff_flags); - diff_data_init_root(&result->right, right_f, right_data, right_len, - diff_flags); - - if (!config->atomize_func) { - result->rc = EINVAL; - return result; - } + result->left = left; + result->right = right; - result->rc = config->atomize_func(config->atomize_func_data, - &result->left, &result->right); - if (result->rc != DIFF_RC_OK) - return result; - struct diff_state state = { .result = result, .recursion_depth_left = config->max_recursion_depth ? : 32, .kd_buf = NULL, .kd_buf_size = 0, }; - diff_data_init_subsection(&state.left, &result->left, - result->left.atoms.head, - result->left.atoms.len); - diff_data_init_subsection(&state.right, &result->right, - result->right.atoms.head, - result->right.atoms.len); + diff_data_init_subsection(&state.left, left, + left->atoms.head, + left->atoms.len); + diff_data_init_subsection(&state.right, right, + right->atoms.head, + right->atoms.len); result->rc = diff_run_algo(config->algo, &state); free(state.kd_buf); @@ -593,8 +593,6 @@ diff_result_free(struct diff_result *result) { if (!result) return; - diff_data_free(&result->left); - diff_data_free(&result->right); ARRAYLIST_FREE(result->chunks); free(result); } blob - c3b32d7b258109c65dd18dbed7f2502d2481fc14 blob + 92c42e1de1fdc900fa3a3e8cb4cbc92cacb295c7 --- lib/diff_output.c +++ lib/diff_output.c @@ -257,11 +257,11 @@ diff_output_match_function_prototype(char **prototype, *prototype = NULL; - if (result->left.atoms.len > 0 && cc->left.start > 0) { - data = &result->left; + if (result->left->atoms.len > 0 && cc->left.start > 0) { + data = result->left; start_atom = &data->atoms.head[cc->left.start - 1]; - } else if (result->right.atoms.len > 0 && cc->right.start > 0) { - data = &result->right; + } else if (result->right->atoms.len > 0 && cc->right.start > 0) { + data = result->right; start_atom = &data->atoms.head[cc->right.start - 1]; } else return DIFF_RC_OK; blob - 234534311b03e23374b96bf69a64d6f670196b87 blob + b6e9089caf74f10a59526427cb0c2b8d1fa47a03 --- lib/diff_output_edscript.c +++ lib/diff_output_edscript.c @@ -40,7 +40,7 @@ output_edscript_chunk(struct diff_output_info *outinfo left_len = cc->left.end - cc->left.start; if (left_len < 0) return EINVAL; - else if (result->left.atoms.len == 0) + else if (result->left->atoms.len == 0) left_start = 0; else if (left_len == 0 && cc->left.start > 0) left_start = cc->left.start; @@ -50,7 +50,7 @@ output_edscript_chunk(struct diff_output_info *outinfo right_len = cc->right.end - cc->right.start; if (right_len < 0) return EINVAL; - else if (result->right.atoms.len == 0) + else if (result->right->atoms.len == 0) right_start = 0; else if (right_len == 0 && cc->right.start > 0) right_start = cc->right.start; blob - 2e90b6827fa608951bc5d15d8157aa5fbe1f8ba1 blob + 730fe4c41f119d893552ec8b09bc0bac067ac6cf --- lib/diff_output_unidiff.c +++ lib/diff_output_unidiff.c @@ -38,7 +38,7 @@ int diff_chunk_get_left_start(const struct diff_chunk *c, const struct diff_result *r, int context_lines) { - int left_start = diff_atom_root_idx(&r->left, c->left_start); + int left_start = diff_atom_root_idx(r->left, c->left_start); return MAX(0, left_start - context_lines); } @@ -47,7 +47,7 @@ diff_chunk_get_left_end(const struct diff_chunk *c, const struct diff_result *r, int context_lines) { int left_start = diff_chunk_get_left_start(c, r, 0); - return MIN(r->left.atoms.len, + return MIN(r->left->atoms.len, left_start + c->left_count + context_lines); } @@ -55,7 +55,7 @@ int diff_chunk_get_right_start(const struct diff_chunk *c, const struct diff_result *r, int context_lines) { - int right_start = diff_atom_root_idx(&r->right, c->right_start); + int right_start = diff_atom_root_idx(r->right, c->right_start); return MAX(0, right_start - context_lines); } @@ -64,7 +64,7 @@ diff_chunk_get_right_end(const struct diff_chunk *c, const struct diff_result *r, int context_lines) { int right_start = diff_chunk_get_right_start(c, r, 0); - return MIN(r->right.atoms.len, + return MIN(r->right->atoms.len, right_start + c->right_count + context_lines); } @@ -242,7 +242,7 @@ output_unidiff_chunk(struct diff_output_info *outinfo, } left_len = cc->left.end - cc->left.start; - if (result->left.atoms.len == 0) + if (result->left->atoms.len == 0) left_start = 0; else if (left_len == 0 && cc->left.start > 0) left_start = cc->left.start; @@ -250,7 +250,7 @@ output_unidiff_chunk(struct diff_output_info *outinfo, left_start = cc->left.start + 1; right_len = cc->right.end - cc->right.start; - if (result->right.atoms.len == 0) + if (result->right->atoms.len == 0) right_start = 0; else if (right_len == 0 && cc->right.start > 0) right_start = cc->right.start; @@ -306,11 +306,11 @@ output_unidiff_chunk(struct diff_output_info *outinfo, const struct diff_chunk *first_chunk; int chunk_start_line; first_chunk = &result->chunks.head[cc->chunk.start]; - chunk_start_line = diff_atom_root_idx(&result->left, + chunk_start_line = diff_atom_root_idx(result->left, first_chunk->left_start); if (cc->left.start < chunk_start_line) { rc = diff_output_lines(outinfo, dest, " ", - &result->left.atoms.head[cc->left.start], + &result->left->atoms.head[cc->left.start], chunk_start_line - cc->left.start); if (rc) return rc; @@ -347,12 +347,12 @@ output_unidiff_chunk(struct diff_output_info *outinfo, const struct diff_chunk *last_chunk; int chunk_end_line; last_chunk = &result->chunks.head[cc->chunk.end - 1]; - chunk_end_line = diff_atom_root_idx(&result->left, + chunk_end_line = diff_atom_root_idx(result->left, last_chunk->left_start + last_chunk->left_count); if (cc->left.end > chunk_end_line) { rc = diff_output_lines(outinfo, dest, " ", - &result->left.atoms.head[chunk_end_line], + &result->left->atoms.head[chunk_end_line], cc->left.end - chunk_end_line); if (rc) return rc; @@ -369,8 +369,8 @@ diff_output_unidiff_chunk(struct diff_output_info **ou const struct diff_chunk_context *cc) { struct diff_output_info *outinfo = NULL; - int flags = (result->left.root->diff_flags | - result->right.root->diff_flags); + int flags = (result->left->root->diff_flags | + result->right->root->diff_flags); bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES); if (output_info) { @@ -393,8 +393,8 @@ diff_output_unidiff(struct diff_output_info **output_i struct diff_output_unidiff_state *state; struct diff_chunk_context cc = {}; struct diff_output_info *outinfo = NULL; - int flags = (result->left.root->diff_flags | - result->right.root->diff_flags); + int flags = (result->left->root->diff_flags | + result->right->root->diff_flags); bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES); int i; blob - ef2746f5db885fe20cd242b5b7cb6764e9e49dcb blob + 880049e35da7795925a98b663edc4f56435ce549 --- test/results_test.c +++ test/results_test.c @@ -13,37 +13,41 @@ void test_minus_after_plus(void) { struct diff_result *result = malloc(sizeof(struct diff_result)); + struct diff_data d_left, d_right; char *left_data = "a\nb\nc\nd\ne\nm\nn\n"; char *right_data = "a\nb\nj\nk\nl\nm\nn\n"; int i; printf("\n--- %s()\n", __func__); + d_left = (struct diff_data){ + .data = left_data, + .len = strlen(left_data), + .root = result->left, + }; + d_right = (struct diff_data){ + .data = right_data, + .len = strlen(right_data), + .root = result->right, + }; *result = (struct diff_result) { - .left = (struct diff_data){ - .data = left_data, - .len = strlen(left_data), - .root = &result->left, - }, - .right = (struct diff_data){ - .data = right_data, - .len = strlen(right_data), - .root = &result->right, - }, + .left = &d_left, + .right = &d_right, }; - diff_atomize_text_by_line(0, &result->left, &result->right); + diff_atomize_text_by_line(NULL, result->left); + diff_atomize_text_by_line(NULL, result->right); struct diff_state state = { .result = result, .recursion_depth_left = 32, }; - diff_data_init_subsection(&state.left, &result->left, - result->left.atoms.head, - result->left.atoms.len); - diff_data_init_subsection(&state.right, &result->right, - result->right.atoms.head, - result->right.atoms.len); + diff_data_init_subsection(&state.left, result->left, + result->left->atoms.head, + result->left->atoms.len); + diff_data_init_subsection(&state.right, result->right, + result->right->atoms.head, + result->right->atoms.len); /* "same" section */ diff_state_add_chunk(&state, true, @@ -80,42 +84,48 @@ void test_minus_after_plus(void) } diff_result_free(result); + diff_data_free(&d_left); + diff_data_free(&d_right); } void test_plus_after_plus(void) { struct diff_result *result = malloc(sizeof(struct diff_result)); + struct diff_data d_left, d_right; char *left_data = "a\nb\nc\nd\ne\nm\nn\n"; char *right_data = "a\nb\nj\nk\nl\nm\nn\n"; struct diff_chunk *c; printf("\n--- %s()\n", __func__); + d_left = (struct diff_data){ + .data = left_data, + .len = strlen(left_data), + .root = result->left, + }; + d_right = (struct diff_data){ + .data = right_data, + .len = strlen(right_data), + .root = result->right, + }; *result = (struct diff_result) { - .left = (struct diff_data){ - .data = left_data, - .len = strlen(left_data), - .root = &result->left, - }, - .right = (struct diff_data){ - .data = right_data, - .len = strlen(right_data), - .root = &result->right, - }, + .left = &d_left, + .right = &d_right, }; - diff_atomize_text_by_line(0, &result->left, &result->right); + diff_atomize_text_by_line(NULL, result->left); + diff_atomize_text_by_line(NULL, result->right); struct diff_state state = { .result = result, .recursion_depth_left = 32, }; - diff_data_init_subsection(&state.left, &result->left, - result->left.atoms.head, - result->left.atoms.len); - diff_data_init_subsection(&state.right, &result->right, - result->right.atoms.head, - result->right.atoms.len); + diff_data_init_subsection(&state.left, result->left, + result->left->atoms.head, + result->left->atoms.len); + diff_data_init_subsection(&state.right, result->right, + result->right->atoms.head, + result->right->atoms.len); /* "same" section */ diff_state_add_chunk(&state, true, @@ -156,6 +166,8 @@ void test_plus_after_plus(void) } diff_result_free(result); + diff_data_free(&d_left); + diff_data_free(&d_right); } int main(void)