commit - fe8af0d6c0a2ba7f1c50f0b88cd7e13d784d2e23
commit + 1dfba0555efd6b616811906d011f96945be90dcc
blob - 598d7b74a05865a500826f9b346fd3933781415f
blob + 195e5bc946b99bc4518bb657bb1a31a1a213e725
--- diff/GNUmakefile
+++ diff/GNUmakefile
diff: $(SRCS) $(LIB)
gcc $(CFLAGS) -I../include -o $@ $^
-../lib/libdiff.a: ../lib/*.[hc] ../include/diff/*.h
+../lib/libdiff.a: ../lib/*.[hc] ../include/*.h
$(MAKE) -C ../lib
.PHONY: clean
blob - 9e288bba939bf3e9c8595855f3933fd37a309fbb
blob + ff6ba3cd5842b83bb5c0d79e6b539956588d6cab
--- diff/diff.c
+++ diff/diff.c
#include <string.h>
#include <unistd.h>
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
-#include <diff/diff_output.h>
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
__dead void usage(void);
int diffreg(char *, char *, bool, bool, int);
blob - 7aaab19f0a58317289881be832a03ec59f67a0ff (mode 644)
blob + /dev/null
--- include/diff/arraylist.h
+++ /dev/null
-/* Auto-reallocating array for arbitrary member types. */
-/*
- * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/* Usage:
- *
- * ARRAYLIST(any_type_t) list;
- * // OR
- * typedef ARRAYLIST(any_type_t) any_type_list_t;
- * any_type_list_t list;
- *
- * // pass the number of (at first unused) members to add on each realloc:
- * ARRAYLIST_INIT(list, 128);
- * any_type_t *x;
- * while (bar) {
- * // This enlarges the allocated array as needed;
- * // list.head may change due to realloc:
- * ARRAYLIST_ADD(x, list);
- * if (!x)
- * return ENOMEM;
- * *x = random_foo_value;
- * }
- * for (i = 0; i < list.len; i++)
- * printf("%s", foo_to_str(list.head[i]));
- * ARRAYLIST_FREE(list);
- */
-#define ARRAYLIST(MEMBER_TYPE) \
- struct { \
- MEMBER_TYPE *head; \
- MEMBER_TYPE *p; \
- unsigned int len; \
- unsigned int allocated; \
- unsigned int alloc_blocksize; \
- }
-
-#define ARRAYLIST_INIT(ARRAY_LIST, ALLOC_BLOCKSIZE) do { \
- (ARRAY_LIST).head = NULL; \
- (ARRAY_LIST).len = 0; \
- (ARRAY_LIST).allocated = 0; \
- (ARRAY_LIST).alloc_blocksize = ALLOC_BLOCKSIZE; \
- } while(0)
-
-#define ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST) do { \
- if ((ARRAY_LIST).len && !(ARRAY_LIST).allocated) { \
- NEW_ITEM_P = NULL; \
- break; \
- } \
- if ((ARRAY_LIST).head == NULL \
- || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
- (ARRAY_LIST).p = recallocarray((ARRAY_LIST).head, \
- (ARRAY_LIST).len, \
- (ARRAY_LIST).allocated + \
- ((ARRAY_LIST).alloc_blocksize ? : 8), \
- sizeof(*(ARRAY_LIST).head)); \
- if ((ARRAY_LIST).p == NULL) { \
- NEW_ITEM_P = NULL; \
- break; \
- } \
- (ARRAY_LIST).allocated += \
- (ARRAY_LIST).alloc_blocksize ? : 8; \
- (ARRAY_LIST).head = (ARRAY_LIST).p; \
- (ARRAY_LIST).p = NULL; \
- }; \
- if ((ARRAY_LIST).head == NULL \
- || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
- NEW_ITEM_P = NULL; \
- break; \
- } \
- (NEW_ITEM_P) = &(ARRAY_LIST).head[(ARRAY_LIST).len]; \
- (ARRAY_LIST).len++; \
- } while (0)
-
-#define ARRAYLIST_INSERT(NEW_ITEM_P, ARRAY_LIST, AT_IDX) do { \
- ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST); \
- if ((NEW_ITEM_P) && (AT_IDX) < (ARRAY_LIST).len) \
- memmove(&(ARRAY_LIST).head[(AT_IDX) + 1], \
- &(ARRAY_LIST).head[AT_IDX], \
- ((ARRAY_LIST).len - (AT_IDX)) \
- * sizeof(*(ARRAY_LIST).head)); \
- } while (0)
-
-#define ARRAYLIST_CLEAR(ARRAY_LIST) \
- (ARRAY_LIST).len = 0
-
-#define ARRAYLIST_FREE(ARRAY_LIST) \
- do { \
- if ((ARRAY_LIST).head && (ARRAY_LIST).allocated) \
- free((ARRAY_LIST).head); \
- ARRAYLIST_INIT(ARRAY_LIST, (ARRAY_LIST).alloc_blocksize); \
- } while(0)
blob - 9479d3112b7ca78b893d54f7948605a4e532da35 (mode 644)
blob + /dev/null
--- include/diff/diff_main.h
+++ /dev/null
-/* Generic infrastructure to implement various diff algorithms. */
-/*
- * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-struct diff_range {
- int start;
- int end;
-};
-
-/* List of all possible return codes of a diff invocation. */
-#define DIFF_RC_USE_DIFF_ALGO_FALLBACK -1
-#define DIFF_RC_OK 0
-/* Any positive return values are errno values from sys/errno.h */
-
-struct diff_atom;
-
-/* For each file, there is a "root" struct diff_data referencing the entire
- * file, which the atoms are parsed from. In recursion of diff algorithm, there
- * may be "child" struct diff_data only referencing a subsection of the file,
- * re-using the atoms parsing. For "root" structs, atoms_allocated will be
- * nonzero, indicating that the array of atoms is owned by that struct. For
- * "child" structs, atoms_allocated == 0, to indicate that the struct is
- * referencing a subset of atoms. */
-struct diff_data {
- FILE *f; /* if root diff_data and not memory-mapped */
- off_t pos; /* if not memory-mapped */
- const uint8_t *data; /* if memory-mapped */
- off_t len;
-
- ARRAYLIST(struct diff_atom) atoms;
- struct diff_data *root;
-
- int diff_flags;
-};
-
-#define DIFF_FLAG_IGNORE_WHITESPACE 0x00000001
-
-void diff_data_free(struct diff_data *diff_data);
-
-struct diff_chunk;
-typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t;
-
-struct diff_result {
- int rc;
- struct diff_data left;
- struct diff_data right;
- diff_chunk_arraylist_t chunks;
-};
-
-struct diff_state;
-
-/* Signature of a utility function to divide both source files into diff atoms.
- * It is possible that a (future) algorithm requires both source files to decide
- * on atom split points, hence this gets both left and right to atomize at the
- * same time.
- * An example is diff_atomize_text_by_line() in diff_atomize_text.c.
- *
- * func_data: context pointer (free to be used by implementation).
- * left: struct diff_data with left->data and left->len already set up, and
- * left->atoms to be created.
- * right: struct diff_data with right->data and right->len already set up, and
- * right->atoms to be created.
- */
-typedef int (*diff_atomize_func_t)(void *func_data,
- struct diff_data *left,
- struct diff_data *right);
-
-extern int diff_atomize_text_by_line(void *func_data,
- struct diff_data *left,
- struct diff_data *right);
-
-struct diff_algo_config;
-typedef int (*diff_algo_impl_t)(
- const struct diff_algo_config *algo_config, struct diff_state *state);
-
-/* Form a result with all left-side removed and all right-side added, i.e. no
- * actual diff algorithm involved. */
-int diff_algo_none(const struct diff_algo_config *algo_config,
- struct diff_state *state);
-
-/* Myers Diff tracing from the start all the way through to the end, requiring
- * quadratic amounts of memory. This can fail if the required space surpasses
- * algo_config->permitted_state_size. */
-extern int diff_algo_myers(const struct diff_algo_config *algo_config,
- struct diff_state *state);
-
-/* Myers "Divide et Impera": tracing forwards from the start and backwards from
- * the end to find a midpoint that divides the problem into smaller chunks.
- * Requires only linear amounts of memory. */
-extern int diff_algo_myers_divide(
- const struct diff_algo_config *algo_config, struct diff_state *state);
-
-/* Patience Diff algorithm, which divides a larger diff into smaller chunks. For
- * very specific scenarios, it may lead to a complete diff result by itself, but
- * needs a fallback algo to solve chunks that don't have common-unique atoms. */
-extern int diff_algo_patience(
- const struct diff_algo_config *algo_config, struct diff_state *state);
-
-/* Diff algorithms to use, possibly nested. For example:
- *
- * struct diff_algo_config myers, patience, myers_divide;
- *
- * myers = (struct diff_algo_config){
- * .impl = diff_algo_myers,
- * .permitted_state_size = 32 * 1024 * 1024,
- * // When too large, do diff_algo_patience:
- * .fallback_algo = &patience,
- * };
- *
- * const struct diff_algo_config patience = (struct diff_algo_config){
- * .impl = diff_algo_patience,
- * // After subdivision, do Patience again:
- * .inner_algo = &patience,
- * // If subdivision failed, do Myers Divide et Impera:
- * .fallback_algo = &myers_then_myers_divide,
- * };
- *
- * const struct diff_algo_config myers_divide = (struct diff_algo_config){
- * .impl = diff_algo_myers_divide,
- * // When division succeeded, start from the top:
- * .inner_algo = &myers_then_myers_divide,
- * // (fallback_algo = NULL implies diff_algo_none).
- * };
- * struct diff_config config = {
- * .algo = &myers,
- * ...
- * };
- * diff_main(&config, ...);
- */
-struct diff_algo_config {
- diff_algo_impl_t impl;
-
- /* Fail this algo if it would use more than this amount of memory, and
- * instead use fallback_algo (diff_algo_myers). permitted_state_size ==
- * 0 means no limitation. */
- size_t permitted_state_size;
-
- /* For algorithms that divide into smaller chunks, use this algorithm to
- * solve the divided chunks. */
- const struct diff_algo_config *inner_algo;
-
- /* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large
- * state, or diff_algo_patience can't find any common-unique atoms),
- * then use this algorithm instead. */
- const struct diff_algo_config *fallback_algo;
-};
-
-struct diff_config {
- diff_atomize_func_t atomize_func;
- void *atomize_func_data;
-
- const struct diff_algo_config *algo;
-
- /* How deep to step into subdivisions of a source file, a paranoia /
- * safety measure to guard against infinite loops through diff
- * algorithms. When the maximum recursion is reached, employ
- * diff_algo_none (i.e. remove all left atoms and add all right atoms).
- */
- unsigned int max_recursion_depth;
-};
-
-struct diff_result *diff_main(const struct diff_config *config,
- FILE *left_f, const uint8_t *left_data,
- off_t left_len,
- FILE *right_f, const uint8_t *right_data,
- off_t right_len, int diff_flags);
-void diff_result_free(struct diff_result *result);
blob - 20767052a7fd8ba51f93706796155ffdbfb04a82 (mode 644)
blob + /dev/null
--- include/diff/diff_output.h
+++ /dev/null
-/* Diff output generators and invocation shims. */
-/*
- * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-struct diff_input_info {
- const char *left_path;
- const char *right_path;
-};
-
-struct diff_output_info {
- /*
- * Byte offset to each line in the generated output file.
- * The total number of lines in the file is line_offsets.len - 1.
- * The last offset in this array corresponds to end-of-file.
- */
- ARRAYLIST(off_t) line_offsets;
-};
-
-void diff_output_info_free(struct diff_output_info *output_info);
-
-struct diff_chunk_context {
- struct diff_range chunk;
- struct diff_range left, right;
-};
-
-int diff_output_plain(struct diff_output_info **output_info, FILE *dest,
- const struct diff_input_info *info,
- const struct diff_result *result);
-int diff_output_unidiff(struct diff_output_info **output_info,
- FILE *dest, const struct diff_input_info *info,
- const struct diff_result *result,
- unsigned int context_lines);
-int diff_chunk_get_left_start(const struct diff_chunk *c,
- const struct diff_result *r,
- int context_lines);
-int diff_chunk_get_left_end(const struct diff_chunk *c,
- const struct diff_result *r,
- int context_lines);
-int diff_chunk_get_right_start(const struct diff_chunk *c,
- const struct diff_result *r,
- int context_lines);
-int diff_chunk_get_right_end(const struct diff_chunk *c,
- const struct diff_result *r,
- int context_lines);
-void diff_chunk_context_get(struct diff_chunk_context *cc,
- const struct diff_result *r,
- int chunk_idx, int context_lines);
-struct diff_output_unidiff_state;
-struct diff_output_unidiff_state *diff_output_unidiff_state_alloc(void);
-void diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state);
-void diff_output_unidiff_state_free(struct diff_output_unidiff_state *state);
-int diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
- struct diff_output_unidiff_state *state,
- const struct diff_input_info *info,
- const struct diff_result *result,
- const struct diff_chunk_context *cc);
-int diff_output_chunk_left_version(struct diff_output_info **output_info,
- FILE *dest,
- const struct diff_input_info *info,
- const struct diff_result *result,
- const struct diff_chunk_context *cc);
-int diff_output_chunk_right_version(struct diff_output_info **output_info,
- FILE *dest,
- const struct diff_input_info *info,
- const struct diff_result *result,
- const struct diff_chunk_context *cc);
blob - /dev/null
blob + 7aaab19f0a58317289881be832a03ec59f67a0ff (mode 644)
--- /dev/null
+++ include/arraylist.h
+/* Auto-reallocating array for arbitrary member types. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Usage:
+ *
+ * ARRAYLIST(any_type_t) list;
+ * // OR
+ * typedef ARRAYLIST(any_type_t) any_type_list_t;
+ * any_type_list_t list;
+ *
+ * // pass the number of (at first unused) members to add on each realloc:
+ * ARRAYLIST_INIT(list, 128);
+ * any_type_t *x;
+ * while (bar) {
+ * // This enlarges the allocated array as needed;
+ * // list.head may change due to realloc:
+ * ARRAYLIST_ADD(x, list);
+ * if (!x)
+ * return ENOMEM;
+ * *x = random_foo_value;
+ * }
+ * for (i = 0; i < list.len; i++)
+ * printf("%s", foo_to_str(list.head[i]));
+ * ARRAYLIST_FREE(list);
+ */
+#define ARRAYLIST(MEMBER_TYPE) \
+ struct { \
+ MEMBER_TYPE *head; \
+ MEMBER_TYPE *p; \
+ unsigned int len; \
+ unsigned int allocated; \
+ unsigned int alloc_blocksize; \
+ }
+
+#define ARRAYLIST_INIT(ARRAY_LIST, ALLOC_BLOCKSIZE) do { \
+ (ARRAY_LIST).head = NULL; \
+ (ARRAY_LIST).len = 0; \
+ (ARRAY_LIST).allocated = 0; \
+ (ARRAY_LIST).alloc_blocksize = ALLOC_BLOCKSIZE; \
+ } while(0)
+
+#define ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST) do { \
+ if ((ARRAY_LIST).len && !(ARRAY_LIST).allocated) { \
+ NEW_ITEM_P = NULL; \
+ break; \
+ } \
+ if ((ARRAY_LIST).head == NULL \
+ || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
+ (ARRAY_LIST).p = recallocarray((ARRAY_LIST).head, \
+ (ARRAY_LIST).len, \
+ (ARRAY_LIST).allocated + \
+ ((ARRAY_LIST).alloc_blocksize ? : 8), \
+ sizeof(*(ARRAY_LIST).head)); \
+ if ((ARRAY_LIST).p == NULL) { \
+ NEW_ITEM_P = NULL; \
+ break; \
+ } \
+ (ARRAY_LIST).allocated += \
+ (ARRAY_LIST).alloc_blocksize ? : 8; \
+ (ARRAY_LIST).head = (ARRAY_LIST).p; \
+ (ARRAY_LIST).p = NULL; \
+ }; \
+ if ((ARRAY_LIST).head == NULL \
+ || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
+ NEW_ITEM_P = NULL; \
+ break; \
+ } \
+ (NEW_ITEM_P) = &(ARRAY_LIST).head[(ARRAY_LIST).len]; \
+ (ARRAY_LIST).len++; \
+ } while (0)
+
+#define ARRAYLIST_INSERT(NEW_ITEM_P, ARRAY_LIST, AT_IDX) do { \
+ ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST); \
+ if ((NEW_ITEM_P) && (AT_IDX) < (ARRAY_LIST).len) \
+ memmove(&(ARRAY_LIST).head[(AT_IDX) + 1], \
+ &(ARRAY_LIST).head[AT_IDX], \
+ ((ARRAY_LIST).len - (AT_IDX)) \
+ * sizeof(*(ARRAY_LIST).head)); \
+ } while (0)
+
+#define ARRAYLIST_CLEAR(ARRAY_LIST) \
+ (ARRAY_LIST).len = 0
+
+#define ARRAYLIST_FREE(ARRAY_LIST) \
+ do { \
+ if ((ARRAY_LIST).head && (ARRAY_LIST).allocated) \
+ free((ARRAY_LIST).head); \
+ ARRAYLIST_INIT(ARRAY_LIST, (ARRAY_LIST).alloc_blocksize); \
+ } while(0)
blob - /dev/null
blob + 9479d3112b7ca78b893d54f7948605a4e532da35 (mode 644)
--- /dev/null
+++ include/diff_main.h
+/* Generic infrastructure to implement various diff algorithms. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+struct diff_range {
+ int start;
+ int end;
+};
+
+/* List of all possible return codes of a diff invocation. */
+#define DIFF_RC_USE_DIFF_ALGO_FALLBACK -1
+#define DIFF_RC_OK 0
+/* Any positive return values are errno values from sys/errno.h */
+
+struct diff_atom;
+
+/* For each file, there is a "root" struct diff_data referencing the entire
+ * file, which the atoms are parsed from. In recursion of diff algorithm, there
+ * may be "child" struct diff_data only referencing a subsection of the file,
+ * re-using the atoms parsing. For "root" structs, atoms_allocated will be
+ * nonzero, indicating that the array of atoms is owned by that struct. For
+ * "child" structs, atoms_allocated == 0, to indicate that the struct is
+ * referencing a subset of atoms. */
+struct diff_data {
+ FILE *f; /* if root diff_data and not memory-mapped */
+ off_t pos; /* if not memory-mapped */
+ const uint8_t *data; /* if memory-mapped */
+ off_t len;
+
+ ARRAYLIST(struct diff_atom) atoms;
+ struct diff_data *root;
+
+ int diff_flags;
+};
+
+#define DIFF_FLAG_IGNORE_WHITESPACE 0x00000001
+
+void diff_data_free(struct diff_data *diff_data);
+
+struct diff_chunk;
+typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t;
+
+struct diff_result {
+ int rc;
+ struct diff_data left;
+ struct diff_data right;
+ diff_chunk_arraylist_t chunks;
+};
+
+struct diff_state;
+
+/* Signature of a utility function to divide both source files into diff atoms.
+ * It is possible that a (future) algorithm requires both source files to decide
+ * on atom split points, hence this gets both left and right to atomize at the
+ * same time.
+ * An example is diff_atomize_text_by_line() in diff_atomize_text.c.
+ *
+ * func_data: context pointer (free to be used by implementation).
+ * left: struct diff_data with left->data and left->len already set up, and
+ * left->atoms to be created.
+ * right: struct diff_data with right->data and right->len already set up, and
+ * right->atoms to be created.
+ */
+typedef int (*diff_atomize_func_t)(void *func_data,
+ struct diff_data *left,
+ struct diff_data *right);
+
+extern int diff_atomize_text_by_line(void *func_data,
+ struct diff_data *left,
+ struct diff_data *right);
+
+struct diff_algo_config;
+typedef int (*diff_algo_impl_t)(
+ const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Form a result with all left-side removed and all right-side added, i.e. no
+ * actual diff algorithm involved. */
+int diff_algo_none(const struct diff_algo_config *algo_config,
+ struct diff_state *state);
+
+/* Myers Diff tracing from the start all the way through to the end, requiring
+ * quadratic amounts of memory. This can fail if the required space surpasses
+ * algo_config->permitted_state_size. */
+extern int diff_algo_myers(const struct diff_algo_config *algo_config,
+ struct diff_state *state);
+
+/* Myers "Divide et Impera": tracing forwards from the start and backwards from
+ * the end to find a midpoint that divides the problem into smaller chunks.
+ * Requires only linear amounts of memory. */
+extern int diff_algo_myers_divide(
+ const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Patience Diff algorithm, which divides a larger diff into smaller chunks. For
+ * very specific scenarios, it may lead to a complete diff result by itself, but
+ * needs a fallback algo to solve chunks that don't have common-unique atoms. */
+extern int diff_algo_patience(
+ const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Diff algorithms to use, possibly nested. For example:
+ *
+ * struct diff_algo_config myers, patience, myers_divide;
+ *
+ * myers = (struct diff_algo_config){
+ * .impl = diff_algo_myers,
+ * .permitted_state_size = 32 * 1024 * 1024,
+ * // When too large, do diff_algo_patience:
+ * .fallback_algo = &patience,
+ * };
+ *
+ * const struct diff_algo_config patience = (struct diff_algo_config){
+ * .impl = diff_algo_patience,
+ * // After subdivision, do Patience again:
+ * .inner_algo = &patience,
+ * // If subdivision failed, do Myers Divide et Impera:
+ * .fallback_algo = &myers_then_myers_divide,
+ * };
+ *
+ * const struct diff_algo_config myers_divide = (struct diff_algo_config){
+ * .impl = diff_algo_myers_divide,
+ * // When division succeeded, start from the top:
+ * .inner_algo = &myers_then_myers_divide,
+ * // (fallback_algo = NULL implies diff_algo_none).
+ * };
+ * struct diff_config config = {
+ * .algo = &myers,
+ * ...
+ * };
+ * diff_main(&config, ...);
+ */
+struct diff_algo_config {
+ diff_algo_impl_t impl;
+
+ /* Fail this algo if it would use more than this amount of memory, and
+ * instead use fallback_algo (diff_algo_myers). permitted_state_size ==
+ * 0 means no limitation. */
+ size_t permitted_state_size;
+
+ /* For algorithms that divide into smaller chunks, use this algorithm to
+ * solve the divided chunks. */
+ const struct diff_algo_config *inner_algo;
+
+ /* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large
+ * state, or diff_algo_patience can't find any common-unique atoms),
+ * then use this algorithm instead. */
+ const struct diff_algo_config *fallback_algo;
+};
+
+struct diff_config {
+ diff_atomize_func_t atomize_func;
+ void *atomize_func_data;
+
+ const struct diff_algo_config *algo;
+
+ /* How deep to step into subdivisions of a source file, a paranoia /
+ * safety measure to guard against infinite loops through diff
+ * algorithms. When the maximum recursion is reached, employ
+ * diff_algo_none (i.e. remove all left atoms and add all right atoms).
+ */
+ unsigned int max_recursion_depth;
+};
+
+struct diff_result *diff_main(const struct diff_config *config,
+ FILE *left_f, const uint8_t *left_data,
+ off_t left_len,
+ FILE *right_f, const uint8_t *right_data,
+ off_t right_len, int diff_flags);
+void diff_result_free(struct diff_result *result);
blob - /dev/null
blob + 20767052a7fd8ba51f93706796155ffdbfb04a82 (mode 644)
--- /dev/null
+++ include/diff_output.h
+/* Diff output generators and invocation shims. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+struct diff_input_info {
+ const char *left_path;
+ const char *right_path;
+};
+
+struct diff_output_info {
+ /*
+ * Byte offset to each line in the generated output file.
+ * The total number of lines in the file is line_offsets.len - 1.
+ * The last offset in this array corresponds to end-of-file.
+ */
+ ARRAYLIST(off_t) line_offsets;
+};
+
+void diff_output_info_free(struct diff_output_info *output_info);
+
+struct diff_chunk_context {
+ struct diff_range chunk;
+ struct diff_range left, right;
+};
+
+int diff_output_plain(struct diff_output_info **output_info, FILE *dest,
+ const struct diff_input_info *info,
+ const struct diff_result *result);
+int diff_output_unidiff(struct diff_output_info **output_info,
+ FILE *dest, const struct diff_input_info *info,
+ const struct diff_result *result,
+ unsigned int context_lines);
+int diff_chunk_get_left_start(const struct diff_chunk *c,
+ const struct diff_result *r,
+ int context_lines);
+int diff_chunk_get_left_end(const struct diff_chunk *c,
+ const struct diff_result *r,
+ int context_lines);
+int diff_chunk_get_right_start(const struct diff_chunk *c,
+ const struct diff_result *r,
+ int context_lines);
+int diff_chunk_get_right_end(const struct diff_chunk *c,
+ const struct diff_result *r,
+ int context_lines);
+void diff_chunk_context_get(struct diff_chunk_context *cc,
+ const struct diff_result *r,
+ int chunk_idx, int context_lines);
+struct diff_output_unidiff_state;
+struct diff_output_unidiff_state *diff_output_unidiff_state_alloc(void);
+void diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state);
+void diff_output_unidiff_state_free(struct diff_output_unidiff_state *state);
+int diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
+ struct diff_output_unidiff_state *state,
+ const struct diff_input_info *info,
+ const struct diff_result *result,
+ const struct diff_chunk_context *cc);
+int diff_output_chunk_left_version(struct diff_output_info **output_info,
+ FILE *dest,
+ const struct diff_input_info *info,
+ const struct diff_result *result,
+ const struct diff_chunk_context *cc);
+int diff_output_chunk_right_version(struct diff_output_info **output_info,
+ FILE *dest,
+ const struct diff_input_info *info,
+ const struct diff_result *result,
+ const struct diff_chunk_context *cc);
blob - 1b5c256b8e80cab7a4ed7e563349ee1ee5e2dd79
blob + 8bc527e0f06aeeca1e930f349552b5503cdae9c8
--- lib/GNUmakefile
+++ lib/GNUmakefile
CFLAGS += -fsanitize=address -fsanitize=undefined -g -O3
CFLAGS += -Wstrict-prototypes -Wunused-variable
-%.o: %.c ./*.h ../include/diff/*.h
+%.o: %.c ./*.h ../include/*.h
gcc $(CFLAGS) -I../include -o $@ -c $<
.PHONY: clean
blob - 438e9c4e73f56cd2f0eb9d2cfcf6766893b10883
blob + 3eac2d385875ae4eb6ed7bbf3a373422e4ff27eb
--- lib/diff_atomize_text.c
+++ lib/diff_atomize_text.c
#include <stdlib.h>
#include <unistd.h>
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
+#include <arraylist.h>
+#include <diff_main.h>
+
#include "diff_internal.h"
#include "diff_debug.h"
blob - 6b5c37f50b58796347ffae9293e99c30adc72549
blob + 65e95cd2003475de86231ea9a9e0e28b7251e3e0
--- lib/diff_main.c
+++ lib/diff_main.c
#include <assert.h>
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
+#include <arraylist.h>
+#include <diff_main.h>
#include "diff_internal.h"
#include "diff_debug.h"
blob - a21aeee883c98dd77c1a1b5a8c58d46654e0e2f6
blob + 1fbd6c606a5b3ece2a185f342bba36de076fd8eb
--- lib/diff_myers.c
+++ lib/diff_myers.c
#include <stdio.h>
#include <errno.h>
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
+#include <arraylist.h>
+#include <diff_main.h>
#include "diff_internal.h"
#include "diff_debug.h"
blob - 90ed1983d9b6b7ad3005b3a3b77230d52c12ba2d
blob + 7934dc74e738ad06ae99c293b07fecd3feb51dd3
--- lib/diff_output.c
+++ lib/diff_output.c
#include <stdlib.h>
#include <unistd.h>
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
-#include <diff/diff_output.h>
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
#include "diff_internal.h"
blob - 76f3831e2f6844dc51a9dc5e98f69b52ac40b18e
blob + cc478ba4dd635825a2cf8235364f9ddcebb2f6aa
--- lib/diff_output_plain.c
+++ lib/diff_output_plain.c
#include <stdbool.h>
#include <stdlib.h>
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
-#include <diff/diff_output.h>
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
#include "diff_internal.h"
blob - fc5b69b7af9741fe8308060d890d82c1eeb4c8c4
blob + da3c98842c7e864025e83544cc9206ac232c8289
--- lib/diff_output_unidiff.c
+++ lib/diff_output_unidiff.c
#include <stdio.h>
#include <stdlib.h>
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
-#include <diff/diff_output.h>
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
#include "diff_internal.h"
#include "diff_debug.h"
blob - 6922a7308a35a266bfc5e71aea83f2abd2bd70aa
blob + cccfafd597fa145d26f94990a65acb0a25287205
--- lib/diff_patience.c
+++ lib/diff_patience.c
#include <stdio.h>
#include <stdlib.h>
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
+#include <arraylist.h>
+#include <diff_main.h>
#include "diff_internal.h"
#include "diff_debug.h"