Commit Diff


commit - fe8af0d6c0a2ba7f1c50f0b88cd7e13d784d2e23
commit + 1dfba0555efd6b616811906d011f96945be90dcc
blob - 598d7b74a05865a500826f9b346fd3933781415f
blob + 195e5bc946b99bc4518bb657bb1a31a1a213e725
--- diff/GNUmakefile
+++ diff/GNUmakefile
@@ -10,7 +10,7 @@ CFLAGS+=       -I$(CURDIR)/../compat/include
 diff: $(SRCS) $(LIB)
 	gcc $(CFLAGS) -I../include -o $@ $^
 
-../lib/libdiff.a: ../lib/*.[hc] ../include/diff/*.h
+../lib/libdiff.a: ../lib/*.[hc] ../include/*.h
 	$(MAKE) -C ../lib
 
 .PHONY: clean
blob - 9e288bba939bf3e9c8595855f3933fd37a309fbb
blob + ff6ba3cd5842b83bb5c0d79e6b539956588d6cab
--- diff/diff.c
+++ diff/diff.c
@@ -29,9 +29,9 @@
 #include <string.h>
 #include <unistd.h>
 
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
-#include <diff/diff_output.h>
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
 
 __dead void	 usage(void);
 int		 diffreg(char *, char *, bool, bool, int);
blob - 7aaab19f0a58317289881be832a03ec59f67a0ff (mode 644)
blob + /dev/null
--- include/diff/arraylist.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* Auto-reallocating array for arbitrary member types. */
-/*
- * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/* Usage:
- *
- * ARRAYLIST(any_type_t) list;
- * // OR
- * typedef ARRAYLIST(any_type_t) any_type_list_t;
- * any_type_list_t list;
- *
- * // pass the number of (at first unused) members to add on each realloc:
- * ARRAYLIST_INIT(list, 128);
- * any_type_t *x;
- * while (bar) {
- *         // This enlarges the allocated array as needed;
- *         // list.head may change due to realloc:
- *         ARRAYLIST_ADD(x, list);
- *         if (!x)
- *                 return ENOMEM; 
- *         *x = random_foo_value;
- * }
- * for (i = 0; i < list.len; i++)
- *         printf("%s", foo_to_str(list.head[i]));
- * ARRAYLIST_FREE(list);
- */
-#define ARRAYLIST(MEMBER_TYPE) \
-	struct { \
-		MEMBER_TYPE *head; \
-		MEMBER_TYPE *p; \
-		unsigned int len; \
-		unsigned int allocated; \
-		unsigned int alloc_blocksize; \
-	}
-
-#define ARRAYLIST_INIT(ARRAY_LIST, ALLOC_BLOCKSIZE) do { \
-		(ARRAY_LIST).head = NULL; \
-		(ARRAY_LIST).len = 0; \
-		(ARRAY_LIST).allocated = 0; \
-		(ARRAY_LIST).alloc_blocksize = ALLOC_BLOCKSIZE; \
-	} while(0)
-
-#define ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST) do { \
-		if ((ARRAY_LIST).len && !(ARRAY_LIST).allocated) { \
-			NEW_ITEM_P = NULL; \
-			break; \
-		} \
-		if ((ARRAY_LIST).head == NULL \
-		    || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
-			(ARRAY_LIST).p = recallocarray((ARRAY_LIST).head, \
-				(ARRAY_LIST).len, \
-				(ARRAY_LIST).allocated + \
-				((ARRAY_LIST).alloc_blocksize ? : 8), \
-				sizeof(*(ARRAY_LIST).head)); \
-			if ((ARRAY_LIST).p == NULL) { \
-				NEW_ITEM_P = NULL; \
-				break; \
-			} \
-			(ARRAY_LIST).allocated += \
-				(ARRAY_LIST).alloc_blocksize ? : 8; \
-			(ARRAY_LIST).head = (ARRAY_LIST).p; \
-			(ARRAY_LIST).p = NULL; \
-		}; \
-		if ((ARRAY_LIST).head == NULL \
-		    || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
-			NEW_ITEM_P = NULL; \
-			break; \
-		} \
-		(NEW_ITEM_P) = &(ARRAY_LIST).head[(ARRAY_LIST).len]; \
-		(ARRAY_LIST).len++; \
-	} while (0)
-
-#define ARRAYLIST_INSERT(NEW_ITEM_P, ARRAY_LIST, AT_IDX) do { \
-		ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST); \
-		if ((NEW_ITEM_P) && (AT_IDX) < (ARRAY_LIST).len) \
-			memmove(&(ARRAY_LIST).head[(AT_IDX) + 1], \
-				&(ARRAY_LIST).head[AT_IDX], \
-				((ARRAY_LIST).len - (AT_IDX)) \
-					* sizeof(*(ARRAY_LIST).head)); \
-	} while (0)
-
-#define ARRAYLIST_CLEAR(ARRAY_LIST) \
-	(ARRAY_LIST).len = 0
-
-#define ARRAYLIST_FREE(ARRAY_LIST) \
-	do { \
-		if ((ARRAY_LIST).head && (ARRAY_LIST).allocated) \
-			free((ARRAY_LIST).head); \
-		ARRAYLIST_INIT(ARRAY_LIST, (ARRAY_LIST).alloc_blocksize); \
-	} while(0)
blob - 9479d3112b7ca78b893d54f7948605a4e532da35 (mode 644)
blob + /dev/null
--- include/diff/diff_main.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/* Generic infrastructure to implement various diff algorithms. */
-/*
- * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-struct diff_range {
-	int start;
-	int end;
-};
-
-/* List of all possible return codes of a diff invocation. */
-#define DIFF_RC_USE_DIFF_ALGO_FALLBACK	-1
-#define DIFF_RC_OK			0
-/* Any positive return values are errno values from sys/errno.h */
-
-struct diff_atom;
-
-/* For each file, there is a "root" struct diff_data referencing the entire
- * file, which the atoms are parsed from. In recursion of diff algorithm, there
- * may be "child" struct diff_data only referencing a subsection of the file,
- * re-using the atoms parsing. For "root" structs, atoms_allocated will be
- * nonzero, indicating that the array of atoms is owned by that struct. For
- * "child" structs, atoms_allocated == 0, to indicate that the struct is
- * referencing a subset of atoms. */
-struct diff_data {
-	FILE *f;		/* if root diff_data and not memory-mapped */
-	off_t pos;		/* if not memory-mapped */
-	const uint8_t *data;	/* if memory-mapped */
-	off_t len;
-
-	ARRAYLIST(struct diff_atom) atoms;
-	struct diff_data *root;
-
-	int diff_flags;
-};
-
-#define DIFF_FLAG_IGNORE_WHITESPACE	0x00000001
-
-void diff_data_free(struct diff_data *diff_data);
-
-struct diff_chunk;
-typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t;
-
-struct diff_result {
-	int rc;
-	struct diff_data left;
-	struct diff_data right;
-	diff_chunk_arraylist_t chunks;
-};
-
-struct diff_state;
-
-/* Signature of a utility function to divide both source files into diff atoms.
- * It is possible that a (future) algorithm requires both source files to decide
- * on atom split points, hence this gets both left and right to atomize at the
- * same time.
- * An example is diff_atomize_text_by_line() in diff_atomize_text.c.
- *
- * func_data: context pointer (free to be used by implementation).
- * left: struct diff_data with left->data and left->len already set up, and
- * left->atoms to be created.
- * right: struct diff_data with right->data and right->len already set up, and
- * right->atoms to be created.
- */
-typedef int (*diff_atomize_func_t)(void *func_data,
-					    struct diff_data *left,
-					    struct diff_data *right);
-
-extern int diff_atomize_text_by_line(void *func_data,
-					      struct diff_data *left,
-					      struct diff_data *right);
-
-struct diff_algo_config;
-typedef int (*diff_algo_impl_t)(
-	const struct diff_algo_config *algo_config, struct diff_state *state);
-
-/* Form a result with all left-side removed and all right-side added, i.e. no
- * actual diff algorithm involved. */
-int diff_algo_none(const struct diff_algo_config *algo_config,
-			    struct diff_state *state);
-
-/* Myers Diff tracing from the start all the way through to the end, requiring
- * quadratic amounts of memory. This can fail if the required space surpasses
- * algo_config->permitted_state_size. */
-extern int diff_algo_myers(const struct diff_algo_config *algo_config,
-				    struct diff_state *state);
-
-/* Myers "Divide et Impera": tracing forwards from the start and backwards from
- * the end to find a midpoint that divides the problem into smaller chunks.
- * Requires only linear amounts of memory. */
-extern int diff_algo_myers_divide(
-	const struct diff_algo_config *algo_config, struct diff_state *state);
-
-/* Patience Diff algorithm, which divides a larger diff into smaller chunks. For
- * very specific scenarios, it may lead to a complete diff result by itself, but
- * needs a fallback algo to solve chunks that don't have common-unique atoms. */
-extern int diff_algo_patience(
-	const struct diff_algo_config *algo_config, struct diff_state *state);
-
-/* Diff algorithms to use, possibly nested. For example:
- *
- * struct diff_algo_config myers, patience, myers_divide;
- *
- * myers = (struct diff_algo_config){
- *         .impl = diff_algo_myers,
- *         .permitted_state_size = 32 * 1024 * 1024,
- *         // When too large, do diff_algo_patience:
- *         .fallback_algo = &patience,
- * };
- *
- * const struct diff_algo_config patience = (struct diff_algo_config){
- * 	.impl = diff_algo_patience,
- * 	// After subdivision, do Patience again:
- * 	.inner_algo = &patience,
- * 	// If subdivision failed, do Myers Divide et Impera:
- * 	.fallback_algo = &myers_then_myers_divide,
- * };
- * 
- * const struct diff_algo_config myers_divide = (struct diff_algo_config){
- * 	.impl = diff_algo_myers_divide,
- * 	// When division succeeded, start from the top:
- * 	.inner_algo = &myers_then_myers_divide,
- * 	// (fallback_algo = NULL implies diff_algo_none).
- * };
- * struct diff_config config = {
- *         .algo = &myers,
- *         ...
- * };
- * diff_main(&config, ...);
- */
-struct diff_algo_config {
-	diff_algo_impl_t impl;
-
-	/* Fail this algo if it would use more than this amount of memory, and
-	 * instead use fallback_algo (diff_algo_myers). permitted_state_size ==
-	 * 0 means no limitation. */
-	size_t permitted_state_size;
-
-	/* For algorithms that divide into smaller chunks, use this algorithm to
-	 * solve the divided chunks. */
-	const struct diff_algo_config *inner_algo;
-
-	/* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large
-	 * state, or diff_algo_patience can't find any common-unique atoms),
-	 * then use this algorithm instead. */
-	const struct diff_algo_config *fallback_algo;
-};
-
-struct diff_config {
-	diff_atomize_func_t atomize_func;
-	void *atomize_func_data;
-
-	const struct diff_algo_config *algo;
-
-	/* How deep to step into subdivisions of a source file, a paranoia /
-	 * safety measure to guard against infinite loops through diff
-	 * algorithms. When the maximum recursion is reached, employ
-	 * diff_algo_none (i.e. remove all left atoms and add all right atoms).
-	 */
-	unsigned int max_recursion_depth;
-};
-
-struct diff_result *diff_main(const struct diff_config *config,
-			      FILE *left_f, const uint8_t *left_data,
-			      off_t left_len,
-			      FILE *right_f, const uint8_t *right_data,
-			      off_t right_len, int diff_flags);
-void diff_result_free(struct diff_result *result);
blob - 20767052a7fd8ba51f93706796155ffdbfb04a82 (mode 644)
blob + /dev/null
--- include/diff/diff_output.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Diff output generators and invocation shims. */
-/*
- * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-struct diff_input_info {
-	const char *left_path;
-	const char *right_path;
-};
-
-struct diff_output_info {
-	/*
-	 * Byte offset to each line in the generated output file.
-	 * The total number of lines in the file is line_offsets.len - 1.
-	 * The last offset in this array corresponds to end-of-file.
-	 */
-	ARRAYLIST(off_t) line_offsets;
-};
-
-void diff_output_info_free(struct diff_output_info *output_info);
-
-struct diff_chunk_context {
-	struct diff_range chunk;
-	struct diff_range left, right;
-};
-
-int diff_output_plain(struct diff_output_info **output_info, FILE *dest,
-			const struct diff_input_info *info,
-			const struct diff_result *result);
-int diff_output_unidiff(struct diff_output_info **output_info,
-			FILE *dest, const struct diff_input_info *info,
-			const struct diff_result *result,
-			unsigned int context_lines);
-int diff_chunk_get_left_start(const struct diff_chunk *c,
-			      const struct diff_result *r,
-			      int context_lines);
-int diff_chunk_get_left_end(const struct diff_chunk *c,
-			    const struct diff_result *r,
-			    int context_lines);
-int diff_chunk_get_right_start(const struct diff_chunk *c,
-			       const struct diff_result *r,
-			       int context_lines);
-int diff_chunk_get_right_end(const struct diff_chunk *c,
-			     const struct diff_result *r,
-			     int context_lines);
-void diff_chunk_context_get(struct diff_chunk_context *cc,
-				 const struct diff_result *r,
-				 int chunk_idx, int context_lines);
-struct diff_output_unidiff_state;
-struct diff_output_unidiff_state *diff_output_unidiff_state_alloc(void);
-void diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state);
-void diff_output_unidiff_state_free(struct diff_output_unidiff_state *state);
-int diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
-			  struct diff_output_unidiff_state *state,
-			  const struct diff_input_info *info,
-			  const struct diff_result *result,
-			  const struct diff_chunk_context *cc);
-int diff_output_chunk_left_version(struct diff_output_info **output_info,
-			       FILE *dest,
-			       const struct diff_input_info *info,
-			       const struct diff_result *result,
-			       const struct diff_chunk_context *cc);
-int diff_output_chunk_right_version(struct diff_output_info **output_info,
-				FILE *dest,
-				const struct diff_input_info *info,
-				const struct diff_result *result,
-				const struct diff_chunk_context *cc);
blob - /dev/null
blob + 7aaab19f0a58317289881be832a03ec59f67a0ff (mode 644)
--- /dev/null
+++ include/arraylist.h
@@ -0,0 +1,103 @@
+/* Auto-reallocating array for arbitrary member types. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Usage:
+ *
+ * ARRAYLIST(any_type_t) list;
+ * // OR
+ * typedef ARRAYLIST(any_type_t) any_type_list_t;
+ * any_type_list_t list;
+ *
+ * // pass the number of (at first unused) members to add on each realloc:
+ * ARRAYLIST_INIT(list, 128);
+ * any_type_t *x;
+ * while (bar) {
+ *         // This enlarges the allocated array as needed;
+ *         // list.head may change due to realloc:
+ *         ARRAYLIST_ADD(x, list);
+ *         if (!x)
+ *                 return ENOMEM; 
+ *         *x = random_foo_value;
+ * }
+ * for (i = 0; i < list.len; i++)
+ *         printf("%s", foo_to_str(list.head[i]));
+ * ARRAYLIST_FREE(list);
+ */
+#define ARRAYLIST(MEMBER_TYPE) \
+	struct { \
+		MEMBER_TYPE *head; \
+		MEMBER_TYPE *p; \
+		unsigned int len; \
+		unsigned int allocated; \
+		unsigned int alloc_blocksize; \
+	}
+
+#define ARRAYLIST_INIT(ARRAY_LIST, ALLOC_BLOCKSIZE) do { \
+		(ARRAY_LIST).head = NULL; \
+		(ARRAY_LIST).len = 0; \
+		(ARRAY_LIST).allocated = 0; \
+		(ARRAY_LIST).alloc_blocksize = ALLOC_BLOCKSIZE; \
+	} while(0)
+
+#define ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST) do { \
+		if ((ARRAY_LIST).len && !(ARRAY_LIST).allocated) { \
+			NEW_ITEM_P = NULL; \
+			break; \
+		} \
+		if ((ARRAY_LIST).head == NULL \
+		    || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
+			(ARRAY_LIST).p = recallocarray((ARRAY_LIST).head, \
+				(ARRAY_LIST).len, \
+				(ARRAY_LIST).allocated + \
+				((ARRAY_LIST).alloc_blocksize ? : 8), \
+				sizeof(*(ARRAY_LIST).head)); \
+			if ((ARRAY_LIST).p == NULL) { \
+				NEW_ITEM_P = NULL; \
+				break; \
+			} \
+			(ARRAY_LIST).allocated += \
+				(ARRAY_LIST).alloc_blocksize ? : 8; \
+			(ARRAY_LIST).head = (ARRAY_LIST).p; \
+			(ARRAY_LIST).p = NULL; \
+		}; \
+		if ((ARRAY_LIST).head == NULL \
+		    || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
+			NEW_ITEM_P = NULL; \
+			break; \
+		} \
+		(NEW_ITEM_P) = &(ARRAY_LIST).head[(ARRAY_LIST).len]; \
+		(ARRAY_LIST).len++; \
+	} while (0)
+
+#define ARRAYLIST_INSERT(NEW_ITEM_P, ARRAY_LIST, AT_IDX) do { \
+		ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST); \
+		if ((NEW_ITEM_P) && (AT_IDX) < (ARRAY_LIST).len) \
+			memmove(&(ARRAY_LIST).head[(AT_IDX) + 1], \
+				&(ARRAY_LIST).head[AT_IDX], \
+				((ARRAY_LIST).len - (AT_IDX)) \
+					* sizeof(*(ARRAY_LIST).head)); \
+	} while (0)
+
+#define ARRAYLIST_CLEAR(ARRAY_LIST) \
+	(ARRAY_LIST).len = 0
+
+#define ARRAYLIST_FREE(ARRAY_LIST) \
+	do { \
+		if ((ARRAY_LIST).head && (ARRAY_LIST).allocated) \
+			free((ARRAY_LIST).head); \
+		ARRAYLIST_INIT(ARRAY_LIST, (ARRAY_LIST).alloc_blocksize); \
+	} while(0)
blob - /dev/null
blob + 9479d3112b7ca78b893d54f7948605a4e532da35 (mode 644)
--- /dev/null
+++ include/diff_main.h
@@ -0,0 +1,180 @@
+/* Generic infrastructure to implement various diff algorithms. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+struct diff_range {
+	int start;
+	int end;
+};
+
+/* List of all possible return codes of a diff invocation. */
+#define DIFF_RC_USE_DIFF_ALGO_FALLBACK	-1
+#define DIFF_RC_OK			0
+/* Any positive return values are errno values from sys/errno.h */
+
+struct diff_atom;
+
+/* For each file, there is a "root" struct diff_data referencing the entire
+ * file, which the atoms are parsed from. In recursion of diff algorithm, there
+ * may be "child" struct diff_data only referencing a subsection of the file,
+ * re-using the atoms parsing. For "root" structs, atoms_allocated will be
+ * nonzero, indicating that the array of atoms is owned by that struct. For
+ * "child" structs, atoms_allocated == 0, to indicate that the struct is
+ * referencing a subset of atoms. */
+struct diff_data {
+	FILE *f;		/* if root diff_data and not memory-mapped */
+	off_t pos;		/* if not memory-mapped */
+	const uint8_t *data;	/* if memory-mapped */
+	off_t len;
+
+	ARRAYLIST(struct diff_atom) atoms;
+	struct diff_data *root;
+
+	int diff_flags;
+};
+
+#define DIFF_FLAG_IGNORE_WHITESPACE	0x00000001
+
+void diff_data_free(struct diff_data *diff_data);
+
+struct diff_chunk;
+typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t;
+
+struct diff_result {
+	int rc;
+	struct diff_data left;
+	struct diff_data right;
+	diff_chunk_arraylist_t chunks;
+};
+
+struct diff_state;
+
+/* Signature of a utility function to divide both source files into diff atoms.
+ * It is possible that a (future) algorithm requires both source files to decide
+ * on atom split points, hence this gets both left and right to atomize at the
+ * same time.
+ * An example is diff_atomize_text_by_line() in diff_atomize_text.c.
+ *
+ * func_data: context pointer (free to be used by implementation).
+ * left: struct diff_data with left->data and left->len already set up, and
+ * left->atoms to be created.
+ * right: struct diff_data with right->data and right->len already set up, and
+ * right->atoms to be created.
+ */
+typedef int (*diff_atomize_func_t)(void *func_data,
+					    struct diff_data *left,
+					    struct diff_data *right);
+
+extern int diff_atomize_text_by_line(void *func_data,
+					      struct diff_data *left,
+					      struct diff_data *right);
+
+struct diff_algo_config;
+typedef int (*diff_algo_impl_t)(
+	const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Form a result with all left-side removed and all right-side added, i.e. no
+ * actual diff algorithm involved. */
+int diff_algo_none(const struct diff_algo_config *algo_config,
+			    struct diff_state *state);
+
+/* Myers Diff tracing from the start all the way through to the end, requiring
+ * quadratic amounts of memory. This can fail if the required space surpasses
+ * algo_config->permitted_state_size. */
+extern int diff_algo_myers(const struct diff_algo_config *algo_config,
+				    struct diff_state *state);
+
+/* Myers "Divide et Impera": tracing forwards from the start and backwards from
+ * the end to find a midpoint that divides the problem into smaller chunks.
+ * Requires only linear amounts of memory. */
+extern int diff_algo_myers_divide(
+	const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Patience Diff algorithm, which divides a larger diff into smaller chunks. For
+ * very specific scenarios, it may lead to a complete diff result by itself, but
+ * needs a fallback algo to solve chunks that don't have common-unique atoms. */
+extern int diff_algo_patience(
+	const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Diff algorithms to use, possibly nested. For example:
+ *
+ * struct diff_algo_config myers, patience, myers_divide;
+ *
+ * myers = (struct diff_algo_config){
+ *         .impl = diff_algo_myers,
+ *         .permitted_state_size = 32 * 1024 * 1024,
+ *         // When too large, do diff_algo_patience:
+ *         .fallback_algo = &patience,
+ * };
+ *
+ * const struct diff_algo_config patience = (struct diff_algo_config){
+ * 	.impl = diff_algo_patience,
+ * 	// After subdivision, do Patience again:
+ * 	.inner_algo = &patience,
+ * 	// If subdivision failed, do Myers Divide et Impera:
+ * 	.fallback_algo = &myers_then_myers_divide,
+ * };
+ * 
+ * const struct diff_algo_config myers_divide = (struct diff_algo_config){
+ * 	.impl = diff_algo_myers_divide,
+ * 	// When division succeeded, start from the top:
+ * 	.inner_algo = &myers_then_myers_divide,
+ * 	// (fallback_algo = NULL implies diff_algo_none).
+ * };
+ * struct diff_config config = {
+ *         .algo = &myers,
+ *         ...
+ * };
+ * diff_main(&config, ...);
+ */
+struct diff_algo_config {
+	diff_algo_impl_t impl;
+
+	/* Fail this algo if it would use more than this amount of memory, and
+	 * instead use fallback_algo (diff_algo_myers). permitted_state_size ==
+	 * 0 means no limitation. */
+	size_t permitted_state_size;
+
+	/* For algorithms that divide into smaller chunks, use this algorithm to
+	 * solve the divided chunks. */
+	const struct diff_algo_config *inner_algo;
+
+	/* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large
+	 * state, or diff_algo_patience can't find any common-unique atoms),
+	 * then use this algorithm instead. */
+	const struct diff_algo_config *fallback_algo;
+};
+
+struct diff_config {
+	diff_atomize_func_t atomize_func;
+	void *atomize_func_data;
+
+	const struct diff_algo_config *algo;
+
+	/* How deep to step into subdivisions of a source file, a paranoia /
+	 * safety measure to guard against infinite loops through diff
+	 * algorithms. When the maximum recursion is reached, employ
+	 * diff_algo_none (i.e. remove all left atoms and add all right atoms).
+	 */
+	unsigned int max_recursion_depth;
+};
+
+struct diff_result *diff_main(const struct diff_config *config,
+			      FILE *left_f, const uint8_t *left_data,
+			      off_t left_len,
+			      FILE *right_f, const uint8_t *right_data,
+			      off_t right_len, int diff_flags);
+void diff_result_free(struct diff_result *result);
blob - /dev/null
blob + 20767052a7fd8ba51f93706796155ffdbfb04a82 (mode 644)
--- /dev/null
+++ include/diff_output.h
@@ -0,0 +1,79 @@
+/* Diff output generators and invocation shims. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+struct diff_input_info {
+	const char *left_path;
+	const char *right_path;
+};
+
+struct diff_output_info {
+	/*
+	 * Byte offset to each line in the generated output file.
+	 * The total number of lines in the file is line_offsets.len - 1.
+	 * The last offset in this array corresponds to end-of-file.
+	 */
+	ARRAYLIST(off_t) line_offsets;
+};
+
+void diff_output_info_free(struct diff_output_info *output_info);
+
+struct diff_chunk_context {
+	struct diff_range chunk;
+	struct diff_range left, right;
+};
+
+int diff_output_plain(struct diff_output_info **output_info, FILE *dest,
+			const struct diff_input_info *info,
+			const struct diff_result *result);
+int diff_output_unidiff(struct diff_output_info **output_info,
+			FILE *dest, const struct diff_input_info *info,
+			const struct diff_result *result,
+			unsigned int context_lines);
+int diff_chunk_get_left_start(const struct diff_chunk *c,
+			      const struct diff_result *r,
+			      int context_lines);
+int diff_chunk_get_left_end(const struct diff_chunk *c,
+			    const struct diff_result *r,
+			    int context_lines);
+int diff_chunk_get_right_start(const struct diff_chunk *c,
+			       const struct diff_result *r,
+			       int context_lines);
+int diff_chunk_get_right_end(const struct diff_chunk *c,
+			     const struct diff_result *r,
+			     int context_lines);
+void diff_chunk_context_get(struct diff_chunk_context *cc,
+				 const struct diff_result *r,
+				 int chunk_idx, int context_lines);
+struct diff_output_unidiff_state;
+struct diff_output_unidiff_state *diff_output_unidiff_state_alloc(void);
+void diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state);
+void diff_output_unidiff_state_free(struct diff_output_unidiff_state *state);
+int diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
+			  struct diff_output_unidiff_state *state,
+			  const struct diff_input_info *info,
+			  const struct diff_result *result,
+			  const struct diff_chunk_context *cc);
+int diff_output_chunk_left_version(struct diff_output_info **output_info,
+			       FILE *dest,
+			       const struct diff_input_info *info,
+			       const struct diff_result *result,
+			       const struct diff_chunk_context *cc);
+int diff_output_chunk_right_version(struct diff_output_info **output_info,
+				FILE *dest,
+				const struct diff_input_info *info,
+				const struct diff_result *result,
+				const struct diff_chunk_context *cc);
blob - 1b5c256b8e80cab7a4ed7e563349ee1ee5e2dd79
blob + 8bc527e0f06aeeca1e930f349552b5503cdae9c8
--- lib/GNUmakefile
+++ lib/GNUmakefile
@@ -21,7 +21,7 @@ libdiff.a: $(OBJS)
 CFLAGS += -fsanitize=address -fsanitize=undefined -g -O3
 CFLAGS += -Wstrict-prototypes -Wunused-variable
 
-%.o: %.c ./*.h ../include/diff/*.h
+%.o: %.c ./*.h ../include/*.h
 	gcc $(CFLAGS) -I../include -o $@ -c $<
 
 .PHONY: clean
blob - 438e9c4e73f56cd2f0eb9d2cfcf6766893b10883
blob + 3eac2d385875ae4eb6ed7bbf3a373422e4ff27eb
--- lib/diff_atomize_text.c
+++ lib/diff_atomize_text.c
@@ -22,8 +22,9 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
+#include <arraylist.h>
+#include <diff_main.h>
+
 #include "diff_internal.h"
 #include "diff_debug.h"
 
blob - 6b5c37f50b58796347ffae9293e99c30adc72549
blob + 65e95cd2003475de86231ea9a9e0e28b7251e3e0
--- lib/diff_main.c
+++ lib/diff_main.c
@@ -29,8 +29,8 @@
 
 #include <assert.h>
 
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
+#include <arraylist.h>
+#include <diff_main.h>
 
 #include "diff_internal.h"
 #include "diff_debug.h"
blob - a21aeee883c98dd77c1a1b5a8c58d46654e0e2f6
blob + 1fbd6c606a5b3ece2a185f342bba36de076fd8eb
--- lib/diff_myers.c
+++ lib/diff_myers.c
@@ -24,8 +24,8 @@
 #include <stdio.h>
 #include <errno.h>
 
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
+#include <arraylist.h>
+#include <diff_main.h>
 
 #include "diff_internal.h"
 #include "diff_debug.h"
blob - 90ed1983d9b6b7ad3005b3a3b77230d52c12ba2d
blob + 7934dc74e738ad06ae99c293b07fecd3feb51dd3
--- lib/diff_output.c
+++ lib/diff_output.c
@@ -22,9 +22,9 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
-#include <diff/diff_output.h>
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
 
 #include "diff_internal.h"
 
blob - 76f3831e2f6844dc51a9dc5e98f69b52ac40b18e
blob + cc478ba4dd635825a2cf8235364f9ddcebb2f6aa
--- lib/diff_output_plain.c
+++ lib/diff_output_plain.c
@@ -21,9 +21,9 @@
 #include <stdbool.h>
 #include <stdlib.h>
 
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
-#include <diff/diff_output.h>
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
 
 #include "diff_internal.h"
 
blob - fc5b69b7af9741fe8308060d890d82c1eeb4c8c4
blob + da3c98842c7e864025e83544cc9206ac232c8289
--- lib/diff_output_unidiff.c
+++ lib/diff_output_unidiff.c
@@ -21,9 +21,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
-#include <diff/diff_output.h>
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
 
 #include "diff_internal.h"
 #include "diff_debug.h"
blob - 6922a7308a35a266bfc5e71aea83f2abd2bd70aa
blob + cccfafd597fa145d26f94990a65acb0a25287205
--- lib/diff_patience.c
+++ lib/diff_patience.c
@@ -23,8 +23,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include <diff/arraylist.h>
-#include <diff/diff_main.h>
+#include <arraylist.h>
+#include <diff_main.h>
 
 #include "diff_internal.h"
 #include "diff_debug.h"