Gotweb

Commit Diff

Commit:: 29916bb6c0c248ca6fa5486cb9e081d92112e86c
From:: Stefan Sperling <stsp@stsp.name>
Date:: Wed Nov 18 14:14:06 2020 UTC
Message:: expose struct diff_atom in public API for external atomizer implementations
Actions:: Patch | Tree
commit - fe6d58fb52ea8d1041a8eb65e28a04816df67c08
commit + 29916bb6c0c248ca6fa5486cb9e081d92112e86c
blob - 40142164742fb5bb3d4316a5bca80a90d132836c
blob + bd94a9109c7f29432f6af3217b4a5775f3f0493c
--- include/diff_main.h
+++ include/diff_main.h
@@ -25,8 +25,73 @@ struct diff_range {
 #define DIFF_RC_OK			0
 /* Any positive return values are errno values from sys/errno.h */
 
-struct diff_atom;
+struct diff_atom {
+	struct diff_data *root; /* back pointer to root diff data */
 
+	off_t pos;		/* if not memory-mapped */
+	const uint8_t *at;	/* if memory-mapped */
+	off_t len;
+
+	/* This hash is just a very cheap speed up for finding *mismatching*
+	 * atoms. When hashes match, we still need to compare entire atoms to
+	 * find out whether they are indeed identical or not.
+	 * Calculated over all atom bytes with diff_atom_hash_update(). */
+	unsigned int hash;
+};
+
+/* Mix another atom_byte into the provided hash value and return the result.
+ * The hash value passed in for the first byte of the atom must be zero. */
+unsigned int
+diff_atom_hash_update(unsigned int hash, unsigned char atom_byte);
+
+/* Compare two atoms for equality. Return 0 on success, or errno on failure.
+ * Set cmp to -1, 0, or 1, just like strcmp(). */
+int
+diff_atom_cmp(int *cmp,
+	      const struct diff_atom *left,
+	      const struct diff_atom *right);
+
+
+/* The atom's index in the entire file. For atoms divided by lines of text, this
+ * yields the line number (starting with 0). Also works for diff_data that
+ * reference only a subsection of a file, always reflecting the global position
+ * in the file (and not the relative position within the subsection). */
+#define diff_atom_root_idx(DIFF_DATA, ATOM) \
+	((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
+	 ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
+	 : (DIFF_DATA)->root->atoms.len)
+
+/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
+ * yields the line number (starting with 0). */
+#define diff_atom_idx(DIFF_DATA, ATOM) \
+	((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+	 ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
+	 : (DIFF_DATA)->atoms.len)
+
+#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
+	for ((ATOM) = (FIRST_ATOM); \
+	     (ATOM) \
+	     && ((ATOM) >= (FIRST_ATOM)) \
+	     && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
+	     (ATOM)++)
+
+#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
+	foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
+
+#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
+	for ((ATOM) = (FROM); \
+	     (ATOM) \
+	     && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+	     && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
+	     (ATOM)++)
+
+#define diff_data_foreach_atom_backwards_from(FROM, ATOM, DIFF_DATA) \
+	for ((ATOM) = (FROM); \
+	     (ATOM) \
+	     && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+	     && ((ATOM) - (DIFF_DATA)->atoms.head >= 0); \
+	     (ATOM)--)
+
 /* For each file, there is a "root" struct diff_data referencing the entire
  * file, which the atoms are parsed from. In recursion of diff algorithm, there
  * may be "child" struct diff_data only referencing a subsection of the file,
blob - 1bdb99777475de80a76425f1e2c3243dbbe9d9a6
blob + 1da34c64d7271fbd198a3a0cfc8d0414641d22d8
--- lib/diff_atomize_text.c
+++ lib/diff_atomize_text.c
@@ -29,6 +29,12 @@
 #include "diff_internal.h"
 #include "diff_debug.h"
 
+unsigned int
+diff_atom_hash_update(unsigned int hash, unsigned char atom_byte)
+{
+	return hash * 23 + atom_byte;
+}
+
 static int
 diff_data_atomize_text_lines_fd(struct diff_data *d)
 {
@@ -63,7 +69,8 @@ diff_data_atomize_text_lines_fd(struct diff_data *d)
 				if (buf[i] != '\r' && buf[i] != '\n') {
 					if (!ignore_whitespace
 					    || !isspace(buf[i]))
-						hash = hash * 23 + buf[i];
+						hash = diff_atom_hash_update(
+						    hash, buf[i]);
 					line_end++;
 				} else
 					eol = buf[i];
blob - 94ef28c472ae1b07dee34bf8618414ded3037d74
blob + 699cdbdee8d7c7fa45ac1a2cf93547d0a2c9fdc8
--- lib/diff_internal.h
+++ lib/diff_internal.h
@@ -56,72 +56,12 @@ diff_range_len(const struct diff_range *r)
 #define DIFF_RC_OK			0
 /* Any positive return values are errno values from sys/errno.h */
 
-struct diff_data;
-
-struct diff_atom {
-	struct diff_data *root; /* back pointer to root diff data */
-
-	off_t pos;		/* if not memory-mapped */
-	const uint8_t *at;	/* if memory-mapped */
-	off_t len;
-
-	/* This hash is just a very cheap speed up for finding *mismatching*
-	 * atoms. When hashes match, we still need to compare entire atoms to
-	 * find out whether they are indeed identical or not. */
-	unsigned int hash;
-};
-
-int
-diff_atom_cmp(int *cmp,
-	      const struct diff_atom *left,
-	      const struct diff_atom *right);
-
 /* Indicate whether two given diff atoms match. */
 int
 diff_atom_same(bool *same,
 	       const struct diff_atom *left,
 	       const struct diff_atom *right);
 
-/* The atom's index in the entire file. For atoms divided by lines of text, this
- * yields the line number (starting with 0). Also works for diff_data that
- * reference only a subsection of a file, always reflecting the global position
- * in the file (and not the relative position within the subsection). */
-#define diff_atom_root_idx(DIFF_DATA, ATOM) \
-	((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
-	 ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
-	 : (DIFF_DATA)->root->atoms.len)
-
-/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
- * yields the line number (starting with 0). */
-#define diff_atom_idx(DIFF_DATA, ATOM) \
-	((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
-	 ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
-	 : (DIFF_DATA)->atoms.len)
-
-#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
-	for ((ATOM) = (FIRST_ATOM); \
-	     (ATOM) \
-	     && ((ATOM) >= (FIRST_ATOM)) \
-	     && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
-	     (ATOM)++)
-
-#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
-	foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
-
-#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
-	for ((ATOM) = (FROM); \
-	     (ATOM) \
-	     && ((ATOM) >= (DIFF_DATA)->atoms.head) \
-	     && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
-	     (ATOM)++)
-
-#define diff_data_foreach_atom_backwards_from(FROM, ATOM, DIFF_DATA) \
-	for ((ATOM) = (FROM); \
-	     (ATOM) \
-	     && ((ATOM) >= (DIFF_DATA)->atoms.head) \
-	     && ((ATOM) - (DIFF_DATA)->atoms.head >= 0); \
-	     (ATOM)--)
-
 /* A diff chunk represents a set of atoms on the left and/or a set of atoms on
  * the right.
  *