commit - fe6d58fb52ea8d1041a8eb65e28a04816df67c08
commit + 29916bb6c0c248ca6fa5486cb9e081d92112e86c
blob - 40142164742fb5bb3d4316a5bca80a90d132836c
blob + bd94a9109c7f29432f6af3217b4a5775f3f0493c
--- include/diff_main.h
+++ include/diff_main.h
#define DIFF_RC_OK 0
/* Any positive return values are errno values from sys/errno.h */
-struct diff_atom;
+struct diff_atom {
+ struct diff_data *root; /* back pointer to root diff data */
+ off_t pos; /* if not memory-mapped */
+ const uint8_t *at; /* if memory-mapped */
+ off_t len;
+
+ /* This hash is just a very cheap speed up for finding *mismatching*
+ * atoms. When hashes match, we still need to compare entire atoms to
+ * find out whether they are indeed identical or not.
+ * Calculated over all atom bytes with diff_atom_hash_update(). */
+ unsigned int hash;
+};
+
+/* Mix another atom_byte into the provided hash value and return the result.
+ * The hash value passed in for the first byte of the atom must be zero. */
+unsigned int
+diff_atom_hash_update(unsigned int hash, unsigned char atom_byte);
+
+/* Compare two atoms for equality. Return 0 on success, or errno on failure.
+ * Set cmp to -1, 0, or 1, just like strcmp(). */
+int
+diff_atom_cmp(int *cmp,
+ const struct diff_atom *left,
+ const struct diff_atom *right);
+
+
+/* The atom's index in the entire file. For atoms divided by lines of text, this
+ * yields the line number (starting with 0). Also works for diff_data that
+ * reference only a subsection of a file, always reflecting the global position
+ * in the file (and not the relative position within the subsection). */
+#define diff_atom_root_idx(DIFF_DATA, ATOM) \
+ ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
+ ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
+ : (DIFF_DATA)->root->atoms.len)
+
+/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
+ * yields the line number (starting with 0). */
+#define diff_atom_idx(DIFF_DATA, ATOM) \
+ ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+ ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
+ : (DIFF_DATA)->atoms.len)
+
+#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
+ for ((ATOM) = (FIRST_ATOM); \
+ (ATOM) \
+ && ((ATOM) >= (FIRST_ATOM)) \
+ && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
+ (ATOM)++)
+
+#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
+ foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
+
+#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
+ for ((ATOM) = (FROM); \
+ (ATOM) \
+ && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+ && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
+ (ATOM)++)
+
+#define diff_data_foreach_atom_backwards_from(FROM, ATOM, DIFF_DATA) \
+ for ((ATOM) = (FROM); \
+ (ATOM) \
+ && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+ && ((ATOM) - (DIFF_DATA)->atoms.head >= 0); \
+ (ATOM)--)
+
/* For each file, there is a "root" struct diff_data referencing the entire
* file, which the atoms are parsed from. In recursion of diff algorithm, there
* may be "child" struct diff_data only referencing a subsection of the file,
blob - 1bdb99777475de80a76425f1e2c3243dbbe9d9a6
blob + 1da34c64d7271fbd198a3a0cfc8d0414641d22d8
--- lib/diff_atomize_text.c
+++ lib/diff_atomize_text.c
#include "diff_internal.h"
#include "diff_debug.h"
+unsigned int
+diff_atom_hash_update(unsigned int hash, unsigned char atom_byte)
+{
+ return hash * 23 + atom_byte;
+}
+
static int
diff_data_atomize_text_lines_fd(struct diff_data *d)
{
if (buf[i] != '\r' && buf[i] != '\n') {
if (!ignore_whitespace
|| !isspace(buf[i]))
- hash = hash * 23 + buf[i];
+ hash = diff_atom_hash_update(
+ hash, buf[i]);
line_end++;
} else
eol = buf[i];
blob - 94ef28c472ae1b07dee34bf8618414ded3037d74
blob + 699cdbdee8d7c7fa45ac1a2cf93547d0a2c9fdc8
--- lib/diff_internal.h
+++ lib/diff_internal.h
#define DIFF_RC_OK 0
/* Any positive return values are errno values from sys/errno.h */
-struct diff_data;
-
-struct diff_atom {
- struct diff_data *root; /* back pointer to root diff data */
-
- off_t pos; /* if not memory-mapped */
- const uint8_t *at; /* if memory-mapped */
- off_t len;
-
- /* This hash is just a very cheap speed up for finding *mismatching*
- * atoms. When hashes match, we still need to compare entire atoms to
- * find out whether they are indeed identical or not. */
- unsigned int hash;
-};
-
-int
-diff_atom_cmp(int *cmp,
- const struct diff_atom *left,
- const struct diff_atom *right);
-
/* Indicate whether two given diff atoms match. */
int
diff_atom_same(bool *same,
const struct diff_atom *left,
const struct diff_atom *right);
-/* The atom's index in the entire file. For atoms divided by lines of text, this
- * yields the line number (starting with 0). Also works for diff_data that
- * reference only a subsection of a file, always reflecting the global position
- * in the file (and not the relative position within the subsection). */
-#define diff_atom_root_idx(DIFF_DATA, ATOM) \
- ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
- ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
- : (DIFF_DATA)->root->atoms.len)
-
-/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
- * yields the line number (starting with 0). */
-#define diff_atom_idx(DIFF_DATA, ATOM) \
- ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
- ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
- : (DIFF_DATA)->atoms.len)
-
-#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
- for ((ATOM) = (FIRST_ATOM); \
- (ATOM) \
- && ((ATOM) >= (FIRST_ATOM)) \
- && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
- (ATOM)++)
-
-#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
- foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
-
-#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
- for ((ATOM) = (FROM); \
- (ATOM) \
- && ((ATOM) >= (DIFF_DATA)->atoms.head) \
- && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
- (ATOM)++)
-
-#define diff_data_foreach_atom_backwards_from(FROM, ATOM, DIFF_DATA) \
- for ((ATOM) = (FROM); \
- (ATOM) \
- && ((ATOM) >= (DIFF_DATA)->atoms.head) \
- && ((ATOM) - (DIFF_DATA)->atoms.head >= 0); \
- (ATOM)--)
-
/* A diff chunk represents a set of atoms on the left and/or a set of atoms on
* the right.
*