1 /* Split source by line breaks, and calculate a simplistic checksum. */
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
26 #include <arraylist.h>
27 #include <diff_main.h>
29 #include "diff_internal.h"
30 #include "diff_debug.h"
33 diff_data_atomize_text_lines_fd(struct diff_data *d)
36 const off_t end = pos + d->len;
37 unsigned int array_size_estimate = d->len / 50;
38 unsigned int pow2 = 1;
39 bool ignore_whitespace = (d->diff_flags & DIFF_FLAG_IGNORE_WHITESPACE);
41 while (array_size_estimate >>= 1)
44 ARRAYLIST_INIT(d->atoms, 1 << pow2);
46 if (fseek(d->root->f, 0L, SEEK_SET) == -1)
51 unsigned int hash = 0;
52 unsigned char buf[512];
54 struct diff_atom *atom;
57 while (eol == 0 && line_end < end) {
58 r = fread(buf, sizeof(char), sizeof(buf), d->root->f);
59 if (r == 0 && ferror(d->root->f))
62 while (eol == 0 && i < r) {
63 if (buf[i] != '\r' && buf[i] != '\n') {
64 if (!ignore_whitespace
66 hash = hash * 23 + buf[i];
74 /* When not at the end of data, the line ending char ('\r' or
75 * '\n') must follow */
78 /* If that was an '\r', also pull in any following '\n' */
79 if (line_end < end && eol == '\r') {
80 if (fseeko(d->root->f, line_end, SEEK_SET) == -1)
82 r = fread(buf, sizeof(char), sizeof(buf), d->root->f);
83 if (r == 0 && ferror(d->root->f))
85 if (r == 1 && buf[0] == '\n' )
89 /* Record the found line as diff atom */
90 ARRAYLIST_ADD(atom, d->atoms);
94 *atom = (struct diff_atom){
97 .at = NULL, /* atom data is not memory-mapped */
98 .len = line_end - pos,
102 /* Starting point for next line: */
104 if (fseeko(d->root->f, pos, SEEK_SET) == -1)
112 diff_data_atomize_text_lines_mmap(struct diff_data *d)
114 const uint8_t *pos = d->data;
115 const uint8_t *end = pos + d->len;
116 bool ignore_whitespace = (d->diff_flags & DIFF_FLAG_IGNORE_WHITESPACE);
118 unsigned int array_size_estimate = d->len / 50;
119 unsigned int pow2 = 1;
120 while (array_size_estimate >>= 1)
123 ARRAYLIST_INIT(d->atoms, 1 << pow2);
126 const uint8_t *line_end = pos;
127 unsigned int hash = 0;
129 while (line_end < end && *line_end != '\r' && *line_end != '\n') {
130 if (!ignore_whitespace
131 || !isspace(*line_end))
132 hash = hash * 23 + *line_end;
136 /* When not at the end of data, the line ending char ('\r' or
137 * '\n') must follow */
140 /* If that was an '\r', also pull in any following '\n' */
141 if (line_end < end - 1 && line_end[0] == '\r' &&
145 /* Record the found line as diff atom */
146 struct diff_atom *atom;
147 ARRAYLIST_ADD(atom, d->atoms);
151 *atom = (struct diff_atom){
153 .pos = (off_t)(pos - d->data),
155 .len = line_end - pos,
159 /* Starting point for next line: */
167 diff_data_atomize_text_lines(struct diff_data *d)
170 return diff_data_atomize_text_lines_fd(d);
172 return diff_data_atomize_text_lines_mmap(d);
176 diff_atomize_text_by_line(void *func_data, struct diff_data *d)
178 return diff_data_atomize_text_lines(d);