1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
174 static void diff_output(FILE *, const char *, ...);
175 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
176 static void check(struct got_diff_state *, FILE *, FILE *, int);
177 static void range(FILE *, int, int, char *);
178 static void uni_range(FILE *, int, int);
179 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
180 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
181 static void prune(struct got_diff_state *);
182 static void equiv(struct line *, int, struct line *, int, int *);
183 static void unravel(struct got_diff_state *, int);
184 static int unsort(struct line *, int, int *);
185 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
186 static void sort(struct line *, int);
187 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
188 static int asciifile(FILE *);
189 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
190 static int newcand(struct got_diff_state *, int, int, int, int *);
191 static int search(struct got_diff_state *, int *, int, int);
192 static int skipline(FILE *);
193 static int isqrt(int);
194 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
195 static int readhash(struct got_diff_state *, FILE *, int);
196 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
197 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
200 * chrtran points to one of 2 translation tables: cup2low if folding upper to
201 * lower case clow2low if not folding case
203 u_char clow2low[256] = {
204 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
205 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
206 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
207 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
208 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
209 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
210 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
211 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
212 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
213 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
214 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
215 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
216 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
217 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
218 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
219 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
220 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
221 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
222 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
223 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
224 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
225 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
226 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
230 u_char cup2low[256] = {
231 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
232 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
233 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
234 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
235 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
236 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
237 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
238 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
239 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
240 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
241 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
242 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
243 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
244 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
245 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
246 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
247 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
248 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
249 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
250 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
251 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
252 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
253 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
258 diff_output(FILE *outfile, const char *fmt, ...)
266 vfprintf(outfile, fmt, ap);
270 const struct got_error *
271 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
272 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
273 struct got_diff_changes *changes)
275 const struct got_error *err = NULL;
282 ds->lastmatchline = 0;
283 ds->context_vec_ptr = ds->context_vec_start - 1;
284 ds->max_context = GOT_DIFF_MAX_CONTEXT;
285 if (flags & D_IGNORECASE)
286 ds->chrtran = cup2low;
288 ds->chrtran = clow2low;
289 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
290 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
293 if (flags & D_EMPTY1) {
294 f1 = fopen(_PATH_DEVNULL, "r");
296 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
300 else if (f1 == NULL) {
305 if (flags & D_EMPTY2) {
306 f2 = fopen(_PATH_DEVNULL, "r");
308 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
311 } else if (f2 == NULL) {
316 switch (files_differ(ds, f1, f2, flags)) {
327 if ((flags & D_FORCEASCII) == 0 &&
328 (!asciifile(f1) || !asciifile(f2))) {
333 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
334 err = got_error_from_errno("prepare");
337 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
338 err = got_error_from_errno("prepare");
343 sort(ds->sfile[0], ds->slen[0]);
344 sort(ds->sfile[1], ds->slen[1]);
346 ds->member = (int *)ds->file[1];
347 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
348 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
350 err = got_error_from_errno("reallocarray");
355 ds->class = (int *)ds->file[0];
356 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
357 err = got_error_from_errno("unsort");
360 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
362 err = got_error_from_errno("reallocarray");
367 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
368 if (ds->klist == NULL) {
369 err = got_error_from_errno("calloc");
374 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
375 if (ds->clist == NULL) {
376 err = got_error_from_errno("calloc");
379 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
381 err = got_error_from_errno("stone");
385 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
387 err = got_error_from_errno("reallocarray");
391 unravel(ds, ds->klist[i]);
393 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
395 err = got_error_from_errno("reallocarray");
399 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
401 err = got_error_from_errno("reallocarray");
405 check(ds, f1, f2, flags);
406 if (output(outfile, changes, ds, args, args->label[0], f1,
407 args->label[1], f2, flags))
408 err = got_error_from_errno("output");
422 if ((flags & D_EMPTY1) && f1) {
423 if (fclose(f1) != 0 && err == NULL)
424 err = got_error_from_errno("fclose");
426 if ((flags & D_EMPTY2) && f2) {
427 if (fclose(f2) != 0 && err == NULL)
428 err = got_error_from_errno("fclose");
434 * Check to see if the given files differ.
435 * Returns 0 if they are the same, 1 if different, and -1 on error.
436 * XXX - could use code from cmp(1) [faster]
439 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
441 char buf1[BUFSIZ], buf2[BUFSIZ];
444 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
445 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
448 i = fread(buf1, 1, sizeof(buf1), f1);
449 j = fread(buf2, 1, sizeof(buf2), f2);
450 if ((!i && ferror(f1)) || (!j && ferror(f2)))
456 if (memcmp(buf1, buf2, i) != 0)
462 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
470 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
474 p = calloc(sz + 3, sizeof(*p));
477 for (j = 0; (h = readhash(ds, fd, flags));) {
480 q = reallocarray(p, sz + 3, sizeof(*p));
496 prune(struct got_diff_state *ds)
500 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
501 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
504 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
505 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
508 for (j = 0; j < 2; j++) {
509 ds->sfile[j] = ds->file[j] + ds->pref;
510 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
511 for (i = 0; i <= ds->slen[j]; i++)
512 ds->sfile[j][i].serial = i;
517 equiv(struct line *a, int n, struct line *b, int m, int *c)
522 while (i <= n && j <= m) {
523 if (a[i].value < b[j].value)
525 else if (a[i].value == b[j].value)
536 while (b[j + 1].value == b[j].value) {
544 /* Code taken from ping.c */
553 do { /* newton was a stinker */
558 } while ((x - y) > 1 || (x - y) < -1);
564 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
567 int oldc, tc, oldl, sq;
568 u_int numtries, bound;
571 if (flags & D_MINIMAL)
575 bound = MAXIMUM(256, sq);
579 c[0] = newcand(ds, 0, 0, 0, &error);
582 for (i = 1; i <= n; i++) {
591 if (y <= ds->clist[oldc].y)
593 l = search(ds, c, k, y);
597 if (ds->clist[c[l]].y <= y)
600 c[l] = newcand(ds, i, y, oldc, &error);
607 c[l] = newcand(ds, i, y, oldc, &error);
613 } while ((y = b[++j]) > 0 && numtries < bound);
619 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
623 if (ds->clen == ds->clistlen) {
624 ds->clistlen = ds->clistlen * 11 / 10;
625 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
634 q = ds->clist + ds->clen;
643 search(struct got_diff_state *ds, int *c, int k, int y)
647 if (ds->clist[c[k]].y < y) /* quick look for typical case */
655 t = ds->clist[c[l]].y;
667 unravel(struct got_diff_state *ds, int p)
672 for (i = 0; i <= ds->len[0]; i++)
673 ds->J[i] = i <= ds->pref ? i :
674 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
675 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
676 ds->J[q->x + ds->pref] = q->y + ds->pref;
680 * Check does double duty:
681 * 1. ferret out any fortuitous correspondences due
682 * to confounding by hashing (which result in "jackpot")
683 * 2. collect random access indexes to the two files
686 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
688 int i, j, jackpot, c, d;
694 ds->ixold[0] = ds->ixnew[0] = 0;
697 for (i = 1; i <= ds->len[0]; i++) {
699 ds->ixold[i] = ctold += skipline(f1);
702 while (j < ds->J[i]) {
703 ds->ixnew[j] = ctnew += skipline(f2);
706 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
711 * GNU diff ignores a missing newline
712 * in one file for -b or -w.
714 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
715 if (c == EOF && d == '\n') {
718 } else if (c == '\n' && d == EOF) {
725 if ((flags & D_FOLDBLANKS) && isspace(c) &&
731 } while (isspace(c = getc(f1)));
736 } while (isspace(d = getc(f2)));
737 } else if ((flags & D_IGNOREBLANKS)) {
738 while (isspace(c) && c != '\n') {
742 while (isspace(d) && d != '\n') {
747 if (ds->chrtran[c] != ds->chrtran[d]) {
750 if (c != '\n' && c != EOF)
751 ctold += skipline(f1);
752 if (d != '\n' && c != EOF)
753 ctnew += skipline(f2);
756 if (c == '\n' || c == EOF)
763 if ((c = getc(f1)) != (d = getc(f2))) {
766 if (c != '\n' && c != EOF)
767 ctold += skipline(f1);
768 if (d != '\n' && c != EOF)
769 ctnew += skipline(f2);
772 if (c == '\n' || c == EOF)
776 ds->ixold[i] = ctold;
777 ds->ixnew[j] = ctnew;
780 for (; j <= ds->len[1]; j++)
781 ds->ixnew[j] = ctnew += skipline(f2);
784 * fprintf(stderr, "jackpot\n");
788 /* shellsort CACM #201 */
790 sort(struct line *a, int n)
792 struct line *ai, *aim, w;
797 for (j = 1; j <= n; j *= 2)
799 for (m /= 2; m != 0; m /= 2) {
801 for (j = 1; j <= k; j++) {
802 for (ai = &a[j]; ai > a; ai -= m) {
805 break; /* wraparound */
806 if (aim->value > ai[0].value ||
807 (aim->value == ai[0].value &&
808 aim->serial > ai[0].serial))
810 w.value = ai[0].value;
811 ai[0].value = aim->value;
812 aim->value = w.value;
813 w.serial = ai[0].serial;
814 ai[0].serial = aim->serial;
815 aim->serial = w.serial;
822 unsort(struct line *f, int l, int *b)
826 a = calloc(l + 1, sizeof(*a));
829 for (i = 1; i <= l; i++)
830 a[f[i].serial] = f[i].value;
831 for (i = 1; i <= l; i++)
843 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
849 output(FILE *outfile, struct got_diff_changes *changes,
850 struct got_diff_state *ds, struct got_diff_args *args,
851 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
853 int m, i0, i1, j0, j1;
860 ds->J[m + 1] = ds->len[1] + 1;
861 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
862 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
864 j0 = ds->J[i0 - 1] + 1;
866 while (i1 < m && ds->J[i1 + 1] == 0)
868 j1 = ds->J[i1 + 1] - 1;
870 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
871 i0, i1, j0, j1, &flags);
876 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
877 1, 0, 1, ds->len[1], &flags);
881 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
882 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
888 range(FILE *outfile, int a, int b, char *separator)
890 diff_output(outfile, "%d", a > b ? b : a);
892 diff_output(outfile, "%s%d", separator, b);
896 uni_range(FILE *outfile, int a, int b)
899 diff_output(outfile, "%d,%d", a, b - a + 1);
901 diff_output(outfile, "%d", b);
903 diff_output(outfile, "%d,0", b);
907 * Indicate that there is a difference between lines a and b of the from file
908 * to get to lines c to d of the to file. If a is greater then b then there
909 * are no lines in the from file involved and this means that there were
910 * lines appended (beginning at b). If c is greater than d then there are
911 * lines missing from the to file.
914 change(FILE *outfile, struct got_diff_changes *changes,
915 struct got_diff_state *ds, struct got_diff_args *args,
916 const char *file1, FILE *f1, const char *file2, FILE *f2,
917 int a, int b, int c, int d, int *pflags)
924 if (*pflags & D_HEADER) {
925 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
926 *pflags &= ~D_HEADER;
928 if (args->diff_format == D_UNIFIED) {
930 * Allocate change records as needed.
932 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
933 struct context_vec *cvp;
935 offset = ds->context_vec_ptr - ds->context_vec_start;
936 ds->max_context <<= 1;
937 ds->context_vec_start =
938 cvp = reallocarray(ds->context_vec_start,
939 ds->max_context, sizeof(*ds->context_vec_start));
941 free(ds->context_vec_start);
944 ds->context_vec_start = cvp;
945 ds->context_vec_end = ds->context_vec_start +
947 ds->context_vec_ptr = ds->context_vec_start + offset;
949 if (ds->anychange == 0) {
951 * Print the context/unidiff header first time through.
953 print_header(outfile, ds, args, file1, file2);
955 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
956 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
958 * If this change is more than 'diff_context' lines from the
959 * previous change, dump the record and reset it.
961 dump_unified_vec(outfile, changes, ds, args, f1, f2,
964 ds->context_vec_ptr++;
965 ds->context_vec_ptr->a = a;
966 ds->context_vec_ptr->b = b;
967 ds->context_vec_ptr->c = c;
968 ds->context_vec_ptr->d = d;
971 if (ds->anychange == 0)
973 if (args->diff_format == D_BRIEF)
975 if (args->diff_format == D_NORMAL) {
976 range(outfile, a, b, ",");
977 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
978 range(outfile, c, d, ",");
979 diff_output(outfile, "\n");
980 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
981 if (a <= b && c <= d)
982 diff_output(outfile, "---\n");
984 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2,
985 args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
990 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
991 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
993 int i, j, c, lastc, col, nc;
997 for (i = a; i <= b; i++) {
998 fseek(lb, f[i - 1], SEEK_SET);
999 nc = f[i] - f[i - 1];
1001 diff_output(outfile, "%c", ch);
1002 if (args->Tflag && (args->diff_format == D_UNIFIED ||
1003 args->diff_format == D_NORMAL))
1004 diff_output(outfile, "\t");
1005 else if (args->diff_format != D_UNIFIED)
1006 diff_output(outfile, " ");
1009 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1010 if ((c = getc(lb)) == EOF) {
1011 diff_output(outfile, "\n\\ No newline at end of "
1015 if (c == '\t' && (flags & D_EXPANDTABS)) {
1017 diff_output(outfile, " ");
1018 } while (++col & 7);
1020 diff_output(outfile, "%c", c);
1029 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1032 readhash(struct got_diff_state *ds, FILE *f, int flags)
1039 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1040 if (flags & D_IGNORECASE)
1041 for (i = 0; (t = getc(f)) != '\n'; i++) {
1047 sum = sum * 127 + ds->chrtran[t];
1050 for (i = 0; (t = getc(f)) != '\n'; i++) {
1056 sum = sum * 127 + t;
1060 switch (t = getc(f)) {
1069 if (space && (flags & D_IGNOREBLANKS) == 0) {
1073 sum = sum * 127 + ds->chrtran[t];
1087 * There is a remote possibility that we end up with a zero sum.
1088 * Zero is used as an EOF marker, so return 1 instead.
1090 return (sum == 0 ? 1 : sum);
1096 unsigned char buf[BUFSIZ];
1103 cnt = fread(buf, 1, sizeof(buf), f);
1104 return (memchr(buf, '\0', cnt) == NULL);
1107 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1110 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1112 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1114 int last = ds->lastline;
1118 while (pos > last) {
1119 fseek(fp, f[pos - 1], SEEK_SET);
1120 nc = f[pos] - f[pos - 1];
1121 if (nc >= sizeof(buf))
1122 nc = sizeof(buf) - 1;
1123 nc = fread(buf, 1, nc, fp);
1126 buf[strcspn(buf, "\n")] = '\0';
1127 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1128 if (begins_with(buf, "private:")) {
1130 state = " (private)";
1131 } else if (begins_with(buf, "protected:")) {
1133 state = " (protected)";
1134 } else if (begins_with(buf, "public:")) {
1136 state = " (public)";
1138 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1140 strlcat(ds->lastbuf, state,
1141 sizeof ds->lastbuf);
1142 ds->lastmatchline = pos;
1149 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1152 /* dump accumulated "unified" diff changes */
1154 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1155 struct got_diff_state *ds, struct got_diff_args *args,
1156 FILE *f1, FILE *f2, int flags)
1158 struct context_vec *cvp = ds->context_vec_start;
1159 int lowa, upb, lowc, upd;
1163 if (ds->context_vec_start > ds->context_vec_ptr)
1166 b = d = 0; /* gcc */
1167 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1168 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1169 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1170 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1172 diff_output(outfile, "@@ -");
1173 uni_range(outfile, lowa, upb);
1174 diff_output(outfile, " +");
1175 uni_range(outfile, lowc, upd);
1176 diff_output(outfile, " @@");
1177 if ((flags & D_PROTOTYPE)) {
1178 f = match_function(ds, ds->ixold, lowa-1, f1);
1180 diff_output(outfile, " %s", f);
1182 diff_output(outfile, "\n");
1185 * Output changes in "unified" diff format--the old and new lines
1186 * are printed together.
1188 for (; cvp <= ds->context_vec_ptr; cvp++) {
1190 struct got_diff_change *change;
1191 change = calloc(1, sizeof(*change));
1193 memcpy(&change->cv, cvp, sizeof(change->cv));
1194 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1196 changes->nchanges++;
1206 * c: both new and old changes
1207 * d: only changes in the old file
1208 * a: only changes in the new file
1210 if (a <= b && c <= d)
1213 ch = (a <= b) ? 'd' : 'a';
1217 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1218 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1219 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1222 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1223 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1226 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1227 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1233 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1235 ds->context_vec_ptr = ds->context_vec_start - 1;
1239 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1240 const char *file1, const char *file2)
1242 if (args->label[0] != NULL)
1243 diff_output(outfile, "--- %s\n", args->label[0]);
1245 diff_output(outfile, "--- %s\t%s", file1,
1246 ctime(&ds->stb1.st_mtime));
1247 if (args->label[1] != NULL)
1248 diff_output(outfile, "+++ %s\n", args->label[1]);
1250 diff_output(outfile, "+++ %s\t%s", file2,
1251 ctime(&ds->stb2.st_mtime));