1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
174 static void diff_output(FILE *, const char *, ...);
175 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
176 static void check(struct got_diff_state *, FILE *, FILE *, int);
177 static void range(FILE *, int, int, char *);
178 static void uni_range(FILE *, int, int);
179 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
180 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
181 static void prune(struct got_diff_state *);
182 static void equiv(struct line *, int, struct line *, int, int *);
183 static void unravel(struct got_diff_state *, int);
184 static int unsort(struct line *, int, int *);
185 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
186 static void sort(struct line *, int);
187 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
188 static int asciifile(FILE *);
189 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
190 static int newcand(struct got_diff_state *, int, int, int, int *);
191 static int search(struct got_diff_state *, int *, int, int);
192 static int skipline(FILE *);
193 static int isqrt(int);
194 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
195 static int readhash(struct got_diff_state *, FILE *, int);
196 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
197 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
200 * chrtran points to one of 2 translation tables: cup2low if folding upper to
201 * lower case clow2low if not folding case
203 u_char clow2low[256] = {
204 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
205 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
206 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
207 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
208 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
209 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
210 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
211 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
212 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
213 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
214 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
215 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
216 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
217 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
218 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
219 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
220 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
221 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
222 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
223 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
224 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
225 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
226 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
230 u_char cup2low[256] = {
231 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
232 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
233 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
234 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
235 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
236 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
237 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
238 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
239 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
240 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
241 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
242 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
243 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
244 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
245 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
246 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
247 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
248 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
249 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
250 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
251 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
252 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
253 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
258 diff_output(FILE *outfile, const char *fmt, ...)
266 vfprintf(outfile, fmt, ap);
270 const struct got_error *
271 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
272 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
273 struct got_diff_changes *changes)
275 const struct got_error *err = NULL;
282 ds->lastmatchline = 0;
283 ds->context_vec_ptr = ds->context_vec_start - 1;
284 ds->max_context = GOT_DIFF_MAX_CONTEXT;
285 if (flags & D_IGNORECASE)
286 ds->chrtran = cup2low;
288 ds->chrtran = clow2low;
289 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
290 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
293 if (flags & D_EMPTY1) {
294 f1 = fopen(_PATH_DEVNULL, "r");
296 err = got_error_from_errno();
300 else if (f1 == NULL) {
305 if (flags & D_EMPTY2) {
306 f2 = fopen(_PATH_DEVNULL, "r");
308 err = got_error_from_errno();
311 } else if (f2 == NULL) {
316 switch (files_differ(ds, f1, f2, flags)) {
327 if ((flags & D_FORCEASCII) == 0 &&
328 (!asciifile(f1) || !asciifile(f2))) {
333 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
334 err = got_error_from_errno();
337 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
338 err = got_error_from_errno();
343 sort(ds->sfile[0], ds->slen[0]);
344 sort(ds->sfile[1], ds->slen[1]);
346 ds->member = (int *)ds->file[1];
347 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
348 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
350 err = got_error_from_errno();
355 ds->class = (int *)ds->file[0];
356 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
357 err = got_error_from_errno();
360 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
362 err = got_error_from_errno();
367 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
368 if (ds->klist == NULL) {
369 err = got_error_from_errno();
374 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
375 if (ds->clist == NULL) {
376 err = got_error_from_errno();
379 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
381 err = got_error_from_errno();
385 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
387 err = got_error_from_errno();
391 unravel(ds, ds->klist[i]);
393 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
395 err = got_error_from_errno();
399 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
401 err = got_error_from_errno();
405 check(ds, f1, f2, flags);
406 if (output(outfile, changes, ds, args, args->label[0], f1,
407 args->label[1], f2, flags))
408 err = got_error_from_errno();
427 * Check to see if the given files differ.
428 * Returns 0 if they are the same, 1 if different, and -1 on error.
429 * XXX - could use code from cmp(1) [faster]
432 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
434 char buf1[BUFSIZ], buf2[BUFSIZ];
437 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
438 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
441 i = fread(buf1, 1, sizeof(buf1), f1);
442 j = fread(buf2, 1, sizeof(buf2), f2);
443 if ((!i && ferror(f1)) || (!j && ferror(f2)))
449 if (memcmp(buf1, buf2, i) != 0)
455 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
463 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
467 p = calloc(sz + 3, sizeof(*p));
470 for (j = 0; (h = readhash(ds, fd, flags));) {
473 q = reallocarray(p, sz + 3, sizeof(*p));
489 prune(struct got_diff_state *ds)
493 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
494 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
497 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
498 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
501 for (j = 0; j < 2; j++) {
502 ds->sfile[j] = ds->file[j] + ds->pref;
503 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
504 for (i = 0; i <= ds->slen[j]; i++)
505 ds->sfile[j][i].serial = i;
510 equiv(struct line *a, int n, struct line *b, int m, int *c)
515 while (i <= n && j <= m) {
516 if (a[i].value < b[j].value)
518 else if (a[i].value == b[j].value)
529 while (b[j + 1].value == b[j].value) {
537 /* Code taken from ping.c */
546 do { /* newton was a stinker */
551 } while ((x - y) > 1 || (x - y) < -1);
557 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
560 int oldc, tc, oldl, sq;
561 u_int numtries, bound;
564 if (flags & D_MINIMAL)
568 bound = MAXIMUM(256, sq);
572 c[0] = newcand(ds, 0, 0, 0, &error);
575 for (i = 1; i <= n; i++) {
584 if (y <= ds->clist[oldc].y)
586 l = search(ds, c, k, y);
590 if (ds->clist[c[l]].y <= y)
593 c[l] = newcand(ds, i, y, oldc, &error);
600 c[l] = newcand(ds, i, y, oldc, &error);
606 } while ((y = b[++j]) > 0 && numtries < bound);
612 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
616 if (ds->clen == ds->clistlen) {
617 ds->clistlen = ds->clistlen * 11 / 10;
618 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
627 q = ds->clist + ds->clen;
636 search(struct got_diff_state *ds, int *c, int k, int y)
640 if (ds->clist[c[k]].y < y) /* quick look for typical case */
648 t = ds->clist[c[l]].y;
660 unravel(struct got_diff_state *ds, int p)
665 for (i = 0; i <= ds->len[0]; i++)
666 ds->J[i] = i <= ds->pref ? i :
667 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
668 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
669 ds->J[q->x + ds->pref] = q->y + ds->pref;
673 * Check does double duty:
674 * 1. ferret out any fortuitous correspondences due
675 * to confounding by hashing (which result in "jackpot")
676 * 2. collect random access indexes to the two files
679 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
681 int i, j, jackpot, c, d;
687 ds->ixold[0] = ds->ixnew[0] = 0;
690 for (i = 1; i <= ds->len[0]; i++) {
692 ds->ixold[i] = ctold += skipline(f1);
695 while (j < ds->J[i]) {
696 ds->ixnew[j] = ctnew += skipline(f2);
699 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
704 * GNU diff ignores a missing newline
705 * in one file for -b or -w.
707 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
708 if (c == EOF && d == '\n') {
711 } else if (c == '\n' && d == EOF) {
718 if ((flags & D_FOLDBLANKS) && isspace(c) &&
724 } while (isspace(c = getc(f1)));
729 } while (isspace(d = getc(f2)));
730 } else if ((flags & D_IGNOREBLANKS)) {
731 while (isspace(c) && c != '\n') {
735 while (isspace(d) && d != '\n') {
740 if (ds->chrtran[c] != ds->chrtran[d]) {
743 if (c != '\n' && c != EOF)
744 ctold += skipline(f1);
745 if (d != '\n' && c != EOF)
746 ctnew += skipline(f2);
749 if (c == '\n' || c == EOF)
756 if ((c = getc(f1)) != (d = getc(f2))) {
759 if (c != '\n' && c != EOF)
760 ctold += skipline(f1);
761 if (d != '\n' && c != EOF)
762 ctnew += skipline(f2);
765 if (c == '\n' || c == EOF)
769 ds->ixold[i] = ctold;
770 ds->ixnew[j] = ctnew;
773 for (; j <= ds->len[1]; j++)
774 ds->ixnew[j] = ctnew += skipline(f2);
777 * fprintf(stderr, "jackpot\n");
781 /* shellsort CACM #201 */
783 sort(struct line *a, int n)
785 struct line *ai, *aim, w;
790 for (j = 1; j <= n; j *= 2)
792 for (m /= 2; m != 0; m /= 2) {
794 for (j = 1; j <= k; j++) {
795 for (ai = &a[j]; ai > a; ai -= m) {
798 break; /* wraparound */
799 if (aim->value > ai[0].value ||
800 (aim->value == ai[0].value &&
801 aim->serial > ai[0].serial))
803 w.value = ai[0].value;
804 ai[0].value = aim->value;
805 aim->value = w.value;
806 w.serial = ai[0].serial;
807 ai[0].serial = aim->serial;
808 aim->serial = w.serial;
815 unsort(struct line *f, int l, int *b)
819 a = calloc(l + 1, sizeof(*a));
822 for (i = 1; i <= l; i++)
823 a[f[i].serial] = f[i].value;
824 for (i = 1; i <= l; i++)
836 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
842 output(FILE *outfile, struct got_diff_changes *changes,
843 struct got_diff_state *ds, struct got_diff_args *args,
844 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
846 int m, i0, i1, j0, j1;
853 ds->J[m + 1] = ds->len[1] + 1;
854 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
855 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
857 j0 = ds->J[i0 - 1] + 1;
859 while (i1 < m && ds->J[i1 + 1] == 0)
861 j1 = ds->J[i1 + 1] - 1;
863 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
864 i0, i1, j0, j1, &flags);
869 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
870 1, 0, 1, ds->len[1], &flags);
874 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
875 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
881 range(FILE *outfile, int a, int b, char *separator)
883 diff_output(outfile, "%d", a > b ? b : a);
885 diff_output(outfile, "%s%d", separator, b);
889 uni_range(FILE *outfile, int a, int b)
892 diff_output(outfile, "%d,%d", a, b - a + 1);
894 diff_output(outfile, "%d", b);
896 diff_output(outfile, "%d,0", b);
900 * Indicate that there is a difference between lines a and b of the from file
901 * to get to lines c to d of the to file. If a is greater then b then there
902 * are no lines in the from file involved and this means that there were
903 * lines appended (beginning at b). If c is greater than d then there are
904 * lines missing from the to file.
907 change(FILE *outfile, struct got_diff_changes *changes,
908 struct got_diff_state *ds, struct got_diff_args *args,
909 const char *file1, FILE *f1, const char *file2, FILE *f2,
910 int a, int b, int c, int d, int *pflags)
917 if (*pflags & D_HEADER) {
918 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
919 *pflags &= ~D_HEADER;
921 if (args->diff_format == D_UNIFIED) {
923 * Allocate change records as needed.
925 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
926 struct context_vec *cvp;
928 offset = ds->context_vec_ptr - ds->context_vec_start;
929 ds->max_context <<= 1;
930 ds->context_vec_start =
931 cvp = reallocarray(ds->context_vec_start,
932 ds->max_context, sizeof(*ds->context_vec_start));
934 free(ds->context_vec_start);
937 ds->context_vec_start = cvp;
938 ds->context_vec_end = ds->context_vec_start +
940 ds->context_vec_ptr = ds->context_vec_start + offset;
942 if (ds->anychange == 0) {
944 * Print the context/unidiff header first time through.
946 print_header(outfile, ds, args, file1, file2);
948 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
949 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
951 * If this change is more than 'diff_context' lines from the
952 * previous change, dump the record and reset it.
954 dump_unified_vec(outfile, changes, ds, args, f1, f2,
957 ds->context_vec_ptr++;
958 ds->context_vec_ptr->a = a;
959 ds->context_vec_ptr->b = b;
960 ds->context_vec_ptr->c = c;
961 ds->context_vec_ptr->d = d;
964 if (ds->anychange == 0)
966 if (args->diff_format == D_BRIEF)
968 if (args->diff_format == D_NORMAL) {
969 range(outfile, a, b, ",");
970 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
971 range(outfile, c, d, ",");
972 diff_output(outfile, "\n");
973 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
974 if (a <= b && c <= d)
975 diff_output(outfile, "---\n");
977 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2,
978 args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
983 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
984 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
986 int i, j, c, lastc, col, nc;
990 for (i = a; i <= b; i++) {
991 fseek(lb, f[i - 1], SEEK_SET);
992 nc = f[i] - f[i - 1];
994 diff_output(outfile, "%c", ch);
995 if (args->Tflag && (args->diff_format == D_UNIFIED ||
996 args->diff_format == D_NORMAL))
997 diff_output(outfile, "\t");
998 else if (args->diff_format != D_UNIFIED)
999 diff_output(outfile, " ");
1002 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1003 if ((c = getc(lb)) == EOF) {
1004 diff_output(outfile, "\n\\ No newline at end of "
1008 if (c == '\t' && (flags & D_EXPANDTABS)) {
1010 diff_output(outfile, " ");
1011 } while (++col & 7);
1013 diff_output(outfile, "%c", c);
1022 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1025 readhash(struct got_diff_state *ds, FILE *f, int flags)
1032 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1033 if (flags & D_IGNORECASE)
1034 for (i = 0; (t = getc(f)) != '\n'; i++) {
1040 sum = sum * 127 + ds->chrtran[t];
1043 for (i = 0; (t = getc(f)) != '\n'; i++) {
1049 sum = sum * 127 + t;
1053 switch (t = getc(f)) {
1062 if (space && (flags & D_IGNOREBLANKS) == 0) {
1066 sum = sum * 127 + ds->chrtran[t];
1080 * There is a remote possibility that we end up with a zero sum.
1081 * Zero is used as an EOF marker, so return 1 instead.
1083 return (sum == 0 ? 1 : sum);
1089 unsigned char buf[BUFSIZ];
1096 cnt = fread(buf, 1, sizeof(buf), f);
1097 return (memchr(buf, '\0', cnt) == NULL);
1100 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1103 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1105 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1107 int last = ds->lastline;
1111 while (pos > last) {
1112 fseek(fp, f[pos - 1], SEEK_SET);
1113 nc = f[pos] - f[pos - 1];
1114 if (nc >= sizeof(buf))
1115 nc = sizeof(buf) - 1;
1116 nc = fread(buf, 1, nc, fp);
1119 buf[strcspn(buf, "\n")] = '\0';
1120 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1121 if (begins_with(buf, "private:")) {
1123 state = " (private)";
1124 } else if (begins_with(buf, "protected:")) {
1126 state = " (protected)";
1127 } else if (begins_with(buf, "public:")) {
1129 state = " (public)";
1131 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1133 strlcat(ds->lastbuf, state,
1134 sizeof ds->lastbuf);
1135 ds->lastmatchline = pos;
1142 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1145 /* dump accumulated "unified" diff changes */
1147 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1148 struct got_diff_state *ds, struct got_diff_args *args,
1149 FILE *f1, FILE *f2, int flags)
1151 struct context_vec *cvp = ds->context_vec_start;
1152 int lowa, upb, lowc, upd;
1156 if (ds->context_vec_start > ds->context_vec_ptr)
1159 b = d = 0; /* gcc */
1160 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1161 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1162 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1163 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1165 diff_output(outfile, "@@ -");
1166 uni_range(outfile, lowa, upb);
1167 diff_output(outfile, " +");
1168 uni_range(outfile, lowc, upd);
1169 diff_output(outfile, " @@");
1170 if ((flags & D_PROTOTYPE)) {
1171 f = match_function(ds, ds->ixold, lowa-1, f1);
1173 diff_output(outfile, " %s", f);
1175 diff_output(outfile, "\n");
1178 * Output changes in "unified" diff format--the old and new lines
1179 * are printed together.
1181 for (; cvp <= ds->context_vec_ptr; cvp++) {
1183 struct got_diff_change *change;
1184 change = calloc(1, sizeof(*change));
1186 memcpy(&change->cv, cvp, sizeof(change->cv));
1187 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1189 changes->nchanges++;
1199 * c: both new and old changes
1200 * d: only changes in the old file
1201 * a: only changes in the new file
1203 if (a <= b && c <= d)
1206 ch = (a <= b) ? 'd' : 'a';
1210 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1211 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1212 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1215 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1216 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1219 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1220 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1226 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1228 ds->context_vec_ptr = ds->context_vec_start - 1;
1232 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1233 const char *file1, const char *file2)
1235 if (args->label[0] != NULL)
1236 diff_output(outfile, "--- %s\n", args->label[0]);
1238 diff_output(outfile, "--- %s\t%s", file1,
1239 ctime(&ds->stb1.st_mtime));
1240 if (args->label[1] != NULL)
1241 diff_output(outfile, "+++ %s\n", args->label[1]);
1243 diff_output(outfile, "+++ %s\t%s", file2,
1244 ctime(&ds->stb2.st_mtime));