1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
174 static void diff_output(FILE *, const char *, ...);
175 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
176 static void check(struct got_diff_state *, FILE *, FILE *, int);
177 static void range(FILE *, int, int, char *);
178 static void uni_range(FILE *, int, int);
179 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
180 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
181 static void prune(struct got_diff_state *);
182 static void equiv(struct line *, int, struct line *, int, int *);
183 static void unravel(struct got_diff_state *, int);
184 static int unsort(struct line *, int, int *);
185 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
186 static void sort(struct line *, int);
187 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
188 static int asciifile(FILE *);
189 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
190 static int newcand(struct got_diff_state *, int, int, int, int *);
191 static int search(struct got_diff_state *, int *, int, int);
192 static int skipline(FILE *);
193 static int isqrt(int);
194 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
195 static int readhash(struct got_diff_state *, FILE *, int);
196 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
197 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
200 * chrtran points to one of 2 translation tables: cup2low if folding upper to
201 * lower case clow2low if not folding case
203 u_char clow2low[256] = {
204 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
205 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
206 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
207 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
208 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
209 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
210 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
211 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
212 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
213 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
214 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
215 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
216 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
217 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
218 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
219 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
220 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
221 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
222 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
223 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
224 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
225 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
226 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
230 u_char cup2low[256] = {
231 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
232 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
233 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
234 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
235 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
236 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
237 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
238 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
239 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
240 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
241 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
242 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
243 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
244 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
245 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
246 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
247 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
248 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
249 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
250 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
251 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
252 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
253 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
258 diff_output(FILE *outfile, const char *fmt, ...)
266 vfprintf(outfile, fmt, ap);
270 const struct got_error *
271 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
272 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
273 struct got_diff_changes *changes)
275 const struct got_error *err = NULL;
282 ds->lastmatchline = 0;
283 ds->context_vec_ptr = ds->context_vec_start - 1;
284 ds->max_context = GOT_DIFF_MAX_CONTEXT;
285 if (flags & D_IGNORECASE)
286 ds->chrtran = cup2low;
288 ds->chrtran = clow2low;
289 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
290 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
293 if (flags & D_EMPTY1) {
294 f1 = fopen(_PATH_DEVNULL, "r");
296 err = got_error_from_errno();
300 else if (f1 == NULL) {
305 if (flags & D_EMPTY2) {
306 f2 = fopen(_PATH_DEVNULL, "r");
308 err = got_error_from_errno();
311 } else if (f2 == NULL) {
316 switch (files_differ(ds, f1, f2, flags)) {
327 if ((flags & D_FORCEASCII) == 0 &&
328 (!asciifile(f1) || !asciifile(f2))) {
333 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
334 err = got_error_from_errno();
337 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
338 err = got_error_from_errno();
343 sort(ds->sfile[0], ds->slen[0]);
344 sort(ds->sfile[1], ds->slen[1]);
346 ds->member = (int *)ds->file[1];
347 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
348 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
350 err = got_error_from_errno();
355 ds->class = (int *)ds->file[0];
356 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
357 err = got_error_from_errno();
360 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
362 err = got_error_from_errno();
367 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
368 if (ds->klist == NULL) {
369 err = got_error_from_errno();
374 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
375 if (ds->clist == NULL) {
376 err = got_error_from_errno();
379 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
381 err = got_error_from_errno();
385 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
387 err = got_error_from_errno();
391 unravel(ds, ds->klist[i]);
393 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
395 err = got_error_from_errno();
399 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
401 err = got_error_from_errno();
405 check(ds, f1, f2, flags);
406 if (output(outfile, changes, ds, args, args->label[0], f1,
407 args->label[1], f2, flags))
408 err = got_error_from_errno();
431 * Check to see if the given files differ.
432 * Returns 0 if they are the same, 1 if different, and -1 on error.
433 * XXX - could use code from cmp(1) [faster]
436 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
438 char buf1[BUFSIZ], buf2[BUFSIZ];
441 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
442 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
445 i = fread(buf1, 1, sizeof(buf1), f1);
446 j = fread(buf2, 1, sizeof(buf2), f2);
447 if ((!i && ferror(f1)) || (!j && ferror(f2)))
453 if (memcmp(buf1, buf2, i) != 0)
459 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
467 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
471 p = calloc(sz + 3, sizeof(*p));
474 for (j = 0; (h = readhash(ds, fd, flags));) {
477 q = reallocarray(p, sz + 3, sizeof(*p));
493 prune(struct got_diff_state *ds)
497 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
498 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
501 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
502 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
505 for (j = 0; j < 2; j++) {
506 ds->sfile[j] = ds->file[j] + ds->pref;
507 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
508 for (i = 0; i <= ds->slen[j]; i++)
509 ds->sfile[j][i].serial = i;
514 equiv(struct line *a, int n, struct line *b, int m, int *c)
519 while (i <= n && j <= m) {
520 if (a[i].value < b[j].value)
522 else if (a[i].value == b[j].value)
533 while (b[j + 1].value == b[j].value) {
541 /* Code taken from ping.c */
550 do { /* newton was a stinker */
555 } while ((x - y) > 1 || (x - y) < -1);
561 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
564 int oldc, tc, oldl, sq;
565 u_int numtries, bound;
568 if (flags & D_MINIMAL)
572 bound = MAXIMUM(256, sq);
576 c[0] = newcand(ds, 0, 0, 0, &error);
579 for (i = 1; i <= n; i++) {
588 if (y <= ds->clist[oldc].y)
590 l = search(ds, c, k, y);
594 if (ds->clist[c[l]].y <= y)
597 c[l] = newcand(ds, i, y, oldc, &error);
604 c[l] = newcand(ds, i, y, oldc, &error);
610 } while ((y = b[++j]) > 0 && numtries < bound);
616 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
620 if (ds->clen == ds->clistlen) {
621 ds->clistlen = ds->clistlen * 11 / 10;
622 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
631 q = ds->clist + ds->clen;
640 search(struct got_diff_state *ds, int *c, int k, int y)
644 if (ds->clist[c[k]].y < y) /* quick look for typical case */
652 t = ds->clist[c[l]].y;
664 unravel(struct got_diff_state *ds, int p)
669 for (i = 0; i <= ds->len[0]; i++)
670 ds->J[i] = i <= ds->pref ? i :
671 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
672 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
673 ds->J[q->x + ds->pref] = q->y + ds->pref;
677 * Check does double duty:
678 * 1. ferret out any fortuitous correspondences due
679 * to confounding by hashing (which result in "jackpot")
680 * 2. collect random access indexes to the two files
683 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
685 int i, j, jackpot, c, d;
691 ds->ixold[0] = ds->ixnew[0] = 0;
694 for (i = 1; i <= ds->len[0]; i++) {
696 ds->ixold[i] = ctold += skipline(f1);
699 while (j < ds->J[i]) {
700 ds->ixnew[j] = ctnew += skipline(f2);
703 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
708 * GNU diff ignores a missing newline
709 * in one file for -b or -w.
711 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
712 if (c == EOF && d == '\n') {
715 } else if (c == '\n' && d == EOF) {
722 if ((flags & D_FOLDBLANKS) && isspace(c) &&
728 } while (isspace(c = getc(f1)));
733 } while (isspace(d = getc(f2)));
734 } else if ((flags & D_IGNOREBLANKS)) {
735 while (isspace(c) && c != '\n') {
739 while (isspace(d) && d != '\n') {
744 if (ds->chrtran[c] != ds->chrtran[d]) {
747 if (c != '\n' && c != EOF)
748 ctold += skipline(f1);
749 if (d != '\n' && c != EOF)
750 ctnew += skipline(f2);
753 if (c == '\n' || c == EOF)
760 if ((c = getc(f1)) != (d = getc(f2))) {
763 if (c != '\n' && c != EOF)
764 ctold += skipline(f1);
765 if (d != '\n' && c != EOF)
766 ctnew += skipline(f2);
769 if (c == '\n' || c == EOF)
773 ds->ixold[i] = ctold;
774 ds->ixnew[j] = ctnew;
777 for (; j <= ds->len[1]; j++)
778 ds->ixnew[j] = ctnew += skipline(f2);
781 * fprintf(stderr, "jackpot\n");
785 /* shellsort CACM #201 */
787 sort(struct line *a, int n)
789 struct line *ai, *aim, w;
794 for (j = 1; j <= n; j *= 2)
796 for (m /= 2; m != 0; m /= 2) {
798 for (j = 1; j <= k; j++) {
799 for (ai = &a[j]; ai > a; ai -= m) {
802 break; /* wraparound */
803 if (aim->value > ai[0].value ||
804 (aim->value == ai[0].value &&
805 aim->serial > ai[0].serial))
807 w.value = ai[0].value;
808 ai[0].value = aim->value;
809 aim->value = w.value;
810 w.serial = ai[0].serial;
811 ai[0].serial = aim->serial;
812 aim->serial = w.serial;
819 unsort(struct line *f, int l, int *b)
823 a = calloc(l + 1, sizeof(*a));
826 for (i = 1; i <= l; i++)
827 a[f[i].serial] = f[i].value;
828 for (i = 1; i <= l; i++)
840 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
846 output(FILE *outfile, struct got_diff_changes *changes,
847 struct got_diff_state *ds, struct got_diff_args *args,
848 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
850 int m, i0, i1, j0, j1;
857 ds->J[m + 1] = ds->len[1] + 1;
858 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
859 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
861 j0 = ds->J[i0 - 1] + 1;
863 while (i1 < m && ds->J[i1 + 1] == 0)
865 j1 = ds->J[i1 + 1] - 1;
867 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
868 i0, i1, j0, j1, &flags);
873 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
874 1, 0, 1, ds->len[1], &flags);
878 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
879 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
885 range(FILE *outfile, int a, int b, char *separator)
887 diff_output(outfile, "%d", a > b ? b : a);
889 diff_output(outfile, "%s%d", separator, b);
893 uni_range(FILE *outfile, int a, int b)
896 diff_output(outfile, "%d,%d", a, b - a + 1);
898 diff_output(outfile, "%d", b);
900 diff_output(outfile, "%d,0", b);
904 * Indicate that there is a difference between lines a and b of the from file
905 * to get to lines c to d of the to file. If a is greater then b then there
906 * are no lines in the from file involved and this means that there were
907 * lines appended (beginning at b). If c is greater than d then there are
908 * lines missing from the to file.
911 change(FILE *outfile, struct got_diff_changes *changes,
912 struct got_diff_state *ds, struct got_diff_args *args,
913 const char *file1, FILE *f1, const char *file2, FILE *f2,
914 int a, int b, int c, int d, int *pflags)
921 if (*pflags & D_HEADER) {
922 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
923 *pflags &= ~D_HEADER;
925 if (args->diff_format == D_UNIFIED) {
927 * Allocate change records as needed.
929 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
930 struct context_vec *cvp;
932 offset = ds->context_vec_ptr - ds->context_vec_start;
933 ds->max_context <<= 1;
934 ds->context_vec_start =
935 cvp = reallocarray(ds->context_vec_start,
936 ds->max_context, sizeof(*ds->context_vec_start));
938 free(ds->context_vec_start);
941 ds->context_vec_start = cvp;
942 ds->context_vec_end = ds->context_vec_start +
944 ds->context_vec_ptr = ds->context_vec_start + offset;
946 if (ds->anychange == 0) {
948 * Print the context/unidiff header first time through.
950 print_header(outfile, ds, args, file1, file2);
952 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
953 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
955 * If this change is more than 'diff_context' lines from the
956 * previous change, dump the record and reset it.
958 dump_unified_vec(outfile, changes, ds, args, f1, f2,
961 ds->context_vec_ptr++;
962 ds->context_vec_ptr->a = a;
963 ds->context_vec_ptr->b = b;
964 ds->context_vec_ptr->c = c;
965 ds->context_vec_ptr->d = d;
968 if (ds->anychange == 0)
970 if (args->diff_format == D_BRIEF)
972 if (args->diff_format == D_NORMAL) {
973 range(outfile, a, b, ",");
974 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
975 range(outfile, c, d, ",");
976 diff_output(outfile, "\n");
977 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
978 if (a <= b && c <= d)
979 diff_output(outfile, "---\n");
981 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2,
982 args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
987 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
988 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
990 int i, j, c, lastc, col, nc;
994 for (i = a; i <= b; i++) {
995 fseek(lb, f[i - 1], SEEK_SET);
996 nc = f[i] - f[i - 1];
998 diff_output(outfile, "%c", ch);
999 if (args->Tflag && (args->diff_format == D_UNIFIED ||
1000 args->diff_format == D_NORMAL))
1001 diff_output(outfile, "\t");
1002 else if (args->diff_format != D_UNIFIED)
1003 diff_output(outfile, " ");
1006 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1007 if ((c = getc(lb)) == EOF) {
1008 diff_output(outfile, "\n\\ No newline at end of "
1012 if (c == '\t' && (flags & D_EXPANDTABS)) {
1014 diff_output(outfile, " ");
1015 } while (++col & 7);
1017 diff_output(outfile, "%c", c);
1026 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1029 readhash(struct got_diff_state *ds, FILE *f, int flags)
1036 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1037 if (flags & D_IGNORECASE)
1038 for (i = 0; (t = getc(f)) != '\n'; i++) {
1044 sum = sum * 127 + ds->chrtran[t];
1047 for (i = 0; (t = getc(f)) != '\n'; i++) {
1053 sum = sum * 127 + t;
1057 switch (t = getc(f)) {
1066 if (space && (flags & D_IGNOREBLANKS) == 0) {
1070 sum = sum * 127 + ds->chrtran[t];
1084 * There is a remote possibility that we end up with a zero sum.
1085 * Zero is used as an EOF marker, so return 1 instead.
1087 return (sum == 0 ? 1 : sum);
1093 unsigned char buf[BUFSIZ];
1100 cnt = fread(buf, 1, sizeof(buf), f);
1101 return (memchr(buf, '\0', cnt) == NULL);
1104 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1107 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1109 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1111 int last = ds->lastline;
1115 while (pos > last) {
1116 fseek(fp, f[pos - 1], SEEK_SET);
1117 nc = f[pos] - f[pos - 1];
1118 if (nc >= sizeof(buf))
1119 nc = sizeof(buf) - 1;
1120 nc = fread(buf, 1, nc, fp);
1123 buf[strcspn(buf, "\n")] = '\0';
1124 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1125 if (begins_with(buf, "private:")) {
1127 state = " (private)";
1128 } else if (begins_with(buf, "protected:")) {
1130 state = " (protected)";
1131 } else if (begins_with(buf, "public:")) {
1133 state = " (public)";
1135 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1137 strlcat(ds->lastbuf, state,
1138 sizeof ds->lastbuf);
1139 ds->lastmatchline = pos;
1146 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1149 /* dump accumulated "unified" diff changes */
1151 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1152 struct got_diff_state *ds, struct got_diff_args *args,
1153 FILE *f1, FILE *f2, int flags)
1155 struct context_vec *cvp = ds->context_vec_start;
1156 int lowa, upb, lowc, upd;
1160 if (ds->context_vec_start > ds->context_vec_ptr)
1163 b = d = 0; /* gcc */
1164 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1165 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1166 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1167 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1169 diff_output(outfile, "@@ -");
1170 uni_range(outfile, lowa, upb);
1171 diff_output(outfile, " +");
1172 uni_range(outfile, lowc, upd);
1173 diff_output(outfile, " @@");
1174 if ((flags & D_PROTOTYPE)) {
1175 f = match_function(ds, ds->ixold, lowa-1, f1);
1177 diff_output(outfile, " %s", f);
1179 diff_output(outfile, "\n");
1182 * Output changes in "unified" diff format--the old and new lines
1183 * are printed together.
1185 for (; cvp <= ds->context_vec_ptr; cvp++) {
1187 struct got_diff_change *change;
1188 change = calloc(1, sizeof(*change));
1190 memcpy(&change->cv, cvp, sizeof(change->cv));
1191 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1193 changes->nchanges++;
1203 * c: both new and old changes
1204 * d: only changes in the old file
1205 * a: only changes in the new file
1207 if (a <= b && c <= d)
1210 ch = (a <= b) ? 'd' : 'a';
1214 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1215 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1216 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1219 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1220 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1223 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1224 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1230 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1232 ds->context_vec_ptr = ds->context_vec_start - 1;
1236 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1237 const char *file1, const char *file2)
1239 if (args->label[0] != NULL)
1240 diff_output(outfile, "--- %s\n", args->label[0]);
1242 diff_output(outfile, "--- %s\t%s", file1,
1243 ctime(&ds->stb1.st_mtime));
1244 if (args->label[1] != NULL)
1245 diff_output(outfile, "+++ %s\n", args->label[1]);
1247 diff_output(outfile, "+++ %s\t%s", file2,
1248 ctime(&ds->stb2.st_mtime));