1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
186 static void diff_output(FILE *, const char *, ...);
187 static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void check(struct got_diff_state *, FILE *, FILE *, int);
189 static void range(FILE *, int, int, char *);
190 static void uni_range(FILE *, int, int);
191 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
193 static void prune(struct got_diff_state *);
194 static void equiv(struct line *, int, struct line *, int, int *);
195 static void unravel(struct got_diff_state *, int);
196 static int unsort(struct line *, int, int *);
197 static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
198 static void sort(struct line *, int);
199 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
200 static int asciifile(FILE *);
201 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
202 static int newcand(struct got_diff_state *, int, int, int, int *);
203 static int search(struct got_diff_state *, int *, int, int);
204 static int skipline(FILE *);
205 static int isqrt(int);
206 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
207 static int readhash(struct got_diff_state *, FILE *, int);
208 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
209 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
212 * chrtran points to one of 2 translation tables: cup2low if folding upper to
213 * lower case clow2low if not folding case
215 u_char clow2low[256] = {
216 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
217 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
218 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
219 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
220 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
221 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
222 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
223 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
224 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
225 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
226 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
227 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
228 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
229 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
230 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
231 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
232 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
233 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
234 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
235 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
236 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
237 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
238 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
242 u_char cup2low[256] = {
243 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
244 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
245 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
246 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
247 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
248 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
249 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
250 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
251 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
252 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
253 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
254 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
255 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
256 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
257 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
258 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
259 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
260 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
261 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
262 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
263 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
264 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
265 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
270 diff_output(FILE *outfile, const char *fmt, ...)
275 vfprintf(outfile, fmt, ap);
279 const struct got_error *
280 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
281 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
283 const struct got_error *err = NULL;
290 ds->lastmatchline = 0;
291 ds->context_vec_ptr = ds->context_vec_start - 1;
292 ds->max_context = 64;
293 if (flags & D_IGNORECASE)
294 ds->chrtran = cup2low;
296 ds->chrtran = clow2low;
297 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
298 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
301 if (flags & D_EMPTY1) {
302 f1 = fopen(_PATH_DEVNULL, "r");
304 err = got_error_from_errno();
308 else if (f1 == NULL) {
313 if (flags & D_EMPTY2) {
314 f2 = fopen(_PATH_DEVNULL, "r");
316 err = got_error_from_errno();
319 } else if (f2 == NULL) {
324 switch (files_differ(ds, f1, f2, flags)) {
335 if ((flags & D_FORCEASCII) == 0 &&
336 (!asciifile(f1) || !asciifile(f2))) {
341 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
342 err = got_error_from_errno();
345 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
346 err = got_error_from_errno();
351 sort(ds->sfile[0], ds->slen[0]);
352 sort(ds->sfile[1], ds->slen[1]);
354 ds->member = (int *)ds->file[1];
355 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
356 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
358 err = got_error_from_errno();
363 ds->class = (int *)ds->file[0];
364 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
365 err = got_error_from_errno();
368 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
370 err = got_error_from_errno();
375 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
376 if (ds->klist == NULL) {
377 err = got_error_from_errno();
382 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
383 if (ds->clist == NULL) {
384 err = got_error_from_errno();
387 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
389 err = got_error_from_errno();
393 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
395 err = got_error_from_errno();
399 unravel(ds, ds->klist[i]);
401 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
403 err = got_error_from_errno();
407 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
409 err = got_error_from_errno();
413 check(ds, f1, f2, flags);
414 if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
416 err = got_error_from_errno();
439 * Check to see if the given files differ.
440 * Returns 0 if they are the same, 1 if different, and -1 on error.
441 * XXX - could use code from cmp(1) [faster]
444 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
446 char buf1[BUFSIZ], buf2[BUFSIZ];
449 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
450 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
453 i = fread(buf1, 1, sizeof(buf1), f1);
454 j = fread(buf2, 1, sizeof(buf2), f2);
455 if ((!i && ferror(f1)) || (!j && ferror(f2)))
461 if (memcmp(buf1, buf2, i) != 0)
467 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
475 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
479 p = calloc(sz + 3, sizeof(*p));
482 for (j = 0; (h = readhash(ds, fd, flags));) {
485 q = reallocarray(p, sz + 3, sizeof(*p));
501 prune(struct got_diff_state *ds)
505 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
506 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
509 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
510 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
513 for (j = 0; j < 2; j++) {
514 ds->sfile[j] = ds->file[j] + ds->pref;
515 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
516 for (i = 0; i <= ds->slen[j]; i++)
517 ds->sfile[j][i].serial = i;
522 equiv(struct line *a, int n, struct line *b, int m, int *c)
527 while (i <= n && j <= m) {
528 if (a[i].value < b[j].value)
530 else if (a[i].value == b[j].value)
541 while (b[j + 1].value == b[j].value) {
549 /* Code taken from ping.c */
558 do { /* newton was a stinker */
563 } while ((x - y) > 1 || (x - y) < -1);
569 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
572 int oldc, tc, oldl, sq;
573 u_int numtries, bound;
576 if (flags & D_MINIMAL)
580 bound = MAXIMUM(256, sq);
584 c[0] = newcand(ds, 0, 0, 0, &error);
587 for (i = 1; i <= n; i++) {
596 if (y <= ds->clist[oldc].y)
598 l = search(ds, c, k, y);
602 if (ds->clist[c[l]].y <= y)
605 c[l] = newcand(ds, i, y, oldc, &error);
612 c[l] = newcand(ds, i, y, oldc, &error);
618 } while ((y = b[++j]) > 0 && numtries < bound);
624 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
628 if (ds->clen == ds->clistlen) {
629 ds->clistlen = ds->clistlen * 11 / 10;
630 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
639 q = ds->clist + ds->clen;
648 search(struct got_diff_state *ds, int *c, int k, int y)
652 if (ds->clist[c[k]].y < y) /* quick look for typical case */
660 t = ds->clist[c[l]].y;
672 unravel(struct got_diff_state *ds, int p)
677 for (i = 0; i <= ds->len[0]; i++)
678 ds->J[i] = i <= ds->pref ? i :
679 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
680 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
681 ds->J[q->x + ds->pref] = q->y + ds->pref;
685 * Check does double duty:
686 * 1. ferret out any fortuitous correspondences due
687 * to confounding by hashing (which result in "jackpot")
688 * 2. collect random access indexes to the two files
691 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
693 int i, j, jackpot, c, d;
699 ds->ixold[0] = ds->ixnew[0] = 0;
702 for (i = 1; i <= ds->len[0]; i++) {
704 ds->ixold[i] = ctold += skipline(f1);
707 while (j < ds->J[i]) {
708 ds->ixnew[j] = ctnew += skipline(f2);
711 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
716 * GNU diff ignores a missing newline
717 * in one file for -b or -w.
719 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
720 if (c == EOF && d == '\n') {
723 } else if (c == '\n' && d == EOF) {
730 if ((flags & D_FOLDBLANKS) && isspace(c) &&
736 } while (isspace(c = getc(f1)));
741 } while (isspace(d = getc(f2)));
742 } else if ((flags & D_IGNOREBLANKS)) {
743 while (isspace(c) && c != '\n') {
747 while (isspace(d) && d != '\n') {
752 if (ds->chrtran[c] != ds->chrtran[d]) {
755 if (c != '\n' && c != EOF)
756 ctold += skipline(f1);
757 if (d != '\n' && c != EOF)
758 ctnew += skipline(f2);
761 if (c == '\n' || c == EOF)
768 if ((c = getc(f1)) != (d = getc(f2))) {
771 if (c != '\n' && c != EOF)
772 ctold += skipline(f1);
773 if (d != '\n' && c != EOF)
774 ctnew += skipline(f2);
777 if (c == '\n' || c == EOF)
781 ds->ixold[i] = ctold;
782 ds->ixnew[j] = ctnew;
785 for (; j <= ds->len[1]; j++)
786 ds->ixnew[j] = ctnew += skipline(f2);
789 * fprintf(stderr, "jackpot\n");
793 /* shellsort CACM #201 */
795 sort(struct line *a, int n)
797 struct line *ai, *aim, w;
802 for (j = 1; j <= n; j *= 2)
804 for (m /= 2; m != 0; m /= 2) {
806 for (j = 1; j <= k; j++) {
807 for (ai = &a[j]; ai > a; ai -= m) {
810 break; /* wraparound */
811 if (aim->value > ai[0].value ||
812 (aim->value == ai[0].value &&
813 aim->serial > ai[0].serial))
815 w.value = ai[0].value;
816 ai[0].value = aim->value;
817 aim->value = w.value;
818 w.serial = ai[0].serial;
819 ai[0].serial = aim->serial;
820 aim->serial = w.serial;
827 unsort(struct line *f, int l, int *b)
831 a = calloc(l + 1, sizeof(*a));
834 for (i = 1; i <= l; i++)
835 a[f[i].serial] = f[i].value;
836 for (i = 1; i <= l; i++)
848 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
854 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
855 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
857 int m, i0, i1, j0, j1;
864 ds->J[m + 1] = ds->len[1] + 1;
865 if (args->diff_format != D_EDIT) {
866 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
867 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
869 j0 = ds->J[i0 - 1] + 1;
871 while (i1 < m && ds->J[i1 + 1] == 0)
873 j1 = ds->J[i1 + 1] - 1;
875 error = change(outfile, ds, args, file1, f1, file2, f2,
876 i0, i1, j0, j1, &flags);
881 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
882 while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
884 j0 = ds->J[i0 + 1] - 1;
886 while (i1 > 1 && ds->J[i1 - 1] == 0)
888 j1 = ds->J[i1 - 1] + 1;
890 change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
897 error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
898 1, ds->len[1], &flags);
902 if (args->diff_format == D_IFDEF) {
905 if ((c = getc(f1)) == EOF)
907 diff_output(outfile, "%c", c);
911 if (ds->anychange != 0)
912 dump_unified_vec(outfile, ds, args, f1, f2, flags);
918 range(FILE *outfile, int a, int b, char *separator)
920 diff_output(outfile, "%d", a > b ? b : a);
922 diff_output(outfile, "%s%d", separator, b);
926 uni_range(FILE *outfile, int a, int b)
929 diff_output(outfile, "%d,%d", a, b - a + 1);
931 diff_output(outfile, "%d", b);
933 diff_output(outfile, "%d,0", b);
937 * Indicate that there is a difference between lines a and b of the from file
938 * to get to lines c to d of the to file. If a is greater then b then there
939 * are no lines in the from file involved and this means that there were
940 * lines appended (beginning at b). If c is greater than d then there are
941 * lines missing from the to file.
944 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
945 const char *file1, FILE *f1, const char *file2, FILE *f2,
946 int a, int b, int c, int d, int *pflags)
951 if (args->diff_format != D_IFDEF && a > b && c > d)
954 if (*pflags & D_HEADER) {
955 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
956 *pflags &= ~D_HEADER;
958 if (args->diff_format == D_UNIFIED) {
960 * Allocate change records as needed.
962 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
963 struct context_vec *cvp;
965 offset = ds->context_vec_ptr - ds->context_vec_start;
966 ds->max_context <<= 1;
967 ds->context_vec_start =
968 cvp = reallocarray(ds->context_vec_start,
969 ds->max_context, sizeof(*ds->context_vec_start));
971 free(ds->context_vec_start);
974 ds->context_vec_start = cvp;
975 ds->context_vec_end = ds->context_vec_start +
977 ds->context_vec_ptr = ds->context_vec_start + offset;
979 if (ds->anychange == 0) {
981 * Print the context/unidiff header first time through.
983 print_header(outfile, ds, args, file1, file2);
985 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
986 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
988 * If this change is more than 'diff_context' lines from the
989 * previous change, dump the record and reset it.
991 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
993 ds->context_vec_ptr++;
994 ds->context_vec_ptr->a = a;
995 ds->context_vec_ptr->b = b;
996 ds->context_vec_ptr->c = c;
997 ds->context_vec_ptr->d = d;
1000 if (ds->anychange == 0)
1002 switch (args->diff_format) {
1006 range(outfile, a, b, ",");
1007 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1008 diff_output(outfile, "\n");
1011 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1012 range(outfile, a, b, " ");
1013 diff_output(outfile, "\n");
1017 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1019 diff_output(outfile, "d%d %d\n", a, b - a + 1);
1021 /* add changed lines */
1022 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1026 if (args->diff_format == D_IFDEF)
1027 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1028 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, '\0', 0, *pflags);
1029 if (i != 0 && args->diff_format == D_EDIT) {
1031 * A non-zero return value for D_EDIT indicates that the
1032 * last line printed was a bare dot (".") that has been
1033 * escaped as ".." to prevent ed(1) from misinterpreting
1034 * it. We have to add a substitute command to change this
1035 * back and restart where we left off.
1037 diff_output(outfile, ".\n");
1038 diff_output(outfile, "%ds/.//\n", a + i - 1);
1044 if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1045 diff_output(outfile, ".\n");
1047 diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1055 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1056 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1058 int i, j, c, lastc, col, nc;
1061 * When doing #ifdef's, copy down to current line
1062 * if this is the first file, so that stuff makes it to output.
1064 if (args->diff_format == D_IFDEF && oldfile) {
1065 long curpos = ftell(lb);
1066 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1067 nc = f[a > b ? b : a - 1] - curpos;
1068 for (i = 0; i < nc; i++)
1069 diff_output(outfile, "%c", getc(lb));
1073 if (args->diff_format == D_IFDEF) {
1075 diff_output(outfile, "#else /* %s%s */\n",
1076 oldfile == 1 ? "!" : "", args->ifdefname);
1079 diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1081 diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1083 ds->inifdef = 1 + oldfile;
1085 for (i = a; i <= b; i++) {
1086 fseek(lb, f[i - 1], SEEK_SET);
1087 nc = f[i] - f[i - 1];
1088 if (args->diff_format != D_IFDEF && ch != '\0') {
1089 diff_output(outfile, "%c", ch);
1090 if (args->Tflag && args->diff_format == D_UNIFIED)
1091 diff_output(outfile, "\t");
1092 else if (args->diff_format != D_UNIFIED)
1093 diff_output(outfile, " ");
1096 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1097 if ((c = getc(lb)) == EOF) {
1098 if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1099 args->diff_format == D_NREVERSE)
1100 warnx("No newline at end of file");
1102 diff_output(outfile, "\n\\ No newline at end of "
1106 if (c == '\t' && (flags & D_EXPANDTABS)) {
1108 diff_output(outfile, " ");
1109 } while (++col & 7);
1111 if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1114 * Don't print a bare "." line
1115 * since that will confuse ed(1).
1116 * Print ".." instead and return,
1117 * giving the caller an offset
1118 * from which to restart.
1120 diff_output(outfile, ".\n");
1123 diff_output(outfile, "%c", c);
1132 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1135 readhash(struct got_diff_state *ds, FILE *f, int flags)
1142 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1143 if (flags & D_IGNORECASE)
1144 for (i = 0; (t = getc(f)) != '\n'; i++) {
1150 sum = sum * 127 + ds->chrtran[t];
1153 for (i = 0; (t = getc(f)) != '\n'; i++) {
1159 sum = sum * 127 + t;
1163 switch (t = getc(f)) {
1172 if (space && (flags & D_IGNOREBLANKS) == 0) {
1176 sum = sum * 127 + ds->chrtran[t];
1190 * There is a remote possibility that we end up with a zero sum.
1191 * Zero is used as an EOF marker, so return 1 instead.
1193 return (sum == 0 ? 1 : sum);
1199 unsigned char buf[BUFSIZ];
1206 cnt = fread(buf, 1, sizeof(buf), f);
1207 return (memchr(buf, '\0', cnt) == NULL);
1210 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1213 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1215 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1217 int last = ds->lastline;
1221 while (pos > last) {
1222 fseek(fp, f[pos - 1], SEEK_SET);
1223 nc = f[pos] - f[pos - 1];
1224 if (nc >= sizeof(buf))
1225 nc = sizeof(buf) - 1;
1226 nc = fread(buf, 1, nc, fp);
1229 buf[strcspn(buf, "\n")] = '\0';
1230 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1231 if (begins_with(buf, "private:")) {
1233 state = " (private)";
1234 } else if (begins_with(buf, "protected:")) {
1236 state = " (protected)";
1237 } else if (begins_with(buf, "public:")) {
1239 state = " (public)";
1241 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1243 strlcat(ds->lastbuf, state,
1244 sizeof ds->lastbuf);
1245 ds->lastmatchline = pos;
1252 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1255 /* dump accumulated "unified" diff changes */
1257 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1258 FILE *f1, FILE *f2, int flags)
1260 struct context_vec *cvp = ds->context_vec_start;
1261 int lowa, upb, lowc, upd;
1265 if (ds->context_vec_start > ds->context_vec_ptr)
1268 b = d = 0; /* gcc */
1269 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1270 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1271 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1272 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1274 diff_output(outfile, "@@ -");
1275 uni_range(outfile, lowa, upb);
1276 diff_output(outfile, " +");
1277 uni_range(outfile, lowc, upd);
1278 diff_output(outfile, " @@");
1279 if ((flags & D_PROTOTYPE)) {
1280 f = match_function(ds, ds->ixold, lowa-1, f1);
1282 diff_output(outfile, " %s", f);
1284 diff_output(outfile, "\n");
1287 * Output changes in "unified" diff format--the old and new lines
1288 * are printed together.
1290 for (; cvp <= ds->context_vec_ptr; cvp++) {
1297 * c: both new and old changes
1298 * d: only changes in the old file
1299 * a: only changes in the new file
1301 if (a <= b && c <= d)
1304 ch = (a <= b) ? 'd' : 'a';
1308 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1309 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1310 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1313 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1314 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1317 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1318 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1324 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1326 ds->context_vec_ptr = ds->context_vec_start - 1;
1330 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1331 const char *file1, const char *file2)
1333 if (args->label[0] != NULL)
1334 diff_output(outfile, "--- %s\n", args->label[0]);
1336 diff_output(outfile, "--- %s\t%s", file1,
1337 ctime(&ds->stb1.st_mtime));
1338 if (args->label[1] != NULL)
1339 diff_output(outfile, "+++ %s\n", args->label[1]);
1341 diff_output(outfile, "+++ %s\t%s", file2,
1342 ctime(&ds->stb2.st_mtime));