1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
186 static void diff_output(FILE *, const char *, ...);
187 static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void check(struct got_diff_state *, FILE *, FILE *, int);
189 static void range(FILE *, int, int, char *);
190 static void uni_range(FILE *, int, int);
191 static void dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void prune(struct got_diff_state *);
195 static void equiv(struct line *, int, struct line *, int, int *);
196 static void unravel(struct got_diff_state *, int);
197 static int unsort(struct line *, int, int *);
198 static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void sort(struct line *, int);
200 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int ignoreline(char *);
202 static int asciifile(FILE *);
203 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int newcand(struct got_diff_state *, int, int, int, int *);
205 static int search(struct got_diff_state *, int *, int, int);
206 static int skipline(FILE *);
207 static int isqrt(int);
208 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int readhash(struct got_diff_state *, FILE *, int);
210 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char *preadline(int, size_t, off_t);
215 * chrtran points to one of 2 translation tables: cup2low if folding upper to
216 * lower case clow2low if not folding case
218 u_char clow2low[256] = {
219 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
245 u_char cup2low[256] = {
246 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
273 diff_output(FILE *outfile, const char *fmt, ...)
278 vfprintf(outfile, fmt, ap);
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
286 const struct got_error *err = NULL;
293 ds->lastmatchline = 0;
294 ds->context_vec_ptr = ds->context_vec_start - 1;
295 ds->max_context = 64;
296 if (flags & D_IGNORECASE)
297 ds->chrtran = cup2low;
299 ds->chrtran = clow2low;
300 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
301 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
304 if (flags & D_EMPTY1) {
305 f1 = fopen(_PATH_DEVNULL, "r");
307 err = got_error_from_errno();
311 else if (f1 == NULL) {
316 if (flags & D_EMPTY2) {
317 f2 = fopen(_PATH_DEVNULL, "r");
319 err = got_error_from_errno();
322 } else if (f2 == NULL) {
327 switch (files_differ(ds, f1, f2, flags)) {
338 if ((flags & D_FORCEASCII) == 0 &&
339 (!asciifile(f1) || !asciifile(f2))) {
344 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
345 err = got_error_from_errno();
348 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
349 err = got_error_from_errno();
354 sort(ds->sfile[0], ds->slen[0]);
355 sort(ds->sfile[1], ds->slen[1]);
357 ds->member = (int *)ds->file[1];
358 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
359 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
361 err = got_error_from_errno();
366 ds->class = (int *)ds->file[0];
367 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
368 err = got_error_from_errno();
371 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
373 err = got_error_from_errno();
378 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
379 if (ds->klist == NULL) {
380 err = got_error_from_errno();
385 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
386 if (ds->clist == NULL) {
387 err = got_error_from_errno();
390 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
392 err = got_error_from_errno();
396 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
398 err = got_error_from_errno();
402 unravel(ds, ds->klist[i]);
404 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
406 err = got_error_from_errno();
410 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
412 err = got_error_from_errno();
416 check(ds, f1, f2, flags);
417 if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
419 err = got_error_from_errno();
442 * Check to see if the given files differ.
443 * Returns 0 if they are the same, 1 if different, and -1 on error.
444 * XXX - could use code from cmp(1) [faster]
447 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
449 char buf1[BUFSIZ], buf2[BUFSIZ];
452 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
453 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
456 i = fread(buf1, 1, sizeof(buf1), f1);
457 j = fread(buf2, 1, sizeof(buf2), f2);
458 if ((!i && ferror(f1)) || (!j && ferror(f2)))
464 if (memcmp(buf1, buf2, i) != 0)
470 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
478 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
482 p = calloc(sz + 3, sizeof(*p));
485 for (j = 0; (h = readhash(ds, fd, flags));) {
488 q = reallocarray(p, sz + 3, sizeof(*p));
504 prune(struct got_diff_state *ds)
508 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
509 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
512 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
513 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
516 for (j = 0; j < 2; j++) {
517 ds->sfile[j] = ds->file[j] + ds->pref;
518 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
519 for (i = 0; i <= ds->slen[j]; i++)
520 ds->sfile[j][i].serial = i;
525 equiv(struct line *a, int n, struct line *b, int m, int *c)
530 while (i <= n && j <= m) {
531 if (a[i].value < b[j].value)
533 else if (a[i].value == b[j].value)
544 while (b[j + 1].value == b[j].value) {
552 /* Code taken from ping.c */
561 do { /* newton was a stinker */
566 } while ((x - y) > 1 || (x - y) < -1);
572 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
575 int oldc, tc, oldl, sq;
576 u_int numtries, bound;
579 if (flags & D_MINIMAL)
583 bound = MAXIMUM(256, sq);
587 c[0] = newcand(ds, 0, 0, 0, &error);
590 for (i = 1; i <= n; i++) {
599 if (y <= ds->clist[oldc].y)
601 l = search(ds, c, k, y);
605 if (ds->clist[c[l]].y <= y)
608 c[l] = newcand(ds, i, y, oldc, &error);
615 c[l] = newcand(ds, i, y, oldc, &error);
621 } while ((y = b[++j]) > 0 && numtries < bound);
627 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
631 if (ds->clen == ds->clistlen) {
632 ds->clistlen = ds->clistlen * 11 / 10;
633 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
642 q = ds->clist + ds->clen;
651 search(struct got_diff_state *ds, int *c, int k, int y)
655 if (ds->clist[c[k]].y < y) /* quick look for typical case */
663 t = ds->clist[c[l]].y;
675 unravel(struct got_diff_state *ds, int p)
680 for (i = 0; i <= ds->len[0]; i++)
681 ds->J[i] = i <= ds->pref ? i :
682 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
683 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
684 ds->J[q->x + ds->pref] = q->y + ds->pref;
688 * Check does double duty:
689 * 1. ferret out any fortuitous correspondences due
690 * to confounding by hashing (which result in "jackpot")
691 * 2. collect random access indexes to the two files
694 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
696 int i, j, jackpot, c, d;
702 ds->ixold[0] = ds->ixnew[0] = 0;
705 for (i = 1; i <= ds->len[0]; i++) {
707 ds->ixold[i] = ctold += skipline(f1);
710 while (j < ds->J[i]) {
711 ds->ixnew[j] = ctnew += skipline(f2);
714 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
719 * GNU diff ignores a missing newline
720 * in one file for -b or -w.
722 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
723 if (c == EOF && d == '\n') {
726 } else if (c == '\n' && d == EOF) {
733 if ((flags & D_FOLDBLANKS) && isspace(c) &&
739 } while (isspace(c = getc(f1)));
744 } while (isspace(d = getc(f2)));
745 } else if ((flags & D_IGNOREBLANKS)) {
746 while (isspace(c) && c != '\n') {
750 while (isspace(d) && d != '\n') {
755 if (ds->chrtran[c] != ds->chrtran[d]) {
758 if (c != '\n' && c != EOF)
759 ctold += skipline(f1);
760 if (d != '\n' && c != EOF)
761 ctnew += skipline(f2);
764 if (c == '\n' || c == EOF)
771 if ((c = getc(f1)) != (d = getc(f2))) {
774 if (c != '\n' && c != EOF)
775 ctold += skipline(f1);
776 if (d != '\n' && c != EOF)
777 ctnew += skipline(f2);
780 if (c == '\n' || c == EOF)
784 ds->ixold[i] = ctold;
785 ds->ixnew[j] = ctnew;
788 for (; j <= ds->len[1]; j++)
789 ds->ixnew[j] = ctnew += skipline(f2);
792 * fprintf(stderr, "jackpot\n");
796 /* shellsort CACM #201 */
798 sort(struct line *a, int n)
800 struct line *ai, *aim, w;
805 for (j = 1; j <= n; j *= 2)
807 for (m /= 2; m != 0; m /= 2) {
809 for (j = 1; j <= k; j++) {
810 for (ai = &a[j]; ai > a; ai -= m) {
813 break; /* wraparound */
814 if (aim->value > ai[0].value ||
815 (aim->value == ai[0].value &&
816 aim->serial > ai[0].serial))
818 w.value = ai[0].value;
819 ai[0].value = aim->value;
820 aim->value = w.value;
821 w.serial = ai[0].serial;
822 ai[0].serial = aim->serial;
823 aim->serial = w.serial;
830 unsort(struct line *f, int l, int *b)
834 a = calloc(l + 1, sizeof(*a));
837 for (i = 1; i <= l; i++)
838 a[f[i].serial] = f[i].value;
839 for (i = 1; i <= l; i++)
851 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
857 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
858 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
860 int m, i0, i1, j0, j1;
867 ds->J[m + 1] = ds->len[1] + 1;
868 if (args->diff_format != D_EDIT) {
869 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
870 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
872 j0 = ds->J[i0 - 1] + 1;
874 while (i1 < m && ds->J[i1 + 1] == 0)
876 j1 = ds->J[i1 + 1] - 1;
878 error = change(outfile, ds, args, file1, f1, file2, f2,
879 i0, i1, j0, j1, &flags);
884 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
885 while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
887 j0 = ds->J[i0 + 1] - 1;
889 while (i1 > 1 && ds->J[i1 - 1] == 0)
891 j1 = ds->J[i1 - 1] + 1;
893 change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
900 error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
901 1, ds->len[1], &flags);
905 if (args->diff_format == D_IFDEF) {
908 if ((c = getc(f1)) == EOF)
910 diff_output(outfile, "%c", c);
914 if (ds->anychange != 0) {
915 if (args->diff_format == D_CONTEXT)
916 dump_context_vec(outfile, ds, args, f1, f2, flags);
917 else if (args->diff_format == D_UNIFIED)
918 dump_unified_vec(outfile, ds, args, f1, f2, flags);
925 range(FILE *outfile, int a, int b, char *separator)
927 diff_output(outfile, "%d", a > b ? b : a);
929 diff_output(outfile, "%s%d", separator, b);
933 uni_range(FILE *outfile, int a, int b)
936 diff_output(outfile, "%d,%d", a, b - a + 1);
938 diff_output(outfile, "%d", b);
940 diff_output(outfile, "%d,0", b);
944 preadline(int fd, size_t rlen, off_t off)
949 line = malloc(rlen + 1);
952 if ((nr = pread(fd, line, rlen, off)) < 0) {
956 if (nr > 0 && line[nr-1] == '\n')
963 ignoreline(char *line)
966 return 0; /* do not ignore any lines */
970 * Indicate that there is a difference between lines a and b of the from file
971 * to get to lines c to d of the to file. If a is greater then b then there
972 * are no lines in the from file involved and this means that there were
973 * lines appended (beginning at b). If c is greater than d then there are
974 * lines missing from the to file.
977 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
978 const char *file1, FILE *f1, const char *file2, FILE *f2,
979 int a, int b, int c, int d, int *pflags)
984 if (args->diff_format != D_IFDEF && a > b && c > d)
986 if (args->ignore_pats != NULL) {
989 * All lines in the change, insert, or delete must
990 * match an ignore pattern for the change to be
993 if (a <= b) { /* Changes and deletes. */
994 for (i = a; i <= b; i++) {
995 line = preadline(fileno(f1),
996 ds->ixold[i] - ds->ixold[i - 1],
1000 if (!ignoreline(line))
1004 if (a > b || c <= d) { /* Changes and inserts. */
1005 for (i = c; i <= d; i++) {
1006 line = preadline(fileno(f2),
1007 ds->ixnew[i] - ds->ixnew[i - 1],
1011 if (!ignoreline(line))
1018 if (*pflags & D_HEADER) {
1019 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
1020 *pflags &= ~D_HEADER;
1022 if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1024 * Allocate change records as needed.
1026 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1027 struct context_vec *cvp;
1029 offset = ds->context_vec_ptr - ds->context_vec_start;
1030 ds->max_context <<= 1;
1031 ds->context_vec_start =
1032 cvp = reallocarray(ds->context_vec_start,
1033 ds->max_context, sizeof(*ds->context_vec_start));
1035 free(ds->context_vec_start);
1038 ds->context_vec_start = cvp;
1039 ds->context_vec_end = ds->context_vec_start +
1041 ds->context_vec_ptr = ds->context_vec_start + offset;
1043 if (ds->anychange == 0) {
1045 * Print the context/unidiff header first time through.
1047 print_header(outfile, ds, args, file1, file2);
1049 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1050 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1052 * If this change is more than 'diff_context' lines from the
1053 * previous change, dump the record and reset it.
1055 if (args->diff_format == D_CONTEXT)
1056 dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1058 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1060 ds->context_vec_ptr++;
1061 ds->context_vec_ptr->a = a;
1062 ds->context_vec_ptr->b = b;
1063 ds->context_vec_ptr->c = c;
1064 ds->context_vec_ptr->d = d;
1067 if (ds->anychange == 0)
1069 switch (args->diff_format) {
1074 range(outfile, a, b, ",");
1075 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1076 if (args->diff_format == D_NORMAL)
1077 range(outfile, c, d, ",");
1078 diff_output(outfile, "\n");
1081 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1082 range(outfile, a, b, " ");
1083 diff_output(outfile, "\n");
1087 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1089 diff_output(outfile, "d%d %d\n", a, b - a + 1);
1091 /* add changed lines */
1092 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1096 if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1097 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1098 if (a <= b && c <= d && args->diff_format == D_NORMAL)
1099 diff_output(outfile, "---\n");
1101 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1102 if (i != 0 && args->diff_format == D_EDIT) {
1104 * A non-zero return value for D_EDIT indicates that the
1105 * last line printed was a bare dot (".") that has been
1106 * escaped as ".." to prevent ed(1) from misinterpreting
1107 * it. We have to add a substitute command to change this
1108 * back and restart where we left off.
1110 diff_output(outfile, ".\n");
1111 diff_output(outfile, "%ds/.//\n", a + i - 1);
1117 if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1118 diff_output(outfile, ".\n");
1120 diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1128 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1129 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1131 int i, j, c, lastc, col, nc;
1134 * When doing #ifdef's, copy down to current line
1135 * if this is the first file, so that stuff makes it to output.
1137 if (args->diff_format == D_IFDEF && oldfile) {
1138 long curpos = ftell(lb);
1139 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1140 nc = f[a > b ? b : a - 1] - curpos;
1141 for (i = 0; i < nc; i++)
1142 diff_output(outfile, "%c", getc(lb));
1146 if (args->diff_format == D_IFDEF) {
1148 diff_output(outfile, "#else /* %s%s */\n",
1149 oldfile == 1 ? "!" : "", args->ifdefname);
1152 diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1154 diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1156 ds->inifdef = 1 + oldfile;
1158 for (i = a; i <= b; i++) {
1159 fseek(lb, f[i - 1], SEEK_SET);
1160 nc = f[i] - f[i - 1];
1161 if (args->diff_format != D_IFDEF && ch != '\0') {
1162 diff_output(outfile, "%c", ch);
1163 if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1164 || args->diff_format == D_UNIFIED))
1165 diff_output(outfile, "\t");
1166 else if (args->diff_format != D_UNIFIED)
1167 diff_output(outfile, " ");
1170 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1171 if ((c = getc(lb)) == EOF) {
1172 if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1173 args->diff_format == D_NREVERSE)
1174 warnx("No newline at end of file");
1176 diff_output(outfile, "\n\\ No newline at end of "
1180 if (c == '\t' && (flags & D_EXPANDTABS)) {
1182 diff_output(outfile, " ");
1183 } while (++col & 7);
1185 if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1188 * Don't print a bare "." line
1189 * since that will confuse ed(1).
1190 * Print ".." instead and return,
1191 * giving the caller an offset
1192 * from which to restart.
1194 diff_output(outfile, ".\n");
1197 diff_output(outfile, "%c", c);
1206 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1209 readhash(struct got_diff_state *ds, FILE *f, int flags)
1216 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1217 if (flags & D_IGNORECASE)
1218 for (i = 0; (t = getc(f)) != '\n'; i++) {
1224 sum = sum * 127 + ds->chrtran[t];
1227 for (i = 0; (t = getc(f)) != '\n'; i++) {
1233 sum = sum * 127 + t;
1237 switch (t = getc(f)) {
1246 if (space && (flags & D_IGNOREBLANKS) == 0) {
1250 sum = sum * 127 + ds->chrtran[t];
1264 * There is a remote possibility that we end up with a zero sum.
1265 * Zero is used as an EOF marker, so return 1 instead.
1267 return (sum == 0 ? 1 : sum);
1273 unsigned char buf[BUFSIZ];
1280 cnt = fread(buf, 1, sizeof(buf), f);
1281 return (memchr(buf, '\0', cnt) == NULL);
1284 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1287 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1289 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1291 int last = ds->lastline;
1295 while (pos > last) {
1296 fseek(fp, f[pos - 1], SEEK_SET);
1297 nc = f[pos] - f[pos - 1];
1298 if (nc >= sizeof(buf))
1299 nc = sizeof(buf) - 1;
1300 nc = fread(buf, 1, nc, fp);
1303 buf[strcspn(buf, "\n")] = '\0';
1304 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1305 if (begins_with(buf, "private:")) {
1307 state = " (private)";
1308 } else if (begins_with(buf, "protected:")) {
1310 state = " (protected)";
1311 } else if (begins_with(buf, "public:")) {
1313 state = " (public)";
1315 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1317 strlcat(ds->lastbuf, state,
1318 sizeof ds->lastbuf);
1319 ds->lastmatchline = pos;
1326 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1329 /* dump accumulated "context" diff changes */
1331 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1332 FILE *f1, FILE *f2, int flags)
1334 struct context_vec *cvp = ds->context_vec_start;
1335 int lowa, upb, lowc, upd, do_output;
1339 if (ds->context_vec_start > ds->context_vec_ptr)
1342 b = d = 0; /* gcc */
1343 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1344 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1345 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1346 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1348 diff_output(outfile, "***************");
1349 if ((flags & D_PROTOTYPE)) {
1350 f = match_function(ds, ds->ixold, lowa-1, f1);
1352 diff_output(outfile, " %s", f);
1354 diff_output(outfile, "\n*** ");
1355 range(outfile, lowa, upb, ",");
1356 diff_output(outfile, " ****\n");
1359 * Output changes to the "old" file. The first loop suppresses
1360 * output if there were no changes to the "old" file (we'll see
1361 * the "old" lines as context in the "new" list).
1364 for (; cvp <= ds->context_vec_ptr; cvp++)
1365 if (cvp->a <= cvp->b) {
1366 cvp = ds->context_vec_start;
1371 while (cvp <= ds->context_vec_ptr) {
1377 if (a <= b && c <= d)
1380 ch = (a <= b) ? 'd' : 'a';
1383 fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1385 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1386 fetch(outfile, ds, args, ds->ixold, a, b, f1,
1387 ch == 'c' ? '!' : '-', 0, flags);
1392 fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1394 /* output changes to the "new" file */
1395 diff_output(outfile, "--- ");
1396 range(outfile, lowc, upd, ",");
1397 diff_output(outfile, " ----\n");
1400 for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1401 if (cvp->c <= cvp->d) {
1402 cvp = ds->context_vec_start;
1407 while (cvp <= ds->context_vec_ptr) {
1413 if (a <= b && c <= d)
1416 ch = (a <= b) ? 'd' : 'a';
1419 fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1421 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1422 fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1423 ch == 'c' ? '!' : '+', 0, flags);
1428 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1430 ds->context_vec_ptr = ds->context_vec_start - 1;
1433 /* dump accumulated "unified" diff changes */
1435 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1436 FILE *f1, FILE *f2, int flags)
1438 struct context_vec *cvp = ds->context_vec_start;
1439 int lowa, upb, lowc, upd;
1443 if (ds->context_vec_start > ds->context_vec_ptr)
1446 b = d = 0; /* gcc */
1447 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1448 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1449 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1450 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1452 diff_output(outfile, "@@ -");
1453 uni_range(outfile, lowa, upb);
1454 diff_output(outfile, " +");
1455 uni_range(outfile, lowc, upd);
1456 diff_output(outfile, " @@");
1457 if ((flags & D_PROTOTYPE)) {
1458 f = match_function(ds, ds->ixold, lowa-1, f1);
1460 diff_output(outfile, " %s", f);
1462 diff_output(outfile, "\n");
1465 * Output changes in "unified" diff format--the old and new lines
1466 * are printed together.
1468 for (; cvp <= ds->context_vec_ptr; cvp++) {
1475 * c: both new and old changes
1476 * d: only changes in the old file
1477 * a: only changes in the new file
1479 if (a <= b && c <= d)
1482 ch = (a <= b) ? 'd' : 'a';
1486 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1487 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1488 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1491 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1492 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1495 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1496 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1502 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1504 ds->context_vec_ptr = ds->context_vec_start - 1;
1508 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1509 const char *file1, const char *file2)
1511 if (args->label[0] != NULL)
1512 diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1515 diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1516 file1, ctime(&ds->stb1.st_mtime));
1517 if (args->label[1] != NULL)
1518 diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1521 diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1522 file2, ctime(&ds->stb2.st_mtime));