1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
186 static void diff_output(FILE *, const char *, ...);
187 static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void check(struct got_diff_state *, FILE *, FILE *, int);
189 static void range(FILE *, int, int, char *);
190 static void uni_range(FILE *, int, int);
191 static void dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void prune(struct got_diff_state *);
195 static void equiv(struct line *, int, struct line *, int, int *);
196 static void unravel(struct got_diff_state *, int);
197 static int unsort(struct line *, int, int *);
198 static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void sort(struct line *, int);
200 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int ignoreline(char *);
202 static int asciifile(FILE *);
203 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int newcand(struct got_diff_state *, int, int, int, int *);
205 static int search(struct got_diff_state *, int *, int, int);
206 static int skipline(FILE *);
207 static int isqrt(int);
208 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int readhash(struct got_diff_state *, FILE *, int);
210 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char *preadline(int, size_t, off_t);
215 * chrtran points to one of 2 translation tables: cup2low if folding upper to
216 * lower case clow2low if not folding case
218 u_char clow2low[256] = {
219 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
245 u_char cup2low[256] = {
246 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
273 diff_output(FILE *outfile, const char *fmt, ...)
278 vfprintf(outfile, fmt, ap);
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
286 const struct got_error *err = NULL;
292 ds->lastmatchline = 0;
293 ds->context_vec_ptr = ds->context_vec_start - 1;
294 ds->max_context = 64;
295 if (flags & D_IGNORECASE)
296 ds->chrtran = cup2low;
298 ds->chrtran = clow2low;
299 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
300 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
303 if (flags & D_EMPTY1) {
304 f1 = fopen(_PATH_DEVNULL, "r");
306 err = got_error_from_errno();
310 else if (f1 == NULL) {
315 if (flags & D_EMPTY2) {
316 f2 = fopen(_PATH_DEVNULL, "r");
318 err = got_error_from_errno();
321 } else if (f2 == NULL) {
326 switch (files_differ(ds, f1, f2, flags)) {
337 if ((flags & D_FORCEASCII) == 0 &&
338 (!asciifile(f1) || !asciifile(f2))) {
343 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
344 err = got_error_from_errno();
347 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
348 err = got_error_from_errno();
353 sort(ds->sfile[0], ds->slen[0]);
354 sort(ds->sfile[1], ds->slen[1]);
356 ds->member = (int *)ds->file[1];
357 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
358 ds->member = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
359 if (ds->member == NULL) {
360 err = got_error_from_errno();
364 ds->class = (int *)ds->file[0];
365 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
366 err = got_error_from_errno();
369 ds->class = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
370 if (ds->class == NULL) {
371 err = got_error_from_errno();
375 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
376 if (ds->klist == NULL) {
377 err = got_error_from_errno();
382 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
383 if (ds->clist == NULL) {
384 err = got_error_from_errno();
387 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
389 err = got_error_from_errno();
397 ds->J = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
399 err = got_error_from_errno();
402 unravel(ds, ds->klist[i]);
408 ds->ixold = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
409 if (ds->ixold == NULL) {
410 err = got_error_from_errno();
413 ds->ixnew = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
414 if (ds->ixnew == NULL) {
415 err = got_error_from_errno();
418 check(ds, f1, f2, flags);
419 if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
421 err = got_error_from_errno();
437 * Check to see if the given files differ.
438 * Returns 0 if they are the same, 1 if different, and -1 on error.
439 * XXX - could use code from cmp(1) [faster]
442 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
444 char buf1[BUFSIZ], buf2[BUFSIZ];
447 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
448 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
451 i = fread(buf1, 1, sizeof(buf1), f1);
452 j = fread(buf2, 1, sizeof(buf2), f2);
453 if ((!i && ferror(f1)) || (!j && ferror(f2)))
459 if (memcmp(buf1, buf2, i) != 0)
465 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
473 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
477 p = calloc(sz + 3, sizeof(*p));
480 for (j = 0; (h = readhash(ds, fd, flags));) {
483 p = reallocarray(p, sz + 3, sizeof(*p));
496 prune(struct got_diff_state *ds)
500 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
501 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
504 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
505 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
508 for (j = 0; j < 2; j++) {
509 ds->sfile[j] = ds->file[j] + ds->pref;
510 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
511 for (i = 0; i <= ds->slen[j]; i++)
512 ds->sfile[j][i].serial = i;
517 equiv(struct line *a, int n, struct line *b, int m, int *c)
522 while (i <= n && j <= m) {
523 if (a[i].value < b[j].value)
525 else if (a[i].value == b[j].value)
536 while (b[j + 1].value == b[j].value) {
544 /* Code taken from ping.c */
553 do { /* newton was a stinker */
558 } while ((x - y) > 1 || (x - y) < -1);
564 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
567 int oldc, tc, oldl, sq;
568 u_int numtries, bound;
571 if (flags & D_MINIMAL)
575 bound = MAXIMUM(256, sq);
579 c[0] = newcand(ds, 0, 0, 0, &error);
582 for (i = 1; i <= n; i++) {
591 if (y <= ds->clist[oldc].y)
593 l = search(ds, c, k, y);
597 if (ds->clist[c[l]].y <= y)
600 c[l] = newcand(ds, i, y, oldc, &error);
607 c[l] = newcand(ds, i, y, oldc, &error);
613 } while ((y = b[++j]) > 0 && numtries < bound);
619 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
623 if (ds->clen == ds->clistlen) {
624 ds->clistlen = ds->clistlen * 11 / 10;
625 ds->clist = reallocarray(ds->clist, ds->clistlen,
627 if (ds->clist == NULL) {
632 q = ds->clist + ds->clen;
641 search(struct got_diff_state *ds, int *c, int k, int y)
645 if (ds->clist[c[k]].y < y) /* quick look for typical case */
653 t = ds->clist[c[l]].y;
665 unravel(struct got_diff_state *ds, int p)
670 for (i = 0; i <= ds->len[0]; i++)
671 ds->J[i] = i <= ds->pref ? i :
672 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
673 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
674 ds->J[q->x + ds->pref] = q->y + ds->pref;
678 * Check does double duty:
679 * 1. ferret out any fortuitous correspondences due
680 * to confounding by hashing (which result in "jackpot")
681 * 2. collect random access indexes to the two files
684 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
686 int i, j, jackpot, c, d;
692 ds->ixold[0] = ds->ixnew[0] = 0;
695 for (i = 1; i <= ds->len[0]; i++) {
697 ds->ixold[i] = ctold += skipline(f1);
700 while (j < ds->J[i]) {
701 ds->ixnew[j] = ctnew += skipline(f2);
704 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
709 * GNU diff ignores a missing newline
710 * in one file for -b or -w.
712 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
713 if (c == EOF && d == '\n') {
716 } else if (c == '\n' && d == EOF) {
723 if ((flags & D_FOLDBLANKS) && isspace(c) &&
729 } while (isspace(c = getc(f1)));
734 } while (isspace(d = getc(f2)));
735 } else if ((flags & D_IGNOREBLANKS)) {
736 while (isspace(c) && c != '\n') {
740 while (isspace(d) && d != '\n') {
745 if (ds->chrtran[c] != ds->chrtran[d]) {
748 if (c != '\n' && c != EOF)
749 ctold += skipline(f1);
750 if (d != '\n' && c != EOF)
751 ctnew += skipline(f2);
754 if (c == '\n' || c == EOF)
761 if ((c = getc(f1)) != (d = getc(f2))) {
764 if (c != '\n' && c != EOF)
765 ctold += skipline(f1);
766 if (d != '\n' && c != EOF)
767 ctnew += skipline(f2);
770 if (c == '\n' || c == EOF)
774 ds->ixold[i] = ctold;
775 ds->ixnew[j] = ctnew;
778 for (; j <= ds->len[1]; j++)
779 ds->ixnew[j] = ctnew += skipline(f2);
782 * fprintf(stderr, "jackpot\n");
786 /* shellsort CACM #201 */
788 sort(struct line *a, int n)
790 struct line *ai, *aim, w;
795 for (j = 1; j <= n; j *= 2)
797 for (m /= 2; m != 0; m /= 2) {
799 for (j = 1; j <= k; j++) {
800 for (ai = &a[j]; ai > a; ai -= m) {
803 break; /* wraparound */
804 if (aim->value > ai[0].value ||
805 (aim->value == ai[0].value &&
806 aim->serial > ai[0].serial))
808 w.value = ai[0].value;
809 ai[0].value = aim->value;
810 aim->value = w.value;
811 w.serial = ai[0].serial;
812 ai[0].serial = aim->serial;
813 aim->serial = w.serial;
820 unsort(struct line *f, int l, int *b)
824 a = calloc(l + 1, sizeof(*a));
827 for (i = 1; i <= l; i++)
828 a[f[i].serial] = f[i].value;
829 for (i = 1; i <= l; i++)
841 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
847 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
848 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
850 int m, i0, i1, j0, j1;
857 ds->J[m + 1] = ds->len[1] + 1;
858 if (args->diff_format != D_EDIT) {
859 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
860 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
862 j0 = ds->J[i0 - 1] + 1;
864 while (i1 < m && ds->J[i1 + 1] == 0)
866 j1 = ds->J[i1 + 1] - 1;
868 error = change(outfile, ds, args, file1, f1, file2, f2,
869 i0, i1, j0, j1, &flags);
874 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
875 while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
877 j0 = ds->J[i0 + 1] - 1;
879 while (i1 > 1 && ds->J[i1 - 1] == 0)
881 j1 = ds->J[i1 - 1] + 1;
883 change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
890 error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
891 1, ds->len[1], &flags);
895 if (args->diff_format == D_IFDEF) {
898 if ((c = getc(f1)) == EOF)
900 diff_output(outfile, "%c", c);
904 if (ds->anychange != 0) {
905 if (args->diff_format == D_CONTEXT)
906 dump_context_vec(outfile, ds, args, f1, f2, flags);
907 else if (args->diff_format == D_UNIFIED)
908 dump_unified_vec(outfile, ds, args, f1, f2, flags);
915 range(FILE *outfile, int a, int b, char *separator)
917 diff_output(outfile, "%d", a > b ? b : a);
919 diff_output(outfile, "%s%d", separator, b);
923 uni_range(FILE *outfile, int a, int b)
926 diff_output(outfile, "%d,%d", a, b - a + 1);
928 diff_output(outfile, "%d", b);
930 diff_output(outfile, "%d,0", b);
934 preadline(int fd, size_t rlen, off_t off)
939 line = malloc(rlen + 1);
942 if ((nr = pread(fd, line, rlen, off)) < 0) {
946 if (nr > 0 && line[nr-1] == '\n')
953 ignoreline(char *line)
955 return 0; /* do not ignore any lines */
959 * Indicate that there is a difference between lines a and b of the from file
960 * to get to lines c to d of the to file. If a is greater then b then there
961 * are no lines in the from file involved and this means that there were
962 * lines appended (beginning at b). If c is greater than d then there are
963 * lines missing from the to file.
966 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
967 const char *file1, FILE *f1, const char *file2, FILE *f2,
968 int a, int b, int c, int d, int *pflags)
973 if (args->diff_format != D_IFDEF && a > b && c > d)
975 if (args->ignore_pats != NULL) {
978 * All lines in the change, insert, or delete must
979 * match an ignore pattern for the change to be
982 if (a <= b) { /* Changes and deletes. */
983 for (i = a; i <= b; i++) {
984 line = preadline(fileno(f1),
985 ds->ixold[i] - ds->ixold[i - 1],
989 if (!ignoreline(line))
993 if (a > b || c <= d) { /* Changes and inserts. */
994 for (i = c; i <= d; i++) {
995 line = preadline(fileno(f2),
996 ds->ixnew[i] - ds->ixnew[i - 1],
1000 if (!ignoreline(line))
1007 if (*pflags & D_HEADER) {
1008 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
1009 *pflags &= ~D_HEADER;
1011 if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1013 * Allocate change records as needed.
1015 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1017 offset = ds->context_vec_ptr - ds->context_vec_start;
1018 ds->max_context <<= 1;
1019 ds->context_vec_start =
1020 reallocarray(ds->context_vec_start, ds->max_context,
1021 sizeof(*ds->context_vec_start));
1022 if (ds->context_vec_start == NULL)
1024 ds->context_vec_end = ds->context_vec_start +
1026 ds->context_vec_ptr = ds->context_vec_start + offset;
1028 if (ds->anychange == 0) {
1030 * Print the context/unidiff header first time through.
1032 print_header(outfile, ds, args, file1, file2);
1034 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1035 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1037 * If this change is more than 'diff_context' lines from the
1038 * previous change, dump the record and reset it.
1040 if (args->diff_format == D_CONTEXT)
1041 dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1043 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1045 ds->context_vec_ptr++;
1046 ds->context_vec_ptr->a = a;
1047 ds->context_vec_ptr->b = b;
1048 ds->context_vec_ptr->c = c;
1049 ds->context_vec_ptr->d = d;
1052 if (ds->anychange == 0)
1054 switch (args->diff_format) {
1059 range(outfile, a, b, ",");
1060 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1061 if (args->diff_format == D_NORMAL)
1062 range(outfile, c, d, ",");
1063 diff_output(outfile, "\n");
1066 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1067 range(outfile, a, b, " ");
1068 diff_output(outfile, "\n");
1072 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1074 diff_output(outfile, "d%d %d\n", a, b - a + 1);
1076 /* add changed lines */
1077 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1081 if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1082 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1083 if (a <= b && c <= d && args->diff_format == D_NORMAL)
1084 diff_output(outfile, "---\n");
1086 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1087 if (i != 0 && args->diff_format == D_EDIT) {
1089 * A non-zero return value for D_EDIT indicates that the
1090 * last line printed was a bare dot (".") that has been
1091 * escaped as ".." to prevent ed(1) from misinterpreting
1092 * it. We have to add a substitute command to change this
1093 * back and restart where we left off.
1095 diff_output(outfile, ".\n");
1096 diff_output(outfile, "%ds/.//\n", a + i - 1);
1102 if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1103 diff_output(outfile, ".\n");
1105 diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1113 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1114 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1116 int i, j, c, lastc, col, nc;
1119 * When doing #ifdef's, copy down to current line
1120 * if this is the first file, so that stuff makes it to output.
1122 if (args->diff_format == D_IFDEF && oldfile) {
1123 long curpos = ftell(lb);
1124 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1125 nc = f[a > b ? b : a - 1] - curpos;
1126 for (i = 0; i < nc; i++)
1127 diff_output(outfile, "%c", getc(lb));
1131 if (args->diff_format == D_IFDEF) {
1133 diff_output(outfile, "#else /* %s%s */\n",
1134 oldfile == 1 ? "!" : "", args->ifdefname);
1137 diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1139 diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1141 ds->inifdef = 1 + oldfile;
1143 for (i = a; i <= b; i++) {
1144 fseek(lb, f[i - 1], SEEK_SET);
1145 nc = f[i] - f[i - 1];
1146 if (args->diff_format != D_IFDEF && ch != '\0') {
1147 diff_output(outfile, "%c", ch);
1148 if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1149 || args->diff_format == D_UNIFIED))
1150 diff_output(outfile, "\t");
1151 else if (args->diff_format != D_UNIFIED)
1152 diff_output(outfile, " ");
1155 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1156 if ((c = getc(lb)) == EOF) {
1157 if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1158 args->diff_format == D_NREVERSE)
1159 warnx("No newline at end of file");
1161 diff_output(outfile, "\n\\ No newline at end of "
1165 if (c == '\t' && (flags & D_EXPANDTABS)) {
1167 diff_output(outfile, " ");
1168 } while (++col & 7);
1170 if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1173 * Don't print a bare "." line
1174 * since that will confuse ed(1).
1175 * Print ".." instead and return,
1176 * giving the caller an offset
1177 * from which to restart.
1179 diff_output(outfile, ".\n");
1182 diff_output(outfile, "%c", c);
1191 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1194 readhash(struct got_diff_state *ds, FILE *f, int flags)
1201 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1202 if (flags & D_IGNORECASE)
1203 for (i = 0; (t = getc(f)) != '\n'; i++) {
1209 sum = sum * 127 + ds->chrtran[t];
1212 for (i = 0; (t = getc(f)) != '\n'; i++) {
1218 sum = sum * 127 + t;
1222 switch (t = getc(f)) {
1231 if (space && (flags & D_IGNOREBLANKS) == 0) {
1235 sum = sum * 127 + ds->chrtran[t];
1249 * There is a remote possibility that we end up with a zero sum.
1250 * Zero is used as an EOF marker, so return 1 instead.
1252 return (sum == 0 ? 1 : sum);
1258 unsigned char buf[BUFSIZ];
1265 cnt = fread(buf, 1, sizeof(buf), f);
1266 return (memchr(buf, '\0', cnt) == NULL);
1269 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1272 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1274 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1276 int last = ds->lastline;
1280 while (pos > last) {
1281 fseek(fp, f[pos - 1], SEEK_SET);
1282 nc = f[pos] - f[pos - 1];
1283 if (nc >= sizeof(buf))
1284 nc = sizeof(buf) - 1;
1285 nc = fread(buf, 1, nc, fp);
1288 buf[strcspn(buf, "\n")] = '\0';
1289 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1290 if (begins_with(buf, "private:")) {
1292 state = " (private)";
1293 } else if (begins_with(buf, "protected:")) {
1295 state = " (protected)";
1296 } else if (begins_with(buf, "public:")) {
1298 state = " (public)";
1300 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1302 strlcat(ds->lastbuf, state,
1303 sizeof ds->lastbuf);
1304 ds->lastmatchline = pos;
1311 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1314 /* dump accumulated "context" diff changes */
1316 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1317 FILE *f1, FILE *f2, int flags)
1319 struct context_vec *cvp = ds->context_vec_start;
1320 int lowa, upb, lowc, upd, do_output;
1324 if (ds->context_vec_start > ds->context_vec_ptr)
1327 b = d = 0; /* gcc */
1328 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1329 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1330 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1331 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1333 diff_output(outfile, "***************");
1334 if ((flags & D_PROTOTYPE)) {
1335 f = match_function(ds, ds->ixold, lowa-1, f1);
1337 diff_output(outfile, " %s", f);
1339 diff_output(outfile, "\n*** ");
1340 range(outfile, lowa, upb, ",");
1341 diff_output(outfile, " ****\n");
1344 * Output changes to the "old" file. The first loop suppresses
1345 * output if there were no changes to the "old" file (we'll see
1346 * the "old" lines as context in the "new" list).
1349 for (; cvp <= ds->context_vec_ptr; cvp++)
1350 if (cvp->a <= cvp->b) {
1351 cvp = ds->context_vec_start;
1356 while (cvp <= ds->context_vec_ptr) {
1362 if (a <= b && c <= d)
1365 ch = (a <= b) ? 'd' : 'a';
1368 fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1370 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1371 fetch(outfile, ds, args, ds->ixold, a, b, f1,
1372 ch == 'c' ? '!' : '-', 0, flags);
1377 fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1379 /* output changes to the "new" file */
1380 diff_output(outfile, "--- ");
1381 range(outfile, lowc, upd, ",");
1382 diff_output(outfile, " ----\n");
1385 for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1386 if (cvp->c <= cvp->d) {
1387 cvp = ds->context_vec_start;
1392 while (cvp <= ds->context_vec_ptr) {
1398 if (a <= b && c <= d)
1401 ch = (a <= b) ? 'd' : 'a';
1404 fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1406 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1407 fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1408 ch == 'c' ? '!' : '+', 0, flags);
1413 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1415 ds->context_vec_ptr = ds->context_vec_start - 1;
1418 /* dump accumulated "unified" diff changes */
1420 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1421 FILE *f1, FILE *f2, int flags)
1423 struct context_vec *cvp = ds->context_vec_start;
1424 int lowa, upb, lowc, upd;
1428 if (ds->context_vec_start > ds->context_vec_ptr)
1431 b = d = 0; /* gcc */
1432 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1433 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1434 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1435 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1437 diff_output(outfile, "@@ -");
1438 uni_range(outfile, lowa, upb);
1439 diff_output(outfile, " +");
1440 uni_range(outfile, lowc, upd);
1441 diff_output(outfile, " @@");
1442 if ((flags & D_PROTOTYPE)) {
1443 f = match_function(ds, ds->ixold, lowa-1, f1);
1445 diff_output(outfile, " %s", f);
1447 diff_output(outfile, "\n");
1450 * Output changes in "unified" diff format--the old and new lines
1451 * are printed together.
1453 for (; cvp <= ds->context_vec_ptr; cvp++) {
1460 * c: both new and old changes
1461 * d: only changes in the old file
1462 * a: only changes in the new file
1464 if (a <= b && c <= d)
1467 ch = (a <= b) ? 'd' : 'a';
1471 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1472 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1473 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1476 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1477 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1480 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1481 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1487 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1489 ds->context_vec_ptr = ds->context_vec_start - 1;
1493 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1494 const char *file1, const char *file2)
1496 if (args->label[0] != NULL)
1497 diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1500 diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1501 file1, ctime(&ds->stb1.st_mtime));
1502 if (args->label[1] != NULL)
1503 diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1506 diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1507 file2, ctime(&ds->stb2.st_mtime));