1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
92 #include "xmalloc.h" /* XXX should return GOT_ERR_NO_MEM instead of exiting */
94 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
95 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
98 * diff - compare two files.
102 * Uses an algorithm due to Harold Stone, which finds
103 * a pair of longest identical subsequences in the two
106 * The major goal is to generate the match vector J.
107 * J[i] is the index of the line in file1 corresponding
108 * to line i file0. J[i] = 0 if there is no
109 * such line in file1.
111 * Lines are hashed so as to work in core. All potential
112 * matches are located by sorting the lines of each file
113 * on the hash (called ``value''). In particular, this
114 * collects the equivalence classes in file1 together.
115 * Subroutine equiv replaces the value of each line in
116 * file0 by the index of the first element of its
117 * matching equivalence in (the reordered) file1.
118 * To save space equiv squeezes file1 into a single
119 * array member in which the equivalence classes
120 * are simply concatenated, except that their first
121 * members are flagged by changing sign.
123 * Next the indices that point into member are unsorted into
124 * array class according to the original order of file0.
126 * The cleverness lies in routine stone. This marches
127 * through the lines of file0, developing a vector klist
128 * of "k-candidates". At step i a k-candidate is a matched
129 * pair of lines x,y (x in file0 y in file1) such that
130 * there is a common subsequence of length k
131 * between the first i lines of file0 and the first y
132 * lines of file1, but there is no such subsequence for
133 * any smaller y. x is the earliest possible mate to y
134 * that occurs in such a subsequence.
136 * Whenever any of the members of the equivalence class of
137 * lines in file1 matable to a line in file0 has serial number
138 * less than the y of some k-candidate, that k-candidate
139 * with the smallest such y is replaced. The new
140 * k-candidate is chained (via pred) to the current
141 * k-1 candidate so that the actual subsequence can
142 * be recovered. When a member has serial number greater
143 * that the y of all k-candidates, the klist is extended.
144 * At the end, the longest subsequence is pulled out
145 * and placed in the array J by unravel
147 * With J in hand, the matches there recorded are
148 * check'ed against reality to assure that no spurious
149 * matches have crept in due to hashing. If they have,
150 * they are broken, and "jackpot" is recorded--a harmless
151 * matter except that a true match for a spuriously
152 * mated line may now be unnecessarily reported as a change.
154 * Much of the complexity of the program comes simply
155 * from trying to minimize core utilization and
156 * maximize the range of doable problems by dynamically
157 * allocating what is needed and reusing what is not.
158 * The core requirements for problems larger than somewhat
159 * are (in words) 2*length(file0) + length(file1) +
160 * 3*(number of k-candidates installed), typically about
161 * 6n words for files of length n.
176 * The following struct is used to record change information when
177 * doing a "context" or "unified" diff. (see routine "change" to
178 * understand the highly mnemonic field names)
181 int a; /* start line in old file */
182 int b; /* end line in old file */
183 int c; /* start line in new file */
184 int d; /* end line in new file */
187 static void diff_output(FILE *, const char *, ...);
188 static void output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
189 static void check(struct got_diff_state *, FILE *, FILE *, int);
190 static void range(FILE *, int, int, char *);
191 static void uni_range(FILE *, int, int);
192 static void dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
194 static void prepare(struct got_diff_state *, int, FILE *, off_t, int);
195 static void prune(struct got_diff_state *);
196 static void equiv(struct line *, int, struct line *, int, int *);
197 static void unravel(struct got_diff_state *, int);
198 static void unsort(struct line *, int, int *);
199 static void change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
200 static void sort(struct line *, int);
201 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
202 static int ignoreline(char *);
203 static int asciifile(FILE *);
204 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
205 static int newcand(struct got_diff_state *, int, int, int);
206 static int search(struct got_diff_state *, int *, int, int);
207 static int skipline(FILE *);
208 static int isqrt(int);
209 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
210 static int readhash(struct got_diff_state *, FILE *, int);
211 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
212 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
213 static char *preadline(int, size_t, off_t);
216 * chrtran points to one of 2 translation tables: cup2low if folding upper to
217 * lower case clow2low if not folding case
219 u_char clow2low[256] = {
220 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
221 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
222 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
223 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
224 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
225 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
226 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
227 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
228 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
229 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
230 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
231 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
232 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
233 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
234 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
235 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
236 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
237 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
238 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
239 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
240 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
241 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
242 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
246 u_char cup2low[256] = {
247 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
248 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
249 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
250 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
251 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
252 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
253 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
254 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
255 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
256 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
257 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
258 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
259 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
260 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
261 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
262 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
263 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
264 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
265 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
266 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
267 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
268 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
269 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
274 diff_output(FILE *outfile, const char *fmt, ...)
279 vfprintf(outfile, fmt, ap);
283 const struct got_error *
284 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
285 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
287 const struct got_error *err = NULL;
293 ds->lastmatchline = 0;
294 ds->context_vec_ptr = ds->context_vec_start - 1;
295 if (flags & D_IGNORECASE)
296 ds->chrtran = cup2low;
298 ds->chrtran = clow2low;
299 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
300 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
303 if (flags & D_EMPTY1)
304 f1 = fopen(_PATH_DEVNULL, "r");
305 else if (f1 == NULL) {
310 if (flags & D_EMPTY2)
311 f2 = fopen(_PATH_DEVNULL, "r");
312 else if (f2 == NULL) {
317 switch (files_differ(ds, f1, f2, flags)) {
328 if ((flags & D_FORCEASCII) == 0 &&
329 (!asciifile(f1) || !asciifile(f2))) {
334 prepare(ds, 0, f1, ds->stb1.st_size, flags);
335 prepare(ds, 1, f2, ds->stb2.st_size, flags);
338 sort(ds->sfile[0], ds->slen[0]);
339 sort(ds->sfile[1], ds->slen[1]);
341 ds->member = (int *)ds->file[1];
342 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
343 ds->member = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
344 if (ds->member == NULL) {
345 err = got_error(GOT_ERR_NO_MEM);
349 ds->class = (int *)ds->file[0];
350 unsort(ds->sfile[0], ds->slen[0], ds->class);
351 ds->class = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
352 if (ds->class == NULL) {
353 err = got_error(GOT_ERR_NO_MEM);
357 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
358 if (ds->klist == NULL) {
359 err = got_error(GOT_ERR_NO_MEM);
364 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
365 if (ds->clist == NULL) {
366 err = got_error(GOT_ERR_NO_MEM);
369 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
373 ds->J = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
375 err = got_error(GOT_ERR_NO_MEM);
378 unravel(ds, ds->klist[i]);
384 ds->ixold = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
385 if (ds->ixold == NULL) {
386 err = got_error(GOT_ERR_NO_MEM);
389 ds->ixnew = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
390 if (ds->ixnew == NULL) {
391 err = got_error(GOT_ERR_NO_MEM);
394 check(ds, f1, f2, flags);
395 output(outfile, ds, args, args->label[0], f1, args->label[1], f2, flags);
411 * Check to see if the given files differ.
412 * Returns 0 if they are the same, 1 if different, and -1 on error.
413 * XXX - could use code from cmp(1) [faster]
416 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
418 char buf1[BUFSIZ], buf2[BUFSIZ];
421 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
422 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
425 i = fread(buf1, 1, sizeof(buf1), f1);
426 j = fread(buf2, 1, sizeof(buf2), f2);
427 if ((!i && ferror(f1)) || (!j && ferror(f2)))
433 if (memcmp(buf1, buf2, i) != 0)
439 splice(char *dir, char *file)
444 dirlen = strlen(dir);
445 while (dirlen != 0 && dir[dirlen - 1] == '/')
447 if ((tail = strrchr(file, '/')) == NULL)
451 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
456 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
464 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
468 p = xcalloc(sz + 3, sizeof(*p));
469 for (j = 0; (h = readhash(ds, fd, flags));) {
472 p = xreallocarray(p, sz + 3, sizeof(*p));
481 prune(struct got_diff_state *ds)
485 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
486 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
489 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
490 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
493 for (j = 0; j < 2; j++) {
494 ds->sfile[j] = ds->file[j] + ds->pref;
495 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
496 for (i = 0; i <= ds->slen[j]; i++)
497 ds->sfile[j][i].serial = i;
502 equiv(struct line *a, int n, struct line *b, int m, int *c)
507 while (i <= n && j <= m) {
508 if (a[i].value < b[j].value)
510 else if (a[i].value == b[j].value)
521 while (b[j + 1].value == b[j].value) {
529 /* Code taken from ping.c */
538 do { /* newton was a stinker */
543 } while ((x - y) > 1 || (x - y) < -1);
549 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
552 int oldc, tc, oldl, sq;
553 u_int numtries, bound;
555 if (flags & D_MINIMAL)
559 bound = MAXIMUM(256, sq);
563 c[0] = newcand(ds, 0, 0, 0);
564 for (i = 1; i <= n; i++) {
573 if (y <= ds->clist[oldc].y)
575 l = search(ds, c, k, y);
579 if (ds->clist[c[l]].y <= y)
582 c[l] = newcand(ds, i, y, oldc);
587 c[l] = newcand(ds, i, y, oldc);
591 } while ((y = b[++j]) > 0 && numtries < bound);
597 newcand(struct got_diff_state *ds, int x, int y, int pred)
601 if (ds->clen == ds->clistlen) {
602 ds->clistlen = ds->clistlen * 11 / 10;
603 ds->clist = xreallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
605 q = ds->clist + ds->clen;
613 search(struct got_diff_state *ds, int *c, int k, int y)
617 if (ds->clist[c[k]].y < y) /* quick look for typical case */
625 t = ds->clist[c[l]].y;
637 unravel(struct got_diff_state *ds, int p)
642 for (i = 0; i <= ds->len[0]; i++)
643 ds->J[i] = i <= ds->pref ? i :
644 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
645 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
646 ds->J[q->x + ds->pref] = q->y + ds->pref;
650 * Check does double duty:
651 * 1. ferret out any fortuitous correspondences due
652 * to confounding by hashing (which result in "jackpot")
653 * 2. collect random access indexes to the two files
656 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
658 int i, j, jackpot, c, d;
664 ds->ixold[0] = ds->ixnew[0] = 0;
667 for (i = 1; i <= ds->len[0]; i++) {
669 ds->ixold[i] = ctold += skipline(f1);
672 while (j < ds->J[i]) {
673 ds->ixnew[j] = ctnew += skipline(f2);
676 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
681 * GNU diff ignores a missing newline
682 * in one file for -b or -w.
684 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
685 if (c == EOF && d == '\n') {
688 } else if (c == '\n' && d == EOF) {
695 if ((flags & D_FOLDBLANKS) && isspace(c) &&
701 } while (isspace(c = getc(f1)));
706 } while (isspace(d = getc(f2)));
707 } else if ((flags & D_IGNOREBLANKS)) {
708 while (isspace(c) && c != '\n') {
712 while (isspace(d) && d != '\n') {
717 if (ds->chrtran[c] != ds->chrtran[d]) {
720 if (c != '\n' && c != EOF)
721 ctold += skipline(f1);
722 if (d != '\n' && c != EOF)
723 ctnew += skipline(f2);
726 if (c == '\n' || c == EOF)
733 if ((c = getc(f1)) != (d = getc(f2))) {
736 if (c != '\n' && c != EOF)
737 ctold += skipline(f1);
738 if (d != '\n' && c != EOF)
739 ctnew += skipline(f2);
742 if (c == '\n' || c == EOF)
746 ds->ixold[i] = ctold;
747 ds->ixnew[j] = ctnew;
750 for (; j <= ds->len[1]; j++)
751 ds->ixnew[j] = ctnew += skipline(f2);
754 * fprintf(stderr, "jackpot\n");
758 /* shellsort CACM #201 */
760 sort(struct line *a, int n)
762 struct line *ai, *aim, w;
767 for (j = 1; j <= n; j *= 2)
769 for (m /= 2; m != 0; m /= 2) {
771 for (j = 1; j <= k; j++) {
772 for (ai = &a[j]; ai > a; ai -= m) {
775 break; /* wraparound */
776 if (aim->value > ai[0].value ||
777 (aim->value == ai[0].value &&
778 aim->serial > ai[0].serial))
780 w.value = ai[0].value;
781 ai[0].value = aim->value;
782 aim->value = w.value;
783 w.serial = ai[0].serial;
784 ai[0].serial = aim->serial;
785 aim->serial = w.serial;
792 unsort(struct line *f, int l, int *b)
796 a = xcalloc(l + 1, sizeof(*a));
797 for (i = 1; i <= l; i++)
798 a[f[i].serial] = f[i].value;
799 for (i = 1; i <= l; i++)
809 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
815 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
816 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
818 int m, i0, i1, j0, j1;
824 ds->J[m + 1] = ds->len[1] + 1;
825 if (args->diff_format != D_EDIT) {
826 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
827 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
829 j0 = ds->J[i0 - 1] + 1;
831 while (i1 < m && ds->J[i1 + 1] == 0)
833 j1 = ds->J[i1 + 1] - 1;
835 change(outfile, ds, args, file1, f1, file2, f2, i0, i1, j0, j1, &flags);
838 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
839 while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
841 j0 = ds->J[i0 + 1] - 1;
843 while (i1 > 1 && ds->J[i1 - 1] == 0)
845 j1 = ds->J[i1 - 1] + 1;
847 change(outfile, ds, args, file1, f1, file2, f2, i1, i0, j1, j0, &flags);
851 change(outfile, ds, args, file1, f1, file2, f2, 1, 0, 1, ds->len[1], &flags);
852 if (args->diff_format == D_IFDEF) {
855 if ((c = getc(f1)) == EOF)
857 diff_output(outfile, "%c", c);
861 if (ds->anychange != 0) {
862 if (args->diff_format == D_CONTEXT)
863 dump_context_vec(outfile, ds, args, f1, f2, flags);
864 else if (args->diff_format == D_UNIFIED)
865 dump_unified_vec(outfile, ds, args, f1, f2, flags);
870 range(FILE *outfile, int a, int b, char *separator)
872 diff_output(outfile, "%d", a > b ? b : a);
874 diff_output(outfile, "%s%d", separator, b);
878 uni_range(FILE *outfile, int a, int b)
881 diff_output(outfile, "%d,%d", a, b - a + 1);
883 diff_output(outfile, "%d", b);
885 diff_output(outfile, "%d,0", b);
889 preadline(int fd, size_t rlen, off_t off)
894 line = xmalloc(rlen + 1);
895 if ((nr = pread(fd, line, rlen, off)) < 0)
897 if (nr > 0 && line[nr-1] == '\n')
904 ignoreline(char *line)
906 return 0; /* do not ignore any lines */
910 * Indicate that there is a difference between lines a and b of the from file
911 * to get to lines c to d of the to file. If a is greater then b then there
912 * are no lines in the from file involved and this means that there were
913 * lines appended (beginning at b). If c is greater than d then there are
914 * lines missing from the to file.
917 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
918 const char *file1, FILE *f1, const char *file2, FILE *f2,
919 int a, int b, int c, int d, int *pflags)
921 size_t max_context = 64;
925 if (args->diff_format != D_IFDEF && a > b && c > d)
927 if (args->ignore_pats != NULL) {
930 * All lines in the change, insert, or delete must
931 * match an ignore pattern for the change to be
934 if (a <= b) { /* Changes and deletes. */
935 for (i = a; i <= b; i++) {
936 line = preadline(fileno(f1),
937 ds->ixold[i] - ds->ixold[i - 1], ds->ixold[i - 1]);
938 if (!ignoreline(line))
942 if (a > b || c <= d) { /* Changes and inserts. */
943 for (i = c; i <= d; i++) {
944 line = preadline(fileno(f2),
945 ds->ixnew[i] - ds->ixnew[i - 1], ds->ixnew[i - 1]);
946 if (!ignoreline(line))
953 if (*pflags & D_HEADER) {
954 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
955 *pflags &= ~D_HEADER;
957 if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
959 * Allocate change records as needed.
961 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
962 ptrdiff_t offset = ds->context_vec_ptr - ds->context_vec_start;
964 ds->context_vec_start = xreallocarray(ds->context_vec_start,
965 max_context, sizeof(*ds->context_vec_start));
966 ds->context_vec_end = ds->context_vec_start + max_context;
967 ds->context_vec_ptr = ds->context_vec_start + offset;
969 if (ds->anychange == 0) {
971 * Print the context/unidiff header first time through.
973 print_header(outfile, ds, args, file1, file2);
975 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
976 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
978 * If this change is more than 'diff_context' lines from the
979 * previous change, dump the record and reset it.
981 if (args->diff_format == D_CONTEXT)
982 dump_context_vec(outfile, ds, args, f1, f2, *pflags);
984 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
986 ds->context_vec_ptr++;
987 ds->context_vec_ptr->a = a;
988 ds->context_vec_ptr->b = b;
989 ds->context_vec_ptr->c = c;
990 ds->context_vec_ptr->d = d;
993 if (ds->anychange == 0)
995 switch (args->diff_format) {
1000 range(outfile, a, b, ",");
1001 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1002 if (args->diff_format == D_NORMAL)
1003 range(outfile, c, d, ",");
1004 diff_output(outfile, "\n");
1007 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1008 range(outfile, a, b, " ");
1009 diff_output(outfile, "\n");
1013 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1015 diff_output(outfile, "d%d %d\n", a, b - a + 1);
1017 /* add changed lines */
1018 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1022 if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1023 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1024 if (a <= b && c <= d && args->diff_format == D_NORMAL)
1025 diff_output(outfile, "---\n");
1027 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1028 if (i != 0 && args->diff_format == D_EDIT) {
1030 * A non-zero return value for D_EDIT indicates that the
1031 * last line printed was a bare dot (".") that has been
1032 * escaped as ".." to prevent ed(1) from misinterpreting
1033 * it. We have to add a substitute command to change this
1034 * back and restart where we left off.
1036 diff_output(outfile, ".\n");
1037 diff_output(outfile, "%ds/.//\n", a + i - 1);
1043 if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1044 diff_output(outfile, ".\n");
1046 diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1052 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1053 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1055 int i, j, c, lastc, col, nc;
1058 * When doing #ifdef's, copy down to current line
1059 * if this is the first file, so that stuff makes it to output.
1061 if (args->diff_format == D_IFDEF && oldfile) {
1062 long curpos = ftell(lb);
1063 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1064 nc = f[a > b ? b : a - 1] - curpos;
1065 for (i = 0; i < nc; i++)
1066 diff_output(outfile, "%c", getc(lb));
1070 if (args->diff_format == D_IFDEF) {
1072 diff_output(outfile, "#else /* %s%s */\n",
1073 oldfile == 1 ? "!" : "", args->ifdefname);
1076 diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1078 diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1080 ds->inifdef = 1 + oldfile;
1082 for (i = a; i <= b; i++) {
1083 fseek(lb, f[i - 1], SEEK_SET);
1084 nc = f[i] - f[i - 1];
1085 if (args->diff_format != D_IFDEF && ch != '\0') {
1086 diff_output(outfile, "%c", ch);
1087 if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1088 || args->diff_format == D_UNIFIED))
1089 diff_output(outfile, "\t");
1090 else if (args->diff_format != D_UNIFIED)
1091 diff_output(outfile, " ");
1094 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1095 if ((c = getc(lb)) == EOF) {
1096 if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1097 args->diff_format == D_NREVERSE)
1098 warnx("No newline at end of file");
1100 diff_output(outfile, "\n\\ No newline at end of "
1104 if (c == '\t' && (flags & D_EXPANDTABS)) {
1106 diff_output(outfile, " ");
1107 } while (++col & 7);
1109 if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1112 * Don't print a bare "." line
1113 * since that will confuse ed(1).
1114 * Print ".." instead and return,
1115 * giving the caller an offset
1116 * from which to restart.
1118 diff_output(outfile, ".\n");
1121 diff_output(outfile, "%c", c);
1130 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1133 readhash(struct got_diff_state *ds, FILE *f, int flags)
1140 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1141 if (flags & D_IGNORECASE)
1142 for (i = 0; (t = getc(f)) != '\n'; i++) {
1148 sum = sum * 127 + ds->chrtran[t];
1151 for (i = 0; (t = getc(f)) != '\n'; i++) {
1157 sum = sum * 127 + t;
1161 switch (t = getc(f)) {
1170 if (space && (flags & D_IGNOREBLANKS) == 0) {
1174 sum = sum * 127 + ds->chrtran[t];
1188 * There is a remote possibility that we end up with a zero sum.
1189 * Zero is used as an EOF marker, so return 1 instead.
1191 return (sum == 0 ? 1 : sum);
1197 unsigned char buf[BUFSIZ];
1204 cnt = fread(buf, 1, sizeof(buf), f);
1205 return (memchr(buf, '\0', cnt) == NULL);
1208 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1211 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1213 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1215 int last = ds->lastline;
1219 while (pos > last) {
1220 fseek(fp, f[pos - 1], SEEK_SET);
1221 nc = f[pos] - f[pos - 1];
1222 if (nc >= sizeof(buf))
1223 nc = sizeof(buf) - 1;
1224 nc = fread(buf, 1, nc, fp);
1227 buf[strcspn(buf, "\n")] = '\0';
1228 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1229 if (begins_with(buf, "private:")) {
1231 state = " (private)";
1232 } else if (begins_with(buf, "protected:")) {
1234 state = " (protected)";
1235 } else if (begins_with(buf, "public:")) {
1237 state = " (public)";
1239 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1241 strlcat(ds->lastbuf, state,
1242 sizeof ds->lastbuf);
1243 ds->lastmatchline = pos;
1250 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1253 /* dump accumulated "context" diff changes */
1255 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1256 FILE *f1, FILE *f2, int flags)
1258 struct context_vec *cvp = ds->context_vec_start;
1259 int lowa, upb, lowc, upd, do_output;
1263 if (ds->context_vec_start > ds->context_vec_ptr)
1266 b = d = 0; /* gcc */
1267 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1268 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1269 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1270 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1272 diff_output(outfile, "***************");
1273 if ((flags & D_PROTOTYPE)) {
1274 f = match_function(ds, ds->ixold, lowa-1, f1);
1276 diff_output(outfile, " %s", f);
1278 diff_output(outfile, "\n*** ");
1279 range(outfile, lowa, upb, ",");
1280 diff_output(outfile, " ****\n");
1283 * Output changes to the "old" file. The first loop suppresses
1284 * output if there were no changes to the "old" file (we'll see
1285 * the "old" lines as context in the "new" list).
1288 for (; cvp <= ds->context_vec_ptr; cvp++)
1289 if (cvp->a <= cvp->b) {
1290 cvp = ds->context_vec_start;
1295 while (cvp <= ds->context_vec_ptr) {
1301 if (a <= b && c <= d)
1304 ch = (a <= b) ? 'd' : 'a';
1307 fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1309 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1310 fetch(outfile, ds, args, ds->ixold, a, b, f1,
1311 ch == 'c' ? '!' : '-', 0, flags);
1316 fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1318 /* output changes to the "new" file */
1319 diff_output(outfile, "--- ");
1320 range(outfile, lowc, upd, ",");
1321 diff_output(outfile, " ----\n");
1324 for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1325 if (cvp->c <= cvp->d) {
1326 cvp = ds->context_vec_start;
1331 while (cvp <= ds->context_vec_ptr) {
1337 if (a <= b && c <= d)
1340 ch = (a <= b) ? 'd' : 'a';
1343 fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1345 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1346 fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1347 ch == 'c' ? '!' : '+', 0, flags);
1352 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1354 ds->context_vec_ptr = ds->context_vec_start - 1;
1357 /* dump accumulated "unified" diff changes */
1359 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1360 FILE *f1, FILE *f2, int flags)
1362 struct context_vec *cvp = ds->context_vec_start;
1363 int lowa, upb, lowc, upd;
1367 if (ds->context_vec_start > ds->context_vec_ptr)
1370 b = d = 0; /* gcc */
1371 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1372 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1373 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1374 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1376 diff_output(outfile, "@@ -");
1377 uni_range(outfile, lowa, upb);
1378 diff_output(outfile, " +");
1379 uni_range(outfile, lowc, upd);
1380 diff_output(outfile, " @@");
1381 if ((flags & D_PROTOTYPE)) {
1382 f = match_function(ds, ds->ixold, lowa-1, f1);
1384 diff_output(outfile, " %s", f);
1386 diff_output(outfile, "\n");
1389 * Output changes in "unified" diff format--the old and new lines
1390 * are printed together.
1392 for (; cvp <= ds->context_vec_ptr; cvp++) {
1399 * c: both new and old changes
1400 * d: only changes in the old file
1401 * a: only changes in the new file
1403 if (a <= b && c <= d)
1406 ch = (a <= b) ? 'd' : 'a';
1410 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1411 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1412 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1415 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1416 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1419 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1420 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1426 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1428 ds->context_vec_ptr = ds->context_vec_start - 1;
1432 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1433 const char *file1, const char *file2)
1435 if (args->label[0] != NULL)
1436 diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1439 diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1440 file1, ctime(&ds->stb1.st_mtime));
1441 if (args->label[1] != NULL)
1442 diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1445 diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1446 file2, ctime(&ds->stb2.st_mtime));