1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
186 static void diff_output(FILE *, const char *, ...);
187 static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void check(struct got_diff_state *, FILE *, FILE *, int);
189 static void uni_range(FILE *, int, int);
190 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
191 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
192 static void prune(struct got_diff_state *);
193 static void equiv(struct line *, int, struct line *, int, int *);
194 static void unravel(struct got_diff_state *, int);
195 static int unsort(struct line *, int, int *);
196 static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
197 static void sort(struct line *, int);
198 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
199 static int asciifile(FILE *);
200 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
201 static int newcand(struct got_diff_state *, int, int, int, int *);
202 static int search(struct got_diff_state *, int *, int, int);
203 static int skipline(FILE *);
204 static int isqrt(int);
205 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
206 static int readhash(struct got_diff_state *, FILE *, int);
207 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
208 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
211 * chrtran points to one of 2 translation tables: cup2low if folding upper to
212 * lower case clow2low if not folding case
214 u_char clow2low[256] = {
215 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
216 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
217 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
218 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
219 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
220 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
221 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
222 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
223 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
224 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
225 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
226 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
227 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
228 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
229 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
230 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
231 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
232 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
233 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
234 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
235 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
236 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
237 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
241 u_char cup2low[256] = {
242 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
243 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
244 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
245 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
246 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
247 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
248 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
249 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
250 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
251 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
252 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
253 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
254 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
255 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
256 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
257 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
258 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
259 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
260 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
261 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
262 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
263 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
264 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 diff_output(FILE *outfile, const char *fmt, ...)
274 vfprintf(outfile, fmt, ap);
278 const struct got_error *
279 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
280 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
282 const struct got_error *err = NULL;
289 ds->lastmatchline = 0;
290 ds->context_vec_ptr = ds->context_vec_start - 1;
291 ds->max_context = 64;
292 if (flags & D_IGNORECASE)
293 ds->chrtran = cup2low;
295 ds->chrtran = clow2low;
296 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
297 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
300 if (flags & D_EMPTY1) {
301 f1 = fopen(_PATH_DEVNULL, "r");
303 err = got_error_from_errno();
307 else if (f1 == NULL) {
312 if (flags & D_EMPTY2) {
313 f2 = fopen(_PATH_DEVNULL, "r");
315 err = got_error_from_errno();
318 } else if (f2 == NULL) {
323 switch (files_differ(ds, f1, f2, flags)) {
334 if ((flags & D_FORCEASCII) == 0 &&
335 (!asciifile(f1) || !asciifile(f2))) {
340 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
341 err = got_error_from_errno();
344 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
345 err = got_error_from_errno();
350 sort(ds->sfile[0], ds->slen[0]);
351 sort(ds->sfile[1], ds->slen[1]);
353 ds->member = (int *)ds->file[1];
354 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
355 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
357 err = got_error_from_errno();
362 ds->class = (int *)ds->file[0];
363 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
364 err = got_error_from_errno();
367 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
369 err = got_error_from_errno();
374 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
375 if (ds->klist == NULL) {
376 err = got_error_from_errno();
381 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
382 if (ds->clist == NULL) {
383 err = got_error_from_errno();
386 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
388 err = got_error_from_errno();
392 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
394 err = got_error_from_errno();
398 unravel(ds, ds->klist[i]);
400 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
402 err = got_error_from_errno();
406 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
408 err = got_error_from_errno();
412 check(ds, f1, f2, flags);
413 if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
415 err = got_error_from_errno();
438 * Check to see if the given files differ.
439 * Returns 0 if they are the same, 1 if different, and -1 on error.
440 * XXX - could use code from cmp(1) [faster]
443 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
445 char buf1[BUFSIZ], buf2[BUFSIZ];
448 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
449 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
452 i = fread(buf1, 1, sizeof(buf1), f1);
453 j = fread(buf2, 1, sizeof(buf2), f2);
454 if ((!i && ferror(f1)) || (!j && ferror(f2)))
460 if (memcmp(buf1, buf2, i) != 0)
466 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
474 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
478 p = calloc(sz + 3, sizeof(*p));
481 for (j = 0; (h = readhash(ds, fd, flags));) {
484 q = reallocarray(p, sz + 3, sizeof(*p));
500 prune(struct got_diff_state *ds)
504 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
505 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
508 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
509 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
512 for (j = 0; j < 2; j++) {
513 ds->sfile[j] = ds->file[j] + ds->pref;
514 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
515 for (i = 0; i <= ds->slen[j]; i++)
516 ds->sfile[j][i].serial = i;
521 equiv(struct line *a, int n, struct line *b, int m, int *c)
526 while (i <= n && j <= m) {
527 if (a[i].value < b[j].value)
529 else if (a[i].value == b[j].value)
540 while (b[j + 1].value == b[j].value) {
548 /* Code taken from ping.c */
557 do { /* newton was a stinker */
562 } while ((x - y) > 1 || (x - y) < -1);
568 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
571 int oldc, tc, oldl, sq;
572 u_int numtries, bound;
575 if (flags & D_MINIMAL)
579 bound = MAXIMUM(256, sq);
583 c[0] = newcand(ds, 0, 0, 0, &error);
586 for (i = 1; i <= n; i++) {
595 if (y <= ds->clist[oldc].y)
597 l = search(ds, c, k, y);
601 if (ds->clist[c[l]].y <= y)
604 c[l] = newcand(ds, i, y, oldc, &error);
611 c[l] = newcand(ds, i, y, oldc, &error);
617 } while ((y = b[++j]) > 0 && numtries < bound);
623 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
627 if (ds->clen == ds->clistlen) {
628 ds->clistlen = ds->clistlen * 11 / 10;
629 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
638 q = ds->clist + ds->clen;
647 search(struct got_diff_state *ds, int *c, int k, int y)
651 if (ds->clist[c[k]].y < y) /* quick look for typical case */
659 t = ds->clist[c[l]].y;
671 unravel(struct got_diff_state *ds, int p)
676 for (i = 0; i <= ds->len[0]; i++)
677 ds->J[i] = i <= ds->pref ? i :
678 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
679 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
680 ds->J[q->x + ds->pref] = q->y + ds->pref;
684 * Check does double duty:
685 * 1. ferret out any fortuitous correspondences due
686 * to confounding by hashing (which result in "jackpot")
687 * 2. collect random access indexes to the two files
690 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
692 int i, j, jackpot, c, d;
698 ds->ixold[0] = ds->ixnew[0] = 0;
701 for (i = 1; i <= ds->len[0]; i++) {
703 ds->ixold[i] = ctold += skipline(f1);
706 while (j < ds->J[i]) {
707 ds->ixnew[j] = ctnew += skipline(f2);
710 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
715 * GNU diff ignores a missing newline
716 * in one file for -b or -w.
718 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
719 if (c == EOF && d == '\n') {
722 } else if (c == '\n' && d == EOF) {
729 if ((flags & D_FOLDBLANKS) && isspace(c) &&
735 } while (isspace(c = getc(f1)));
740 } while (isspace(d = getc(f2)));
741 } else if ((flags & D_IGNOREBLANKS)) {
742 while (isspace(c) && c != '\n') {
746 while (isspace(d) && d != '\n') {
751 if (ds->chrtran[c] != ds->chrtran[d]) {
754 if (c != '\n' && c != EOF)
755 ctold += skipline(f1);
756 if (d != '\n' && c != EOF)
757 ctnew += skipline(f2);
760 if (c == '\n' || c == EOF)
767 if ((c = getc(f1)) != (d = getc(f2))) {
770 if (c != '\n' && c != EOF)
771 ctold += skipline(f1);
772 if (d != '\n' && c != EOF)
773 ctnew += skipline(f2);
776 if (c == '\n' || c == EOF)
780 ds->ixold[i] = ctold;
781 ds->ixnew[j] = ctnew;
784 for (; j <= ds->len[1]; j++)
785 ds->ixnew[j] = ctnew += skipline(f2);
788 * fprintf(stderr, "jackpot\n");
792 /* shellsort CACM #201 */
794 sort(struct line *a, int n)
796 struct line *ai, *aim, w;
801 for (j = 1; j <= n; j *= 2)
803 for (m /= 2; m != 0; m /= 2) {
805 for (j = 1; j <= k; j++) {
806 for (ai = &a[j]; ai > a; ai -= m) {
809 break; /* wraparound */
810 if (aim->value > ai[0].value ||
811 (aim->value == ai[0].value &&
812 aim->serial > ai[0].serial))
814 w.value = ai[0].value;
815 ai[0].value = aim->value;
816 aim->value = w.value;
817 w.serial = ai[0].serial;
818 ai[0].serial = aim->serial;
819 aim->serial = w.serial;
826 unsort(struct line *f, int l, int *b)
830 a = calloc(l + 1, sizeof(*a));
833 for (i = 1; i <= l; i++)
834 a[f[i].serial] = f[i].value;
835 for (i = 1; i <= l; i++)
847 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
853 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
854 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
856 int m, i0, i1, j0, j1;
863 ds->J[m + 1] = ds->len[1] + 1;
864 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
865 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
867 j0 = ds->J[i0 - 1] + 1;
869 while (i1 < m && ds->J[i1 + 1] == 0)
871 j1 = ds->J[i1 + 1] - 1;
873 error = change(outfile, ds, args, file1, f1, file2, f2,
874 i0, i1, j0, j1, &flags);
879 error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
880 1, ds->len[1], &flags);
884 if (args->diff_format == D_IFDEF) {
887 if ((c = getc(f1)) == EOF)
889 diff_output(outfile, "%c", c);
893 if (ds->anychange != 0)
894 dump_unified_vec(outfile, ds, args, f1, f2, flags);
900 uni_range(FILE *outfile, int a, int b)
903 diff_output(outfile, "%d,%d", a, b - a + 1);
905 diff_output(outfile, "%d", b);
907 diff_output(outfile, "%d,0", b);
911 * Indicate that there is a difference between lines a and b of the from file
912 * to get to lines c to d of the to file. If a is greater then b then there
913 * are no lines in the from file involved and this means that there were
914 * lines appended (beginning at b). If c is greater than d then there are
915 * lines missing from the to file.
918 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
919 const char *file1, FILE *f1, const char *file2, FILE *f2,
920 int a, int b, int c, int d, int *pflags)
924 if (args->diff_format != D_IFDEF && a > b && c > d)
927 if (*pflags & D_HEADER) {
928 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
929 *pflags &= ~D_HEADER;
931 if (args->diff_format == D_UNIFIED) {
933 * Allocate change records as needed.
935 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
936 struct context_vec *cvp;
938 offset = ds->context_vec_ptr - ds->context_vec_start;
939 ds->max_context <<= 1;
940 ds->context_vec_start =
941 cvp = reallocarray(ds->context_vec_start,
942 ds->max_context, sizeof(*ds->context_vec_start));
944 free(ds->context_vec_start);
947 ds->context_vec_start = cvp;
948 ds->context_vec_end = ds->context_vec_start +
950 ds->context_vec_ptr = ds->context_vec_start + offset;
952 if (ds->anychange == 0) {
954 * Print the context/unidiff header first time through.
956 print_header(outfile, ds, args, file1, file2);
958 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
959 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
961 * If this change is more than 'diff_context' lines from the
962 * previous change, dump the record and reset it.
964 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
966 ds->context_vec_ptr++;
967 ds->context_vec_ptr->a = a;
968 ds->context_vec_ptr->b = b;
969 ds->context_vec_ptr->c = c;
970 ds->context_vec_ptr->d = d;
973 if (ds->anychange == 0)
975 if (args->diff_format == D_BRIEF)
977 if (args->diff_format == D_IFDEF)
978 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
979 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, '\0', 0, *pflags);
981 diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
989 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
990 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
992 int i, j, c, lastc, col, nc;
995 * When doing #ifdef's, copy down to current line
996 * if this is the first file, so that stuff makes it to output.
998 if (args->diff_format == D_IFDEF && oldfile) {
999 long curpos = ftell(lb);
1000 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1001 nc = f[a > b ? b : a - 1] - curpos;
1002 for (i = 0; i < nc; i++)
1003 diff_output(outfile, "%c", getc(lb));
1007 if (args->diff_format == D_IFDEF) {
1009 diff_output(outfile, "#else /* %s%s */\n",
1010 oldfile == 1 ? "!" : "", args->ifdefname);
1013 diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1015 diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1017 ds->inifdef = 1 + oldfile;
1019 for (i = a; i <= b; i++) {
1020 fseek(lb, f[i - 1], SEEK_SET);
1021 nc = f[i] - f[i - 1];
1022 if (args->diff_format != D_IFDEF && ch != '\0') {
1023 diff_output(outfile, "%c", ch);
1024 if (args->Tflag && args->diff_format == D_UNIFIED)
1025 diff_output(outfile, "\t");
1026 else if (args->diff_format != D_UNIFIED)
1027 diff_output(outfile, " ");
1030 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1031 if ((c = getc(lb)) == EOF) {
1032 diff_output(outfile, "\n\\ No newline at end of "
1036 if (c == '\t' && (flags & D_EXPANDTABS)) {
1038 diff_output(outfile, " ");
1039 } while (++col & 7);
1041 diff_output(outfile, "%c", c);
1050 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1053 readhash(struct got_diff_state *ds, FILE *f, int flags)
1060 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1061 if (flags & D_IGNORECASE)
1062 for (i = 0; (t = getc(f)) != '\n'; i++) {
1068 sum = sum * 127 + ds->chrtran[t];
1071 for (i = 0; (t = getc(f)) != '\n'; i++) {
1077 sum = sum * 127 + t;
1081 switch (t = getc(f)) {
1090 if (space && (flags & D_IGNOREBLANKS) == 0) {
1094 sum = sum * 127 + ds->chrtran[t];
1108 * There is a remote possibility that we end up with a zero sum.
1109 * Zero is used as an EOF marker, so return 1 instead.
1111 return (sum == 0 ? 1 : sum);
1117 unsigned char buf[BUFSIZ];
1124 cnt = fread(buf, 1, sizeof(buf), f);
1125 return (memchr(buf, '\0', cnt) == NULL);
1128 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1131 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1133 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1135 int last = ds->lastline;
1139 while (pos > last) {
1140 fseek(fp, f[pos - 1], SEEK_SET);
1141 nc = f[pos] - f[pos - 1];
1142 if (nc >= sizeof(buf))
1143 nc = sizeof(buf) - 1;
1144 nc = fread(buf, 1, nc, fp);
1147 buf[strcspn(buf, "\n")] = '\0';
1148 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1149 if (begins_with(buf, "private:")) {
1151 state = " (private)";
1152 } else if (begins_with(buf, "protected:")) {
1154 state = " (protected)";
1155 } else if (begins_with(buf, "public:")) {
1157 state = " (public)";
1159 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1161 strlcat(ds->lastbuf, state,
1162 sizeof ds->lastbuf);
1163 ds->lastmatchline = pos;
1170 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1173 /* dump accumulated "unified" diff changes */
1175 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1176 FILE *f1, FILE *f2, int flags)
1178 struct context_vec *cvp = ds->context_vec_start;
1179 int lowa, upb, lowc, upd;
1183 if (ds->context_vec_start > ds->context_vec_ptr)
1186 b = d = 0; /* gcc */
1187 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1188 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1189 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1190 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1192 diff_output(outfile, "@@ -");
1193 uni_range(outfile, lowa, upb);
1194 diff_output(outfile, " +");
1195 uni_range(outfile, lowc, upd);
1196 diff_output(outfile, " @@");
1197 if ((flags & D_PROTOTYPE)) {
1198 f = match_function(ds, ds->ixold, lowa-1, f1);
1200 diff_output(outfile, " %s", f);
1202 diff_output(outfile, "\n");
1205 * Output changes in "unified" diff format--the old and new lines
1206 * are printed together.
1208 for (; cvp <= ds->context_vec_ptr; cvp++) {
1215 * c: both new and old changes
1216 * d: only changes in the old file
1217 * a: only changes in the new file
1219 if (a <= b && c <= d)
1222 ch = (a <= b) ? 'd' : 'a';
1226 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1227 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1228 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1231 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1232 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1235 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1236 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1242 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1244 ds->context_vec_ptr = ds->context_vec_start - 1;
1248 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1249 const char *file1, const char *file2)
1251 if (args->label[0] != NULL)
1252 diff_output(outfile, "--- %s\n", args->label[0]);
1254 diff_output(outfile, "--- %s\t%s", file1,
1255 ctime(&ds->stb1.st_mtime));
1256 if (args->label[1] != NULL)
1257 diff_output(outfile, "+++ %s\n", args->label[1]);
1259 diff_output(outfile, "+++ %s\t%s", file2,
1260 ctime(&ds->stb2.st_mtime));