Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 /*
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
178 */
179 struct context_vec {
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
184 };
186 static void diff_output(FILE *, const char *, ...);
187 static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void check(struct got_diff_state *, FILE *, FILE *, int);
189 static void uni_range(FILE *, int, int);
190 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
191 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
192 static void prune(struct got_diff_state *);
193 static void equiv(struct line *, int, struct line *, int, int *);
194 static void unravel(struct got_diff_state *, int);
195 static int unsort(struct line *, int, int *);
196 static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
197 static void sort(struct line *, int);
198 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
199 static int asciifile(FILE *);
200 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
201 static int newcand(struct got_diff_state *, int, int, int, int *);
202 static int search(struct got_diff_state *, int *, int, int);
203 static int skipline(FILE *);
204 static int isqrt(int);
205 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
206 static int readhash(struct got_diff_state *, FILE *, int);
207 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
208 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
210 /*
211 * chrtran points to one of 2 translation tables: cup2low if folding upper to
212 * lower case clow2low if not folding case
213 */
214 u_char clow2low[256] = {
215 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
216 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
217 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
218 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
219 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
220 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
221 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
222 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
223 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
224 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
225 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
226 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
227 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
228 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
229 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
230 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
231 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
232 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
233 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
234 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
235 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
236 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
237 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
238 0xfd, 0xfe, 0xff
239 };
241 u_char cup2low[256] = {
242 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
243 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
244 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
245 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
246 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
247 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
248 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
249 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
250 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
251 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
252 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
253 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
254 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
255 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
256 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
257 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
258 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
259 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
260 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
261 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
262 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
263 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
264 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
265 0xfd, 0xfe, 0xff
266 };
268 static void
269 diff_output(FILE *outfile, const char *fmt, ...)
271 va_list ap;
273 va_start(ap, fmt);
274 vfprintf(outfile, fmt, ap);
275 va_end(ap);
278 const struct got_error *
279 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
280 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
282 const struct got_error *err = NULL;
283 int i, *p;
284 long *lp;
286 *rval = D_SAME;
287 ds->anychange = 0;
288 ds->lastline = 0;
289 ds->lastmatchline = 0;
290 ds->context_vec_ptr = ds->context_vec_start - 1;
291 ds->max_context = 64;
292 if (flags & D_IGNORECASE)
293 ds->chrtran = cup2low;
294 else
295 ds->chrtran = clow2low;
296 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
297 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
298 return NULL;
300 if (flags & D_EMPTY1) {
301 f1 = fopen(_PATH_DEVNULL, "r");
302 if (f1 == NULL) {
303 err = got_error_from_errno();
304 goto closem;
307 else if (f1 == NULL) {
308 args->status |= 2;
309 goto closem;
312 if (flags & D_EMPTY2) {
313 f2 = fopen(_PATH_DEVNULL, "r");
314 if (f2 == NULL) {
315 err = got_error_from_errno();
316 goto closem;
318 } else if (f2 == NULL) {
319 args->status |= 2;
320 goto closem;
323 switch (files_differ(ds, f1, f2, flags)) {
324 case 0:
325 goto closem;
326 case 1:
327 break;
328 default:
329 /* error */
330 args->status |= 2;
331 goto closem;
334 if ((flags & D_FORCEASCII) == 0 &&
335 (!asciifile(f1) || !asciifile(f2))) {
336 *rval = D_BINARY;
337 args->status |= 1;
338 goto closem;
340 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
341 err = got_error_from_errno();
342 goto closem;
344 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
345 err = got_error_from_errno();
346 goto closem;
349 prune(ds);
350 sort(ds->sfile[0], ds->slen[0]);
351 sort(ds->sfile[1], ds->slen[1]);
353 ds->member = (int *)ds->file[1];
354 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
355 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
356 if (p == NULL) {
357 err = got_error_from_errno();
358 goto closem;
360 ds->member = p;
362 ds->class = (int *)ds->file[0];
363 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
364 err = got_error_from_errno();
365 goto closem;
367 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
368 if (p == NULL) {
369 err = got_error_from_errno();
370 goto closem;
372 ds->class = p;
374 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
375 if (ds->klist == NULL) {
376 err = got_error_from_errno();
377 goto closem;
379 ds->clen = 0;
380 ds->clistlen = 100;
381 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
382 if (ds->clist == NULL) {
383 err = got_error_from_errno();
384 goto closem;
386 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
387 if (i < 0) {
388 err = got_error_from_errno();
389 goto closem;
392 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
393 if (p == NULL) {
394 err = got_error_from_errno();
395 goto closem;
397 ds->J = p;
398 unravel(ds, ds->klist[i]);
400 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
401 if (lp == NULL) {
402 err = got_error_from_errno();
403 goto closem;
405 ds->ixold = lp;
406 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
407 if (lp == NULL) {
408 err = got_error_from_errno();
409 goto closem;
411 ds->ixnew = lp;
412 check(ds, f1, f2, flags);
413 if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
414 flags))
415 err = got_error_from_errno();
416 closem:
417 free(ds->J);
418 free(ds->member);
419 free(ds->class);
420 free(ds->clist);
421 free(ds->klist);
422 free(ds->ixold);
423 free(ds->ixnew);
424 if (ds->anychange) {
425 args->status |= 1;
426 if (*rval == D_SAME)
427 *rval = D_DIFFER;
429 if (f1 != NULL)
430 fclose(f1);
431 if (f2 != NULL)
432 fclose(f2);
434 return (err);
437 /*
438 * Check to see if the given files differ.
439 * Returns 0 if they are the same, 1 if different, and -1 on error.
440 * XXX - could use code from cmp(1) [faster]
441 */
442 static int
443 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
445 char buf1[BUFSIZ], buf2[BUFSIZ];
446 size_t i, j;
448 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
449 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
450 return (1);
451 for (;;) {
452 i = fread(buf1, 1, sizeof(buf1), f1);
453 j = fread(buf2, 1, sizeof(buf2), f2);
454 if ((!i && ferror(f1)) || (!j && ferror(f2)))
455 return (-1);
456 if (i != j)
457 return (1);
458 if (i == 0)
459 return (0);
460 if (memcmp(buf1, buf2, i) != 0)
461 return (1);
465 static int
466 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
468 struct line *p, *q;
469 int j, h;
470 size_t sz;
472 rewind(fd);
474 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
475 if (sz < 100)
476 sz = 100;
478 p = calloc(sz + 3, sizeof(*p));
479 if (p == NULL)
480 return (-1);
481 for (j = 0; (h = readhash(ds, fd, flags));) {
482 if (j == sz) {
483 sz = sz * 3 / 2;
484 q = reallocarray(p, sz + 3, sizeof(*p));
485 if (q == NULL) {
486 free(p);
487 return (-1);
489 p = q;
491 p[++j].value = h;
493 ds->len[i] = j;
494 ds->file[i] = p;
496 return (0);
499 static void
500 prune(struct got_diff_state *ds)
502 int i, j;
504 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
505 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
506 ds->pref++)
508 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
509 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
510 ds->suff++)
512 for (j = 0; j < 2; j++) {
513 ds->sfile[j] = ds->file[j] + ds->pref;
514 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
515 for (i = 0; i <= ds->slen[j]; i++)
516 ds->sfile[j][i].serial = i;
520 static void
521 equiv(struct line *a, int n, struct line *b, int m, int *c)
523 int i, j;
525 i = j = 1;
526 while (i <= n && j <= m) {
527 if (a[i].value < b[j].value)
528 a[i++].value = 0;
529 else if (a[i].value == b[j].value)
530 a[i++].value = j;
531 else
532 j++;
534 while (i <= n)
535 a[i++].value = 0;
536 b[m + 1].value = 0;
537 j = 0;
538 while (++j <= m) {
539 c[j] = -b[j].serial;
540 while (b[j + 1].value == b[j].value) {
541 j++;
542 c[j] = b[j].serial;
545 c[j] = -1;
548 /* Code taken from ping.c */
549 static int
550 isqrt(int n)
552 int y, x = 1;
554 if (n == 0)
555 return (0);
557 do { /* newton was a stinker */
558 y = x;
559 x = n / x;
560 x += y;
561 x /= 2;
562 } while ((x - y) > 1 || (x - y) < -1);
564 return (x);
567 static int
568 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
570 int i, k, y, j, l;
571 int oldc, tc, oldl, sq;
572 u_int numtries, bound;
573 int error;
575 if (flags & D_MINIMAL)
576 bound = UINT_MAX;
577 else {
578 sq = isqrt(n);
579 bound = MAXIMUM(256, sq);
582 k = 0;
583 c[0] = newcand(ds, 0, 0, 0, &error);
584 if (error)
585 return -1;
586 for (i = 1; i <= n; i++) {
587 j = a[i];
588 if (j == 0)
589 continue;
590 y = -b[j];
591 oldl = 0;
592 oldc = c[0];
593 numtries = 0;
594 do {
595 if (y <= ds->clist[oldc].y)
596 continue;
597 l = search(ds, c, k, y);
598 if (l != oldl + 1)
599 oldc = c[l - 1];
600 if (l <= k) {
601 if (ds->clist[c[l]].y <= y)
602 continue;
603 tc = c[l];
604 c[l] = newcand(ds, i, y, oldc, &error);
605 if (error)
606 return -1;
607 oldc = tc;
608 oldl = l;
609 numtries++;
610 } else {
611 c[l] = newcand(ds, i, y, oldc, &error);
612 if (error)
613 return -1;
614 k++;
615 break;
617 } while ((y = b[++j]) > 0 && numtries < bound);
619 return (k);
622 static int
623 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
625 struct cand *q;
627 if (ds->clen == ds->clistlen) {
628 ds->clistlen = ds->clistlen * 11 / 10;
629 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
630 if (q == NULL) {
631 *errorp = -1;
632 free(ds->clist);
633 ds->clist = NULL;
634 return 0;
636 ds->clist = q;
638 q = ds->clist + ds->clen;
639 q->x = x;
640 q->y = y;
641 q->pred = pred;
642 *errorp = 0;
643 return (ds->clen++);
646 static int
647 search(struct got_diff_state *ds, int *c, int k, int y)
649 int i, j, l, t;
651 if (ds->clist[c[k]].y < y) /* quick look for typical case */
652 return (k + 1);
653 i = 0;
654 j = k + 1;
655 for (;;) {
656 l = (i + j) / 2;
657 if (l <= i)
658 break;
659 t = ds->clist[c[l]].y;
660 if (t > y)
661 j = l;
662 else if (t < y)
663 i = l;
664 else
665 return (l);
667 return (l + 1);
670 static void
671 unravel(struct got_diff_state *ds, int p)
673 struct cand *q;
674 int i;
676 for (i = 0; i <= ds->len[0]; i++)
677 ds->J[i] = i <= ds->pref ? i :
678 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
679 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
680 ds->J[q->x + ds->pref] = q->y + ds->pref;
683 /*
684 * Check does double duty:
685 * 1. ferret out any fortuitous correspondences due
686 * to confounding by hashing (which result in "jackpot")
687 * 2. collect random access indexes to the two files
688 */
689 static void
690 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
692 int i, j, jackpot, c, d;
693 long ctold, ctnew;
695 rewind(f1);
696 rewind(f2);
697 j = 1;
698 ds->ixold[0] = ds->ixnew[0] = 0;
699 jackpot = 0;
700 ctold = ctnew = 0;
701 for (i = 1; i <= ds->len[0]; i++) {
702 if (ds->J[i] == 0) {
703 ds->ixold[i] = ctold += skipline(f1);
704 continue;
706 while (j < ds->J[i]) {
707 ds->ixnew[j] = ctnew += skipline(f2);
708 j++;
710 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
711 for (;;) {
712 c = getc(f1);
713 d = getc(f2);
714 /*
715 * GNU diff ignores a missing newline
716 * in one file for -b or -w.
717 */
718 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
719 if (c == EOF && d == '\n') {
720 ctnew++;
721 break;
722 } else if (c == '\n' && d == EOF) {
723 ctold++;
724 break;
727 ctold++;
728 ctnew++;
729 if ((flags & D_FOLDBLANKS) && isspace(c) &&
730 isspace(d)) {
731 do {
732 if (c == '\n')
733 break;
734 ctold++;
735 } while (isspace(c = getc(f1)));
736 do {
737 if (d == '\n')
738 break;
739 ctnew++;
740 } while (isspace(d = getc(f2)));
741 } else if ((flags & D_IGNOREBLANKS)) {
742 while (isspace(c) && c != '\n') {
743 c = getc(f1);
744 ctold++;
746 while (isspace(d) && d != '\n') {
747 d = getc(f2);
748 ctnew++;
751 if (ds->chrtran[c] != ds->chrtran[d]) {
752 jackpot++;
753 ds->J[i] = 0;
754 if (c != '\n' && c != EOF)
755 ctold += skipline(f1);
756 if (d != '\n' && c != EOF)
757 ctnew += skipline(f2);
758 break;
760 if (c == '\n' || c == EOF)
761 break;
763 } else {
764 for (;;) {
765 ctold++;
766 ctnew++;
767 if ((c = getc(f1)) != (d = getc(f2))) {
768 /* jackpot++; */
769 ds->J[i] = 0;
770 if (c != '\n' && c != EOF)
771 ctold += skipline(f1);
772 if (d != '\n' && c != EOF)
773 ctnew += skipline(f2);
774 break;
776 if (c == '\n' || c == EOF)
777 break;
780 ds->ixold[i] = ctold;
781 ds->ixnew[j] = ctnew;
782 j++;
784 for (; j <= ds->len[1]; j++)
785 ds->ixnew[j] = ctnew += skipline(f2);
786 /*
787 * if (jackpot)
788 * fprintf(stderr, "jackpot\n");
789 */
792 /* shellsort CACM #201 */
793 static void
794 sort(struct line *a, int n)
796 struct line *ai, *aim, w;
797 int j, m = 0, k;
799 if (n == 0)
800 return;
801 for (j = 1; j <= n; j *= 2)
802 m = 2 * j - 1;
803 for (m /= 2; m != 0; m /= 2) {
804 k = n - m;
805 for (j = 1; j <= k; j++) {
806 for (ai = &a[j]; ai > a; ai -= m) {
807 aim = &ai[m];
808 if (aim < ai)
809 break; /* wraparound */
810 if (aim->value > ai[0].value ||
811 (aim->value == ai[0].value &&
812 aim->serial > ai[0].serial))
813 break;
814 w.value = ai[0].value;
815 ai[0].value = aim->value;
816 aim->value = w.value;
817 w.serial = ai[0].serial;
818 ai[0].serial = aim->serial;
819 aim->serial = w.serial;
825 static int
826 unsort(struct line *f, int l, int *b)
828 int *a, i;
830 a = calloc(l + 1, sizeof(*a));
831 if (a == NULL)
832 return (-1);
833 for (i = 1; i <= l; i++)
834 a[f[i].serial] = f[i].value;
835 for (i = 1; i <= l; i++)
836 b[i] = a[i];
837 free(a);
839 return (0);
842 static int
843 skipline(FILE *f)
845 int i, c;
847 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
848 continue;
849 return (i);
852 static int
853 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
854 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
856 int m, i0, i1, j0, j1;
857 int error = 0;
859 rewind(f1);
860 rewind(f2);
861 m = ds->len[0];
862 ds->J[0] = 0;
863 ds->J[m + 1] = ds->len[1] + 1;
864 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
865 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
866 i0++;
867 j0 = ds->J[i0 - 1] + 1;
868 i1 = i0 - 1;
869 while (i1 < m && ds->J[i1 + 1] == 0)
870 i1++;
871 j1 = ds->J[i1 + 1] - 1;
872 ds->J[i1] = j1;
873 error = change(outfile, ds, args, file1, f1, file2, f2,
874 i0, i1, j0, j1, &flags);
875 if (error)
876 return (error);
878 if (m == 0) {
879 error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
880 1, ds->len[1], &flags);
881 if (error)
882 return (error);
884 if (ds->anychange != 0)
885 dump_unified_vec(outfile, ds, args, f1, f2, flags);
887 return (0);
890 static void
891 uni_range(FILE *outfile, int a, int b)
893 if (a < b)
894 diff_output(outfile, "%d,%d", a, b - a + 1);
895 else if (a == b)
896 diff_output(outfile, "%d", b);
897 else
898 diff_output(outfile, "%d,0", b);
901 /*
902 * Indicate that there is a difference between lines a and b of the from file
903 * to get to lines c to d of the to file. If a is greater then b then there
904 * are no lines in the from file involved and this means that there were
905 * lines appended (beginning at b). If c is greater than d then there are
906 * lines missing from the to file.
907 */
908 static int
909 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
910 const char *file1, FILE *f1, const char *file2, FILE *f2,
911 int a, int b, int c, int d, int *pflags)
913 int i;
915 if (a > b && c > d)
916 return (0);
918 if (*pflags & D_HEADER) {
919 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
920 *pflags &= ~D_HEADER;
922 if (args->diff_format == D_UNIFIED) {
923 /*
924 * Allocate change records as needed.
925 */
926 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
927 struct context_vec *cvp;
928 ptrdiff_t offset;
929 offset = ds->context_vec_ptr - ds->context_vec_start;
930 ds->max_context <<= 1;
931 ds->context_vec_start =
932 cvp = reallocarray(ds->context_vec_start,
933 ds->max_context, sizeof(*ds->context_vec_start));
934 if (cvp == NULL) {
935 free(ds->context_vec_start);
936 return (-1);
938 ds->context_vec_start = cvp;
939 ds->context_vec_end = ds->context_vec_start +
940 ds->max_context;
941 ds->context_vec_ptr = ds->context_vec_start + offset;
943 if (ds->anychange == 0) {
944 /*
945 * Print the context/unidiff header first time through.
946 */
947 print_header(outfile, ds, args, file1, file2);
948 ds->anychange = 1;
949 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
950 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
951 /*
952 * If this change is more than 'diff_context' lines from the
953 * previous change, dump the record and reset it.
954 */
955 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
957 ds->context_vec_ptr++;
958 ds->context_vec_ptr->a = a;
959 ds->context_vec_ptr->b = b;
960 ds->context_vec_ptr->c = c;
961 ds->context_vec_ptr->d = d;
962 return (0);
964 if (ds->anychange == 0)
965 ds->anychange = 1;
966 if (args->diff_format == D_BRIEF)
967 return (0);
968 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, '\0', 0, *pflags);
969 return (0);
972 static int
973 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
974 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
976 int i, j, c, lastc, col, nc;
978 if (a > b)
979 return (0);
980 for (i = a; i <= b; i++) {
981 fseek(lb, f[i - 1], SEEK_SET);
982 nc = f[i] - f[i - 1];
983 if (ch != '\0') {
984 diff_output(outfile, "%c", ch);
985 if (args->Tflag && args->diff_format == D_UNIFIED)
986 diff_output(outfile, "\t");
987 else if (args->diff_format != D_UNIFIED)
988 diff_output(outfile, " ");
990 col = 0;
991 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
992 if ((c = getc(lb)) == EOF) {
993 diff_output(outfile, "\n\\ No newline at end of "
994 "file\n");
995 return (0);
997 if (c == '\t' && (flags & D_EXPANDTABS)) {
998 do {
999 diff_output(outfile, " ");
1000 } while (++col & 7);
1001 } else {
1002 diff_output(outfile, "%c", c);
1003 col++;
1007 return (0);
1011 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1013 static int
1014 readhash(struct got_diff_state *ds, FILE *f, int flags)
1016 int i, t, space;
1017 int sum;
1019 sum = 1;
1020 space = 0;
1021 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1022 if (flags & D_IGNORECASE)
1023 for (i = 0; (t = getc(f)) != '\n'; i++) {
1024 if (t == EOF) {
1025 if (i == 0)
1026 return (0);
1027 break;
1029 sum = sum * 127 + ds->chrtran[t];
1031 else
1032 for (i = 0; (t = getc(f)) != '\n'; i++) {
1033 if (t == EOF) {
1034 if (i == 0)
1035 return (0);
1036 break;
1038 sum = sum * 127 + t;
1040 } else {
1041 for (i = 0;;) {
1042 switch (t = getc(f)) {
1043 case '\t':
1044 case '\r':
1045 case '\v':
1046 case '\f':
1047 case ' ':
1048 space++;
1049 continue;
1050 default:
1051 if (space && (flags & D_IGNOREBLANKS) == 0) {
1052 i++;
1053 space = 0;
1055 sum = sum * 127 + ds->chrtran[t];
1056 i++;
1057 continue;
1058 case EOF:
1059 if (i == 0)
1060 return (0);
1061 /* FALLTHROUGH */
1062 case '\n':
1063 break;
1065 break;
1069 * There is a remote possibility that we end up with a zero sum.
1070 * Zero is used as an EOF marker, so return 1 instead.
1072 return (sum == 0 ? 1 : sum);
1075 static int
1076 asciifile(FILE *f)
1078 unsigned char buf[BUFSIZ];
1079 size_t cnt;
1081 if (f == NULL)
1082 return (1);
1084 rewind(f);
1085 cnt = fread(buf, 1, sizeof(buf), f);
1086 return (memchr(buf, '\0', cnt) == NULL);
1089 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1091 static char *
1092 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1094 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1095 size_t nc;
1096 int last = ds->lastline;
1097 char *state = NULL;
1099 ds->lastline = pos;
1100 while (pos > last) {
1101 fseek(fp, f[pos - 1], SEEK_SET);
1102 nc = f[pos] - f[pos - 1];
1103 if (nc >= sizeof(buf))
1104 nc = sizeof(buf) - 1;
1105 nc = fread(buf, 1, nc, fp);
1106 if (nc > 0) {
1107 buf[nc] = '\0';
1108 buf[strcspn(buf, "\n")] = '\0';
1109 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1110 if (begins_with(buf, "private:")) {
1111 if (!state)
1112 state = " (private)";
1113 } else if (begins_with(buf, "protected:")) {
1114 if (!state)
1115 state = " (protected)";
1116 } else if (begins_with(buf, "public:")) {
1117 if (!state)
1118 state = " (public)";
1119 } else {
1120 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1121 if (state)
1122 strlcat(ds->lastbuf, state,
1123 sizeof ds->lastbuf);
1124 ds->lastmatchline = pos;
1125 return ds->lastbuf;
1129 pos--;
1131 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1134 /* dump accumulated "unified" diff changes */
1135 static void
1136 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1137 FILE *f1, FILE *f2, int flags)
1139 struct context_vec *cvp = ds->context_vec_start;
1140 int lowa, upb, lowc, upd;
1141 int a, b, c, d;
1142 char ch, *f;
1144 if (ds->context_vec_start > ds->context_vec_ptr)
1145 return;
1147 b = d = 0; /* gcc */
1148 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1149 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1150 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1151 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1153 diff_output(outfile, "@@ -");
1154 uni_range(outfile, lowa, upb);
1155 diff_output(outfile, " +");
1156 uni_range(outfile, lowc, upd);
1157 diff_output(outfile, " @@");
1158 if ((flags & D_PROTOTYPE)) {
1159 f = match_function(ds, ds->ixold, lowa-1, f1);
1160 if (f != NULL)
1161 diff_output(outfile, " %s", f);
1163 diff_output(outfile, "\n");
1166 * Output changes in "unified" diff format--the old and new lines
1167 * are printed together.
1169 for (; cvp <= ds->context_vec_ptr; cvp++) {
1170 a = cvp->a;
1171 b = cvp->b;
1172 c = cvp->c;
1173 d = cvp->d;
1176 * c: both new and old changes
1177 * d: only changes in the old file
1178 * a: only changes in the new file
1180 if (a <= b && c <= d)
1181 ch = 'c';
1182 else
1183 ch = (a <= b) ? 'd' : 'a';
1185 switch (ch) {
1186 case 'c':
1187 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1188 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1189 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1190 break;
1191 case 'd':
1192 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1193 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1194 break;
1195 case 'a':
1196 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1197 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1198 break;
1200 lowa = b + 1;
1201 lowc = d + 1;
1203 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1205 ds->context_vec_ptr = ds->context_vec_start - 1;
1208 static void
1209 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1210 const char *file1, const char *file2)
1212 if (args->label[0] != NULL)
1213 diff_output(outfile, "--- %s\n", args->label[0]);
1214 else
1215 diff_output(outfile, "--- %s\t%s", file1,
1216 ctime(&ds->stb1.st_mtime));
1217 if (args->label[1] != NULL)
1218 diff_output(outfile, "+++ %s\n", args->label[1]);
1219 else
1220 diff_output(outfile, "+++ %s\t%s", file2,
1221 ctime(&ds->stb2.st_mtime));