Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 /*
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
178 */
179 struct context_vec {
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
184 };
186 static void diff_output(FILE *, const char *, ...);
187 static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void check(struct got_diff_state *, FILE *, FILE *, int);
189 static void range(FILE *, int, int, char *);
190 static void uni_range(FILE *, int, int);
191 static void dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void prune(struct got_diff_state *);
195 static void equiv(struct line *, int, struct line *, int, int *);
196 static void unravel(struct got_diff_state *, int);
197 static int unsort(struct line *, int, int *);
198 static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void sort(struct line *, int);
200 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int ignoreline(char *);
202 static int asciifile(FILE *);
203 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int newcand(struct got_diff_state *, int, int, int, int *);
205 static int search(struct got_diff_state *, int *, int, int);
206 static int skipline(FILE *);
207 static int isqrt(int);
208 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int readhash(struct got_diff_state *, FILE *, int);
210 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char *preadline(int, size_t, off_t);
214 /*
215 * chrtran points to one of 2 translation tables: cup2low if folding upper to
216 * lower case clow2low if not folding case
217 */
218 u_char clow2low[256] = {
219 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
242 0xfd, 0xfe, 0xff
243 };
245 u_char cup2low[256] = {
246 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 0xfd, 0xfe, 0xff
270 };
272 static void
273 diff_output(FILE *outfile, const char *fmt, ...)
275 va_list ap;
277 va_start(ap, fmt);
278 vfprintf(outfile, fmt, ap);
279 va_end(ap);
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
286 const struct got_error *err = NULL;
287 int i, *p;
288 long *lp;
290 *rval = D_SAME;
291 ds->anychange = 0;
292 ds->lastline = 0;
293 ds->lastmatchline = 0;
294 ds->context_vec_ptr = ds->context_vec_start - 1;
295 ds->max_context = 64;
296 if (flags & D_IGNORECASE)
297 ds->chrtran = cup2low;
298 else
299 ds->chrtran = clow2low;
300 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
301 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
302 return NULL;
304 if (flags & D_EMPTY1) {
305 f1 = fopen(_PATH_DEVNULL, "r");
306 if (f1 == NULL) {
307 err = got_error_from_errno();
308 goto closem;
311 else if (f1 == NULL) {
312 args->status |= 2;
313 goto closem;
316 if (flags & D_EMPTY2) {
317 f2 = fopen(_PATH_DEVNULL, "r");
318 if (f2 == NULL) {
319 err = got_error_from_errno();
320 goto closem;
322 } else if (f2 == NULL) {
323 args->status |= 2;
324 goto closem;
327 switch (files_differ(ds, f1, f2, flags)) {
328 case 0:
329 goto closem;
330 case 1:
331 break;
332 default:
333 /* error */
334 args->status |= 2;
335 goto closem;
338 if ((flags & D_FORCEASCII) == 0 &&
339 (!asciifile(f1) || !asciifile(f2))) {
340 *rval = D_BINARY;
341 args->status |= 1;
342 goto closem;
344 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
345 err = got_error_from_errno();
346 goto closem;
348 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
349 err = got_error_from_errno();
350 goto closem;
353 prune(ds);
354 sort(ds->sfile[0], ds->slen[0]);
355 sort(ds->sfile[1], ds->slen[1]);
357 ds->member = (int *)ds->file[1];
358 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
359 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
360 if (p == NULL) {
361 err = got_error_from_errno();
362 goto closem;
364 ds->member = p;
366 ds->class = (int *)ds->file[0];
367 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
368 err = got_error_from_errno();
369 goto closem;
371 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
372 if (p == NULL) {
373 err = got_error_from_errno();
374 goto closem;
376 ds->class = p;
378 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
379 if (ds->klist == NULL) {
380 err = got_error_from_errno();
381 goto closem;
383 ds->clen = 0;
384 ds->clistlen = 100;
385 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
386 if (ds->clist == NULL) {
387 err = got_error_from_errno();
388 goto closem;
390 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
391 if (i < 0) {
392 err = got_error_from_errno();
393 goto closem;
396 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
397 if (p == NULL) {
398 err = got_error_from_errno();
399 goto closem;
401 ds->J = p;
402 unravel(ds, ds->klist[i]);
404 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
405 if (lp == NULL) {
406 err = got_error_from_errno();
407 goto closem;
409 ds->ixold = lp;
410 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
411 if (lp == NULL) {
412 err = got_error_from_errno();
413 goto closem;
415 ds->ixnew = lp;
416 check(ds, f1, f2, flags);
417 if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
418 flags))
419 err = got_error_from_errno();
420 closem:
421 free(ds->J);
422 free(ds->member);
423 free(ds->class);
424 free(ds->clist);
425 free(ds->klist);
426 free(ds->ixold);
427 free(ds->ixnew);
428 if (ds->anychange) {
429 args->status |= 1;
430 if (*rval == D_SAME)
431 *rval = D_DIFFER;
433 if (f1 != NULL)
434 fclose(f1);
435 if (f2 != NULL)
436 fclose(f2);
438 return (err);
441 /*
442 * Check to see if the given files differ.
443 * Returns 0 if they are the same, 1 if different, and -1 on error.
444 * XXX - could use code from cmp(1) [faster]
445 */
446 static int
447 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
449 char buf1[BUFSIZ], buf2[BUFSIZ];
450 size_t i, j;
452 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
453 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
454 return (1);
455 for (;;) {
456 i = fread(buf1, 1, sizeof(buf1), f1);
457 j = fread(buf2, 1, sizeof(buf2), f2);
458 if ((!i && ferror(f1)) || (!j && ferror(f2)))
459 return (-1);
460 if (i != j)
461 return (1);
462 if (i == 0)
463 return (0);
464 if (memcmp(buf1, buf2, i) != 0)
465 return (1);
469 static int
470 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
472 struct line *p, *q;
473 int j, h;
474 size_t sz;
476 rewind(fd);
478 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
479 if (sz < 100)
480 sz = 100;
482 p = calloc(sz + 3, sizeof(*p));
483 if (p == NULL)
484 return (-1);
485 for (j = 0; (h = readhash(ds, fd, flags));) {
486 if (j == sz) {
487 sz = sz * 3 / 2;
488 q = reallocarray(p, sz + 3, sizeof(*p));
489 if (q == NULL) {
490 free(p);
491 return (-1);
493 p = q;
495 p[++j].value = h;
497 ds->len[i] = j;
498 ds->file[i] = p;
500 return (0);
503 static void
504 prune(struct got_diff_state *ds)
506 int i, j;
508 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
509 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
510 ds->pref++)
512 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
513 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
514 ds->suff++)
516 for (j = 0; j < 2; j++) {
517 ds->sfile[j] = ds->file[j] + ds->pref;
518 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
519 for (i = 0; i <= ds->slen[j]; i++)
520 ds->sfile[j][i].serial = i;
524 static void
525 equiv(struct line *a, int n, struct line *b, int m, int *c)
527 int i, j;
529 i = j = 1;
530 while (i <= n && j <= m) {
531 if (a[i].value < b[j].value)
532 a[i++].value = 0;
533 else if (a[i].value == b[j].value)
534 a[i++].value = j;
535 else
536 j++;
538 while (i <= n)
539 a[i++].value = 0;
540 b[m + 1].value = 0;
541 j = 0;
542 while (++j <= m) {
543 c[j] = -b[j].serial;
544 while (b[j + 1].value == b[j].value) {
545 j++;
546 c[j] = b[j].serial;
549 c[j] = -1;
552 /* Code taken from ping.c */
553 static int
554 isqrt(int n)
556 int y, x = 1;
558 if (n == 0)
559 return (0);
561 do { /* newton was a stinker */
562 y = x;
563 x = n / x;
564 x += y;
565 x /= 2;
566 } while ((x - y) > 1 || (x - y) < -1);
568 return (x);
571 static int
572 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
574 int i, k, y, j, l;
575 int oldc, tc, oldl, sq;
576 u_int numtries, bound;
577 int error;
579 if (flags & D_MINIMAL)
580 bound = UINT_MAX;
581 else {
582 sq = isqrt(n);
583 bound = MAXIMUM(256, sq);
586 k = 0;
587 c[0] = newcand(ds, 0, 0, 0, &error);
588 if (error)
589 return -1;
590 for (i = 1; i <= n; i++) {
591 j = a[i];
592 if (j == 0)
593 continue;
594 y = -b[j];
595 oldl = 0;
596 oldc = c[0];
597 numtries = 0;
598 do {
599 if (y <= ds->clist[oldc].y)
600 continue;
601 l = search(ds, c, k, y);
602 if (l != oldl + 1)
603 oldc = c[l - 1];
604 if (l <= k) {
605 if (ds->clist[c[l]].y <= y)
606 continue;
607 tc = c[l];
608 c[l] = newcand(ds, i, y, oldc, &error);
609 if (error)
610 return -1;
611 oldc = tc;
612 oldl = l;
613 numtries++;
614 } else {
615 c[l] = newcand(ds, i, y, oldc, &error);
616 if (error)
617 return -1;
618 k++;
619 break;
621 } while ((y = b[++j]) > 0 && numtries < bound);
623 return (k);
626 static int
627 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
629 struct cand *q;
631 if (ds->clen == ds->clistlen) {
632 ds->clistlen = ds->clistlen * 11 / 10;
633 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
634 if (q == NULL) {
635 *errorp = -1;
636 free(ds->clist);
637 ds->clist = NULL;
638 return 0;
640 ds->clist = q;
642 q = ds->clist + ds->clen;
643 q->x = x;
644 q->y = y;
645 q->pred = pred;
646 *errorp = 0;
647 return (ds->clen++);
650 static int
651 search(struct got_diff_state *ds, int *c, int k, int y)
653 int i, j, l, t;
655 if (ds->clist[c[k]].y < y) /* quick look for typical case */
656 return (k + 1);
657 i = 0;
658 j = k + 1;
659 for (;;) {
660 l = (i + j) / 2;
661 if (l <= i)
662 break;
663 t = ds->clist[c[l]].y;
664 if (t > y)
665 j = l;
666 else if (t < y)
667 i = l;
668 else
669 return (l);
671 return (l + 1);
674 static void
675 unravel(struct got_diff_state *ds, int p)
677 struct cand *q;
678 int i;
680 for (i = 0; i <= ds->len[0]; i++)
681 ds->J[i] = i <= ds->pref ? i :
682 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
683 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
684 ds->J[q->x + ds->pref] = q->y + ds->pref;
687 /*
688 * Check does double duty:
689 * 1. ferret out any fortuitous correspondences due
690 * to confounding by hashing (which result in "jackpot")
691 * 2. collect random access indexes to the two files
692 */
693 static void
694 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
696 int i, j, jackpot, c, d;
697 long ctold, ctnew;
699 rewind(f1);
700 rewind(f2);
701 j = 1;
702 ds->ixold[0] = ds->ixnew[0] = 0;
703 jackpot = 0;
704 ctold = ctnew = 0;
705 for (i = 1; i <= ds->len[0]; i++) {
706 if (ds->J[i] == 0) {
707 ds->ixold[i] = ctold += skipline(f1);
708 continue;
710 while (j < ds->J[i]) {
711 ds->ixnew[j] = ctnew += skipline(f2);
712 j++;
714 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
715 for (;;) {
716 c = getc(f1);
717 d = getc(f2);
718 /*
719 * GNU diff ignores a missing newline
720 * in one file for -b or -w.
721 */
722 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
723 if (c == EOF && d == '\n') {
724 ctnew++;
725 break;
726 } else if (c == '\n' && d == EOF) {
727 ctold++;
728 break;
731 ctold++;
732 ctnew++;
733 if ((flags & D_FOLDBLANKS) && isspace(c) &&
734 isspace(d)) {
735 do {
736 if (c == '\n')
737 break;
738 ctold++;
739 } while (isspace(c = getc(f1)));
740 do {
741 if (d == '\n')
742 break;
743 ctnew++;
744 } while (isspace(d = getc(f2)));
745 } else if ((flags & D_IGNOREBLANKS)) {
746 while (isspace(c) && c != '\n') {
747 c = getc(f1);
748 ctold++;
750 while (isspace(d) && d != '\n') {
751 d = getc(f2);
752 ctnew++;
755 if (ds->chrtran[c] != ds->chrtran[d]) {
756 jackpot++;
757 ds->J[i] = 0;
758 if (c != '\n' && c != EOF)
759 ctold += skipline(f1);
760 if (d != '\n' && c != EOF)
761 ctnew += skipline(f2);
762 break;
764 if (c == '\n' || c == EOF)
765 break;
767 } else {
768 for (;;) {
769 ctold++;
770 ctnew++;
771 if ((c = getc(f1)) != (d = getc(f2))) {
772 /* jackpot++; */
773 ds->J[i] = 0;
774 if (c != '\n' && c != EOF)
775 ctold += skipline(f1);
776 if (d != '\n' && c != EOF)
777 ctnew += skipline(f2);
778 break;
780 if (c == '\n' || c == EOF)
781 break;
784 ds->ixold[i] = ctold;
785 ds->ixnew[j] = ctnew;
786 j++;
788 for (; j <= ds->len[1]; j++)
789 ds->ixnew[j] = ctnew += skipline(f2);
790 /*
791 * if (jackpot)
792 * fprintf(stderr, "jackpot\n");
793 */
796 /* shellsort CACM #201 */
797 static void
798 sort(struct line *a, int n)
800 struct line *ai, *aim, w;
801 int j, m = 0, k;
803 if (n == 0)
804 return;
805 for (j = 1; j <= n; j *= 2)
806 m = 2 * j - 1;
807 for (m /= 2; m != 0; m /= 2) {
808 k = n - m;
809 for (j = 1; j <= k; j++) {
810 for (ai = &a[j]; ai > a; ai -= m) {
811 aim = &ai[m];
812 if (aim < ai)
813 break; /* wraparound */
814 if (aim->value > ai[0].value ||
815 (aim->value == ai[0].value &&
816 aim->serial > ai[0].serial))
817 break;
818 w.value = ai[0].value;
819 ai[0].value = aim->value;
820 aim->value = w.value;
821 w.serial = ai[0].serial;
822 ai[0].serial = aim->serial;
823 aim->serial = w.serial;
829 static int
830 unsort(struct line *f, int l, int *b)
832 int *a, i;
834 a = calloc(l + 1, sizeof(*a));
835 if (a == NULL)
836 return (-1);
837 for (i = 1; i <= l; i++)
838 a[f[i].serial] = f[i].value;
839 for (i = 1; i <= l; i++)
840 b[i] = a[i];
841 free(a);
843 return (0);
846 static int
847 skipline(FILE *f)
849 int i, c;
851 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
852 continue;
853 return (i);
856 static int
857 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
858 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
860 int m, i0, i1, j0, j1;
861 int error = 0;
863 rewind(f1);
864 rewind(f2);
865 m = ds->len[0];
866 ds->J[0] = 0;
867 ds->J[m + 1] = ds->len[1] + 1;
868 if (args->diff_format != D_EDIT) {
869 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
870 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
871 i0++;
872 j0 = ds->J[i0 - 1] + 1;
873 i1 = i0 - 1;
874 while (i1 < m && ds->J[i1 + 1] == 0)
875 i1++;
876 j1 = ds->J[i1 + 1] - 1;
877 ds->J[i1] = j1;
878 error = change(outfile, ds, args, file1, f1, file2, f2,
879 i0, i1, j0, j1, &flags);
880 if (error)
881 return (error);
883 } else {
884 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
885 while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
886 i0--;
887 j0 = ds->J[i0 + 1] - 1;
888 i1 = i0 + 1;
889 while (i1 > 1 && ds->J[i1 - 1] == 0)
890 i1--;
891 j1 = ds->J[i1 - 1] + 1;
892 ds->J[i1] = j1;
893 change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
894 j1, j0, &flags);
895 if (error)
896 return (error);
899 if (m == 0) {
900 error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
901 1, ds->len[1], &flags);
902 if (error)
903 return (error);
905 if (args->diff_format == D_IFDEF) {
906 for (;;) {
907 #define c i0
908 if ((c = getc(f1)) == EOF)
909 return (0);
910 diff_output(outfile, "%c", c);
912 #undef c
914 if (ds->anychange != 0) {
915 if (args->diff_format == D_CONTEXT)
916 dump_context_vec(outfile, ds, args, f1, f2, flags);
917 else if (args->diff_format == D_UNIFIED)
918 dump_unified_vec(outfile, ds, args, f1, f2, flags);
921 return (0);
924 static void
925 range(FILE *outfile, int a, int b, char *separator)
927 diff_output(outfile, "%d", a > b ? b : a);
928 if (a < b)
929 diff_output(outfile, "%s%d", separator, b);
932 static void
933 uni_range(FILE *outfile, int a, int b)
935 if (a < b)
936 diff_output(outfile, "%d,%d", a, b - a + 1);
937 else if (a == b)
938 diff_output(outfile, "%d", b);
939 else
940 diff_output(outfile, "%d,0", b);
943 static char *
944 preadline(int fd, size_t rlen, off_t off)
946 char *line;
947 ssize_t nr;
949 line = malloc(rlen + 1);
950 if (line == NULL)
951 return NULL;
952 if ((nr = pread(fd, line, rlen, off)) < 0) {
953 free(line);
954 return NULL;
956 if (nr > 0 && line[nr-1] == '\n')
957 nr--;
958 line[nr] = '\0';
959 return (line);
962 static int
963 ignoreline(char *line)
965 return 0; /* do not ignore any lines */
968 /*
969 * Indicate that there is a difference between lines a and b of the from file
970 * to get to lines c to d of the to file. If a is greater then b then there
971 * are no lines in the from file involved and this means that there were
972 * lines appended (beginning at b). If c is greater than d then there are
973 * lines missing from the to file.
974 */
975 static int
976 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
977 const char *file1, FILE *f1, const char *file2, FILE *f2,
978 int a, int b, int c, int d, int *pflags)
980 int i;
982 restart:
983 if (args->diff_format != D_IFDEF && a > b && c > d)
984 return (0);
985 if (args->ignore_pats != NULL) {
986 /*
987 * All lines in the change, insert, or delete must
988 * match an ignore pattern for the change to be
989 * ignored.
990 */
991 if (a <= b) { /* Changes and deletes. */
992 for (i = a; i <= b; i++) {
993 char *line = preadline(fileno(f1),
994 ds->ixold[i] - ds->ixold[i - 1],
995 ds->ixold[i - 1]);
996 if (line == NULL)
997 return (-1);
998 if (!ignoreline(line)) {
999 free(line);
1000 goto proceed;
1002 free(line);
1005 if (a > b || c <= d) { /* Changes and inserts. */
1006 for (i = c; i <= d; i++) {
1007 char *line = preadline(fileno(f2),
1008 ds->ixnew[i] - ds->ixnew[i - 1],
1009 ds->ixnew[i - 1]);
1010 if (line == NULL)
1011 return (-1);
1012 if (!ignoreline(line)) {
1013 free(line);
1014 goto proceed;
1016 free(line);
1019 return (0);
1021 proceed:
1022 if (*pflags & D_HEADER) {
1023 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
1024 *pflags &= ~D_HEADER;
1026 if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1028 * Allocate change records as needed.
1030 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1031 struct context_vec *cvp;
1032 ptrdiff_t offset;
1033 offset = ds->context_vec_ptr - ds->context_vec_start;
1034 ds->max_context <<= 1;
1035 ds->context_vec_start =
1036 cvp = reallocarray(ds->context_vec_start,
1037 ds->max_context, sizeof(*ds->context_vec_start));
1038 if (cvp == NULL) {
1039 free(ds->context_vec_start);
1040 return (-1);
1042 ds->context_vec_start = cvp;
1043 ds->context_vec_end = ds->context_vec_start +
1044 ds->max_context;
1045 ds->context_vec_ptr = ds->context_vec_start + offset;
1047 if (ds->anychange == 0) {
1049 * Print the context/unidiff header first time through.
1051 print_header(outfile, ds, args, file1, file2);
1052 ds->anychange = 1;
1053 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1054 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1056 * If this change is more than 'diff_context' lines from the
1057 * previous change, dump the record and reset it.
1059 if (args->diff_format == D_CONTEXT)
1060 dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1061 else
1062 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1064 ds->context_vec_ptr++;
1065 ds->context_vec_ptr->a = a;
1066 ds->context_vec_ptr->b = b;
1067 ds->context_vec_ptr->c = c;
1068 ds->context_vec_ptr->d = d;
1069 return (0);
1071 if (ds->anychange == 0)
1072 ds->anychange = 1;
1073 switch (args->diff_format) {
1074 case D_BRIEF:
1075 return (0);
1076 case D_NORMAL:
1077 case D_EDIT:
1078 range(outfile, a, b, ",");
1079 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1080 if (args->diff_format == D_NORMAL)
1081 range(outfile, c, d, ",");
1082 diff_output(outfile, "\n");
1083 break;
1084 case D_REVERSE:
1085 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1086 range(outfile, a, b, " ");
1087 diff_output(outfile, "\n");
1088 break;
1089 case D_NREVERSE:
1090 if (a > b)
1091 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1092 else {
1093 diff_output(outfile, "d%d %d\n", a, b - a + 1);
1094 if (!(c > d))
1095 /* add changed lines */
1096 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1098 break;
1100 if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1101 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1102 if (a <= b && c <= d && args->diff_format == D_NORMAL)
1103 diff_output(outfile, "---\n");
1105 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1106 if (i != 0 && args->diff_format == D_EDIT) {
1108 * A non-zero return value for D_EDIT indicates that the
1109 * last line printed was a bare dot (".") that has been
1110 * escaped as ".." to prevent ed(1) from misinterpreting
1111 * it. We have to add a substitute command to change this
1112 * back and restart where we left off.
1114 diff_output(outfile, ".\n");
1115 diff_output(outfile, "%ds/.//\n", a + i - 1);
1116 b = a + i - 1;
1117 a = b + 1;
1118 c += i;
1119 goto restart;
1121 if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1122 diff_output(outfile, ".\n");
1123 if (ds->inifdef) {
1124 diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1125 ds->inifdef = 0;
1128 return (0);
1131 static int
1132 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1133 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1135 int i, j, c, lastc, col, nc;
1138 * When doing #ifdef's, copy down to current line
1139 * if this is the first file, so that stuff makes it to output.
1141 if (args->diff_format == D_IFDEF && oldfile) {
1142 long curpos = ftell(lb);
1143 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1144 nc = f[a > b ? b : a - 1] - curpos;
1145 for (i = 0; i < nc; i++)
1146 diff_output(outfile, "%c", getc(lb));
1148 if (a > b)
1149 return (0);
1150 if (args->diff_format == D_IFDEF) {
1151 if (ds->inifdef) {
1152 diff_output(outfile, "#else /* %s%s */\n",
1153 oldfile == 1 ? "!" : "", args->ifdefname);
1154 } else {
1155 if (oldfile)
1156 diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1157 else
1158 diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1160 ds->inifdef = 1 + oldfile;
1162 for (i = a; i <= b; i++) {
1163 fseek(lb, f[i - 1], SEEK_SET);
1164 nc = f[i] - f[i - 1];
1165 if (args->diff_format != D_IFDEF && ch != '\0') {
1166 diff_output(outfile, "%c", ch);
1167 if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1168 || args->diff_format == D_UNIFIED))
1169 diff_output(outfile, "\t");
1170 else if (args->diff_format != D_UNIFIED)
1171 diff_output(outfile, " ");
1173 col = 0;
1174 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1175 if ((c = getc(lb)) == EOF) {
1176 if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1177 args->diff_format == D_NREVERSE)
1178 warnx("No newline at end of file");
1179 else
1180 diff_output(outfile, "\n\\ No newline at end of "
1181 "file\n");
1182 return (0);
1184 if (c == '\t' && (flags & D_EXPANDTABS)) {
1185 do {
1186 diff_output(outfile, " ");
1187 } while (++col & 7);
1188 } else {
1189 if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1190 && lastc == '.') {
1192 * Don't print a bare "." line
1193 * since that will confuse ed(1).
1194 * Print ".." instead and return,
1195 * giving the caller an offset
1196 * from which to restart.
1198 diff_output(outfile, ".\n");
1199 return (i - a + 1);
1201 diff_output(outfile, "%c", c);
1202 col++;
1206 return (0);
1210 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1212 static int
1213 readhash(struct got_diff_state *ds, FILE *f, int flags)
1215 int i, t, space;
1216 int sum;
1218 sum = 1;
1219 space = 0;
1220 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1221 if (flags & D_IGNORECASE)
1222 for (i = 0; (t = getc(f)) != '\n'; i++) {
1223 if (t == EOF) {
1224 if (i == 0)
1225 return (0);
1226 break;
1228 sum = sum * 127 + ds->chrtran[t];
1230 else
1231 for (i = 0; (t = getc(f)) != '\n'; i++) {
1232 if (t == EOF) {
1233 if (i == 0)
1234 return (0);
1235 break;
1237 sum = sum * 127 + t;
1239 } else {
1240 for (i = 0;;) {
1241 switch (t = getc(f)) {
1242 case '\t':
1243 case '\r':
1244 case '\v':
1245 case '\f':
1246 case ' ':
1247 space++;
1248 continue;
1249 default:
1250 if (space && (flags & D_IGNOREBLANKS) == 0) {
1251 i++;
1252 space = 0;
1254 sum = sum * 127 + ds->chrtran[t];
1255 i++;
1256 continue;
1257 case EOF:
1258 if (i == 0)
1259 return (0);
1260 /* FALLTHROUGH */
1261 case '\n':
1262 break;
1264 break;
1268 * There is a remote possibility that we end up with a zero sum.
1269 * Zero is used as an EOF marker, so return 1 instead.
1271 return (sum == 0 ? 1 : sum);
1274 static int
1275 asciifile(FILE *f)
1277 unsigned char buf[BUFSIZ];
1278 size_t cnt;
1280 if (f == NULL)
1281 return (1);
1283 rewind(f);
1284 cnt = fread(buf, 1, sizeof(buf), f);
1285 return (memchr(buf, '\0', cnt) == NULL);
1288 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1290 static char *
1291 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1293 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1294 size_t nc;
1295 int last = ds->lastline;
1296 char *state = NULL;
1298 ds->lastline = pos;
1299 while (pos > last) {
1300 fseek(fp, f[pos - 1], SEEK_SET);
1301 nc = f[pos] - f[pos - 1];
1302 if (nc >= sizeof(buf))
1303 nc = sizeof(buf) - 1;
1304 nc = fread(buf, 1, nc, fp);
1305 if (nc > 0) {
1306 buf[nc] = '\0';
1307 buf[strcspn(buf, "\n")] = '\0';
1308 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1309 if (begins_with(buf, "private:")) {
1310 if (!state)
1311 state = " (private)";
1312 } else if (begins_with(buf, "protected:")) {
1313 if (!state)
1314 state = " (protected)";
1315 } else if (begins_with(buf, "public:")) {
1316 if (!state)
1317 state = " (public)";
1318 } else {
1319 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1320 if (state)
1321 strlcat(ds->lastbuf, state,
1322 sizeof ds->lastbuf);
1323 ds->lastmatchline = pos;
1324 return ds->lastbuf;
1328 pos--;
1330 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1333 /* dump accumulated "context" diff changes */
1334 static void
1335 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1336 FILE *f1, FILE *f2, int flags)
1338 struct context_vec *cvp = ds->context_vec_start;
1339 int lowa, upb, lowc, upd, do_output;
1340 int a, b, c, d;
1341 char ch, *f;
1343 if (ds->context_vec_start > ds->context_vec_ptr)
1344 return;
1346 b = d = 0; /* gcc */
1347 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1348 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1349 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1350 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1352 diff_output(outfile, "***************");
1353 if ((flags & D_PROTOTYPE)) {
1354 f = match_function(ds, ds->ixold, lowa-1, f1);
1355 if (f != NULL)
1356 diff_output(outfile, " %s", f);
1358 diff_output(outfile, "\n*** ");
1359 range(outfile, lowa, upb, ",");
1360 diff_output(outfile, " ****\n");
1363 * Output changes to the "old" file. The first loop suppresses
1364 * output if there were no changes to the "old" file (we'll see
1365 * the "old" lines as context in the "new" list).
1367 do_output = 0;
1368 for (; cvp <= ds->context_vec_ptr; cvp++)
1369 if (cvp->a <= cvp->b) {
1370 cvp = ds->context_vec_start;
1371 do_output++;
1372 break;
1374 if (do_output) {
1375 while (cvp <= ds->context_vec_ptr) {
1376 a = cvp->a;
1377 b = cvp->b;
1378 c = cvp->c;
1379 d = cvp->d;
1381 if (a <= b && c <= d)
1382 ch = 'c';
1383 else
1384 ch = (a <= b) ? 'd' : 'a';
1386 if (ch == 'a')
1387 fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1388 else {
1389 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1390 fetch(outfile, ds, args, ds->ixold, a, b, f1,
1391 ch == 'c' ? '!' : '-', 0, flags);
1393 lowa = b + 1;
1394 cvp++;
1396 fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1398 /* output changes to the "new" file */
1399 diff_output(outfile, "--- ");
1400 range(outfile, lowc, upd, ",");
1401 diff_output(outfile, " ----\n");
1403 do_output = 0;
1404 for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1405 if (cvp->c <= cvp->d) {
1406 cvp = ds->context_vec_start;
1407 do_output++;
1408 break;
1410 if (do_output) {
1411 while (cvp <= ds->context_vec_ptr) {
1412 a = cvp->a;
1413 b = cvp->b;
1414 c = cvp->c;
1415 d = cvp->d;
1417 if (a <= b && c <= d)
1418 ch = 'c';
1419 else
1420 ch = (a <= b) ? 'd' : 'a';
1422 if (ch == 'd')
1423 fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1424 else {
1425 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1426 fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1427 ch == 'c' ? '!' : '+', 0, flags);
1429 lowc = d + 1;
1430 cvp++;
1432 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1434 ds->context_vec_ptr = ds->context_vec_start - 1;
1437 /* dump accumulated "unified" diff changes */
1438 static void
1439 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1440 FILE *f1, FILE *f2, int flags)
1442 struct context_vec *cvp = ds->context_vec_start;
1443 int lowa, upb, lowc, upd;
1444 int a, b, c, d;
1445 char ch, *f;
1447 if (ds->context_vec_start > ds->context_vec_ptr)
1448 return;
1450 b = d = 0; /* gcc */
1451 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1452 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1453 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1454 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1456 diff_output(outfile, "@@ -");
1457 uni_range(outfile, lowa, upb);
1458 diff_output(outfile, " +");
1459 uni_range(outfile, lowc, upd);
1460 diff_output(outfile, " @@");
1461 if ((flags & D_PROTOTYPE)) {
1462 f = match_function(ds, ds->ixold, lowa-1, f1);
1463 if (f != NULL)
1464 diff_output(outfile, " %s", f);
1466 diff_output(outfile, "\n");
1469 * Output changes in "unified" diff format--the old and new lines
1470 * are printed together.
1472 for (; cvp <= ds->context_vec_ptr; cvp++) {
1473 a = cvp->a;
1474 b = cvp->b;
1475 c = cvp->c;
1476 d = cvp->d;
1479 * c: both new and old changes
1480 * d: only changes in the old file
1481 * a: only changes in the new file
1483 if (a <= b && c <= d)
1484 ch = 'c';
1485 else
1486 ch = (a <= b) ? 'd' : 'a';
1488 switch (ch) {
1489 case 'c':
1490 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1491 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1492 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1493 break;
1494 case 'd':
1495 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1496 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1497 break;
1498 case 'a':
1499 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1500 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1501 break;
1503 lowa = b + 1;
1504 lowc = d + 1;
1506 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1508 ds->context_vec_ptr = ds->context_vec_start - 1;
1511 static void
1512 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1513 const char *file1, const char *file2)
1515 if (args->label[0] != NULL)
1516 diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1517 args->label[0]);
1518 else
1519 diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1520 file1, ctime(&ds->stb1.st_mtime));
1521 if (args->label[1] != NULL)
1522 diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1523 args->label[1]);
1524 else
1525 diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1526 file2, ctime(&ds->stb2.st_mtime));