Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stddef.h>
77 #include <stdint.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82 #include <limits.h>
83 #include <sha1.h>
84 #include <zlib.h>
86 #include "got_error.h"
87 #include "got_object.h"
88 #include "got_diff.h"
90 #include "diff.h"
91 #include "xmalloc.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 /*
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
178 */
179 struct context_vec {
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
184 };
186 #define diff_output printf
187 static FILE *opentemp(const char *);
188 static void output(struct got_diff_state *, struct got_diff_args *, char *, FILE *, char *, FILE *, int);
189 static void check(struct got_diff_state *, FILE *, FILE *, int);
190 static void range(int, int, char *);
191 static void uni_range(int, int);
192 static void dump_context_vec(struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static void dump_unified_vec(struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
194 static void prepare(struct got_diff_state *, int, FILE *, off_t, int);
195 static void prune(struct got_diff_state *);
196 static void equiv(struct line *, int, struct line *, int, int *);
197 static void unravel(struct got_diff_state *, int);
198 static void unsort(struct line *, int, int *);
199 static void change(struct got_diff_state *, struct got_diff_args *, char *, FILE *, char *, FILE *, int, int, int, int, int *);
200 static void sort(struct line *, int);
201 static void print_header(struct got_diff_state *, struct got_diff_args *, const char *, const char *);
202 static int ignoreline(char *);
203 static int asciifile(FILE *);
204 static int fetch(struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
205 static int newcand(struct got_diff_state *, int, int, int);
206 static int search(struct got_diff_state *, int *, int, int);
207 static int skipline(FILE *);
208 static int isqrt(int);
209 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
210 static int readhash(struct got_diff_state *, FILE *, int);
211 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
212 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
213 static char *preadline(int, size_t, off_t);
215 /*
216 * chrtran points to one of 2 translation tables: cup2low if folding upper to
217 * lower case clow2low if not folding case
218 */
219 u_char clow2low[256] = {
220 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
221 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
222 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
223 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
224 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
225 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
226 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
227 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
228 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
229 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
230 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
231 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
232 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
233 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
234 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
235 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
236 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
237 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
238 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
239 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
240 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
241 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
242 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
243 0xfd, 0xfe, 0xff
244 };
246 u_char cup2low[256] = {
247 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
248 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
249 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
250 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
251 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
252 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
253 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
254 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
255 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
256 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
257 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
258 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
259 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
260 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
261 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
262 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
263 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
264 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
265 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
266 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
267 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
268 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
269 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
270 0xfd, 0xfe, 0xff
271 };
273 const struct got_error *
274 got_diffreg(int *rval, char *file1, char *file2, int flags,
275 struct got_diff_args *args, struct got_diff_state *ds)
277 const struct got_error *err = NULL;
278 FILE *f1, *f2;
279 int i;
281 if (strcmp(file1, "-") == 0)
282 fstat(STDIN_FILENO, &ds->stb1);
283 else if (stat(file1, &ds->stb1) != 0)
284 return got_error(GOT_ERR_BAD_PATH);
286 if (strcmp(file2, "-") == 0)
287 fstat(STDIN_FILENO, &ds->stb2);
288 else if (stat(file2, &ds->stb2) != 0)
289 return got_error(GOT_ERR_BAD_PATH);
291 f1 = f2 = NULL;
292 *rval = D_SAME;
293 ds->anychange = 0;
294 ds->lastline = 0;
295 ds->lastmatchline = 0;
296 ds->context_vec_ptr = ds->context_vec_start - 1;
297 if (flags & D_IGNORECASE)
298 ds->chrtran = cup2low;
299 else
300 ds->chrtran = clow2low;
301 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
302 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
303 return NULL;
305 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
306 goto closem;
308 if (flags & D_EMPTY1)
309 f1 = fopen(_PATH_DEVNULL, "r");
310 else {
311 if (!S_ISREG(ds->stb1.st_mode)) {
312 if ((f1 = opentemp(file1)) == NULL ||
313 fstat(fileno(f1), &ds->stb1) < 0) {
314 args->status |= 2;
315 goto closem;
317 } else if (strcmp(file1, "-") == 0)
318 f1 = stdin;
319 else
320 f1 = fopen(file1, "r");
322 if (f1 == NULL) {
323 args->status |= 2;
324 goto closem;
327 if (flags & D_EMPTY2)
328 f2 = fopen(_PATH_DEVNULL, "r");
329 else {
330 if (!S_ISREG(ds->stb2.st_mode)) {
331 if ((f2 = opentemp(file2)) == NULL ||
332 fstat(fileno(f2), &ds->stb2) < 0) {
333 args->status |= 2;
334 goto closem;
336 } else if (strcmp(file2, "-") == 0)
337 f2 = stdin;
338 else
339 f2 = fopen(file2, "r");
341 if (f2 == NULL) {
342 args->status |= 2;
343 goto closem;
346 switch (files_differ(ds, f1, f2, flags)) {
347 case 0:
348 goto closem;
349 case 1:
350 break;
351 default:
352 /* error */
353 args->status |= 2;
354 goto closem;
357 if ((flags & D_FORCEASCII) == 0 &&
358 (!asciifile(f1) || !asciifile(f2))) {
359 *rval = D_BINARY;
360 args->status |= 1;
361 goto closem;
363 prepare(ds, 0, f1, ds->stb1.st_size, flags);
364 prepare(ds, 1, f2, ds->stb2.st_size, flags);
366 prune(ds);
367 sort(ds->sfile[0], ds->slen[0]);
368 sort(ds->sfile[1], ds->slen[1]);
370 ds->member = (int *)ds->file[1];
371 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
372 ds->member = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
373 if (ds->member == NULL) {
374 err = got_error(GOT_ERR_NO_MEM);
375 goto closem;
378 ds->class = (int *)ds->file[0];
379 unsort(ds->sfile[0], ds->slen[0], ds->class);
380 ds->class = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
381 if (ds->class == NULL) {
382 err = got_error(GOT_ERR_NO_MEM);
383 goto closem;
386 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
387 if (ds->klist == NULL) {
388 err = got_error(GOT_ERR_NO_MEM);
389 goto closem;
391 ds->clen = 0;
392 ds->clistlen = 100;
393 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
394 if (ds->clist == NULL) {
395 err = got_error(GOT_ERR_NO_MEM);
396 goto closem;
398 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
399 free(ds->member);
400 free(ds->class);
402 ds->J = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
403 if (ds->J == NULL) {
404 err = got_error(GOT_ERR_NO_MEM);
405 goto closem;
407 unravel(ds, ds->klist[i]);
408 free(ds->clist);
409 ds->clist = NULL;
410 free(ds->klist);
411 ds->klist = NULL;
413 ds->ixold = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
414 if (ds->ixold == NULL) {
415 err = got_error(GOT_ERR_NO_MEM);
416 goto closem;
418 ds->ixnew = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
419 if (ds->ixnew == NULL) {
420 err = got_error(GOT_ERR_NO_MEM);
421 goto closem;
423 check(ds, f1, f2, flags);
424 output(ds, args, file1, f1, file2, f2, flags);
425 closem:
426 if (ds->anychange) {
427 args->status |= 1;
428 if (*rval == D_SAME)
429 *rval = D_DIFFER;
431 if (f1 != NULL)
432 fclose(f1);
433 if (f2 != NULL)
434 fclose(f2);
436 return (err);
439 /*
440 * Check to see if the given files differ.
441 * Returns 0 if they are the same, 1 if different, and -1 on error.
442 * XXX - could use code from cmp(1) [faster]
443 */
444 static int
445 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
447 char buf1[BUFSIZ], buf2[BUFSIZ];
448 size_t i, j;
450 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
451 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
452 return (1);
453 for (;;) {
454 i = fread(buf1, 1, sizeof(buf1), f1);
455 j = fread(buf2, 1, sizeof(buf2), f2);
456 if ((!i && ferror(f1)) || (!j && ferror(f2)))
457 return (-1);
458 if (i != j)
459 return (1);
460 if (i == 0)
461 return (0);
462 if (memcmp(buf1, buf2, i) != 0)
463 return (1);
467 static FILE *
468 opentemp(const char *file)
470 char buf[BUFSIZ], tempfile[PATH_MAX];
471 ssize_t nread;
472 int ifd, ofd;
474 if (strcmp(file, "-") == 0)
475 ifd = STDIN_FILENO;
476 else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
477 return (NULL);
479 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
481 if ((ofd = mkstemp(tempfile)) < 0) {
482 close(ifd);
483 return (NULL);
485 unlink(tempfile);
486 while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
487 if (write(ofd, buf, nread) != nread) {
488 close(ifd);
489 close(ofd);
490 return (NULL);
493 close(ifd);
494 lseek(ofd, (off_t)0, SEEK_SET);
495 return (fdopen(ofd, "r"));
498 char *
499 splice(char *dir, char *file)
501 char *tail, *buf;
502 size_t dirlen;
504 dirlen = strlen(dir);
505 while (dirlen != 0 && dir[dirlen - 1] == '/')
506 dirlen--;
507 if ((tail = strrchr(file, '/')) == NULL)
508 tail = file;
509 else
510 tail++;
511 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
512 return (buf);
515 static void
516 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
518 struct line *p;
519 int j, h;
520 size_t sz;
522 rewind(fd);
524 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
525 if (sz < 100)
526 sz = 100;
528 p = xcalloc(sz + 3, sizeof(*p));
529 for (j = 0; (h = readhash(ds, fd, flags));) {
530 if (j == sz) {
531 sz = sz * 3 / 2;
532 p = xreallocarray(p, sz + 3, sizeof(*p));
534 p[++j].value = h;
536 ds->len[i] = j;
537 ds->file[i] = p;
540 static void
541 prune(struct got_diff_state *ds)
543 int i, j;
545 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
546 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
547 ds->pref++)
549 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
550 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
551 ds->suff++)
553 for (j = 0; j < 2; j++) {
554 ds->sfile[j] = ds->file[j] + ds->pref;
555 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
556 for (i = 0; i <= ds->slen[j]; i++)
557 ds->sfile[j][i].serial = i;
561 static void
562 equiv(struct line *a, int n, struct line *b, int m, int *c)
564 int i, j;
566 i = j = 1;
567 while (i <= n && j <= m) {
568 if (a[i].value < b[j].value)
569 a[i++].value = 0;
570 else if (a[i].value == b[j].value)
571 a[i++].value = j;
572 else
573 j++;
575 while (i <= n)
576 a[i++].value = 0;
577 b[m + 1].value = 0;
578 j = 0;
579 while (++j <= m) {
580 c[j] = -b[j].serial;
581 while (b[j + 1].value == b[j].value) {
582 j++;
583 c[j] = b[j].serial;
586 c[j] = -1;
589 /* Code taken from ping.c */
590 static int
591 isqrt(int n)
593 int y, x = 1;
595 if (n == 0)
596 return (0);
598 do { /* newton was a stinker */
599 y = x;
600 x = n / x;
601 x += y;
602 x /= 2;
603 } while ((x - y) > 1 || (x - y) < -1);
605 return (x);
608 static int
609 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
611 int i, k, y, j, l;
612 int oldc, tc, oldl, sq;
613 u_int numtries, bound;
615 if (flags & D_MINIMAL)
616 bound = UINT_MAX;
617 else {
618 sq = isqrt(n);
619 bound = MAXIMUM(256, sq);
622 k = 0;
623 c[0] = newcand(ds, 0, 0, 0);
624 for (i = 1; i <= n; i++) {
625 j = a[i];
626 if (j == 0)
627 continue;
628 y = -b[j];
629 oldl = 0;
630 oldc = c[0];
631 numtries = 0;
632 do {
633 if (y <= ds->clist[oldc].y)
634 continue;
635 l = search(ds, c, k, y);
636 if (l != oldl + 1)
637 oldc = c[l - 1];
638 if (l <= k) {
639 if (ds->clist[c[l]].y <= y)
640 continue;
641 tc = c[l];
642 c[l] = newcand(ds, i, y, oldc);
643 oldc = tc;
644 oldl = l;
645 numtries++;
646 } else {
647 c[l] = newcand(ds, i, y, oldc);
648 k++;
649 break;
651 } while ((y = b[++j]) > 0 && numtries < bound);
653 return (k);
656 static int
657 newcand(struct got_diff_state *ds, int x, int y, int pred)
659 struct cand *q;
661 if (ds->clen == ds->clistlen) {
662 ds->clistlen = ds->clistlen * 11 / 10;
663 ds->clist = xreallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
665 q = ds->clist + ds->clen;
666 q->x = x;
667 q->y = y;
668 q->pred = pred;
669 return (ds->clen++);
672 static int
673 search(struct got_diff_state *ds, int *c, int k, int y)
675 int i, j, l, t;
677 if (ds->clist[c[k]].y < y) /* quick look for typical case */
678 return (k + 1);
679 i = 0;
680 j = k + 1;
681 for (;;) {
682 l = (i + j) / 2;
683 if (l <= i)
684 break;
685 t = ds->clist[c[l]].y;
686 if (t > y)
687 j = l;
688 else if (t < y)
689 i = l;
690 else
691 return (l);
693 return (l + 1);
696 static void
697 unravel(struct got_diff_state *ds, int p)
699 struct cand *q;
700 int i;
702 for (i = 0; i <= ds->len[0]; i++)
703 ds->J[i] = i <= ds->pref ? i :
704 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
705 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
706 ds->J[q->x + ds->pref] = q->y + ds->pref;
709 /*
710 * Check does double duty:
711 * 1. ferret out any fortuitous correspondences due
712 * to confounding by hashing (which result in "jackpot")
713 * 2. collect random access indexes to the two files
714 */
715 static void
716 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
718 int i, j, jackpot, c, d;
719 long ctold, ctnew;
721 rewind(f1);
722 rewind(f2);
723 j = 1;
724 ds->ixold[0] = ds->ixnew[0] = 0;
725 jackpot = 0;
726 ctold = ctnew = 0;
727 for (i = 1; i <= ds->len[0]; i++) {
728 if (ds->J[i] == 0) {
729 ds->ixold[i] = ctold += skipline(f1);
730 continue;
732 while (j < ds->J[i]) {
733 ds->ixnew[j] = ctnew += skipline(f2);
734 j++;
736 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
737 for (;;) {
738 c = getc(f1);
739 d = getc(f2);
740 /*
741 * GNU diff ignores a missing newline
742 * in one file for -b or -w.
743 */
744 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
745 if (c == EOF && d == '\n') {
746 ctnew++;
747 break;
748 } else if (c == '\n' && d == EOF) {
749 ctold++;
750 break;
753 ctold++;
754 ctnew++;
755 if ((flags & D_FOLDBLANKS) && isspace(c) &&
756 isspace(d)) {
757 do {
758 if (c == '\n')
759 break;
760 ctold++;
761 } while (isspace(c = getc(f1)));
762 do {
763 if (d == '\n')
764 break;
765 ctnew++;
766 } while (isspace(d = getc(f2)));
767 } else if ((flags & D_IGNOREBLANKS)) {
768 while (isspace(c) && c != '\n') {
769 c = getc(f1);
770 ctold++;
772 while (isspace(d) && d != '\n') {
773 d = getc(f2);
774 ctnew++;
777 if (ds->chrtran[c] != ds->chrtran[d]) {
778 jackpot++;
779 ds->J[i] = 0;
780 if (c != '\n' && c != EOF)
781 ctold += skipline(f1);
782 if (d != '\n' && c != EOF)
783 ctnew += skipline(f2);
784 break;
786 if (c == '\n' || c == EOF)
787 break;
789 } else {
790 for (;;) {
791 ctold++;
792 ctnew++;
793 if ((c = getc(f1)) != (d = getc(f2))) {
794 /* jackpot++; */
795 ds->J[i] = 0;
796 if (c != '\n' && c != EOF)
797 ctold += skipline(f1);
798 if (d != '\n' && c != EOF)
799 ctnew += skipline(f2);
800 break;
802 if (c == '\n' || c == EOF)
803 break;
806 ds->ixold[i] = ctold;
807 ds->ixnew[j] = ctnew;
808 j++;
810 for (; j <= ds->len[1]; j++)
811 ds->ixnew[j] = ctnew += skipline(f2);
812 /*
813 * if (jackpot)
814 * fprintf(stderr, "jackpot\n");
815 */
818 /* shellsort CACM #201 */
819 static void
820 sort(struct line *a, int n)
822 struct line *ai, *aim, w;
823 int j, m = 0, k;
825 if (n == 0)
826 return;
827 for (j = 1; j <= n; j *= 2)
828 m = 2 * j - 1;
829 for (m /= 2; m != 0; m /= 2) {
830 k = n - m;
831 for (j = 1; j <= k; j++) {
832 for (ai = &a[j]; ai > a; ai -= m) {
833 aim = &ai[m];
834 if (aim < ai)
835 break; /* wraparound */
836 if (aim->value > ai[0].value ||
837 (aim->value == ai[0].value &&
838 aim->serial > ai[0].serial))
839 break;
840 w.value = ai[0].value;
841 ai[0].value = aim->value;
842 aim->value = w.value;
843 w.serial = ai[0].serial;
844 ai[0].serial = aim->serial;
845 aim->serial = w.serial;
851 static void
852 unsort(struct line *f, int l, int *b)
854 int *a, i;
856 a = xcalloc(l + 1, sizeof(*a));
857 for (i = 1; i <= l; i++)
858 a[f[i].serial] = f[i].value;
859 for (i = 1; i <= l; i++)
860 b[i] = a[i];
861 free(a);
864 static int
865 skipline(FILE *f)
867 int i, c;
869 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
870 continue;
871 return (i);
874 static void
875 output(struct got_diff_state *ds, struct got_diff_args *args,
876 char *file1, FILE *f1, char *file2, FILE *f2, int flags)
878 int m, i0, i1, j0, j1;
880 rewind(f1);
881 rewind(f2);
882 m = ds->len[0];
883 ds->J[0] = 0;
884 ds->J[m + 1] = ds->len[1] + 1;
885 if (args->diff_format != D_EDIT) {
886 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
887 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
888 i0++;
889 j0 = ds->J[i0 - 1] + 1;
890 i1 = i0 - 1;
891 while (i1 < m && ds->J[i1 + 1] == 0)
892 i1++;
893 j1 = ds->J[i1 + 1] - 1;
894 ds->J[i1] = j1;
895 change(ds, args, file1, f1, file2, f2, i0, i1, j0, j1, &flags);
897 } else {
898 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
899 while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
900 i0--;
901 j0 = ds->J[i0 + 1] - 1;
902 i1 = i0 + 1;
903 while (i1 > 1 && ds->J[i1 - 1] == 0)
904 i1--;
905 j1 = ds->J[i1 - 1] + 1;
906 ds->J[i1] = j1;
907 change(ds, args, file1, f1, file2, f2, i1, i0, j1, j0, &flags);
910 if (m == 0)
911 change(ds, args, file1, f1, file2, f2, 1, 0, 1, ds->len[1], &flags);
912 if (args->diff_format == D_IFDEF) {
913 for (;;) {
914 #define c i0
915 if ((c = getc(f1)) == EOF)
916 return;
917 diff_output("%c", c);
919 #undef c
921 if (ds->anychange != 0) {
922 if (args->diff_format == D_CONTEXT)
923 dump_context_vec(ds, args, f1, f2, flags);
924 else if (args->diff_format == D_UNIFIED)
925 dump_unified_vec(ds, args, f1, f2, flags);
929 static void
930 range(int a, int b, char *separator)
932 diff_output("%d", a > b ? b : a);
933 if (a < b)
934 diff_output("%s%d", separator, b);
937 static void
938 uni_range(int a, int b)
940 if (a < b)
941 diff_output("%d,%d", a, b - a + 1);
942 else if (a == b)
943 diff_output("%d", b);
944 else
945 diff_output("%d,0", b);
948 static char *
949 preadline(int fd, size_t rlen, off_t off)
951 char *line;
952 ssize_t nr;
954 line = xmalloc(rlen + 1);
955 if ((nr = pread(fd, line, rlen, off)) < 0)
956 err(2, "preadline");
957 if (nr > 0 && line[nr-1] == '\n')
958 nr--;
959 line[nr] = '\0';
960 return (line);
963 static int
964 ignoreline(char *line)
966 return 0; /* do not ignore any lines */
969 /*
970 * Indicate that there is a difference between lines a and b of the from file
971 * to get to lines c to d of the to file. If a is greater then b then there
972 * are no lines in the from file involved and this means that there were
973 * lines appended (beginning at b). If c is greater than d then there are
974 * lines missing from the to file.
975 */
976 static void
977 change(struct got_diff_state *ds, struct got_diff_args *args,
978 char *file1, FILE *f1, char *file2, FILE *f2,
979 int a, int b, int c, int d, int *pflags)
981 static size_t max_context = 64;
982 int i;
984 restart:
985 if (args->diff_format != D_IFDEF && a > b && c > d)
986 return;
987 if (args->ignore_pats != NULL) {
988 char *line;
989 /*
990 * All lines in the change, insert, or delete must
991 * match an ignore pattern for the change to be
992 * ignored.
993 */
994 if (a <= b) { /* Changes and deletes. */
995 for (i = a; i <= b; i++) {
996 line = preadline(fileno(f1),
997 ds->ixold[i] - ds->ixold[i - 1], ds->ixold[i - 1]);
998 if (!ignoreline(line))
999 goto proceed;
1002 if (a > b || c <= d) { /* Changes and inserts. */
1003 for (i = c; i <= d; i++) {
1004 line = preadline(fileno(f2),
1005 ds->ixnew[i] - ds->ixnew[i - 1], ds->ixnew[i - 1]);
1006 if (!ignoreline(line))
1007 goto proceed;
1010 return;
1012 proceed:
1013 if (*pflags & D_HEADER) {
1014 diff_output("%s %s %s\n", args->diffargs, file1, file2);
1015 *pflags &= ~D_HEADER;
1017 if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1019 * Allocate change records as needed.
1021 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1022 ptrdiff_t offset = ds->context_vec_ptr - ds->context_vec_start;
1023 max_context <<= 1;
1024 ds->context_vec_start = xreallocarray(ds->context_vec_start,
1025 max_context, sizeof(*ds->context_vec_start));
1026 ds->context_vec_end = ds->context_vec_start + max_context;
1027 ds->context_vec_ptr = ds->context_vec_start + offset;
1029 if (ds->anychange == 0) {
1031 * Print the context/unidiff header first time through.
1033 print_header(ds, args, file1, file2);
1034 ds->anychange = 1;
1035 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1036 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1038 * If this change is more than 'diff_context' lines from the
1039 * previous change, dump the record and reset it.
1041 if (args->diff_format == D_CONTEXT)
1042 dump_context_vec(ds, args, f1, f2, *pflags);
1043 else
1044 dump_unified_vec(ds, args, f1, f2, *pflags);
1046 ds->context_vec_ptr++;
1047 ds->context_vec_ptr->a = a;
1048 ds->context_vec_ptr->b = b;
1049 ds->context_vec_ptr->c = c;
1050 ds->context_vec_ptr->d = d;
1051 return;
1053 if (ds->anychange == 0)
1054 ds->anychange = 1;
1055 switch (args->diff_format) {
1056 case D_BRIEF:
1057 return;
1058 case D_NORMAL:
1059 case D_EDIT:
1060 range(a, b, ",");
1061 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1062 if (args->diff_format == D_NORMAL)
1063 range(c, d, ",");
1064 diff_output("\n");
1065 break;
1066 case D_REVERSE:
1067 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1068 range(a, b, " ");
1069 diff_output("\n");
1070 break;
1071 case D_NREVERSE:
1072 if (a > b)
1073 diff_output("a%d %d\n", b, d - c + 1);
1074 else {
1075 diff_output("d%d %d\n", a, b - a + 1);
1076 if (!(c > d))
1077 /* add changed lines */
1078 diff_output("a%d %d\n", b, d - c + 1);
1080 break;
1082 if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1083 fetch(ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1084 if (a <= b && c <= d && args->diff_format == D_NORMAL)
1085 diff_output("---\n");
1087 i = fetch(ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1088 if (i != 0 && args->diff_format == D_EDIT) {
1090 * A non-zero return value for D_EDIT indicates that the
1091 * last line printed was a bare dot (".") that has been
1092 * escaped as ".." to prevent ed(1) from misinterpreting
1093 * it. We have to add a substitute command to change this
1094 * back and restart where we left off.
1096 diff_output(".\n");
1097 diff_output("%ds/.//\n", a + i - 1);
1098 b = a + i - 1;
1099 a = b + 1;
1100 c += i;
1101 goto restart;
1103 if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1104 diff_output(".\n");
1105 if (ds->inifdef) {
1106 diff_output("#endif /* %s */\n", args->ifdefname);
1107 ds->inifdef = 0;
1111 static int
1112 fetch(struct got_diff_state *ds, struct got_diff_args *args,
1113 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1115 int i, j, c, lastc, col, nc;
1118 * When doing #ifdef's, copy down to current line
1119 * if this is the first file, so that stuff makes it to output.
1121 if (args->diff_format == D_IFDEF && oldfile) {
1122 long curpos = ftell(lb);
1123 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1124 nc = f[a > b ? b : a - 1] - curpos;
1125 for (i = 0; i < nc; i++)
1126 diff_output("%c", getc(lb));
1128 if (a > b)
1129 return (0);
1130 if (args->diff_format == D_IFDEF) {
1131 if (ds->inifdef) {
1132 diff_output("#else /* %s%s */\n",
1133 oldfile == 1 ? "!" : "", args->ifdefname);
1134 } else {
1135 if (oldfile)
1136 diff_output("#ifndef %s\n", args->ifdefname);
1137 else
1138 diff_output("#ifdef %s\n", args->ifdefname);
1140 ds->inifdef = 1 + oldfile;
1142 for (i = a; i <= b; i++) {
1143 fseek(lb, f[i - 1], SEEK_SET);
1144 nc = f[i] - f[i - 1];
1145 if (args->diff_format != D_IFDEF && ch != '\0') {
1146 diff_output("%c", ch);
1147 if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1148 || args->diff_format == D_UNIFIED))
1149 diff_output("\t");
1150 else if (args->diff_format != D_UNIFIED)
1151 diff_output(" ");
1153 col = 0;
1154 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1155 if ((c = getc(lb)) == EOF) {
1156 if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1157 args->diff_format == D_NREVERSE)
1158 warnx("No newline at end of file");
1159 else
1160 diff_output("\n\\ No newline at end of "
1161 "file\n");
1162 return (0);
1164 if (c == '\t' && (flags & D_EXPANDTABS)) {
1165 do {
1166 diff_output(" ");
1167 } while (++col & 7);
1168 } else {
1169 if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1170 && lastc == '.') {
1172 * Don't print a bare "." line
1173 * since that will confuse ed(1).
1174 * Print ".." instead and return,
1175 * giving the caller an offset
1176 * from which to restart.
1178 diff_output(".\n");
1179 return (i - a + 1);
1181 diff_output("%c", c);
1182 col++;
1186 return (0);
1190 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1192 static int
1193 readhash(struct got_diff_state *ds, FILE *f, int flags)
1195 int i, t, space;
1196 int sum;
1198 sum = 1;
1199 space = 0;
1200 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1201 if (flags & D_IGNORECASE)
1202 for (i = 0; (t = getc(f)) != '\n'; i++) {
1203 if (t == EOF) {
1204 if (i == 0)
1205 return (0);
1206 break;
1208 sum = sum * 127 + ds->chrtran[t];
1210 else
1211 for (i = 0; (t = getc(f)) != '\n'; i++) {
1212 if (t == EOF) {
1213 if (i == 0)
1214 return (0);
1215 break;
1217 sum = sum * 127 + t;
1219 } else {
1220 for (i = 0;;) {
1221 switch (t = getc(f)) {
1222 case '\t':
1223 case '\r':
1224 case '\v':
1225 case '\f':
1226 case ' ':
1227 space++;
1228 continue;
1229 default:
1230 if (space && (flags & D_IGNOREBLANKS) == 0) {
1231 i++;
1232 space = 0;
1234 sum = sum * 127 + ds->chrtran[t];
1235 i++;
1236 continue;
1237 case EOF:
1238 if (i == 0)
1239 return (0);
1240 /* FALLTHROUGH */
1241 case '\n':
1242 break;
1244 break;
1248 * There is a remote possibility that we end up with a zero sum.
1249 * Zero is used as an EOF marker, so return 1 instead.
1251 return (sum == 0 ? 1 : sum);
1254 static int
1255 asciifile(FILE *f)
1257 unsigned char buf[BUFSIZ];
1258 size_t cnt;
1260 if (f == NULL)
1261 return (1);
1263 rewind(f);
1264 cnt = fread(buf, 1, sizeof(buf), f);
1265 return (memchr(buf, '\0', cnt) == NULL);
1268 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1270 static char *
1271 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1273 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1274 size_t nc;
1275 int last = ds->lastline;
1276 char *state = NULL;
1278 ds->lastline = pos;
1279 while (pos > last) {
1280 fseek(fp, f[pos - 1], SEEK_SET);
1281 nc = f[pos] - f[pos - 1];
1282 if (nc >= sizeof(buf))
1283 nc = sizeof(buf) - 1;
1284 nc = fread(buf, 1, nc, fp);
1285 if (nc > 0) {
1286 buf[nc] = '\0';
1287 buf[strcspn(buf, "\n")] = '\0';
1288 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1289 if (begins_with(buf, "private:")) {
1290 if (!state)
1291 state = " (private)";
1292 } else if (begins_with(buf, "protected:")) {
1293 if (!state)
1294 state = " (protected)";
1295 } else if (begins_with(buf, "public:")) {
1296 if (!state)
1297 state = " (public)";
1298 } else {
1299 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1300 if (state)
1301 strlcat(ds->lastbuf, state,
1302 sizeof ds->lastbuf);
1303 ds->lastmatchline = pos;
1304 return ds->lastbuf;
1308 pos--;
1310 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1313 /* dump accumulated "context" diff changes */
1314 static void
1315 dump_context_vec(struct got_diff_state *ds, struct got_diff_args *args,
1316 FILE *f1, FILE *f2, int flags)
1318 struct context_vec *cvp = ds->context_vec_start;
1319 int lowa, upb, lowc, upd, do_output;
1320 int a, b, c, d;
1321 char ch, *f;
1323 if (ds->context_vec_start > ds->context_vec_ptr)
1324 return;
1326 b = d = 0; /* gcc */
1327 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1328 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1329 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1330 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1332 diff_output("***************");
1333 if ((flags & D_PROTOTYPE)) {
1334 f = match_function(ds, ds->ixold, lowa-1, f1);
1335 if (f != NULL)
1336 diff_output(" %s", f);
1338 diff_output("\n*** ");
1339 range(lowa, upb, ",");
1340 diff_output(" ****\n");
1343 * Output changes to the "old" file. The first loop suppresses
1344 * output if there were no changes to the "old" file (we'll see
1345 * the "old" lines as context in the "new" list).
1347 do_output = 0;
1348 for (; cvp <= ds->context_vec_ptr; cvp++)
1349 if (cvp->a <= cvp->b) {
1350 cvp = ds->context_vec_start;
1351 do_output++;
1352 break;
1354 if (do_output) {
1355 while (cvp <= ds->context_vec_ptr) {
1356 a = cvp->a;
1357 b = cvp->b;
1358 c = cvp->c;
1359 d = cvp->d;
1361 if (a <= b && c <= d)
1362 ch = 'c';
1363 else
1364 ch = (a <= b) ? 'd' : 'a';
1366 if (ch == 'a')
1367 fetch(ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1368 else {
1369 fetch(ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1370 fetch(ds, args, ds->ixold, a, b, f1,
1371 ch == 'c' ? '!' : '-', 0, flags);
1373 lowa = b + 1;
1374 cvp++;
1376 fetch(ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1378 /* output changes to the "new" file */
1379 diff_output("--- ");
1380 range(lowc, upd, ",");
1381 diff_output(" ----\n");
1383 do_output = 0;
1384 for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1385 if (cvp->c <= cvp->d) {
1386 cvp = ds->context_vec_start;
1387 do_output++;
1388 break;
1390 if (do_output) {
1391 while (cvp <= ds->context_vec_ptr) {
1392 a = cvp->a;
1393 b = cvp->b;
1394 c = cvp->c;
1395 d = cvp->d;
1397 if (a <= b && c <= d)
1398 ch = 'c';
1399 else
1400 ch = (a <= b) ? 'd' : 'a';
1402 if (ch == 'd')
1403 fetch(ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1404 else {
1405 fetch(ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1406 fetch(ds, args, ds->ixnew, c, d, f2,
1407 ch == 'c' ? '!' : '+', 0, flags);
1409 lowc = d + 1;
1410 cvp++;
1412 fetch(ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1414 ds->context_vec_ptr = ds->context_vec_start - 1;
1417 /* dump accumulated "unified" diff changes */
1418 static void
1419 dump_unified_vec(struct got_diff_state *ds, struct got_diff_args *args,
1420 FILE *f1, FILE *f2, int flags)
1422 struct context_vec *cvp = ds->context_vec_start;
1423 int lowa, upb, lowc, upd;
1424 int a, b, c, d;
1425 char ch, *f;
1427 if (ds->context_vec_start > ds->context_vec_ptr)
1428 return;
1430 b = d = 0; /* gcc */
1431 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1432 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1433 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1434 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1436 diff_output("@@ -");
1437 uni_range(lowa, upb);
1438 diff_output(" +");
1439 uni_range(lowc, upd);
1440 diff_output(" @@");
1441 if ((flags & D_PROTOTYPE)) {
1442 f = match_function(ds, ds->ixold, lowa-1, f1);
1443 if (f != NULL)
1444 diff_output(" %s", f);
1446 diff_output("\n");
1449 * Output changes in "unified" diff format--the old and new lines
1450 * are printed together.
1452 for (; cvp <= ds->context_vec_ptr; cvp++) {
1453 a = cvp->a;
1454 b = cvp->b;
1455 c = cvp->c;
1456 d = cvp->d;
1459 * c: both new and old changes
1460 * d: only changes in the old file
1461 * a: only changes in the new file
1463 if (a <= b && c <= d)
1464 ch = 'c';
1465 else
1466 ch = (a <= b) ? 'd' : 'a';
1468 switch (ch) {
1469 case 'c':
1470 fetch(ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1471 fetch(ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1472 fetch(ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1473 break;
1474 case 'd':
1475 fetch(ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1476 fetch(ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1477 break;
1478 case 'a':
1479 fetch(ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1480 fetch(ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1481 break;
1483 lowa = b + 1;
1484 lowc = d + 1;
1486 fetch(ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1488 ds->context_vec_ptr = ds->context_vec_start - 1;
1491 static void
1492 print_header(struct got_diff_state *ds, struct got_diff_args *args,
1493 const char *file1, const char *file2)
1495 if (args->label[0] != NULL)
1496 diff_output("%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1497 args->label[0]);
1498 else
1499 diff_output("%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1500 file1, ctime(&ds->stb1.st_mtime));
1501 if (args->label[1] != NULL)
1502 diff_output("%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1503 args->label[1]);
1504 else
1505 diff_output("%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1506 file2, ctime(&ds->stb2.st_mtime));