Gotweb

Blob

Date:: Mon Apr 2 18:49:11 2018 UTC
Message:: remove support for "normal" diff output from diffreg
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
86 
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
90 
91 #include "got_lib_diff.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 };
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 static void	 diff_output(FILE *, const char *, ...);
187 static int	 output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void	 check(struct got_diff_state *, FILE *, FILE *, int);
189 static void	 range(FILE *, int, int, char *);
190 static void	 uni_range(FILE *, int, int);
191 static void	 dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static int	 prepare(struct got_diff_state *, int, FILE *, off_t, int);
193 static void	 prune(struct got_diff_state *);
194 static void	 equiv(struct line *, int, struct line *, int, int *);
195 static void	 unravel(struct got_diff_state *, int);
196 static int	 unsort(struct line *, int, int *);
197 static int	 change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
198 static void	 sort(struct line *, int);
199 static void	 print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
200 static int	 asciifile(FILE *);
201 static int	 fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
202 static int	 newcand(struct got_diff_state *, int, int, int, int *);
203 static int	 search(struct got_diff_state *, int *, int, int);
204 static int	 skipline(FILE *);
205 static int	 isqrt(int);
206 static int	 stone(struct got_diff_state *, int *, int, int *, int *, int);
207 static int	 readhash(struct got_diff_state *, FILE *, int);
208 static int	 files_differ(struct got_diff_state *, FILE *, FILE *, int);
209 static char	*match_function(struct got_diff_state *, const long *, int, FILE *);
210 
211 /*
212  * chrtran points to one of 2 translation tables: cup2low if folding upper to
213  * lower case clow2low if not folding case
214  */
215 u_char clow2low[256] = {
216 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
217 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
218 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
219 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
220 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
221 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
222 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
223 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
224 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
225 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
226 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
227 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
228 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
229 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
230 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
231 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
232 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
233 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
234 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
235 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
236 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
237 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
238 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
239 	0xfd, 0xfe, 0xff
240 };
241 
242 u_char cup2low[256] = {
243 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
244 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
245 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
246 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
247 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
248 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
249 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
250 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
251 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
252 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
253 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
254 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
255 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
256 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
257 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
258 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
259 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
260 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
261 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
262 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
263 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
264 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
265 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
266 	0xfd, 0xfe, 0xff
267 };
268 
269 static void
270 diff_output(FILE *outfile, const char *fmt, ...)
271 {
272 	va_list ap;
273 
274 	va_start(ap, fmt);
275 	vfprintf(outfile, fmt, ap);
276 	va_end(ap);
277 }
278 
279 const struct got_error *
280 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
281     struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
282 {
283 	const struct got_error *err = NULL;
284 	int i, *p;
285 	long *lp;
286 
287 	*rval = D_SAME;
288 	ds->anychange = 0;
289 	ds->lastline = 0;
290 	ds->lastmatchline = 0;
291 	ds->context_vec_ptr = ds->context_vec_start - 1;
292 	ds->max_context = 64;
293 	if (flags & D_IGNORECASE)
294 		ds->chrtran = cup2low;
295 	else
296 		ds->chrtran = clow2low;
297 	if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
298 		*rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
299 		return NULL;
300 	}
301 	if (flags & D_EMPTY1) {
302 		f1 = fopen(_PATH_DEVNULL, "r");
303 		if (f1 == NULL) {
304 			err = got_error_from_errno();
305 			goto closem;
306 		}
307 	}
308 	else if (f1 == NULL) {
309 		args->status |= 2;
310 		goto closem;
311 	}
312 
313 	if (flags & D_EMPTY2) {
314 		f2 = fopen(_PATH_DEVNULL, "r");
315 		if (f2 == NULL) {
316 			err = got_error_from_errno();
317 			goto closem;
318 		}
319 	} else if (f2 == NULL) {
320 		args->status |= 2;
321 		goto closem;
322 	}
323 
324 	switch (files_differ(ds, f1, f2, flags)) {
325 	case 0:
326 		goto closem;
327 	case 1:
328 		break;
329 	default:
330 		/* error */
331 		args->status |= 2;
332 		goto closem;
333 	}
334 
335 	if ((flags & D_FORCEASCII) == 0 &&
336 	    (!asciifile(f1) || !asciifile(f2))) {
337 		*rval = D_BINARY;
338 		args->status |= 1;
339 		goto closem;
340 	}
341 	if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
342 		err = got_error_from_errno();
343 		goto closem;
344 	}
345 	if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
346 		err = got_error_from_errno();
347 		goto closem;
348 	}
349 
350 	prune(ds);
351 	sort(ds->sfile[0], ds->slen[0]);
352 	sort(ds->sfile[1], ds->slen[1]);
353 
354 	ds->member = (int *)ds->file[1];
355 	equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
356 	p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
357 	if (p == NULL) {
358 		err = got_error_from_errno();
359 		goto closem;
360 	}
361 	ds->member = p;
362 
363 	ds->class = (int *)ds->file[0];
364 	if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
365 		err = got_error_from_errno();
366 		goto closem;
367 	}
368 	p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
369 	if (p == NULL) {
370 		err = got_error_from_errno();
371 		goto closem;
372 	}
373 	ds->class = p;
374 
375 	ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
376 	if (ds->klist == NULL) {
377 		err = got_error_from_errno();
378 		goto closem;
379 	}
380 	ds->clen = 0;
381 	ds->clistlen = 100;
382 	ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
383 	if (ds->clist == NULL) {
384 		err = got_error_from_errno();
385 		goto closem;
386 	}
387 	i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
388 	if (i < 0) {
389 		err = got_error_from_errno();
390 		goto closem;
391 	}
392 
393 	p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
394 	if (p == NULL) {
395 		err = got_error_from_errno();
396 		goto closem;
397 	}
398 	ds->J = p;
399 	unravel(ds, ds->klist[i]);
400 
401 	lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
402 	if (lp == NULL) {
403 		err = got_error_from_errno();
404 		goto closem;
405 	}
406 	ds->ixold = lp;
407 	lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
408 	if (lp == NULL) {
409 		err = got_error_from_errno();
410 		goto closem;
411 	}
412 	ds->ixnew = lp;
413 	check(ds, f1, f2, flags);
414 	if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
415 	    flags))
416 		err = got_error_from_errno();
417 closem:
418 	free(ds->J);
419 	free(ds->member);
420 	free(ds->class);
421 	free(ds->clist);
422 	free(ds->klist);
423 	free(ds->ixold);
424 	free(ds->ixnew);
425 	if (ds->anychange) {
426 		args->status |= 1;
427 		if (*rval == D_SAME)
428 			*rval = D_DIFFER;
429 	}
430 	if (f1 != NULL)
431 		fclose(f1);
432 	if (f2 != NULL)
433 		fclose(f2);
434 
435 	return (err);
436 }
437 
438 /*
439  * Check to see if the given files differ.
440  * Returns 0 if they are the same, 1 if different, and -1 on error.
441  * XXX - could use code from cmp(1) [faster]
442  */
443 static int
444 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
445 {
446 	char buf1[BUFSIZ], buf2[BUFSIZ];
447 	size_t i, j;
448 
449 	if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
450 	    (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
451 		return (1);
452 	for (;;) {
453 		i = fread(buf1, 1, sizeof(buf1), f1);
454 		j = fread(buf2, 1, sizeof(buf2), f2);
455 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
456 			return (-1);
457 		if (i != j)
458 			return (1);
459 		if (i == 0)
460 			return (0);
461 		if (memcmp(buf1, buf2, i) != 0)
462 			return (1);
463 	}
464 }
465 
466 static int
467 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
468 {
469 	struct line *p, *q;
470 	int j, h;
471 	size_t sz;
472 
473 	rewind(fd);
474 
475 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
476 	if (sz < 100)
477 		sz = 100;
478 
479 	p = calloc(sz + 3, sizeof(*p));
480 	if (p == NULL)
481 		return (-1);
482 	for (j = 0; (h = readhash(ds, fd, flags));) {
483 		if (j == sz) {
484 			sz = sz * 3 / 2;
485 			q = reallocarray(p, sz + 3, sizeof(*p));
486 			if (q == NULL) {
487 				free(p);
488 				return (-1);
489 			}
490 			p = q;
491 		}
492 		p[++j].value = h;
493 	}
494 	ds->len[i] = j;
495 	ds->file[i] = p;
496 
497 	return (0);
498 }
499 
500 static void
501 prune(struct got_diff_state *ds)
502 {
503 	int i, j;
504 
505 	for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
506 	    ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
507 	    ds->pref++)
508 		;
509 	for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
510 	    ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
511 	    ds->suff++)
512 		;
513 	for (j = 0; j < 2; j++) {
514 		ds->sfile[j] = ds->file[j] + ds->pref;
515 		ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
516 		for (i = 0; i <= ds->slen[j]; i++)
517 			ds->sfile[j][i].serial = i;
518 	}
519 }
520 
521 static void
522 equiv(struct line *a, int n, struct line *b, int m, int *c)
523 {
524 	int i, j;
525 
526 	i = j = 1;
527 	while (i <= n && j <= m) {
528 		if (a[i].value < b[j].value)
529 			a[i++].value = 0;
530 		else if (a[i].value == b[j].value)
531 			a[i++].value = j;
532 		else
533 			j++;
534 	}
535 	while (i <= n)
536 		a[i++].value = 0;
537 	b[m + 1].value = 0;
538 	j = 0;
539 	while (++j <= m) {
540 		c[j] = -b[j].serial;
541 		while (b[j + 1].value == b[j].value) {
542 			j++;
543 			c[j] = b[j].serial;
544 		}
545 	}
546 	c[j] = -1;
547 }
548 
549 /* Code taken from ping.c */
550 static int
551 isqrt(int n)
552 {
553 	int y, x = 1;
554 
555 	if (n == 0)
556 		return (0);
557 
558 	do { /* newton was a stinker */
559 		y = x;
560 		x = n / x;
561 		x += y;
562 		x /= 2;
563 	} while ((x - y) > 1 || (x - y) < -1);
564 
565 	return (x);
566 }
567 
568 static int
569 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
570 {
571 	int i, k, y, j, l;
572 	int oldc, tc, oldl, sq;
573 	u_int numtries, bound;
574 	int error;
575 
576 	if (flags & D_MINIMAL)
577 		bound = UINT_MAX;
578 	else {
579 		sq = isqrt(n);
580 		bound = MAXIMUM(256, sq);
581 	}
582 
583 	k = 0;
584 	c[0] = newcand(ds, 0, 0, 0, &error);
585 	if (error)
586 		return -1;
587 	for (i = 1; i <= n; i++) {
588 		j = a[i];
589 		if (j == 0)
590 			continue;
591 		y = -b[j];
592 		oldl = 0;
593 		oldc = c[0];
594 		numtries = 0;
595 		do {
596 			if (y <= ds->clist[oldc].y)
597 				continue;
598 			l = search(ds, c, k, y);
599 			if (l != oldl + 1)
600 				oldc = c[l - 1];
601 			if (l <= k) {
602 				if (ds->clist[c[l]].y <= y)
603 					continue;
604 				tc = c[l];
605 				c[l] = newcand(ds, i, y, oldc, &error);
606 				if (error)
607 					return -1;
608 				oldc = tc;
609 				oldl = l;
610 				numtries++;
611 			} else {
612 				c[l] = newcand(ds, i, y, oldc, &error);
613 				if (error)
614 					return -1;
615 				k++;
616 				break;
617 			}
618 		} while ((y = b[++j]) > 0 && numtries < bound);
619 	}
620 	return (k);
621 }
622 
623 static int
624 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
625 {
626 	struct cand *q;
627 
628 	if (ds->clen == ds->clistlen) {
629 		ds->clistlen = ds->clistlen * 11 / 10;
630 		q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
631 		if (q == NULL) {
632 			*errorp = -1;
633 			free(ds->clist);
634 			ds->clist = NULL;
635 			return 0;
636 		}
637 		ds->clist = q;
638 	}
639 	q = ds->clist + ds->clen;
640 	q->x = x;
641 	q->y = y;
642 	q->pred = pred;
643 	*errorp = 0;
644 	return (ds->clen++);
645 }
646 
647 static int
648 search(struct got_diff_state *ds, int *c, int k, int y)
649 {
650 	int i, j, l, t;
651 
652 	if (ds->clist[c[k]].y < y)	/* quick look for typical case */
653 		return (k + 1);
654 	i = 0;
655 	j = k + 1;
656 	for (;;) {
657 		l = (i + j) / 2;
658 		if (l <= i)
659 			break;
660 		t = ds->clist[c[l]].y;
661 		if (t > y)
662 			j = l;
663 		else if (t < y)
664 			i = l;
665 		else
666 			return (l);
667 	}
668 	return (l + 1);
669 }
670 
671 static void
672 unravel(struct got_diff_state *ds, int p)
673 {
674 	struct cand *q;
675 	int i;
676 
677 	for (i = 0; i <= ds->len[0]; i++)
678 		ds->J[i] = i <= ds->pref ? i :
679 		    i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
680 	for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
681 		ds->J[q->x + ds->pref] = q->y + ds->pref;
682 }
683 
684 /*
685  * Check does double duty:
686  *  1.	ferret out any fortuitous correspondences due
687  *	to confounding by hashing (which result in "jackpot")
688  *  2.  collect random access indexes to the two files
689  */
690 static void
691 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
692 {
693 	int i, j, jackpot, c, d;
694 	long ctold, ctnew;
695 
696 	rewind(f1);
697 	rewind(f2);
698 	j = 1;
699 	ds->ixold[0] = ds->ixnew[0] = 0;
700 	jackpot = 0;
701 	ctold = ctnew = 0;
702 	for (i = 1; i <= ds->len[0]; i++) {
703 		if (ds->J[i] == 0) {
704 			ds->ixold[i] = ctold += skipline(f1);
705 			continue;
706 		}
707 		while (j < ds->J[i]) {
708 			ds->ixnew[j] = ctnew += skipline(f2);
709 			j++;
710 		}
711 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
712 			for (;;) {
713 				c = getc(f1);
714 				d = getc(f2);
715 				/*
716 				 * GNU diff ignores a missing newline
717 				 * in one file for -b or -w.
718 				 */
719 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
720 					if (c == EOF && d == '\n') {
721 						ctnew++;
722 						break;
723 					} else if (c == '\n' && d == EOF) {
724 						ctold++;
725 						break;
726 					}
727 				}
728 				ctold++;
729 				ctnew++;
730 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
731 				    isspace(d)) {
732 					do {
733 						if (c == '\n')
734 							break;
735 						ctold++;
736 					} while (isspace(c = getc(f1)));
737 					do {
738 						if (d == '\n')
739 							break;
740 						ctnew++;
741 					} while (isspace(d = getc(f2)));
742 				} else if ((flags & D_IGNOREBLANKS)) {
743 					while (isspace(c) && c != '\n') {
744 						c = getc(f1);
745 						ctold++;
746 					}
747 					while (isspace(d) && d != '\n') {
748 						d = getc(f2);
749 						ctnew++;
750 					}
751 				}
752 				if (ds->chrtran[c] != ds->chrtran[d]) {
753 					jackpot++;
754 					ds->J[i] = 0;
755 					if (c != '\n' && c != EOF)
756 						ctold += skipline(f1);
757 					if (d != '\n' && c != EOF)
758 						ctnew += skipline(f2);
759 					break;
760 				}
761 				if (c == '\n' || c == EOF)
762 					break;
763 			}
764 		} else {
765 			for (;;) {
766 				ctold++;
767 				ctnew++;
768 				if ((c = getc(f1)) != (d = getc(f2))) {
769 					/* jackpot++; */
770 					ds->J[i] = 0;
771 					if (c != '\n' && c != EOF)
772 						ctold += skipline(f1);
773 					if (d != '\n' && c != EOF)
774 						ctnew += skipline(f2);
775 					break;
776 				}
777 				if (c == '\n' || c == EOF)
778 					break;
779 			}
780 		}
781 		ds->ixold[i] = ctold;
782 		ds->ixnew[j] = ctnew;
783 		j++;
784 	}
785 	for (; j <= ds->len[1]; j++)
786 		ds->ixnew[j] = ctnew += skipline(f2);
787 	/*
788 	 * if (jackpot)
789 	 *	fprintf(stderr, "jackpot\n");
790 	 */
791 }
792 
793 /* shellsort CACM #201 */
794 static void
795 sort(struct line *a, int n)
796 {
797 	struct line *ai, *aim, w;
798 	int j, m = 0, k;
799 
800 	if (n == 0)
801 		return;
802 	for (j = 1; j <= n; j *= 2)
803 		m = 2 * j - 1;
804 	for (m /= 2; m != 0; m /= 2) {
805 		k = n - m;
806 		for (j = 1; j <= k; j++) {
807 			for (ai = &a[j]; ai > a; ai -= m) {
808 				aim = &ai[m];
809 				if (aim < ai)
810 					break;	/* wraparound */
811 				if (aim->value > ai[0].value ||
812 				    (aim->value == ai[0].value &&
813 					aim->serial > ai[0].serial))
814 					break;
815 				w.value = ai[0].value;
816 				ai[0].value = aim->value;
817 				aim->value = w.value;
818 				w.serial = ai[0].serial;
819 				ai[0].serial = aim->serial;
820 				aim->serial = w.serial;
821 			}
822 		}
823 	}
824 }
825 
826 static int
827 unsort(struct line *f, int l, int *b)
828 {
829 	int *a, i;
830 
831 	a = calloc(l + 1, sizeof(*a));
832 	if (a == NULL)
833 		return (-1);
834 	for (i = 1; i <= l; i++)
835 		a[f[i].serial] = f[i].value;
836 	for (i = 1; i <= l; i++)
837 		b[i] = a[i];
838 	free(a);
839 
840 	return (0);
841 }
842 
843 static int
844 skipline(FILE *f)
845 {
846 	int i, c;
847 
848 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
849 		continue;
850 	return (i);
851 }
852 
853 static int
854 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
855     const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
856 {
857 	int m, i0, i1, j0, j1;
858 	int error = 0;
859 
860 	rewind(f1);
861 	rewind(f2);
862 	m = ds->len[0];
863 	ds->J[0] = 0;
864 	ds->J[m + 1] = ds->len[1] + 1;
865 	if (args->diff_format != D_EDIT) {
866 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
867 			while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
868 				i0++;
869 			j0 = ds->J[i0 - 1] + 1;
870 			i1 = i0 - 1;
871 			while (i1 < m && ds->J[i1 + 1] == 0)
872 				i1++;
873 			j1 = ds->J[i1 + 1] - 1;
874 			ds->J[i1] = j1;
875 			error = change(outfile, ds, args, file1, f1, file2, f2,
876 			    i0, i1, j0, j1, &flags);
877 			if (error)
878 				return (error);
879 		}
880 	} else {
881 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
882 			while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
883 				i0--;
884 			j0 = ds->J[i0 + 1] - 1;
885 			i1 = i0 + 1;
886 			while (i1 > 1 && ds->J[i1 - 1] == 0)
887 				i1--;
888 			j1 = ds->J[i1 - 1] + 1;
889 			ds->J[i1] = j1;
890 			change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
891 			    j1, j0, &flags);
892 			if (error)
893 				return (error);
894 		}
895 	}
896 	if (m == 0) {
897 		error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
898 		    1, ds->len[1], &flags);
899 		if (error)
900 			return (error);
901 	}
902 	if (args->diff_format == D_IFDEF) {
903 		for (;;) {
904 #define	c i0
905 			if ((c = getc(f1)) == EOF)
906 				return (0);
907 			diff_output(outfile, "%c", c);
908 		}
909 #undef c
910 	}
911 	if (ds->anychange != 0)
912 		dump_unified_vec(outfile, ds, args, f1, f2, flags);
913 
914 	return (0);
915 }
916 
917 static void
918 range(FILE *outfile, int a, int b, char *separator)
919 {
920 	diff_output(outfile, "%d", a > b ? b : a);
921 	if (a < b)
922 		diff_output(outfile, "%s%d", separator, b);
923 }
924 
925 static void
926 uni_range(FILE *outfile, int a, int b)
927 {
928 	if (a < b)
929 		diff_output(outfile, "%d,%d", a, b - a + 1);
930 	else if (a == b)
931 		diff_output(outfile, "%d", b);
932 	else
933 		diff_output(outfile, "%d,0", b);
934 }
935 
936 /*
937  * Indicate that there is a difference between lines a and b of the from file
938  * to get to lines c to d of the to file.  If a is greater then b then there
939  * are no lines in the from file involved and this means that there were
940  * lines appended (beginning at b).  If c is greater than d then there are
941  * lines missing from the to file.
942  */
943 static int
944 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
945     const char *file1, FILE *f1, const char *file2, FILE *f2,
946     int a, int b, int c, int d, int *pflags)
947 {
948 	int i;
949 
950 restart:
951 	if (args->diff_format != D_IFDEF && a > b && c > d)
952 		return (0);
953 
954 	if (*pflags & D_HEADER) {
955 		diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
956 		*pflags &= ~D_HEADER;
957 	}
958 	if (args->diff_format == D_UNIFIED) {
959 		/*
960 		 * Allocate change records as needed.
961 		 */
962 		if (ds->context_vec_ptr == ds->context_vec_end - 1) {
963 			struct context_vec *cvp;
964 			ptrdiff_t offset;
965 			offset = ds->context_vec_ptr - ds->context_vec_start;
966 			ds->max_context <<= 1;
967 			ds->context_vec_start =
968 			cvp = reallocarray(ds->context_vec_start,
969 			    ds->max_context, sizeof(*ds->context_vec_start));
970 			if (cvp == NULL) {
971 				free(ds->context_vec_start);
972 				return (-1);
973 			}
974 			ds->context_vec_start = cvp;
975 			ds->context_vec_end = ds->context_vec_start +
976 			    ds->max_context;
977 			ds->context_vec_ptr = ds->context_vec_start + offset;
978 		}
979 		if (ds->anychange == 0) {
980 			/*
981 			 * Print the context/unidiff header first time through.
982 			 */
983 			print_header(outfile, ds, args, file1, file2);
984 			ds->anychange = 1;
985 		} else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
986 		    c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
987 			/*
988 			 * If this change is more than 'diff_context' lines from the
989 			 * previous change, dump the record and reset it.
990 			 */
991 			dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
992 		}
993 		ds->context_vec_ptr++;
994 		ds->context_vec_ptr->a = a;
995 		ds->context_vec_ptr->b = b;
996 		ds->context_vec_ptr->c = c;
997 		ds->context_vec_ptr->d = d;
998 		return (0);
999 	}
1000 	if (ds->anychange == 0)
1001 		ds->anychange = 1;
1002 	switch (args->diff_format) {
1003 	case D_BRIEF:
1004 		return (0);
1005 	case D_EDIT:
1006 		range(outfile, a, b, ",");
1007 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1008 		diff_output(outfile, "\n");
1009 		break;
1010 	case D_REVERSE:
1011 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1012 		range(outfile, a, b, " ");
1013 		diff_output(outfile, "\n");
1014 		break;
1015 	case D_NREVERSE:
1016 		if (a > b)
1017 			diff_output(outfile, "a%d %d\n", b, d - c + 1);
1018 		else {
1019 			diff_output(outfile, "d%d %d\n", a, b - a + 1);
1020 			if (!(c > d))
1021 				/* add changed lines */
1022 				diff_output(outfile, "a%d %d\n", b, d - c + 1);
1023 		}
1024 		break;
1025 	}
1026 	if (args->diff_format == D_IFDEF)
1027 		fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1028 	i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, '\0', 0, *pflags);
1029 	if (i != 0 && args->diff_format == D_EDIT) {
1030 		/*
1031 		 * A non-zero return value for D_EDIT indicates that the
1032 		 * last line printed was a bare dot (".") that has been
1033 		 * escaped as ".." to prevent ed(1) from misinterpreting
1034 		 * it.  We have to add a substitute command to change this
1035 		 * back and restart where we left off.
1036 		 */
1037 		diff_output(outfile, ".\n");
1038 		diff_output(outfile, "%ds/.//\n", a + i - 1);
1039 		b = a + i - 1;
1040 		a = b + 1;
1041 		c += i;
1042 		goto restart;
1043 	}
1044 	if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1045 		diff_output(outfile, ".\n");
1046 	if (ds->inifdef) {
1047 		diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1048 		ds->inifdef = 0;
1049 	}
1050 
1051 	return (0);
1052 }
1053 
1054 static int
1055 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1056     long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1057 {
1058 	int i, j, c, lastc, col, nc;
1059 
1060 	/*
1061 	 * When doing #ifdef's, copy down to current line
1062 	 * if this is the first file, so that stuff makes it to output.
1063 	 */
1064 	if (args->diff_format == D_IFDEF && oldfile) {
1065 		long curpos = ftell(lb);
1066 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1067 		nc = f[a > b ? b : a - 1] - curpos;
1068 		for (i = 0; i < nc; i++)
1069 			diff_output(outfile, "%c", getc(lb));
1070 	}
1071 	if (a > b)
1072 		return (0);
1073 	if (args->diff_format == D_IFDEF) {
1074 		if (ds->inifdef) {
1075 			diff_output(outfile, "#else /* %s%s */\n",
1076 			    oldfile == 1 ? "!" : "", args->ifdefname);
1077 		} else {
1078 			if (oldfile)
1079 				diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1080 			else
1081 				diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1082 		}
1083 		ds->inifdef = 1 + oldfile;
1084 	}
1085 	for (i = a; i <= b; i++) {
1086 		fseek(lb, f[i - 1], SEEK_SET);
1087 		nc = f[i] - f[i - 1];
1088 		if (args->diff_format != D_IFDEF && ch != '\0') {
1089 			diff_output(outfile, "%c", ch);
1090 			if (args->Tflag && args->diff_format == D_UNIFIED)
1091 				diff_output(outfile, "\t");
1092 			else if (args->diff_format != D_UNIFIED)
1093 				diff_output(outfile, " ");
1094 		}
1095 		col = 0;
1096 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1097 			if ((c = getc(lb)) == EOF) {
1098 				if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1099 				    args->diff_format == D_NREVERSE)
1100 					warnx("No newline at end of file");
1101 				else
1102 					diff_output(outfile, "\n\\ No newline at end of "
1103 					    "file\n");
1104 				return (0);
1105 			}
1106 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1107 				do {
1108 					diff_output(outfile, " ");
1109 				} while (++col & 7);
1110 			} else {
1111 				if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1112 				    && lastc == '.') {
1113 					/*
1114 					 * Don't print a bare "." line
1115 					 * since that will confuse ed(1).
1116 					 * Print ".." instead and return,
1117 					 * giving the caller an offset
1118 					 * from which to restart.
1119 					 */
1120 					diff_output(outfile, ".\n");
1121 					return (i - a + 1);
1122 				}
1123 				diff_output(outfile, "%c", c);
1124 				col++;
1125 			}
1126 		}
1127 	}
1128 	return (0);
1129 }
1130 
1131 /*
1132  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1133  */
1134 static int
1135 readhash(struct got_diff_state *ds, FILE *f, int flags)
1136 {
1137 	int i, t, space;
1138 	int sum;
1139 
1140 	sum = 1;
1141 	space = 0;
1142 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1143 		if (flags & D_IGNORECASE)
1144 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1145 				if (t == EOF) {
1146 					if (i == 0)
1147 						return (0);
1148 					break;
1149 				}
1150 				sum = sum * 127 + ds->chrtran[t];
1151 			}
1152 		else
1153 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1154 				if (t == EOF) {
1155 					if (i == 0)
1156 						return (0);
1157 					break;
1158 				}
1159 				sum = sum * 127 + t;
1160 			}
1161 	} else {
1162 		for (i = 0;;) {
1163 			switch (t = getc(f)) {
1164 			case '\t':
1165 			case '\r':
1166 			case '\v':
1167 			case '\f':
1168 			case ' ':
1169 				space++;
1170 				continue;
1171 			default:
1172 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1173 					i++;
1174 					space = 0;
1175 				}
1176 				sum = sum * 127 + ds->chrtran[t];
1177 				i++;
1178 				continue;
1179 			case EOF:
1180 				if (i == 0)
1181 					return (0);
1182 				/* FALLTHROUGH */
1183 			case '\n':
1184 				break;
1185 			}
1186 			break;
1187 		}
1188 	}
1189 	/*
1190 	 * There is a remote possibility that we end up with a zero sum.
1191 	 * Zero is used as an EOF marker, so return 1 instead.
1192 	 */
1193 	return (sum == 0 ? 1 : sum);
1194 }
1195 
1196 static int
1197 asciifile(FILE *f)
1198 {
1199 	unsigned char buf[BUFSIZ];
1200 	size_t cnt;
1201 
1202 	if (f == NULL)
1203 		return (1);
1204 
1205 	rewind(f);
1206 	cnt = fread(buf, 1, sizeof(buf), f);
1207 	return (memchr(buf, '\0', cnt) == NULL);
1208 }
1209 
1210 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1211 
1212 static char *
1213 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1214 {
1215 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1216 	size_t nc;
1217 	int last = ds->lastline;
1218 	char *state = NULL;
1219 
1220 	ds->lastline = pos;
1221 	while (pos > last) {
1222 		fseek(fp, f[pos - 1], SEEK_SET);
1223 		nc = f[pos] - f[pos - 1];
1224 		if (nc >= sizeof(buf))
1225 			nc = sizeof(buf) - 1;
1226 		nc = fread(buf, 1, nc, fp);
1227 		if (nc > 0) {
1228 			buf[nc] = '\0';
1229 			buf[strcspn(buf, "\n")] = '\0';
1230 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1231 				if (begins_with(buf, "private:")) {
1232 					if (!state)
1233 						state = " (private)";
1234 				} else if (begins_with(buf, "protected:")) {
1235 					if (!state)
1236 						state = " (protected)";
1237 				} else if (begins_with(buf, "public:")) {
1238 					if (!state)
1239 						state = " (public)";
1240 				} else {
1241 					strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1242 					if (state)
1243 						strlcat(ds->lastbuf, state,
1244 						    sizeof ds->lastbuf);
1245 					ds->lastmatchline = pos;
1246 					return ds->lastbuf;
1247 				}
1248 			}
1249 		}
1250 		pos--;
1251 	}
1252 	return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1253 }
1254 
1255 /* dump accumulated "unified" diff changes */
1256 static void
1257 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1258     FILE *f1, FILE *f2, int flags)
1259 {
1260 	struct context_vec *cvp = ds->context_vec_start;
1261 	int lowa, upb, lowc, upd;
1262 	int a, b, c, d;
1263 	char ch, *f;
1264 
1265 	if (ds->context_vec_start > ds->context_vec_ptr)
1266 		return;
1267 
1268 	b = d = 0;		/* gcc */
1269 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1270 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1271 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1272 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1273 
1274 	diff_output(outfile, "@@ -");
1275 	uni_range(outfile, lowa, upb);
1276 	diff_output(outfile, " +");
1277 	uni_range(outfile, lowc, upd);
1278 	diff_output(outfile, " @@");
1279 	if ((flags & D_PROTOTYPE)) {
1280 		f = match_function(ds, ds->ixold, lowa-1, f1);
1281 		if (f != NULL)
1282 			diff_output(outfile, " %s", f);
1283 	}
1284 	diff_output(outfile, "\n");
1285 
1286 	/*
1287 	 * Output changes in "unified" diff format--the old and new lines
1288 	 * are printed together.
1289 	 */
1290 	for (; cvp <= ds->context_vec_ptr; cvp++) {
1291 		a = cvp->a;
1292 		b = cvp->b;
1293 		c = cvp->c;
1294 		d = cvp->d;
1295 
1296 		/*
1297 		 * c: both new and old changes
1298 		 * d: only changes in the old file
1299 		 * a: only changes in the new file
1300 		 */
1301 		if (a <= b && c <= d)
1302 			ch = 'c';
1303 		else
1304 			ch = (a <= b) ? 'd' : 'a';
1305 
1306 		switch (ch) {
1307 		case 'c':
1308 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1309 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1310 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1311 			break;
1312 		case 'd':
1313 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1314 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1315 			break;
1316 		case 'a':
1317 			fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1318 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1319 			break;
1320 		}
1321 		lowa = b + 1;
1322 		lowc = d + 1;
1323 	}
1324 	fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1325 
1326 	ds->context_vec_ptr = ds->context_vec_start - 1;
1327 }
1328 
1329 static void
1330 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1331     const char *file1, const char *file2)
1332 {
1333 	if (args->label[0] != NULL)
1334 		diff_output(outfile, "--- %s\n", args->label[0]);
1335 	else
1336 		diff_output(outfile, "--- %s\t%s", file1,
1337 		    ctime(&ds->stb1.st_mtime));
1338 	if (args->label[1] != NULL)
1339 		diff_output(outfile, "+++ %s\n", args->label[1]);
1340 	else
1341 		diff_output(outfile, "+++ %s\t%s", file2,
1342 		    ctime(&ds->stb2.st_mtime));
1343 }