Gotweb

Blob

Date:: Mon Apr 2 18:42:06 2018 UTC
Message:: adjust preadline() and ignoreline() to match diff(1) again
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
86 
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
90 
91 #include "got_lib_diff.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 };
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 static void	 diff_output(FILE *, const char *, ...);
187 static int	 output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void	 check(struct got_diff_state *, FILE *, FILE *, int);
189 static void	 range(FILE *, int, int, char *);
190 static void	 uni_range(FILE *, int, int);
191 static void	 dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void	 dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int	 prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void	 prune(struct got_diff_state *);
195 static void	 equiv(struct line *, int, struct line *, int, int *);
196 static void	 unravel(struct got_diff_state *, int);
197 static int	 unsort(struct line *, int, int *);
198 static int	 change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void	 sort(struct line *, int);
200 static void	 print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int	 ignoreline(char *);
202 static int	 asciifile(FILE *);
203 static int	 fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int	 newcand(struct got_diff_state *, int, int, int, int *);
205 static int	 search(struct got_diff_state *, int *, int, int);
206 static int	 skipline(FILE *);
207 static int	 isqrt(int);
208 static int	 stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int	 readhash(struct got_diff_state *, FILE *, int);
210 static int	 files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char	*match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char	*preadline(int, size_t, off_t);
213 
214 /*
215  * chrtran points to one of 2 translation tables: cup2low if folding upper to
216  * lower case clow2low if not folding case
217  */
218 u_char clow2low[256] = {
219 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
242 	0xfd, 0xfe, 0xff
243 };
244 
245 u_char cup2low[256] = {
246 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 	0xfd, 0xfe, 0xff
270 };
271 
272 static void
273 diff_output(FILE *outfile, const char *fmt, ...)
274 {
275 	va_list ap;
276 
277 	va_start(ap, fmt);
278 	vfprintf(outfile, fmt, ap);
279 	va_end(ap);
280 }
281 
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284     struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
285 {
286 	const struct got_error *err = NULL;
287 	int i, *p;
288 	long *lp;
289 
290 	*rval = D_SAME;
291 	ds->anychange = 0;
292 	ds->lastline = 0;
293 	ds->lastmatchline = 0;
294 	ds->context_vec_ptr = ds->context_vec_start - 1;
295 	ds->max_context = 64;
296 	if (flags & D_IGNORECASE)
297 		ds->chrtran = cup2low;
298 	else
299 		ds->chrtran = clow2low;
300 	if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
301 		*rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
302 		return NULL;
303 	}
304 	if (flags & D_EMPTY1) {
305 		f1 = fopen(_PATH_DEVNULL, "r");
306 		if (f1 == NULL) {
307 			err = got_error_from_errno();
308 			goto closem;
309 		}
310 	}
311 	else if (f1 == NULL) {
312 		args->status |= 2;
313 		goto closem;
314 	}
315 
316 	if (flags & D_EMPTY2) {
317 		f2 = fopen(_PATH_DEVNULL, "r");
318 		if (f2 == NULL) {
319 			err = got_error_from_errno();
320 			goto closem;
321 		}
322 	} else if (f2 == NULL) {
323 		args->status |= 2;
324 		goto closem;
325 	}
326 
327 	switch (files_differ(ds, f1, f2, flags)) {
328 	case 0:
329 		goto closem;
330 	case 1:
331 		break;
332 	default:
333 		/* error */
334 		args->status |= 2;
335 		goto closem;
336 	}
337 
338 	if ((flags & D_FORCEASCII) == 0 &&
339 	    (!asciifile(f1) || !asciifile(f2))) {
340 		*rval = D_BINARY;
341 		args->status |= 1;
342 		goto closem;
343 	}
344 	if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
345 		err = got_error_from_errno();
346 		goto closem;
347 	}
348 	if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
349 		err = got_error_from_errno();
350 		goto closem;
351 	}
352 
353 	prune(ds);
354 	sort(ds->sfile[0], ds->slen[0]);
355 	sort(ds->sfile[1], ds->slen[1]);
356 
357 	ds->member = (int *)ds->file[1];
358 	equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
359 	p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
360 	if (p == NULL) {
361 		err = got_error_from_errno();
362 		goto closem;
363 	}
364 	ds->member = p;
365 
366 	ds->class = (int *)ds->file[0];
367 	if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
368 		err = got_error_from_errno();
369 		goto closem;
370 	}
371 	p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
372 	if (p == NULL) {
373 		err = got_error_from_errno();
374 		goto closem;
375 	}
376 	ds->class = p;
377 
378 	ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
379 	if (ds->klist == NULL) {
380 		err = got_error_from_errno();
381 		goto closem;
382 	}
383 	ds->clen = 0;
384 	ds->clistlen = 100;
385 	ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
386 	if (ds->clist == NULL) {
387 		err = got_error_from_errno();
388 		goto closem;
389 	}
390 	i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
391 	if (i < 0) {
392 		err = got_error_from_errno();
393 		goto closem;
394 	}
395 
396 	p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
397 	if (p == NULL) {
398 		err = got_error_from_errno();
399 		goto closem;
400 	}
401 	ds->J = p;
402 	unravel(ds, ds->klist[i]);
403 
404 	lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
405 	if (lp == NULL) {
406 		err = got_error_from_errno();
407 		goto closem;
408 	}
409 	ds->ixold = lp;
410 	lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
411 	if (lp == NULL) {
412 		err = got_error_from_errno();
413 		goto closem;
414 	}
415 	ds->ixnew = lp;
416 	check(ds, f1, f2, flags);
417 	if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
418 	    flags))
419 		err = got_error_from_errno();
420 closem:
421 	free(ds->J);
422 	free(ds->member);
423 	free(ds->class);
424 	free(ds->clist);
425 	free(ds->klist);
426 	free(ds->ixold);
427 	free(ds->ixnew);
428 	if (ds->anychange) {
429 		args->status |= 1;
430 		if (*rval == D_SAME)
431 			*rval = D_DIFFER;
432 	}
433 	if (f1 != NULL)
434 		fclose(f1);
435 	if (f2 != NULL)
436 		fclose(f2);
437 
438 	return (err);
439 }
440 
441 /*
442  * Check to see if the given files differ.
443  * Returns 0 if they are the same, 1 if different, and -1 on error.
444  * XXX - could use code from cmp(1) [faster]
445  */
446 static int
447 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
448 {
449 	char buf1[BUFSIZ], buf2[BUFSIZ];
450 	size_t i, j;
451 
452 	if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
453 	    (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
454 		return (1);
455 	for (;;) {
456 		i = fread(buf1, 1, sizeof(buf1), f1);
457 		j = fread(buf2, 1, sizeof(buf2), f2);
458 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
459 			return (-1);
460 		if (i != j)
461 			return (1);
462 		if (i == 0)
463 			return (0);
464 		if (memcmp(buf1, buf2, i) != 0)
465 			return (1);
466 	}
467 }
468 
469 static int
470 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
471 {
472 	struct line *p, *q;
473 	int j, h;
474 	size_t sz;
475 
476 	rewind(fd);
477 
478 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
479 	if (sz < 100)
480 		sz = 100;
481 
482 	p = calloc(sz + 3, sizeof(*p));
483 	if (p == NULL)
484 		return (-1);
485 	for (j = 0; (h = readhash(ds, fd, flags));) {
486 		if (j == sz) {
487 			sz = sz * 3 / 2;
488 			q = reallocarray(p, sz + 3, sizeof(*p));
489 			if (q == NULL) {
490 				free(p);
491 				return (-1);
492 			}
493 			p = q;
494 		}
495 		p[++j].value = h;
496 	}
497 	ds->len[i] = j;
498 	ds->file[i] = p;
499 
500 	return (0);
501 }
502 
503 static void
504 prune(struct got_diff_state *ds)
505 {
506 	int i, j;
507 
508 	for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
509 	    ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
510 	    ds->pref++)
511 		;
512 	for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
513 	    ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
514 	    ds->suff++)
515 		;
516 	for (j = 0; j < 2; j++) {
517 		ds->sfile[j] = ds->file[j] + ds->pref;
518 		ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
519 		for (i = 0; i <= ds->slen[j]; i++)
520 			ds->sfile[j][i].serial = i;
521 	}
522 }
523 
524 static void
525 equiv(struct line *a, int n, struct line *b, int m, int *c)
526 {
527 	int i, j;
528 
529 	i = j = 1;
530 	while (i <= n && j <= m) {
531 		if (a[i].value < b[j].value)
532 			a[i++].value = 0;
533 		else if (a[i].value == b[j].value)
534 			a[i++].value = j;
535 		else
536 			j++;
537 	}
538 	while (i <= n)
539 		a[i++].value = 0;
540 	b[m + 1].value = 0;
541 	j = 0;
542 	while (++j <= m) {
543 		c[j] = -b[j].serial;
544 		while (b[j + 1].value == b[j].value) {
545 			j++;
546 			c[j] = b[j].serial;
547 		}
548 	}
549 	c[j] = -1;
550 }
551 
552 /* Code taken from ping.c */
553 static int
554 isqrt(int n)
555 {
556 	int y, x = 1;
557 
558 	if (n == 0)
559 		return (0);
560 
561 	do { /* newton was a stinker */
562 		y = x;
563 		x = n / x;
564 		x += y;
565 		x /= 2;
566 	} while ((x - y) > 1 || (x - y) < -1);
567 
568 	return (x);
569 }
570 
571 static int
572 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
573 {
574 	int i, k, y, j, l;
575 	int oldc, tc, oldl, sq;
576 	u_int numtries, bound;
577 	int error;
578 
579 	if (flags & D_MINIMAL)
580 		bound = UINT_MAX;
581 	else {
582 		sq = isqrt(n);
583 		bound = MAXIMUM(256, sq);
584 	}
585 
586 	k = 0;
587 	c[0] = newcand(ds, 0, 0, 0, &error);
588 	if (error)
589 		return -1;
590 	for (i = 1; i <= n; i++) {
591 		j = a[i];
592 		if (j == 0)
593 			continue;
594 		y = -b[j];
595 		oldl = 0;
596 		oldc = c[0];
597 		numtries = 0;
598 		do {
599 			if (y <= ds->clist[oldc].y)
600 				continue;
601 			l = search(ds, c, k, y);
602 			if (l != oldl + 1)
603 				oldc = c[l - 1];
604 			if (l <= k) {
605 				if (ds->clist[c[l]].y <= y)
606 					continue;
607 				tc = c[l];
608 				c[l] = newcand(ds, i, y, oldc, &error);
609 				if (error)
610 					return -1;
611 				oldc = tc;
612 				oldl = l;
613 				numtries++;
614 			} else {
615 				c[l] = newcand(ds, i, y, oldc, &error);
616 				if (error)
617 					return -1;
618 				k++;
619 				break;
620 			}
621 		} while ((y = b[++j]) > 0 && numtries < bound);
622 	}
623 	return (k);
624 }
625 
626 static int
627 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
628 {
629 	struct cand *q;
630 
631 	if (ds->clen == ds->clistlen) {
632 		ds->clistlen = ds->clistlen * 11 / 10;
633 		q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
634 		if (q == NULL) {
635 			*errorp = -1;
636 			free(ds->clist);
637 			ds->clist = NULL;
638 			return 0;
639 		}
640 		ds->clist = q;
641 	}
642 	q = ds->clist + ds->clen;
643 	q->x = x;
644 	q->y = y;
645 	q->pred = pred;
646 	*errorp = 0;
647 	return (ds->clen++);
648 }
649 
650 static int
651 search(struct got_diff_state *ds, int *c, int k, int y)
652 {
653 	int i, j, l, t;
654 
655 	if (ds->clist[c[k]].y < y)	/* quick look for typical case */
656 		return (k + 1);
657 	i = 0;
658 	j = k + 1;
659 	for (;;) {
660 		l = (i + j) / 2;
661 		if (l <= i)
662 			break;
663 		t = ds->clist[c[l]].y;
664 		if (t > y)
665 			j = l;
666 		else if (t < y)
667 			i = l;
668 		else
669 			return (l);
670 	}
671 	return (l + 1);
672 }
673 
674 static void
675 unravel(struct got_diff_state *ds, int p)
676 {
677 	struct cand *q;
678 	int i;
679 
680 	for (i = 0; i <= ds->len[0]; i++)
681 		ds->J[i] = i <= ds->pref ? i :
682 		    i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
683 	for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
684 		ds->J[q->x + ds->pref] = q->y + ds->pref;
685 }
686 
687 /*
688  * Check does double duty:
689  *  1.	ferret out any fortuitous correspondences due
690  *	to confounding by hashing (which result in "jackpot")
691  *  2.  collect random access indexes to the two files
692  */
693 static void
694 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
695 {
696 	int i, j, jackpot, c, d;
697 	long ctold, ctnew;
698 
699 	rewind(f1);
700 	rewind(f2);
701 	j = 1;
702 	ds->ixold[0] = ds->ixnew[0] = 0;
703 	jackpot = 0;
704 	ctold = ctnew = 0;
705 	for (i = 1; i <= ds->len[0]; i++) {
706 		if (ds->J[i] == 0) {
707 			ds->ixold[i] = ctold += skipline(f1);
708 			continue;
709 		}
710 		while (j < ds->J[i]) {
711 			ds->ixnew[j] = ctnew += skipline(f2);
712 			j++;
713 		}
714 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
715 			for (;;) {
716 				c = getc(f1);
717 				d = getc(f2);
718 				/*
719 				 * GNU diff ignores a missing newline
720 				 * in one file for -b or -w.
721 				 */
722 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
723 					if (c == EOF && d == '\n') {
724 						ctnew++;
725 						break;
726 					} else if (c == '\n' && d == EOF) {
727 						ctold++;
728 						break;
729 					}
730 				}
731 				ctold++;
732 				ctnew++;
733 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
734 				    isspace(d)) {
735 					do {
736 						if (c == '\n')
737 							break;
738 						ctold++;
739 					} while (isspace(c = getc(f1)));
740 					do {
741 						if (d == '\n')
742 							break;
743 						ctnew++;
744 					} while (isspace(d = getc(f2)));
745 				} else if ((flags & D_IGNOREBLANKS)) {
746 					while (isspace(c) && c != '\n') {
747 						c = getc(f1);
748 						ctold++;
749 					}
750 					while (isspace(d) && d != '\n') {
751 						d = getc(f2);
752 						ctnew++;
753 					}
754 				}
755 				if (ds->chrtran[c] != ds->chrtran[d]) {
756 					jackpot++;
757 					ds->J[i] = 0;
758 					if (c != '\n' && c != EOF)
759 						ctold += skipline(f1);
760 					if (d != '\n' && c != EOF)
761 						ctnew += skipline(f2);
762 					break;
763 				}
764 				if (c == '\n' || c == EOF)
765 					break;
766 			}
767 		} else {
768 			for (;;) {
769 				ctold++;
770 				ctnew++;
771 				if ((c = getc(f1)) != (d = getc(f2))) {
772 					/* jackpot++; */
773 					ds->J[i] = 0;
774 					if (c != '\n' && c != EOF)
775 						ctold += skipline(f1);
776 					if (d != '\n' && c != EOF)
777 						ctnew += skipline(f2);
778 					break;
779 				}
780 				if (c == '\n' || c == EOF)
781 					break;
782 			}
783 		}
784 		ds->ixold[i] = ctold;
785 		ds->ixnew[j] = ctnew;
786 		j++;
787 	}
788 	for (; j <= ds->len[1]; j++)
789 		ds->ixnew[j] = ctnew += skipline(f2);
790 	/*
791 	 * if (jackpot)
792 	 *	fprintf(stderr, "jackpot\n");
793 	 */
794 }
795 
796 /* shellsort CACM #201 */
797 static void
798 sort(struct line *a, int n)
799 {
800 	struct line *ai, *aim, w;
801 	int j, m = 0, k;
802 
803 	if (n == 0)
804 		return;
805 	for (j = 1; j <= n; j *= 2)
806 		m = 2 * j - 1;
807 	for (m /= 2; m != 0; m /= 2) {
808 		k = n - m;
809 		for (j = 1; j <= k; j++) {
810 			for (ai = &a[j]; ai > a; ai -= m) {
811 				aim = &ai[m];
812 				if (aim < ai)
813 					break;	/* wraparound */
814 				if (aim->value > ai[0].value ||
815 				    (aim->value == ai[0].value &&
816 					aim->serial > ai[0].serial))
817 					break;
818 				w.value = ai[0].value;
819 				ai[0].value = aim->value;
820 				aim->value = w.value;
821 				w.serial = ai[0].serial;
822 				ai[0].serial = aim->serial;
823 				aim->serial = w.serial;
824 			}
825 		}
826 	}
827 }
828 
829 static int
830 unsort(struct line *f, int l, int *b)
831 {
832 	int *a, i;
833 
834 	a = calloc(l + 1, sizeof(*a));
835 	if (a == NULL)
836 		return (-1);
837 	for (i = 1; i <= l; i++)
838 		a[f[i].serial] = f[i].value;
839 	for (i = 1; i <= l; i++)
840 		b[i] = a[i];
841 	free(a);
842 
843 	return (0);
844 }
845 
846 static int
847 skipline(FILE *f)
848 {
849 	int i, c;
850 
851 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
852 		continue;
853 	return (i);
854 }
855 
856 static int
857 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
858     const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
859 {
860 	int m, i0, i1, j0, j1;
861 	int error = 0;
862 
863 	rewind(f1);
864 	rewind(f2);
865 	m = ds->len[0];
866 	ds->J[0] = 0;
867 	ds->J[m + 1] = ds->len[1] + 1;
868 	if (args->diff_format != D_EDIT) {
869 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
870 			while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
871 				i0++;
872 			j0 = ds->J[i0 - 1] + 1;
873 			i1 = i0 - 1;
874 			while (i1 < m && ds->J[i1 + 1] == 0)
875 				i1++;
876 			j1 = ds->J[i1 + 1] - 1;
877 			ds->J[i1] = j1;
878 			error = change(outfile, ds, args, file1, f1, file2, f2,
879 			    i0, i1, j0, j1, &flags);
880 			if (error)
881 				return (error);
882 		}
883 	} else {
884 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
885 			while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
886 				i0--;
887 			j0 = ds->J[i0 + 1] - 1;
888 			i1 = i0 + 1;
889 			while (i1 > 1 && ds->J[i1 - 1] == 0)
890 				i1--;
891 			j1 = ds->J[i1 - 1] + 1;
892 			ds->J[i1] = j1;
893 			change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
894 			    j1, j0, &flags);
895 			if (error)
896 				return (error);
897 		}
898 	}
899 	if (m == 0) {
900 		error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
901 		    1, ds->len[1], &flags);
902 		if (error)
903 			return (error);
904 	}
905 	if (args->diff_format == D_IFDEF) {
906 		for (;;) {
907 #define	c i0
908 			if ((c = getc(f1)) == EOF)
909 				return (0);
910 			diff_output(outfile, "%c", c);
911 		}
912 #undef c
913 	}
914 	if (ds->anychange != 0) {
915 		if (args->diff_format == D_CONTEXT)
916 			dump_context_vec(outfile, ds, args, f1, f2, flags);
917 		else if (args->diff_format == D_UNIFIED)
918 			dump_unified_vec(outfile, ds, args, f1, f2, flags);
919 	}
920 
921 	return (0);
922 }
923 
924 static void
925 range(FILE *outfile, int a, int b, char *separator)
926 {
927 	diff_output(outfile, "%d", a > b ? b : a);
928 	if (a < b)
929 		diff_output(outfile, "%s%d", separator, b);
930 }
931 
932 static void
933 uni_range(FILE *outfile, int a, int b)
934 {
935 	if (a < b)
936 		diff_output(outfile, "%d,%d", a, b - a + 1);
937 	else if (a == b)
938 		diff_output(outfile, "%d", b);
939 	else
940 		diff_output(outfile, "%d,0", b);
941 }
942 
943 static char *
944 preadline(int fd, size_t rlen, off_t off)
945 {
946 	char *line;
947 	ssize_t nr;
948 
949 	line = malloc(rlen + 1);
950 	if (line == NULL)
951 		return NULL;
952 	if ((nr = pread(fd, line, rlen, off)) < 0) {
953 		free(line);
954 		return NULL;
955 	}
956 	if (nr > 0 && line[nr-1] == '\n')
957 		nr--;
958 	line[nr] = '\0';
959 	return (line);
960 }
961 
962 static int
963 ignoreline(char *line)
964 {
965 	free(line);
966 	return 0; /* do not ignore any lines */
967 }
968 
969 /*
970  * Indicate that there is a difference between lines a and b of the from file
971  * to get to lines c to d of the to file.  If a is greater then b then there
972  * are no lines in the from file involved and this means that there were
973  * lines appended (beginning at b).  If c is greater than d then there are
974  * lines missing from the to file.
975  */
976 static int
977 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
978     const char *file1, FILE *f1, const char *file2, FILE *f2,
979     int a, int b, int c, int d, int *pflags)
980 {
981 	int i;
982 
983 restart:
984 	if (args->diff_format != D_IFDEF && a > b && c > d)
985 		return (0);
986 	if (args->ignore_pats != NULL) {
987 		char *line;
988 		/*
989 		 * All lines in the change, insert, or delete must
990 		 * match an ignore pattern for the change to be
991 		 * ignored.
992 		 */
993 		if (a <= b) {		/* Changes and deletes. */
994 			for (i = a; i <= b; i++) {
995 				line = preadline(fileno(f1),
996 				    ds->ixold[i] - ds->ixold[i - 1],
997 				    ds->ixold[i - 1]);
998 				if (line == NULL)
999 					return (-1);
1000 				if (!ignoreline(line))
1001 					goto proceed;
1002 			}
1003 		}
1004 		if (a > b || c <= d) {	/* Changes and inserts. */
1005 			for (i = c; i <= d; i++) {
1006 				line = preadline(fileno(f2),
1007 				    ds->ixnew[i] - ds->ixnew[i - 1],
1008 				    ds->ixnew[i - 1]);
1009 				if (line == NULL)
1010 					return (-1);
1011 				if (!ignoreline(line))
1012 					goto proceed;
1013 			}
1014 		}
1015 		return (0);
1016 	}
1017 proceed:
1018 	if (*pflags & D_HEADER) {
1019 		diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
1020 		*pflags &= ~D_HEADER;
1021 	}
1022 	if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1023 		/*
1024 		 * Allocate change records as needed.
1025 		 */
1026 		if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1027 			struct context_vec *cvp;
1028 			ptrdiff_t offset;
1029 			offset = ds->context_vec_ptr - ds->context_vec_start;
1030 			ds->max_context <<= 1;
1031 			ds->context_vec_start =
1032 			cvp = reallocarray(ds->context_vec_start,
1033 			    ds->max_context, sizeof(*ds->context_vec_start));
1034 			if (cvp == NULL) {
1035 				free(ds->context_vec_start);
1036 				return (-1);
1037 			}
1038 			ds->context_vec_start = cvp;
1039 			ds->context_vec_end = ds->context_vec_start +
1040 			    ds->max_context;
1041 			ds->context_vec_ptr = ds->context_vec_start + offset;
1042 		}
1043 		if (ds->anychange == 0) {
1044 			/*
1045 			 * Print the context/unidiff header first time through.
1046 			 */
1047 			print_header(outfile, ds, args, file1, file2);
1048 			ds->anychange = 1;
1049 		} else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1050 		    c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1051 			/*
1052 			 * If this change is more than 'diff_context' lines from the
1053 			 * previous change, dump the record and reset it.
1054 			 */
1055 			if (args->diff_format == D_CONTEXT)
1056 				dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1057 			else
1058 				dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1059 		}
1060 		ds->context_vec_ptr++;
1061 		ds->context_vec_ptr->a = a;
1062 		ds->context_vec_ptr->b = b;
1063 		ds->context_vec_ptr->c = c;
1064 		ds->context_vec_ptr->d = d;
1065 		return (0);
1066 	}
1067 	if (ds->anychange == 0)
1068 		ds->anychange = 1;
1069 	switch (args->diff_format) {
1070 	case D_BRIEF:
1071 		return (0);
1072 	case D_NORMAL:
1073 	case D_EDIT:
1074 		range(outfile, a, b, ",");
1075 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1076 		if (args->diff_format == D_NORMAL)
1077 			range(outfile, c, d, ",");
1078 		diff_output(outfile, "\n");
1079 		break;
1080 	case D_REVERSE:
1081 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1082 		range(outfile, a, b, " ");
1083 		diff_output(outfile, "\n");
1084 		break;
1085 	case D_NREVERSE:
1086 		if (a > b)
1087 			diff_output(outfile, "a%d %d\n", b, d - c + 1);
1088 		else {
1089 			diff_output(outfile, "d%d %d\n", a, b - a + 1);
1090 			if (!(c > d))
1091 				/* add changed lines */
1092 				diff_output(outfile, "a%d %d\n", b, d - c + 1);
1093 		}
1094 		break;
1095 	}
1096 	if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1097 		fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1098 		if (a <= b && c <= d && args->diff_format == D_NORMAL)
1099 			diff_output(outfile, "---\n");
1100 	}
1101 	i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1102 	if (i != 0 && args->diff_format == D_EDIT) {
1103 		/*
1104 		 * A non-zero return value for D_EDIT indicates that the
1105 		 * last line printed was a bare dot (".") that has been
1106 		 * escaped as ".." to prevent ed(1) from misinterpreting
1107 		 * it.  We have to add a substitute command to change this
1108 		 * back and restart where we left off.
1109 		 */
1110 		diff_output(outfile, ".\n");
1111 		diff_output(outfile, "%ds/.//\n", a + i - 1);
1112 		b = a + i - 1;
1113 		a = b + 1;
1114 		c += i;
1115 		goto restart;
1116 	}
1117 	if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1118 		diff_output(outfile, ".\n");
1119 	if (ds->inifdef) {
1120 		diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1121 		ds->inifdef = 0;
1122 	}
1123 
1124 	return (0);
1125 }
1126 
1127 static int
1128 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1129     long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1130 {
1131 	int i, j, c, lastc, col, nc;
1132 
1133 	/*
1134 	 * When doing #ifdef's, copy down to current line
1135 	 * if this is the first file, so that stuff makes it to output.
1136 	 */
1137 	if (args->diff_format == D_IFDEF && oldfile) {
1138 		long curpos = ftell(lb);
1139 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1140 		nc = f[a > b ? b : a - 1] - curpos;
1141 		for (i = 0; i < nc; i++)
1142 			diff_output(outfile, "%c", getc(lb));
1143 	}
1144 	if (a > b)
1145 		return (0);
1146 	if (args->diff_format == D_IFDEF) {
1147 		if (ds->inifdef) {
1148 			diff_output(outfile, "#else /* %s%s */\n",
1149 			    oldfile == 1 ? "!" : "", args->ifdefname);
1150 		} else {
1151 			if (oldfile)
1152 				diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1153 			else
1154 				diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1155 		}
1156 		ds->inifdef = 1 + oldfile;
1157 	}
1158 	for (i = a; i <= b; i++) {
1159 		fseek(lb, f[i - 1], SEEK_SET);
1160 		nc = f[i] - f[i - 1];
1161 		if (args->diff_format != D_IFDEF && ch != '\0') {
1162 			diff_output(outfile, "%c", ch);
1163 			if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1164 			    || args->diff_format == D_UNIFIED))
1165 				diff_output(outfile, "\t");
1166 			else if (args->diff_format != D_UNIFIED)
1167 				diff_output(outfile, " ");
1168 		}
1169 		col = 0;
1170 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1171 			if ((c = getc(lb)) == EOF) {
1172 				if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1173 				    args->diff_format == D_NREVERSE)
1174 					warnx("No newline at end of file");
1175 				else
1176 					diff_output(outfile, "\n\\ No newline at end of "
1177 					    "file\n");
1178 				return (0);
1179 			}
1180 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1181 				do {
1182 					diff_output(outfile, " ");
1183 				} while (++col & 7);
1184 			} else {
1185 				if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1186 				    && lastc == '.') {
1187 					/*
1188 					 * Don't print a bare "." line
1189 					 * since that will confuse ed(1).
1190 					 * Print ".." instead and return,
1191 					 * giving the caller an offset
1192 					 * from which to restart.
1193 					 */
1194 					diff_output(outfile, ".\n");
1195 					return (i - a + 1);
1196 				}
1197 				diff_output(outfile, "%c", c);
1198 				col++;
1199 			}
1200 		}
1201 	}
1202 	return (0);
1203 }
1204 
1205 /*
1206  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1207  */
1208 static int
1209 readhash(struct got_diff_state *ds, FILE *f, int flags)
1210 {
1211 	int i, t, space;
1212 	int sum;
1213 
1214 	sum = 1;
1215 	space = 0;
1216 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1217 		if (flags & D_IGNORECASE)
1218 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1219 				if (t == EOF) {
1220 					if (i == 0)
1221 						return (0);
1222 					break;
1223 				}
1224 				sum = sum * 127 + ds->chrtran[t];
1225 			}
1226 		else
1227 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1228 				if (t == EOF) {
1229 					if (i == 0)
1230 						return (0);
1231 					break;
1232 				}
1233 				sum = sum * 127 + t;
1234 			}
1235 	} else {
1236 		for (i = 0;;) {
1237 			switch (t = getc(f)) {
1238 			case '\t':
1239 			case '\r':
1240 			case '\v':
1241 			case '\f':
1242 			case ' ':
1243 				space++;
1244 				continue;
1245 			default:
1246 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1247 					i++;
1248 					space = 0;
1249 				}
1250 				sum = sum * 127 + ds->chrtran[t];
1251 				i++;
1252 				continue;
1253 			case EOF:
1254 				if (i == 0)
1255 					return (0);
1256 				/* FALLTHROUGH */
1257 			case '\n':
1258 				break;
1259 			}
1260 			break;
1261 		}
1262 	}
1263 	/*
1264 	 * There is a remote possibility that we end up with a zero sum.
1265 	 * Zero is used as an EOF marker, so return 1 instead.
1266 	 */
1267 	return (sum == 0 ? 1 : sum);
1268 }
1269 
1270 static int
1271 asciifile(FILE *f)
1272 {
1273 	unsigned char buf[BUFSIZ];
1274 	size_t cnt;
1275 
1276 	if (f == NULL)
1277 		return (1);
1278 
1279 	rewind(f);
1280 	cnt = fread(buf, 1, sizeof(buf), f);
1281 	return (memchr(buf, '\0', cnt) == NULL);
1282 }
1283 
1284 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1285 
1286 static char *
1287 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1288 {
1289 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1290 	size_t nc;
1291 	int last = ds->lastline;
1292 	char *state = NULL;
1293 
1294 	ds->lastline = pos;
1295 	while (pos > last) {
1296 		fseek(fp, f[pos - 1], SEEK_SET);
1297 		nc = f[pos] - f[pos - 1];
1298 		if (nc >= sizeof(buf))
1299 			nc = sizeof(buf) - 1;
1300 		nc = fread(buf, 1, nc, fp);
1301 		if (nc > 0) {
1302 			buf[nc] = '\0';
1303 			buf[strcspn(buf, "\n")] = '\0';
1304 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1305 				if (begins_with(buf, "private:")) {
1306 					if (!state)
1307 						state = " (private)";
1308 				} else if (begins_with(buf, "protected:")) {
1309 					if (!state)
1310 						state = " (protected)";
1311 				} else if (begins_with(buf, "public:")) {
1312 					if (!state)
1313 						state = " (public)";
1314 				} else {
1315 					strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1316 					if (state)
1317 						strlcat(ds->lastbuf, state,
1318 						    sizeof ds->lastbuf);
1319 					ds->lastmatchline = pos;
1320 					return ds->lastbuf;
1321 				}
1322 			}
1323 		}
1324 		pos--;
1325 	}
1326 	return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1327 }
1328 
1329 /* dump accumulated "context" diff changes */
1330 static void
1331 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1332     FILE *f1, FILE *f2, int flags)
1333 {
1334 	struct context_vec *cvp = ds->context_vec_start;
1335 	int lowa, upb, lowc, upd, do_output;
1336 	int a, b, c, d;
1337 	char ch, *f;
1338 
1339 	if (ds->context_vec_start > ds->context_vec_ptr)
1340 		return;
1341 
1342 	b = d = 0;		/* gcc */
1343 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1344 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1345 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1346 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1347 
1348 	diff_output(outfile, "***************");
1349 	if ((flags & D_PROTOTYPE)) {
1350 		f = match_function(ds, ds->ixold, lowa-1, f1);
1351 		if (f != NULL)
1352 			diff_output(outfile, " %s", f);
1353 	}
1354 	diff_output(outfile, "\n*** ");
1355 	range(outfile, lowa, upb, ",");
1356 	diff_output(outfile, " ****\n");
1357 
1358 	/*
1359 	 * Output changes to the "old" file.  The first loop suppresses
1360 	 * output if there were no changes to the "old" file (we'll see
1361 	 * the "old" lines as context in the "new" list).
1362 	 */
1363 	do_output = 0;
1364 	for (; cvp <= ds->context_vec_ptr; cvp++)
1365 		if (cvp->a <= cvp->b) {
1366 			cvp = ds->context_vec_start;
1367 			do_output++;
1368 			break;
1369 		}
1370 	if (do_output) {
1371 		while (cvp <= ds->context_vec_ptr) {
1372 			a = cvp->a;
1373 			b = cvp->b;
1374 			c = cvp->c;
1375 			d = cvp->d;
1376 
1377 			if (a <= b && c <= d)
1378 				ch = 'c';
1379 			else
1380 				ch = (a <= b) ? 'd' : 'a';
1381 
1382 			if (ch == 'a')
1383 				fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1384 			else {
1385 				fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1386 				fetch(outfile, ds, args, ds->ixold, a, b, f1,
1387 				    ch == 'c' ? '!' : '-', 0, flags);
1388 			}
1389 			lowa = b + 1;
1390 			cvp++;
1391 		}
1392 		fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1393 	}
1394 	/* output changes to the "new" file */
1395 	diff_output(outfile, "--- ");
1396 	range(outfile, lowc, upd, ",");
1397 	diff_output(outfile, " ----\n");
1398 
1399 	do_output = 0;
1400 	for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1401 		if (cvp->c <= cvp->d) {
1402 			cvp = ds->context_vec_start;
1403 			do_output++;
1404 			break;
1405 		}
1406 	if (do_output) {
1407 		while (cvp <= ds->context_vec_ptr) {
1408 			a = cvp->a;
1409 			b = cvp->b;
1410 			c = cvp->c;
1411 			d = cvp->d;
1412 
1413 			if (a <= b && c <= d)
1414 				ch = 'c';
1415 			else
1416 				ch = (a <= b) ? 'd' : 'a';
1417 
1418 			if (ch == 'd')
1419 				fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1420 			else {
1421 				fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1422 				fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1423 				    ch == 'c' ? '!' : '+', 0, flags);
1424 			}
1425 			lowc = d + 1;
1426 			cvp++;
1427 		}
1428 		fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1429 	}
1430 	ds->context_vec_ptr = ds->context_vec_start - 1;
1431 }
1432 
1433 /* dump accumulated "unified" diff changes */
1434 static void
1435 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1436     FILE *f1, FILE *f2, int flags)
1437 {
1438 	struct context_vec *cvp = ds->context_vec_start;
1439 	int lowa, upb, lowc, upd;
1440 	int a, b, c, d;
1441 	char ch, *f;
1442 
1443 	if (ds->context_vec_start > ds->context_vec_ptr)
1444 		return;
1445 
1446 	b = d = 0;		/* gcc */
1447 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1448 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1449 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1450 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1451 
1452 	diff_output(outfile, "@@ -");
1453 	uni_range(outfile, lowa, upb);
1454 	diff_output(outfile, " +");
1455 	uni_range(outfile, lowc, upd);
1456 	diff_output(outfile, " @@");
1457 	if ((flags & D_PROTOTYPE)) {
1458 		f = match_function(ds, ds->ixold, lowa-1, f1);
1459 		if (f != NULL)
1460 			diff_output(outfile, " %s", f);
1461 	}
1462 	diff_output(outfile, "\n");
1463 
1464 	/*
1465 	 * Output changes in "unified" diff format--the old and new lines
1466 	 * are printed together.
1467 	 */
1468 	for (; cvp <= ds->context_vec_ptr; cvp++) {
1469 		a = cvp->a;
1470 		b = cvp->b;
1471 		c = cvp->c;
1472 		d = cvp->d;
1473 
1474 		/*
1475 		 * c: both new and old changes
1476 		 * d: only changes in the old file
1477 		 * a: only changes in the new file
1478 		 */
1479 		if (a <= b && c <= d)
1480 			ch = 'c';
1481 		else
1482 			ch = (a <= b) ? 'd' : 'a';
1483 
1484 		switch (ch) {
1485 		case 'c':
1486 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1487 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1488 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1489 			break;
1490 		case 'd':
1491 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1492 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1493 			break;
1494 		case 'a':
1495 			fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1496 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1497 			break;
1498 		}
1499 		lowa = b + 1;
1500 		lowc = d + 1;
1501 	}
1502 	fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1503 
1504 	ds->context_vec_ptr = ds->context_vec_start - 1;
1505 }
1506 
1507 static void
1508 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1509     const char *file1, const char *file2)
1510 {
1511 	if (args->label[0] != NULL)
1512 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1513 		    args->label[0]);
1514 	else
1515 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1516 		    file1, ctime(&ds->stb1.st_mtime));
1517 	if (args->label[1] != NULL)
1518 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1519 		    args->label[1]);
1520 	else
1521 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1522 		    file2, ctime(&ds->stb2.st_mtime));
1523 }