Gotweb

Blob

Date:: Mon Apr 2 17:31:10 2018 UTC
Message:: in diffreg.c, return value from preadline() must be freed
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
86 
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
90 
91 #include "got_lib_diff.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 };
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 static void	 diff_output(FILE *, const char *, ...);
187 static int	 output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void	 check(struct got_diff_state *, FILE *, FILE *, int);
189 static void	 range(FILE *, int, int, char *);
190 static void	 uni_range(FILE *, int, int);
191 static void	 dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void	 dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int	 prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void	 prune(struct got_diff_state *);
195 static void	 equiv(struct line *, int, struct line *, int, int *);
196 static void	 unravel(struct got_diff_state *, int);
197 static int	 unsort(struct line *, int, int *);
198 static int	 change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void	 sort(struct line *, int);
200 static void	 print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int	 ignoreline(char *);
202 static int	 asciifile(FILE *);
203 static int	 fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int	 newcand(struct got_diff_state *, int, int, int, int *);
205 static int	 search(struct got_diff_state *, int *, int, int);
206 static int	 skipline(FILE *);
207 static int	 isqrt(int);
208 static int	 stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int	 readhash(struct got_diff_state *, FILE *, int);
210 static int	 files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char	*match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char	*preadline(int, size_t, off_t);
213 
214 /*
215  * chrtran points to one of 2 translation tables: cup2low if folding upper to
216  * lower case clow2low if not folding case
217  */
218 u_char clow2low[256] = {
219 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
242 	0xfd, 0xfe, 0xff
243 };
244 
245 u_char cup2low[256] = {
246 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 	0xfd, 0xfe, 0xff
270 };
271 
272 static void
273 diff_output(FILE *outfile, const char *fmt, ...)
274 {
275 	va_list ap;
276 
277 	va_start(ap, fmt);
278 	vfprintf(outfile, fmt, ap);
279 	va_end(ap);
280 }
281 
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284     struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
285 {
286 	const struct got_error *err = NULL;
287 	int i, *p;
288 	long *lp;
289 
290 	*rval = D_SAME;
291 	ds->anychange = 0;
292 	ds->lastline = 0;
293 	ds->lastmatchline = 0;
294 	ds->context_vec_ptr = ds->context_vec_start - 1;
295 	ds->max_context = 64;
296 	if (flags & D_IGNORECASE)
297 		ds->chrtran = cup2low;
298 	else
299 		ds->chrtran = clow2low;
300 	if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
301 		*rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
302 		return NULL;
303 	}
304 	if (flags & D_EMPTY1) {
305 		f1 = fopen(_PATH_DEVNULL, "r");
306 		if (f1 == NULL) {
307 			err = got_error_from_errno();
308 			goto closem;
309 		}
310 	}
311 	else if (f1 == NULL) {
312 		args->status |= 2;
313 		goto closem;
314 	}
315 
316 	if (flags & D_EMPTY2) {
317 		f2 = fopen(_PATH_DEVNULL, "r");
318 		if (f2 == NULL) {
319 			err = got_error_from_errno();
320 			goto closem;
321 		}
322 	} else if (f2 == NULL) {
323 		args->status |= 2;
324 		goto closem;
325 	}
326 
327 	switch (files_differ(ds, f1, f2, flags)) {
328 	case 0:
329 		goto closem;
330 	case 1:
331 		break;
332 	default:
333 		/* error */
334 		args->status |= 2;
335 		goto closem;
336 	}
337 
338 	if ((flags & D_FORCEASCII) == 0 &&
339 	    (!asciifile(f1) || !asciifile(f2))) {
340 		*rval = D_BINARY;
341 		args->status |= 1;
342 		goto closem;
343 	}
344 	if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
345 		err = got_error_from_errno();
346 		goto closem;
347 	}
348 	if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
349 		err = got_error_from_errno();
350 		goto closem;
351 	}
352 
353 	prune(ds);
354 	sort(ds->sfile[0], ds->slen[0]);
355 	sort(ds->sfile[1], ds->slen[1]);
356 
357 	ds->member = (int *)ds->file[1];
358 	equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
359 	p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
360 	if (p == NULL) {
361 		err = got_error_from_errno();
362 		goto closem;
363 	}
364 	ds->member = p;
365 
366 	ds->class = (int *)ds->file[0];
367 	if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
368 		err = got_error_from_errno();
369 		goto closem;
370 	}
371 	p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
372 	if (p == NULL) {
373 		err = got_error_from_errno();
374 		goto closem;
375 	}
376 	ds->class = p;
377 
378 	ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
379 	if (ds->klist == NULL) {
380 		err = got_error_from_errno();
381 		goto closem;
382 	}
383 	ds->clen = 0;
384 	ds->clistlen = 100;
385 	ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
386 	if (ds->clist == NULL) {
387 		err = got_error_from_errno();
388 		goto closem;
389 	}
390 	i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
391 	if (i < 0) {
392 		err = got_error_from_errno();
393 		goto closem;
394 	}
395 
396 	p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
397 	if (p == NULL) {
398 		err = got_error_from_errno();
399 		goto closem;
400 	}
401 	ds->J = p;
402 	unravel(ds, ds->klist[i]);
403 
404 	lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
405 	if (lp == NULL) {
406 		err = got_error_from_errno();
407 		goto closem;
408 	}
409 	ds->ixold = lp;
410 	lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
411 	if (lp == NULL) {
412 		err = got_error_from_errno();
413 		goto closem;
414 	}
415 	ds->ixnew = lp;
416 	check(ds, f1, f2, flags);
417 	if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
418 	    flags))
419 		err = got_error_from_errno();
420 closem:
421 	free(ds->J);
422 	free(ds->member);
423 	free(ds->class);
424 	free(ds->clist);
425 	free(ds->klist);
426 	free(ds->ixold);
427 	free(ds->ixnew);
428 	if (ds->anychange) {
429 		args->status |= 1;
430 		if (*rval == D_SAME)
431 			*rval = D_DIFFER;
432 	}
433 	if (f1 != NULL)
434 		fclose(f1);
435 	if (f2 != NULL)
436 		fclose(f2);
437 
438 	return (err);
439 }
440 
441 /*
442  * Check to see if the given files differ.
443  * Returns 0 if they are the same, 1 if different, and -1 on error.
444  * XXX - could use code from cmp(1) [faster]
445  */
446 static int
447 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
448 {
449 	char buf1[BUFSIZ], buf2[BUFSIZ];
450 	size_t i, j;
451 
452 	if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
453 	    (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
454 		return (1);
455 	for (;;) {
456 		i = fread(buf1, 1, sizeof(buf1), f1);
457 		j = fread(buf2, 1, sizeof(buf2), f2);
458 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
459 			return (-1);
460 		if (i != j)
461 			return (1);
462 		if (i == 0)
463 			return (0);
464 		if (memcmp(buf1, buf2, i) != 0)
465 			return (1);
466 	}
467 }
468 
469 static int
470 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
471 {
472 	struct line *p, *q;
473 	int j, h;
474 	size_t sz;
475 
476 	rewind(fd);
477 
478 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
479 	if (sz < 100)
480 		sz = 100;
481 
482 	p = calloc(sz + 3, sizeof(*p));
483 	if (p == NULL)
484 		return (-1);
485 	for (j = 0; (h = readhash(ds, fd, flags));) {
486 		if (j == sz) {
487 			sz = sz * 3 / 2;
488 			q = reallocarray(p, sz + 3, sizeof(*p));
489 			if (q == NULL) {
490 				free(p);
491 				return (-1);
492 			}
493 			p = q;
494 		}
495 		p[++j].value = h;
496 	}
497 	ds->len[i] = j;
498 	ds->file[i] = p;
499 
500 	return (0);
501 }
502 
503 static void
504 prune(struct got_diff_state *ds)
505 {
506 	int i, j;
507 
508 	for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
509 	    ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
510 	    ds->pref++)
511 		;
512 	for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
513 	    ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
514 	    ds->suff++)
515 		;
516 	for (j = 0; j < 2; j++) {
517 		ds->sfile[j] = ds->file[j] + ds->pref;
518 		ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
519 		for (i = 0; i <= ds->slen[j]; i++)
520 			ds->sfile[j][i].serial = i;
521 	}
522 }
523 
524 static void
525 equiv(struct line *a, int n, struct line *b, int m, int *c)
526 {
527 	int i, j;
528 
529 	i = j = 1;
530 	while (i <= n && j <= m) {
531 		if (a[i].value < b[j].value)
532 			a[i++].value = 0;
533 		else if (a[i].value == b[j].value)
534 			a[i++].value = j;
535 		else
536 			j++;
537 	}
538 	while (i <= n)
539 		a[i++].value = 0;
540 	b[m + 1].value = 0;
541 	j = 0;
542 	while (++j <= m) {
543 		c[j] = -b[j].serial;
544 		while (b[j + 1].value == b[j].value) {
545 			j++;
546 			c[j] = b[j].serial;
547 		}
548 	}
549 	c[j] = -1;
550 }
551 
552 /* Code taken from ping.c */
553 static int
554 isqrt(int n)
555 {
556 	int y, x = 1;
557 
558 	if (n == 0)
559 		return (0);
560 
561 	do { /* newton was a stinker */
562 		y = x;
563 		x = n / x;
564 		x += y;
565 		x /= 2;
566 	} while ((x - y) > 1 || (x - y) < -1);
567 
568 	return (x);
569 }
570 
571 static int
572 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
573 {
574 	int i, k, y, j, l;
575 	int oldc, tc, oldl, sq;
576 	u_int numtries, bound;
577 	int error;
578 
579 	if (flags & D_MINIMAL)
580 		bound = UINT_MAX;
581 	else {
582 		sq = isqrt(n);
583 		bound = MAXIMUM(256, sq);
584 	}
585 
586 	k = 0;
587 	c[0] = newcand(ds, 0, 0, 0, &error);
588 	if (error)
589 		return -1;
590 	for (i = 1; i <= n; i++) {
591 		j = a[i];
592 		if (j == 0)
593 			continue;
594 		y = -b[j];
595 		oldl = 0;
596 		oldc = c[0];
597 		numtries = 0;
598 		do {
599 			if (y <= ds->clist[oldc].y)
600 				continue;
601 			l = search(ds, c, k, y);
602 			if (l != oldl + 1)
603 				oldc = c[l - 1];
604 			if (l <= k) {
605 				if (ds->clist[c[l]].y <= y)
606 					continue;
607 				tc = c[l];
608 				c[l] = newcand(ds, i, y, oldc, &error);
609 				if (error)
610 					return -1;
611 				oldc = tc;
612 				oldl = l;
613 				numtries++;
614 			} else {
615 				c[l] = newcand(ds, i, y, oldc, &error);
616 				if (error)
617 					return -1;
618 				k++;
619 				break;
620 			}
621 		} while ((y = b[++j]) > 0 && numtries < bound);
622 	}
623 	return (k);
624 }
625 
626 static int
627 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
628 {
629 	struct cand *q;
630 
631 	if (ds->clen == ds->clistlen) {
632 		ds->clistlen = ds->clistlen * 11 / 10;
633 		q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
634 		if (q == NULL) {
635 			*errorp = -1;
636 			free(ds->clist);
637 			ds->clist = NULL;
638 			return 0;
639 		}
640 		ds->clist = q;
641 	}
642 	q = ds->clist + ds->clen;
643 	q->x = x;
644 	q->y = y;
645 	q->pred = pred;
646 	*errorp = 0;
647 	return (ds->clen++);
648 }
649 
650 static int
651 search(struct got_diff_state *ds, int *c, int k, int y)
652 {
653 	int i, j, l, t;
654 
655 	if (ds->clist[c[k]].y < y)	/* quick look for typical case */
656 		return (k + 1);
657 	i = 0;
658 	j = k + 1;
659 	for (;;) {
660 		l = (i + j) / 2;
661 		if (l <= i)
662 			break;
663 		t = ds->clist[c[l]].y;
664 		if (t > y)
665 			j = l;
666 		else if (t < y)
667 			i = l;
668 		else
669 			return (l);
670 	}
671 	return (l + 1);
672 }
673 
674 static void
675 unravel(struct got_diff_state *ds, int p)
676 {
677 	struct cand *q;
678 	int i;
679 
680 	for (i = 0; i <= ds->len[0]; i++)
681 		ds->J[i] = i <= ds->pref ? i :
682 		    i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
683 	for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
684 		ds->J[q->x + ds->pref] = q->y + ds->pref;
685 }
686 
687 /*
688  * Check does double duty:
689  *  1.	ferret out any fortuitous correspondences due
690  *	to confounding by hashing (which result in "jackpot")
691  *  2.  collect random access indexes to the two files
692  */
693 static void
694 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
695 {
696 	int i, j, jackpot, c, d;
697 	long ctold, ctnew;
698 
699 	rewind(f1);
700 	rewind(f2);
701 	j = 1;
702 	ds->ixold[0] = ds->ixnew[0] = 0;
703 	jackpot = 0;
704 	ctold = ctnew = 0;
705 	for (i = 1; i <= ds->len[0]; i++) {
706 		if (ds->J[i] == 0) {
707 			ds->ixold[i] = ctold += skipline(f1);
708 			continue;
709 		}
710 		while (j < ds->J[i]) {
711 			ds->ixnew[j] = ctnew += skipline(f2);
712 			j++;
713 		}
714 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
715 			for (;;) {
716 				c = getc(f1);
717 				d = getc(f2);
718 				/*
719 				 * GNU diff ignores a missing newline
720 				 * in one file for -b or -w.
721 				 */
722 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
723 					if (c == EOF && d == '\n') {
724 						ctnew++;
725 						break;
726 					} else if (c == '\n' && d == EOF) {
727 						ctold++;
728 						break;
729 					}
730 				}
731 				ctold++;
732 				ctnew++;
733 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
734 				    isspace(d)) {
735 					do {
736 						if (c == '\n')
737 							break;
738 						ctold++;
739 					} while (isspace(c = getc(f1)));
740 					do {
741 						if (d == '\n')
742 							break;
743 						ctnew++;
744 					} while (isspace(d = getc(f2)));
745 				} else if ((flags & D_IGNOREBLANKS)) {
746 					while (isspace(c) && c != '\n') {
747 						c = getc(f1);
748 						ctold++;
749 					}
750 					while (isspace(d) && d != '\n') {
751 						d = getc(f2);
752 						ctnew++;
753 					}
754 				}
755 				if (ds->chrtran[c] != ds->chrtran[d]) {
756 					jackpot++;
757 					ds->J[i] = 0;
758 					if (c != '\n' && c != EOF)
759 						ctold += skipline(f1);
760 					if (d != '\n' && c != EOF)
761 						ctnew += skipline(f2);
762 					break;
763 				}
764 				if (c == '\n' || c == EOF)
765 					break;
766 			}
767 		} else {
768 			for (;;) {
769 				ctold++;
770 				ctnew++;
771 				if ((c = getc(f1)) != (d = getc(f2))) {
772 					/* jackpot++; */
773 					ds->J[i] = 0;
774 					if (c != '\n' && c != EOF)
775 						ctold += skipline(f1);
776 					if (d != '\n' && c != EOF)
777 						ctnew += skipline(f2);
778 					break;
779 				}
780 				if (c == '\n' || c == EOF)
781 					break;
782 			}
783 		}
784 		ds->ixold[i] = ctold;
785 		ds->ixnew[j] = ctnew;
786 		j++;
787 	}
788 	for (; j <= ds->len[1]; j++)
789 		ds->ixnew[j] = ctnew += skipline(f2);
790 	/*
791 	 * if (jackpot)
792 	 *	fprintf(stderr, "jackpot\n");
793 	 */
794 }
795 
796 /* shellsort CACM #201 */
797 static void
798 sort(struct line *a, int n)
799 {
800 	struct line *ai, *aim, w;
801 	int j, m = 0, k;
802 
803 	if (n == 0)
804 		return;
805 	for (j = 1; j <= n; j *= 2)
806 		m = 2 * j - 1;
807 	for (m /= 2; m != 0; m /= 2) {
808 		k = n - m;
809 		for (j = 1; j <= k; j++) {
810 			for (ai = &a[j]; ai > a; ai -= m) {
811 				aim = &ai[m];
812 				if (aim < ai)
813 					break;	/* wraparound */
814 				if (aim->value > ai[0].value ||
815 				    (aim->value == ai[0].value &&
816 					aim->serial > ai[0].serial))
817 					break;
818 				w.value = ai[0].value;
819 				ai[0].value = aim->value;
820 				aim->value = w.value;
821 				w.serial = ai[0].serial;
822 				ai[0].serial = aim->serial;
823 				aim->serial = w.serial;
824 			}
825 		}
826 	}
827 }
828 
829 static int
830 unsort(struct line *f, int l, int *b)
831 {
832 	int *a, i;
833 
834 	a = calloc(l + 1, sizeof(*a));
835 	if (a == NULL)
836 		return (-1);
837 	for (i = 1; i <= l; i++)
838 		a[f[i].serial] = f[i].value;
839 	for (i = 1; i <= l; i++)
840 		b[i] = a[i];
841 	free(a);
842 
843 	return (0);
844 }
845 
846 static int
847 skipline(FILE *f)
848 {
849 	int i, c;
850 
851 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
852 		continue;
853 	return (i);
854 }
855 
856 static int
857 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
858     const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
859 {
860 	int m, i0, i1, j0, j1;
861 	int error = 0;
862 
863 	rewind(f1);
864 	rewind(f2);
865 	m = ds->len[0];
866 	ds->J[0] = 0;
867 	ds->J[m + 1] = ds->len[1] + 1;
868 	if (args->diff_format != D_EDIT) {
869 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
870 			while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
871 				i0++;
872 			j0 = ds->J[i0 - 1] + 1;
873 			i1 = i0 - 1;
874 			while (i1 < m && ds->J[i1 + 1] == 0)
875 				i1++;
876 			j1 = ds->J[i1 + 1] - 1;
877 			ds->J[i1] = j1;
878 			error = change(outfile, ds, args, file1, f1, file2, f2,
879 			    i0, i1, j0, j1, &flags);
880 			if (error)
881 				return (error);
882 		}
883 	} else {
884 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
885 			while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
886 				i0--;
887 			j0 = ds->J[i0 + 1] - 1;
888 			i1 = i0 + 1;
889 			while (i1 > 1 && ds->J[i1 - 1] == 0)
890 				i1--;
891 			j1 = ds->J[i1 - 1] + 1;
892 			ds->J[i1] = j1;
893 			change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
894 			    j1, j0, &flags);
895 			if (error)
896 				return (error);
897 		}
898 	}
899 	if (m == 0) {
900 		error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
901 		    1, ds->len[1], &flags);
902 		if (error)
903 			return (error);
904 	}
905 	if (args->diff_format == D_IFDEF) {
906 		for (;;) {
907 #define	c i0
908 			if ((c = getc(f1)) == EOF)
909 				return (0);
910 			diff_output(outfile, "%c", c);
911 		}
912 #undef c
913 	}
914 	if (ds->anychange != 0) {
915 		if (args->diff_format == D_CONTEXT)
916 			dump_context_vec(outfile, ds, args, f1, f2, flags);
917 		else if (args->diff_format == D_UNIFIED)
918 			dump_unified_vec(outfile, ds, args, f1, f2, flags);
919 	}
920 
921 	return (0);
922 }
923 
924 static void
925 range(FILE *outfile, int a, int b, char *separator)
926 {
927 	diff_output(outfile, "%d", a > b ? b : a);
928 	if (a < b)
929 		diff_output(outfile, "%s%d", separator, b);
930 }
931 
932 static void
933 uni_range(FILE *outfile, int a, int b)
934 {
935 	if (a < b)
936 		diff_output(outfile, "%d,%d", a, b - a + 1);
937 	else if (a == b)
938 		diff_output(outfile, "%d", b);
939 	else
940 		diff_output(outfile, "%d,0", b);
941 }
942 
943 static char *
944 preadline(int fd, size_t rlen, off_t off)
945 {
946 	char *line;
947 	ssize_t nr;
948 
949 	line = malloc(rlen + 1);
950 	if (line == NULL)
951 		return NULL;
952 	if ((nr = pread(fd, line, rlen, off)) < 0) {
953 		free(line);
954 		return NULL;
955 	}
956 	if (nr > 0 && line[nr-1] == '\n')
957 		nr--;
958 	line[nr] = '\0';
959 	return (line);
960 }
961 
962 static int
963 ignoreline(char *line)
964 {
965 	return 0; /* do not ignore any lines */
966 }
967 
968 /*
969  * Indicate that there is a difference between lines a and b of the from file
970  * to get to lines c to d of the to file.  If a is greater then b then there
971  * are no lines in the from file involved and this means that there were
972  * lines appended (beginning at b).  If c is greater than d then there are
973  * lines missing from the to file.
974  */
975 static int
976 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
977     const char *file1, FILE *f1, const char *file2, FILE *f2,
978     int a, int b, int c, int d, int *pflags)
979 {
980 	int i;
981 
982 restart:
983 	if (args->diff_format != D_IFDEF && a > b && c > d)
984 		return (0);
985 	if (args->ignore_pats != NULL) {
986 		/*
987 		 * All lines in the change, insert, or delete must
988 		 * match an ignore pattern for the change to be
989 		 * ignored.
990 		 */
991 		if (a <= b) {		/* Changes and deletes. */
992 			for (i = a; i <= b; i++) {
993 				char *line = preadline(fileno(f1),
994 				    ds->ixold[i] - ds->ixold[i - 1],
995 				    ds->ixold[i - 1]);
996 				if (line == NULL)
997 					return (-1);
998 				if (!ignoreline(line)) {
999 					free(line);
1000 					goto proceed;
1001 				}
1002 				free(line);
1003 			}
1004 		}
1005 		if (a > b || c <= d) {	/* Changes and inserts. */
1006 			for (i = c; i <= d; i++) {
1007 				char *line = preadline(fileno(f2),
1008 				    ds->ixnew[i] - ds->ixnew[i - 1],
1009 				    ds->ixnew[i - 1]);
1010 				if (line == NULL)
1011 					return (-1);
1012 				if (!ignoreline(line)) {
1013 					free(line);
1014 					goto proceed;
1015 				}
1016 				free(line);
1017 			}
1018 		}
1019 		return (0);
1020 	}
1021 proceed:
1022 	if (*pflags & D_HEADER) {
1023 		diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
1024 		*pflags &= ~D_HEADER;
1025 	}
1026 	if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1027 		/*
1028 		 * Allocate change records as needed.
1029 		 */
1030 		if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1031 			struct context_vec *cvp;
1032 			ptrdiff_t offset;
1033 			offset = ds->context_vec_ptr - ds->context_vec_start;
1034 			ds->max_context <<= 1;
1035 			ds->context_vec_start =
1036 			cvp = reallocarray(ds->context_vec_start,
1037 			    ds->max_context, sizeof(*ds->context_vec_start));
1038 			if (cvp == NULL) {
1039 				free(ds->context_vec_start);
1040 				return (-1);
1041 			}
1042 			ds->context_vec_start = cvp;
1043 			ds->context_vec_end = ds->context_vec_start +
1044 			    ds->max_context;
1045 			ds->context_vec_ptr = ds->context_vec_start + offset;
1046 		}
1047 		if (ds->anychange == 0) {
1048 			/*
1049 			 * Print the context/unidiff header first time through.
1050 			 */
1051 			print_header(outfile, ds, args, file1, file2);
1052 			ds->anychange = 1;
1053 		} else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1054 		    c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1055 			/*
1056 			 * If this change is more than 'diff_context' lines from the
1057 			 * previous change, dump the record and reset it.
1058 			 */
1059 			if (args->diff_format == D_CONTEXT)
1060 				dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1061 			else
1062 				dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1063 		}
1064 		ds->context_vec_ptr++;
1065 		ds->context_vec_ptr->a = a;
1066 		ds->context_vec_ptr->b = b;
1067 		ds->context_vec_ptr->c = c;
1068 		ds->context_vec_ptr->d = d;
1069 		return (0);
1070 	}
1071 	if (ds->anychange == 0)
1072 		ds->anychange = 1;
1073 	switch (args->diff_format) {
1074 	case D_BRIEF:
1075 		return (0);
1076 	case D_NORMAL:
1077 	case D_EDIT:
1078 		range(outfile, a, b, ",");
1079 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1080 		if (args->diff_format == D_NORMAL)
1081 			range(outfile, c, d, ",");
1082 		diff_output(outfile, "\n");
1083 		break;
1084 	case D_REVERSE:
1085 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1086 		range(outfile, a, b, " ");
1087 		diff_output(outfile, "\n");
1088 		break;
1089 	case D_NREVERSE:
1090 		if (a > b)
1091 			diff_output(outfile, "a%d %d\n", b, d - c + 1);
1092 		else {
1093 			diff_output(outfile, "d%d %d\n", a, b - a + 1);
1094 			if (!(c > d))
1095 				/* add changed lines */
1096 				diff_output(outfile, "a%d %d\n", b, d - c + 1);
1097 		}
1098 		break;
1099 	}
1100 	if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1101 		fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1102 		if (a <= b && c <= d && args->diff_format == D_NORMAL)
1103 			diff_output(outfile, "---\n");
1104 	}
1105 	i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1106 	if (i != 0 && args->diff_format == D_EDIT) {
1107 		/*
1108 		 * A non-zero return value for D_EDIT indicates that the
1109 		 * last line printed was a bare dot (".") that has been
1110 		 * escaped as ".." to prevent ed(1) from misinterpreting
1111 		 * it.  We have to add a substitute command to change this
1112 		 * back and restart where we left off.
1113 		 */
1114 		diff_output(outfile, ".\n");
1115 		diff_output(outfile, "%ds/.//\n", a + i - 1);
1116 		b = a + i - 1;
1117 		a = b + 1;
1118 		c += i;
1119 		goto restart;
1120 	}
1121 	if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1122 		diff_output(outfile, ".\n");
1123 	if (ds->inifdef) {
1124 		diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1125 		ds->inifdef = 0;
1126 	}
1127 
1128 	return (0);
1129 }
1130 
1131 static int
1132 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1133     long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1134 {
1135 	int i, j, c, lastc, col, nc;
1136 
1137 	/*
1138 	 * When doing #ifdef's, copy down to current line
1139 	 * if this is the first file, so that stuff makes it to output.
1140 	 */
1141 	if (args->diff_format == D_IFDEF && oldfile) {
1142 		long curpos = ftell(lb);
1143 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1144 		nc = f[a > b ? b : a - 1] - curpos;
1145 		for (i = 0; i < nc; i++)
1146 			diff_output(outfile, "%c", getc(lb));
1147 	}
1148 	if (a > b)
1149 		return (0);
1150 	if (args->diff_format == D_IFDEF) {
1151 		if (ds->inifdef) {
1152 			diff_output(outfile, "#else /* %s%s */\n",
1153 			    oldfile == 1 ? "!" : "", args->ifdefname);
1154 		} else {
1155 			if (oldfile)
1156 				diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1157 			else
1158 				diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1159 		}
1160 		ds->inifdef = 1 + oldfile;
1161 	}
1162 	for (i = a; i <= b; i++) {
1163 		fseek(lb, f[i - 1], SEEK_SET);
1164 		nc = f[i] - f[i - 1];
1165 		if (args->diff_format != D_IFDEF && ch != '\0') {
1166 			diff_output(outfile, "%c", ch);
1167 			if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1168 			    || args->diff_format == D_UNIFIED))
1169 				diff_output(outfile, "\t");
1170 			else if (args->diff_format != D_UNIFIED)
1171 				diff_output(outfile, " ");
1172 		}
1173 		col = 0;
1174 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1175 			if ((c = getc(lb)) == EOF) {
1176 				if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1177 				    args->diff_format == D_NREVERSE)
1178 					warnx("No newline at end of file");
1179 				else
1180 					diff_output(outfile, "\n\\ No newline at end of "
1181 					    "file\n");
1182 				return (0);
1183 			}
1184 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1185 				do {
1186 					diff_output(outfile, " ");
1187 				} while (++col & 7);
1188 			} else {
1189 				if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1190 				    && lastc == '.') {
1191 					/*
1192 					 * Don't print a bare "." line
1193 					 * since that will confuse ed(1).
1194 					 * Print ".." instead and return,
1195 					 * giving the caller an offset
1196 					 * from which to restart.
1197 					 */
1198 					diff_output(outfile, ".\n");
1199 					return (i - a + 1);
1200 				}
1201 				diff_output(outfile, "%c", c);
1202 				col++;
1203 			}
1204 		}
1205 	}
1206 	return (0);
1207 }
1208 
1209 /*
1210  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1211  */
1212 static int
1213 readhash(struct got_diff_state *ds, FILE *f, int flags)
1214 {
1215 	int i, t, space;
1216 	int sum;
1217 
1218 	sum = 1;
1219 	space = 0;
1220 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1221 		if (flags & D_IGNORECASE)
1222 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1223 				if (t == EOF) {
1224 					if (i == 0)
1225 						return (0);
1226 					break;
1227 				}
1228 				sum = sum * 127 + ds->chrtran[t];
1229 			}
1230 		else
1231 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1232 				if (t == EOF) {
1233 					if (i == 0)
1234 						return (0);
1235 					break;
1236 				}
1237 				sum = sum * 127 + t;
1238 			}
1239 	} else {
1240 		for (i = 0;;) {
1241 			switch (t = getc(f)) {
1242 			case '\t':
1243 			case '\r':
1244 			case '\v':
1245 			case '\f':
1246 			case ' ':
1247 				space++;
1248 				continue;
1249 			default:
1250 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1251 					i++;
1252 					space = 0;
1253 				}
1254 				sum = sum * 127 + ds->chrtran[t];
1255 				i++;
1256 				continue;
1257 			case EOF:
1258 				if (i == 0)
1259 					return (0);
1260 				/* FALLTHROUGH */
1261 			case '\n':
1262 				break;
1263 			}
1264 			break;
1265 		}
1266 	}
1267 	/*
1268 	 * There is a remote possibility that we end up with a zero sum.
1269 	 * Zero is used as an EOF marker, so return 1 instead.
1270 	 */
1271 	return (sum == 0 ? 1 : sum);
1272 }
1273 
1274 static int
1275 asciifile(FILE *f)
1276 {
1277 	unsigned char buf[BUFSIZ];
1278 	size_t cnt;
1279 
1280 	if (f == NULL)
1281 		return (1);
1282 
1283 	rewind(f);
1284 	cnt = fread(buf, 1, sizeof(buf), f);
1285 	return (memchr(buf, '\0', cnt) == NULL);
1286 }
1287 
1288 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1289 
1290 static char *
1291 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1292 {
1293 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1294 	size_t nc;
1295 	int last = ds->lastline;
1296 	char *state = NULL;
1297 
1298 	ds->lastline = pos;
1299 	while (pos > last) {
1300 		fseek(fp, f[pos - 1], SEEK_SET);
1301 		nc = f[pos] - f[pos - 1];
1302 		if (nc >= sizeof(buf))
1303 			nc = sizeof(buf) - 1;
1304 		nc = fread(buf, 1, nc, fp);
1305 		if (nc > 0) {
1306 			buf[nc] = '\0';
1307 			buf[strcspn(buf, "\n")] = '\0';
1308 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1309 				if (begins_with(buf, "private:")) {
1310 					if (!state)
1311 						state = " (private)";
1312 				} else if (begins_with(buf, "protected:")) {
1313 					if (!state)
1314 						state = " (protected)";
1315 				} else if (begins_with(buf, "public:")) {
1316 					if (!state)
1317 						state = " (public)";
1318 				} else {
1319 					strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1320 					if (state)
1321 						strlcat(ds->lastbuf, state,
1322 						    sizeof ds->lastbuf);
1323 					ds->lastmatchline = pos;
1324 					return ds->lastbuf;
1325 				}
1326 			}
1327 		}
1328 		pos--;
1329 	}
1330 	return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1331 }
1332 
1333 /* dump accumulated "context" diff changes */
1334 static void
1335 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1336     FILE *f1, FILE *f2, int flags)
1337 {
1338 	struct context_vec *cvp = ds->context_vec_start;
1339 	int lowa, upb, lowc, upd, do_output;
1340 	int a, b, c, d;
1341 	char ch, *f;
1342 
1343 	if (ds->context_vec_start > ds->context_vec_ptr)
1344 		return;
1345 
1346 	b = d = 0;		/* gcc */
1347 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1348 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1349 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1350 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1351 
1352 	diff_output(outfile, "***************");
1353 	if ((flags & D_PROTOTYPE)) {
1354 		f = match_function(ds, ds->ixold, lowa-1, f1);
1355 		if (f != NULL)
1356 			diff_output(outfile, " %s", f);
1357 	}
1358 	diff_output(outfile, "\n*** ");
1359 	range(outfile, lowa, upb, ",");
1360 	diff_output(outfile, " ****\n");
1361 
1362 	/*
1363 	 * Output changes to the "old" file.  The first loop suppresses
1364 	 * output if there were no changes to the "old" file (we'll see
1365 	 * the "old" lines as context in the "new" list).
1366 	 */
1367 	do_output = 0;
1368 	for (; cvp <= ds->context_vec_ptr; cvp++)
1369 		if (cvp->a <= cvp->b) {
1370 			cvp = ds->context_vec_start;
1371 			do_output++;
1372 			break;
1373 		}
1374 	if (do_output) {
1375 		while (cvp <= ds->context_vec_ptr) {
1376 			a = cvp->a;
1377 			b = cvp->b;
1378 			c = cvp->c;
1379 			d = cvp->d;
1380 
1381 			if (a <= b && c <= d)
1382 				ch = 'c';
1383 			else
1384 				ch = (a <= b) ? 'd' : 'a';
1385 
1386 			if (ch == 'a')
1387 				fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1388 			else {
1389 				fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1390 				fetch(outfile, ds, args, ds->ixold, a, b, f1,
1391 				    ch == 'c' ? '!' : '-', 0, flags);
1392 			}
1393 			lowa = b + 1;
1394 			cvp++;
1395 		}
1396 		fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1397 	}
1398 	/* output changes to the "new" file */
1399 	diff_output(outfile, "--- ");
1400 	range(outfile, lowc, upd, ",");
1401 	diff_output(outfile, " ----\n");
1402 
1403 	do_output = 0;
1404 	for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1405 		if (cvp->c <= cvp->d) {
1406 			cvp = ds->context_vec_start;
1407 			do_output++;
1408 			break;
1409 		}
1410 	if (do_output) {
1411 		while (cvp <= ds->context_vec_ptr) {
1412 			a = cvp->a;
1413 			b = cvp->b;
1414 			c = cvp->c;
1415 			d = cvp->d;
1416 
1417 			if (a <= b && c <= d)
1418 				ch = 'c';
1419 			else
1420 				ch = (a <= b) ? 'd' : 'a';
1421 
1422 			if (ch == 'd')
1423 				fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1424 			else {
1425 				fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1426 				fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1427 				    ch == 'c' ? '!' : '+', 0, flags);
1428 			}
1429 			lowc = d + 1;
1430 			cvp++;
1431 		}
1432 		fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1433 	}
1434 	ds->context_vec_ptr = ds->context_vec_start - 1;
1435 }
1436 
1437 /* dump accumulated "unified" diff changes */
1438 static void
1439 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1440     FILE *f1, FILE *f2, int flags)
1441 {
1442 	struct context_vec *cvp = ds->context_vec_start;
1443 	int lowa, upb, lowc, upd;
1444 	int a, b, c, d;
1445 	char ch, *f;
1446 
1447 	if (ds->context_vec_start > ds->context_vec_ptr)
1448 		return;
1449 
1450 	b = d = 0;		/* gcc */
1451 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1452 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1453 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1454 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1455 
1456 	diff_output(outfile, "@@ -");
1457 	uni_range(outfile, lowa, upb);
1458 	diff_output(outfile, " +");
1459 	uni_range(outfile, lowc, upd);
1460 	diff_output(outfile, " @@");
1461 	if ((flags & D_PROTOTYPE)) {
1462 		f = match_function(ds, ds->ixold, lowa-1, f1);
1463 		if (f != NULL)
1464 			diff_output(outfile, " %s", f);
1465 	}
1466 	diff_output(outfile, "\n");
1467 
1468 	/*
1469 	 * Output changes in "unified" diff format--the old and new lines
1470 	 * are printed together.
1471 	 */
1472 	for (; cvp <= ds->context_vec_ptr; cvp++) {
1473 		a = cvp->a;
1474 		b = cvp->b;
1475 		c = cvp->c;
1476 		d = cvp->d;
1477 
1478 		/*
1479 		 * c: both new and old changes
1480 		 * d: only changes in the old file
1481 		 * a: only changes in the new file
1482 		 */
1483 		if (a <= b && c <= d)
1484 			ch = 'c';
1485 		else
1486 			ch = (a <= b) ? 'd' : 'a';
1487 
1488 		switch (ch) {
1489 		case 'c':
1490 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1491 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1492 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1493 			break;
1494 		case 'd':
1495 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1496 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1497 			break;
1498 		case 'a':
1499 			fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1500 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1501 			break;
1502 		}
1503 		lowa = b + 1;
1504 		lowc = d + 1;
1505 	}
1506 	fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1507 
1508 	ds->context_vec_ptr = ds->context_vec_start - 1;
1509 }
1510 
1511 static void
1512 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1513     const char *file1, const char *file2)
1514 {
1515 	if (args->label[0] != NULL)
1516 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1517 		    args->label[0]);
1518 	else
1519 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1520 		    file1, ctime(&ds->stb1.st_mtime));
1521 	if (args->label[1] != NULL)
1522 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1523 		    args->label[1]);
1524 	else
1525 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1526 		    file2, ctime(&ds->stb2.st_mtime));
1527 }