Gotweb

Blob

Date:: Mon Apr 2 17:26:55 2018 UTC
Message:: always free ds->ixold and ds->ixmew in got_diffreg()
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
86 
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
90 
91 #include "got_lib_diff.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 };
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 static void	 diff_output(FILE *, const char *, ...);
187 static int	 output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void	 check(struct got_diff_state *, FILE *, FILE *, int);
189 static void	 range(FILE *, int, int, char *);
190 static void	 uni_range(FILE *, int, int);
191 static void	 dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void	 dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int	 prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void	 prune(struct got_diff_state *);
195 static void	 equiv(struct line *, int, struct line *, int, int *);
196 static void	 unravel(struct got_diff_state *, int);
197 static int	 unsort(struct line *, int, int *);
198 static int	 change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void	 sort(struct line *, int);
200 static void	 print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int	 ignoreline(char *);
202 static int	 asciifile(FILE *);
203 static int	 fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int	 newcand(struct got_diff_state *, int, int, int, int *);
205 static int	 search(struct got_diff_state *, int *, int, int);
206 static int	 skipline(FILE *);
207 static int	 isqrt(int);
208 static int	 stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int	 readhash(struct got_diff_state *, FILE *, int);
210 static int	 files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char	*match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char	*preadline(int, size_t, off_t);
213 
214 /*
215  * chrtran points to one of 2 translation tables: cup2low if folding upper to
216  * lower case clow2low if not folding case
217  */
218 u_char clow2low[256] = {
219 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
242 	0xfd, 0xfe, 0xff
243 };
244 
245 u_char cup2low[256] = {
246 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 	0xfd, 0xfe, 0xff
270 };
271 
272 static void
273 diff_output(FILE *outfile, const char *fmt, ...)
274 {
275 	va_list ap;
276 
277 	va_start(ap, fmt);
278 	vfprintf(outfile, fmt, ap);
279 	va_end(ap);
280 }
281 
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284     struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
285 {
286 	const struct got_error *err = NULL;
287 	int i, *p;
288 	long *lp;
289 
290 	*rval = D_SAME;
291 	ds->anychange = 0;
292 	ds->lastline = 0;
293 	ds->lastmatchline = 0;
294 	ds->context_vec_ptr = ds->context_vec_start - 1;
295 	ds->max_context = 64;
296 	if (flags & D_IGNORECASE)
297 		ds->chrtran = cup2low;
298 	else
299 		ds->chrtran = clow2low;
300 	if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
301 		*rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
302 		return NULL;
303 	}
304 	if (flags & D_EMPTY1) {
305 		f1 = fopen(_PATH_DEVNULL, "r");
306 		if (f1 == NULL) {
307 			err = got_error_from_errno();
308 			goto closem;
309 		}
310 	}
311 	else if (f1 == NULL) {
312 		args->status |= 2;
313 		goto closem;
314 	}
315 
316 	if (flags & D_EMPTY2) {
317 		f2 = fopen(_PATH_DEVNULL, "r");
318 		if (f2 == NULL) {
319 			err = got_error_from_errno();
320 			goto closem;
321 		}
322 	} else if (f2 == NULL) {
323 		args->status |= 2;
324 		goto closem;
325 	}
326 
327 	switch (files_differ(ds, f1, f2, flags)) {
328 	case 0:
329 		goto closem;
330 	case 1:
331 		break;
332 	default:
333 		/* error */
334 		args->status |= 2;
335 		goto closem;
336 	}
337 
338 	if ((flags & D_FORCEASCII) == 0 &&
339 	    (!asciifile(f1) || !asciifile(f2))) {
340 		*rval = D_BINARY;
341 		args->status |= 1;
342 		goto closem;
343 	}
344 	if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
345 		err = got_error_from_errno();
346 		goto closem;
347 	}
348 	if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
349 		err = got_error_from_errno();
350 		goto closem;
351 	}
352 
353 	prune(ds);
354 	sort(ds->sfile[0], ds->slen[0]);
355 	sort(ds->sfile[1], ds->slen[1]);
356 
357 	ds->member = (int *)ds->file[1];
358 	equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
359 	p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
360 	if (p == NULL) {
361 		err = got_error_from_errno();
362 		goto closem;
363 	}
364 	ds->member = p;
365 
366 	ds->class = (int *)ds->file[0];
367 	if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
368 		err = got_error_from_errno();
369 		goto closem;
370 	}
371 	p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
372 	if (p == NULL) {
373 		err = got_error_from_errno();
374 		goto closem;
375 	}
376 	ds->class = p;
377 
378 	ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
379 	if (ds->klist == NULL) {
380 		err = got_error_from_errno();
381 		goto closem;
382 	}
383 	ds->clen = 0;
384 	ds->clistlen = 100;
385 	ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
386 	if (ds->clist == NULL) {
387 		err = got_error_from_errno();
388 		goto closem;
389 	}
390 	i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
391 	if (i < 0) {
392 		err = got_error_from_errno();
393 		goto closem;
394 	}
395 
396 	p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
397 	if (p == NULL) {
398 		err = got_error_from_errno();
399 		goto closem;
400 	}
401 	ds->J = p;
402 	unravel(ds, ds->klist[i]);
403 
404 	lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
405 	if (lp == NULL) {
406 		err = got_error_from_errno();
407 		goto closem;
408 	}
409 	ds->ixold = lp;
410 	lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
411 	if (lp == NULL) {
412 		err = got_error_from_errno();
413 		goto closem;
414 	}
415 	ds->ixnew = lp;
416 	check(ds, f1, f2, flags);
417 	if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
418 	    flags))
419 		err = got_error_from_errno();
420 closem:
421 	free(ds->J);
422 	free(ds->member);
423 	free(ds->class);
424 	free(ds->clist);
425 	free(ds->klist);
426 	free(ds->ixold);
427 	free(ds->ixnew);
428 	if (ds->anychange) {
429 		args->status |= 1;
430 		if (*rval == D_SAME)
431 			*rval = D_DIFFER;
432 	}
433 	if (f1 != NULL)
434 		fclose(f1);
435 	if (f2 != NULL)
436 		fclose(f2);
437 
438 	return (err);
439 }
440 
441 /*
442  * Check to see if the given files differ.
443  * Returns 0 if they are the same, 1 if different, and -1 on error.
444  * XXX - could use code from cmp(1) [faster]
445  */
446 static int
447 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
448 {
449 	char buf1[BUFSIZ], buf2[BUFSIZ];
450 	size_t i, j;
451 
452 	if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
453 	    (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
454 		return (1);
455 	for (;;) {
456 		i = fread(buf1, 1, sizeof(buf1), f1);
457 		j = fread(buf2, 1, sizeof(buf2), f2);
458 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
459 			return (-1);
460 		if (i != j)
461 			return (1);
462 		if (i == 0)
463 			return (0);
464 		if (memcmp(buf1, buf2, i) != 0)
465 			return (1);
466 	}
467 }
468 
469 static int
470 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
471 {
472 	struct line *p, *q;
473 	int j, h;
474 	size_t sz;
475 
476 	rewind(fd);
477 
478 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
479 	if (sz < 100)
480 		sz = 100;
481 
482 	p = calloc(sz + 3, sizeof(*p));
483 	if (p == NULL)
484 		return (-1);
485 	for (j = 0; (h = readhash(ds, fd, flags));) {
486 		if (j == sz) {
487 			sz = sz * 3 / 2;
488 			q = reallocarray(p, sz + 3, sizeof(*p));
489 			if (q == NULL) {
490 				free(p);
491 				return (-1);
492 			}
493 			p = q;
494 		}
495 		p[++j].value = h;
496 	}
497 	ds->len[i] = j;
498 	ds->file[i] = p;
499 
500 	return (0);
501 }
502 
503 static void
504 prune(struct got_diff_state *ds)
505 {
506 	int i, j;
507 
508 	for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
509 	    ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
510 	    ds->pref++)
511 		;
512 	for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
513 	    ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
514 	    ds->suff++)
515 		;
516 	for (j = 0; j < 2; j++) {
517 		ds->sfile[j] = ds->file[j] + ds->pref;
518 		ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
519 		for (i = 0; i <= ds->slen[j]; i++)
520 			ds->sfile[j][i].serial = i;
521 	}
522 }
523 
524 static void
525 equiv(struct line *a, int n, struct line *b, int m, int *c)
526 {
527 	int i, j;
528 
529 	i = j = 1;
530 	while (i <= n && j <= m) {
531 		if (a[i].value < b[j].value)
532 			a[i++].value = 0;
533 		else if (a[i].value == b[j].value)
534 			a[i++].value = j;
535 		else
536 			j++;
537 	}
538 	while (i <= n)
539 		a[i++].value = 0;
540 	b[m + 1].value = 0;
541 	j = 0;
542 	while (++j <= m) {
543 		c[j] = -b[j].serial;
544 		while (b[j + 1].value == b[j].value) {
545 			j++;
546 			c[j] = b[j].serial;
547 		}
548 	}
549 	c[j] = -1;
550 }
551 
552 /* Code taken from ping.c */
553 static int
554 isqrt(int n)
555 {
556 	int y, x = 1;
557 
558 	if (n == 0)
559 		return (0);
560 
561 	do { /* newton was a stinker */
562 		y = x;
563 		x = n / x;
564 		x += y;
565 		x /= 2;
566 	} while ((x - y) > 1 || (x - y) < -1);
567 
568 	return (x);
569 }
570 
571 static int
572 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
573 {
574 	int i, k, y, j, l;
575 	int oldc, tc, oldl, sq;
576 	u_int numtries, bound;
577 	int error;
578 
579 	if (flags & D_MINIMAL)
580 		bound = UINT_MAX;
581 	else {
582 		sq = isqrt(n);
583 		bound = MAXIMUM(256, sq);
584 	}
585 
586 	k = 0;
587 	c[0] = newcand(ds, 0, 0, 0, &error);
588 	if (error)
589 		return -1;
590 	for (i = 1; i <= n; i++) {
591 		j = a[i];
592 		if (j == 0)
593 			continue;
594 		y = -b[j];
595 		oldl = 0;
596 		oldc = c[0];
597 		numtries = 0;
598 		do {
599 			if (y <= ds->clist[oldc].y)
600 				continue;
601 			l = search(ds, c, k, y);
602 			if (l != oldl + 1)
603 				oldc = c[l - 1];
604 			if (l <= k) {
605 				if (ds->clist[c[l]].y <= y)
606 					continue;
607 				tc = c[l];
608 				c[l] = newcand(ds, i, y, oldc, &error);
609 				if (error)
610 					return -1;
611 				oldc = tc;
612 				oldl = l;
613 				numtries++;
614 			} else {
615 				c[l] = newcand(ds, i, y, oldc, &error);
616 				if (error)
617 					return -1;
618 				k++;
619 				break;
620 			}
621 		} while ((y = b[++j]) > 0 && numtries < bound);
622 	}
623 	return (k);
624 }
625 
626 static int
627 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
628 {
629 	struct cand *q;
630 
631 	if (ds->clen == ds->clistlen) {
632 		ds->clistlen = ds->clistlen * 11 / 10;
633 		q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
634 		if (q == NULL) {
635 			*errorp = -1;
636 			free(ds->clist);
637 			ds->clist = NULL;
638 			return 0;
639 		}
640 		ds->clist = q;
641 	}
642 	q = ds->clist + ds->clen;
643 	q->x = x;
644 	q->y = y;
645 	q->pred = pred;
646 	*errorp = 0;
647 	return (ds->clen++);
648 }
649 
650 static int
651 search(struct got_diff_state *ds, int *c, int k, int y)
652 {
653 	int i, j, l, t;
654 
655 	if (ds->clist[c[k]].y < y)	/* quick look for typical case */
656 		return (k + 1);
657 	i = 0;
658 	j = k + 1;
659 	for (;;) {
660 		l = (i + j) / 2;
661 		if (l <= i)
662 			break;
663 		t = ds->clist[c[l]].y;
664 		if (t > y)
665 			j = l;
666 		else if (t < y)
667 			i = l;
668 		else
669 			return (l);
670 	}
671 	return (l + 1);
672 }
673 
674 static void
675 unravel(struct got_diff_state *ds, int p)
676 {
677 	struct cand *q;
678 	int i;
679 
680 	for (i = 0; i <= ds->len[0]; i++)
681 		ds->J[i] = i <= ds->pref ? i :
682 		    i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
683 	for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
684 		ds->J[q->x + ds->pref] = q->y + ds->pref;
685 }
686 
687 /*
688  * Check does double duty:
689  *  1.	ferret out any fortuitous correspondences due
690  *	to confounding by hashing (which result in "jackpot")
691  *  2.  collect random access indexes to the two files
692  */
693 static void
694 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
695 {
696 	int i, j, jackpot, c, d;
697 	long ctold, ctnew;
698 
699 	rewind(f1);
700 	rewind(f2);
701 	j = 1;
702 	ds->ixold[0] = ds->ixnew[0] = 0;
703 	jackpot = 0;
704 	ctold = ctnew = 0;
705 	for (i = 1; i <= ds->len[0]; i++) {
706 		if (ds->J[i] == 0) {
707 			ds->ixold[i] = ctold += skipline(f1);
708 			continue;
709 		}
710 		while (j < ds->J[i]) {
711 			ds->ixnew[j] = ctnew += skipline(f2);
712 			j++;
713 		}
714 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
715 			for (;;) {
716 				c = getc(f1);
717 				d = getc(f2);
718 				/*
719 				 * GNU diff ignores a missing newline
720 				 * in one file for -b or -w.
721 				 */
722 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
723 					if (c == EOF && d == '\n') {
724 						ctnew++;
725 						break;
726 					} else if (c == '\n' && d == EOF) {
727 						ctold++;
728 						break;
729 					}
730 				}
731 				ctold++;
732 				ctnew++;
733 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
734 				    isspace(d)) {
735 					do {
736 						if (c == '\n')
737 							break;
738 						ctold++;
739 					} while (isspace(c = getc(f1)));
740 					do {
741 						if (d == '\n')
742 							break;
743 						ctnew++;
744 					} while (isspace(d = getc(f2)));
745 				} else if ((flags & D_IGNOREBLANKS)) {
746 					while (isspace(c) && c != '\n') {
747 						c = getc(f1);
748 						ctold++;
749 					}
750 					while (isspace(d) && d != '\n') {
751 						d = getc(f2);
752 						ctnew++;
753 					}
754 				}
755 				if (ds->chrtran[c] != ds->chrtran[d]) {
756 					jackpot++;
757 					ds->J[i] = 0;
758 					if (c != '\n' && c != EOF)
759 						ctold += skipline(f1);
760 					if (d != '\n' && c != EOF)
761 						ctnew += skipline(f2);
762 					break;
763 				}
764 				if (c == '\n' || c == EOF)
765 					break;
766 			}
767 		} else {
768 			for (;;) {
769 				ctold++;
770 				ctnew++;
771 				if ((c = getc(f1)) != (d = getc(f2))) {
772 					/* jackpot++; */
773 					ds->J[i] = 0;
774 					if (c != '\n' && c != EOF)
775 						ctold += skipline(f1);
776 					if (d != '\n' && c != EOF)
777 						ctnew += skipline(f2);
778 					break;
779 				}
780 				if (c == '\n' || c == EOF)
781 					break;
782 			}
783 		}
784 		ds->ixold[i] = ctold;
785 		ds->ixnew[j] = ctnew;
786 		j++;
787 	}
788 	for (; j <= ds->len[1]; j++)
789 		ds->ixnew[j] = ctnew += skipline(f2);
790 	/*
791 	 * if (jackpot)
792 	 *	fprintf(stderr, "jackpot\n");
793 	 */
794 }
795 
796 /* shellsort CACM #201 */
797 static void
798 sort(struct line *a, int n)
799 {
800 	struct line *ai, *aim, w;
801 	int j, m = 0, k;
802 
803 	if (n == 0)
804 		return;
805 	for (j = 1; j <= n; j *= 2)
806 		m = 2 * j - 1;
807 	for (m /= 2; m != 0; m /= 2) {
808 		k = n - m;
809 		for (j = 1; j <= k; j++) {
810 			for (ai = &a[j]; ai > a; ai -= m) {
811 				aim = &ai[m];
812 				if (aim < ai)
813 					break;	/* wraparound */
814 				if (aim->value > ai[0].value ||
815 				    (aim->value == ai[0].value &&
816 					aim->serial > ai[0].serial))
817 					break;
818 				w.value = ai[0].value;
819 				ai[0].value = aim->value;
820 				aim->value = w.value;
821 				w.serial = ai[0].serial;
822 				ai[0].serial = aim->serial;
823 				aim->serial = w.serial;
824 			}
825 		}
826 	}
827 }
828 
829 static int
830 unsort(struct line *f, int l, int *b)
831 {
832 	int *a, i;
833 
834 	a = calloc(l + 1, sizeof(*a));
835 	if (a == NULL)
836 		return (-1);
837 	for (i = 1; i <= l; i++)
838 		a[f[i].serial] = f[i].value;
839 	for (i = 1; i <= l; i++)
840 		b[i] = a[i];
841 	free(a);
842 
843 	return (0);
844 }
845 
846 static int
847 skipline(FILE *f)
848 {
849 	int i, c;
850 
851 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
852 		continue;
853 	return (i);
854 }
855 
856 static int
857 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
858     const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
859 {
860 	int m, i0, i1, j0, j1;
861 	int error = 0;
862 
863 	rewind(f1);
864 	rewind(f2);
865 	m = ds->len[0];
866 	ds->J[0] = 0;
867 	ds->J[m + 1] = ds->len[1] + 1;
868 	if (args->diff_format != D_EDIT) {
869 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
870 			while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
871 				i0++;
872 			j0 = ds->J[i0 - 1] + 1;
873 			i1 = i0 - 1;
874 			while (i1 < m && ds->J[i1 + 1] == 0)
875 				i1++;
876 			j1 = ds->J[i1 + 1] - 1;
877 			ds->J[i1] = j1;
878 			error = change(outfile, ds, args, file1, f1, file2, f2,
879 			    i0, i1, j0, j1, &flags);
880 			if (error)
881 				return (error);
882 		}
883 	} else {
884 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
885 			while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
886 				i0--;
887 			j0 = ds->J[i0 + 1] - 1;
888 			i1 = i0 + 1;
889 			while (i1 > 1 && ds->J[i1 - 1] == 0)
890 				i1--;
891 			j1 = ds->J[i1 - 1] + 1;
892 			ds->J[i1] = j1;
893 			change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
894 			    j1, j0, &flags);
895 			if (error)
896 				return (error);
897 		}
898 	}
899 	if (m == 0) {
900 		error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
901 		    1, ds->len[1], &flags);
902 		if (error)
903 			return (error);
904 	}
905 	if (args->diff_format == D_IFDEF) {
906 		for (;;) {
907 #define	c i0
908 			if ((c = getc(f1)) == EOF)
909 				return (0);
910 			diff_output(outfile, "%c", c);
911 		}
912 #undef c
913 	}
914 	if (ds->anychange != 0) {
915 		if (args->diff_format == D_CONTEXT)
916 			dump_context_vec(outfile, ds, args, f1, f2, flags);
917 		else if (args->diff_format == D_UNIFIED)
918 			dump_unified_vec(outfile, ds, args, f1, f2, flags);
919 	}
920 
921 	return (0);
922 }
923 
924 static void
925 range(FILE *outfile, int a, int b, char *separator)
926 {
927 	diff_output(outfile, "%d", a > b ? b : a);
928 	if (a < b)
929 		diff_output(outfile, "%s%d", separator, b);
930 }
931 
932 static void
933 uni_range(FILE *outfile, int a, int b)
934 {
935 	if (a < b)
936 		diff_output(outfile, "%d,%d", a, b - a + 1);
937 	else if (a == b)
938 		diff_output(outfile, "%d", b);
939 	else
940 		diff_output(outfile, "%d,0", b);
941 }
942 
943 static char *
944 preadline(int fd, size_t rlen, off_t off)
945 {
946 	char *line;
947 	ssize_t nr;
948 
949 	line = malloc(rlen + 1);
950 	if (line == NULL)
951 		return NULL;
952 	if ((nr = pread(fd, line, rlen, off)) < 0) {
953 		free(line);
954 		return NULL;
955 	}
956 	if (nr > 0 && line[nr-1] == '\n')
957 		nr--;
958 	line[nr] = '\0';
959 	return (line);
960 }
961 
962 static int
963 ignoreline(char *line)
964 {
965 	return 0; /* do not ignore any lines */
966 }
967 
968 /*
969  * Indicate that there is a difference between lines a and b of the from file
970  * to get to lines c to d of the to file.  If a is greater then b then there
971  * are no lines in the from file involved and this means that there were
972  * lines appended (beginning at b).  If c is greater than d then there are
973  * lines missing from the to file.
974  */
975 static int
976 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
977     const char *file1, FILE *f1, const char *file2, FILE *f2,
978     int a, int b, int c, int d, int *pflags)
979 {
980 	int i;
981 
982 restart:
983 	if (args->diff_format != D_IFDEF && a > b && c > d)
984 		return (0);
985 	if (args->ignore_pats != NULL) {
986 		char *line;
987 		/*
988 		 * All lines in the change, insert, or delete must
989 		 * match an ignore pattern for the change to be
990 		 * ignored.
991 		 */
992 		if (a <= b) {		/* Changes and deletes. */
993 			for (i = a; i <= b; i++) {
994 				line = preadline(fileno(f1),
995 				    ds->ixold[i] - ds->ixold[i - 1],
996 				    ds->ixold[i - 1]);
997 				if (line == NULL)
998 					return (-1);
999 				if (!ignoreline(line))
1000 					goto proceed;
1001 			}
1002 		}
1003 		if (a > b || c <= d) {	/* Changes and inserts. */
1004 			for (i = c; i <= d; i++) {
1005 				line = preadline(fileno(f2),
1006 				    ds->ixnew[i] - ds->ixnew[i - 1],
1007 				    ds->ixnew[i - 1]);
1008 				if (line == NULL)
1009 					return (-1);
1010 				if (!ignoreline(line))
1011 					goto proceed;
1012 			}
1013 		}
1014 		return (0);
1015 	}
1016 proceed:
1017 	if (*pflags & D_HEADER) {
1018 		diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
1019 		*pflags &= ~D_HEADER;
1020 	}
1021 	if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1022 		/*
1023 		 * Allocate change records as needed.
1024 		 */
1025 		if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1026 			struct context_vec *cvp;
1027 			ptrdiff_t offset;
1028 			offset = ds->context_vec_ptr - ds->context_vec_start;
1029 			ds->max_context <<= 1;
1030 			ds->context_vec_start =
1031 			cvp = reallocarray(ds->context_vec_start,
1032 			    ds->max_context, sizeof(*ds->context_vec_start));
1033 			if (cvp == NULL) {
1034 				free(ds->context_vec_start);
1035 				return (-1);
1036 			}
1037 			ds->context_vec_start = cvp;
1038 			ds->context_vec_end = ds->context_vec_start +
1039 			    ds->max_context;
1040 			ds->context_vec_ptr = ds->context_vec_start + offset;
1041 		}
1042 		if (ds->anychange == 0) {
1043 			/*
1044 			 * Print the context/unidiff header first time through.
1045 			 */
1046 			print_header(outfile, ds, args, file1, file2);
1047 			ds->anychange = 1;
1048 		} else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1049 		    c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1050 			/*
1051 			 * If this change is more than 'diff_context' lines from the
1052 			 * previous change, dump the record and reset it.
1053 			 */
1054 			if (args->diff_format == D_CONTEXT)
1055 				dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1056 			else
1057 				dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1058 		}
1059 		ds->context_vec_ptr++;
1060 		ds->context_vec_ptr->a = a;
1061 		ds->context_vec_ptr->b = b;
1062 		ds->context_vec_ptr->c = c;
1063 		ds->context_vec_ptr->d = d;
1064 		return (0);
1065 	}
1066 	if (ds->anychange == 0)
1067 		ds->anychange = 1;
1068 	switch (args->diff_format) {
1069 	case D_BRIEF:
1070 		return (0);
1071 	case D_NORMAL:
1072 	case D_EDIT:
1073 		range(outfile, a, b, ",");
1074 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1075 		if (args->diff_format == D_NORMAL)
1076 			range(outfile, c, d, ",");
1077 		diff_output(outfile, "\n");
1078 		break;
1079 	case D_REVERSE:
1080 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1081 		range(outfile, a, b, " ");
1082 		diff_output(outfile, "\n");
1083 		break;
1084 	case D_NREVERSE:
1085 		if (a > b)
1086 			diff_output(outfile, "a%d %d\n", b, d - c + 1);
1087 		else {
1088 			diff_output(outfile, "d%d %d\n", a, b - a + 1);
1089 			if (!(c > d))
1090 				/* add changed lines */
1091 				diff_output(outfile, "a%d %d\n", b, d - c + 1);
1092 		}
1093 		break;
1094 	}
1095 	if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1096 		fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1097 		if (a <= b && c <= d && args->diff_format == D_NORMAL)
1098 			diff_output(outfile, "---\n");
1099 	}
1100 	i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1101 	if (i != 0 && args->diff_format == D_EDIT) {
1102 		/*
1103 		 * A non-zero return value for D_EDIT indicates that the
1104 		 * last line printed was a bare dot (".") that has been
1105 		 * escaped as ".." to prevent ed(1) from misinterpreting
1106 		 * it.  We have to add a substitute command to change this
1107 		 * back and restart where we left off.
1108 		 */
1109 		diff_output(outfile, ".\n");
1110 		diff_output(outfile, "%ds/.//\n", a + i - 1);
1111 		b = a + i - 1;
1112 		a = b + 1;
1113 		c += i;
1114 		goto restart;
1115 	}
1116 	if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1117 		diff_output(outfile, ".\n");
1118 	if (ds->inifdef) {
1119 		diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1120 		ds->inifdef = 0;
1121 	}
1122 
1123 	return (0);
1124 }
1125 
1126 static int
1127 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1128     long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1129 {
1130 	int i, j, c, lastc, col, nc;
1131 
1132 	/*
1133 	 * When doing #ifdef's, copy down to current line
1134 	 * if this is the first file, so that stuff makes it to output.
1135 	 */
1136 	if (args->diff_format == D_IFDEF && oldfile) {
1137 		long curpos = ftell(lb);
1138 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1139 		nc = f[a > b ? b : a - 1] - curpos;
1140 		for (i = 0; i < nc; i++)
1141 			diff_output(outfile, "%c", getc(lb));
1142 	}
1143 	if (a > b)
1144 		return (0);
1145 	if (args->diff_format == D_IFDEF) {
1146 		if (ds->inifdef) {
1147 			diff_output(outfile, "#else /* %s%s */\n",
1148 			    oldfile == 1 ? "!" : "", args->ifdefname);
1149 		} else {
1150 			if (oldfile)
1151 				diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1152 			else
1153 				diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1154 		}
1155 		ds->inifdef = 1 + oldfile;
1156 	}
1157 	for (i = a; i <= b; i++) {
1158 		fseek(lb, f[i - 1], SEEK_SET);
1159 		nc = f[i] - f[i - 1];
1160 		if (args->diff_format != D_IFDEF && ch != '\0') {
1161 			diff_output(outfile, "%c", ch);
1162 			if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1163 			    || args->diff_format == D_UNIFIED))
1164 				diff_output(outfile, "\t");
1165 			else if (args->diff_format != D_UNIFIED)
1166 				diff_output(outfile, " ");
1167 		}
1168 		col = 0;
1169 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1170 			if ((c = getc(lb)) == EOF) {
1171 				if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1172 				    args->diff_format == D_NREVERSE)
1173 					warnx("No newline at end of file");
1174 				else
1175 					diff_output(outfile, "\n\\ No newline at end of "
1176 					    "file\n");
1177 				return (0);
1178 			}
1179 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1180 				do {
1181 					diff_output(outfile, " ");
1182 				} while (++col & 7);
1183 			} else {
1184 				if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1185 				    && lastc == '.') {
1186 					/*
1187 					 * Don't print a bare "." line
1188 					 * since that will confuse ed(1).
1189 					 * Print ".." instead and return,
1190 					 * giving the caller an offset
1191 					 * from which to restart.
1192 					 */
1193 					diff_output(outfile, ".\n");
1194 					return (i - a + 1);
1195 				}
1196 				diff_output(outfile, "%c", c);
1197 				col++;
1198 			}
1199 		}
1200 	}
1201 	return (0);
1202 }
1203 
1204 /*
1205  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1206  */
1207 static int
1208 readhash(struct got_diff_state *ds, FILE *f, int flags)
1209 {
1210 	int i, t, space;
1211 	int sum;
1212 
1213 	sum = 1;
1214 	space = 0;
1215 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1216 		if (flags & D_IGNORECASE)
1217 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1218 				if (t == EOF) {
1219 					if (i == 0)
1220 						return (0);
1221 					break;
1222 				}
1223 				sum = sum * 127 + ds->chrtran[t];
1224 			}
1225 		else
1226 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1227 				if (t == EOF) {
1228 					if (i == 0)
1229 						return (0);
1230 					break;
1231 				}
1232 				sum = sum * 127 + t;
1233 			}
1234 	} else {
1235 		for (i = 0;;) {
1236 			switch (t = getc(f)) {
1237 			case '\t':
1238 			case '\r':
1239 			case '\v':
1240 			case '\f':
1241 			case ' ':
1242 				space++;
1243 				continue;
1244 			default:
1245 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1246 					i++;
1247 					space = 0;
1248 				}
1249 				sum = sum * 127 + ds->chrtran[t];
1250 				i++;
1251 				continue;
1252 			case EOF:
1253 				if (i == 0)
1254 					return (0);
1255 				/* FALLTHROUGH */
1256 			case '\n':
1257 				break;
1258 			}
1259 			break;
1260 		}
1261 	}
1262 	/*
1263 	 * There is a remote possibility that we end up with a zero sum.
1264 	 * Zero is used as an EOF marker, so return 1 instead.
1265 	 */
1266 	return (sum == 0 ? 1 : sum);
1267 }
1268 
1269 static int
1270 asciifile(FILE *f)
1271 {
1272 	unsigned char buf[BUFSIZ];
1273 	size_t cnt;
1274 
1275 	if (f == NULL)
1276 		return (1);
1277 
1278 	rewind(f);
1279 	cnt = fread(buf, 1, sizeof(buf), f);
1280 	return (memchr(buf, '\0', cnt) == NULL);
1281 }
1282 
1283 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1284 
1285 static char *
1286 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1287 {
1288 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1289 	size_t nc;
1290 	int last = ds->lastline;
1291 	char *state = NULL;
1292 
1293 	ds->lastline = pos;
1294 	while (pos > last) {
1295 		fseek(fp, f[pos - 1], SEEK_SET);
1296 		nc = f[pos] - f[pos - 1];
1297 		if (nc >= sizeof(buf))
1298 			nc = sizeof(buf) - 1;
1299 		nc = fread(buf, 1, nc, fp);
1300 		if (nc > 0) {
1301 			buf[nc] = '\0';
1302 			buf[strcspn(buf, "\n")] = '\0';
1303 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1304 				if (begins_with(buf, "private:")) {
1305 					if (!state)
1306 						state = " (private)";
1307 				} else if (begins_with(buf, "protected:")) {
1308 					if (!state)
1309 						state = " (protected)";
1310 				} else if (begins_with(buf, "public:")) {
1311 					if (!state)
1312 						state = " (public)";
1313 				} else {
1314 					strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1315 					if (state)
1316 						strlcat(ds->lastbuf, state,
1317 						    sizeof ds->lastbuf);
1318 					ds->lastmatchline = pos;
1319 					return ds->lastbuf;
1320 				}
1321 			}
1322 		}
1323 		pos--;
1324 	}
1325 	return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1326 }
1327 
1328 /* dump accumulated "context" diff changes */
1329 static void
1330 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1331     FILE *f1, FILE *f2, int flags)
1332 {
1333 	struct context_vec *cvp = ds->context_vec_start;
1334 	int lowa, upb, lowc, upd, do_output;
1335 	int a, b, c, d;
1336 	char ch, *f;
1337 
1338 	if (ds->context_vec_start > ds->context_vec_ptr)
1339 		return;
1340 
1341 	b = d = 0;		/* gcc */
1342 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1343 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1344 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1345 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1346 
1347 	diff_output(outfile, "***************");
1348 	if ((flags & D_PROTOTYPE)) {
1349 		f = match_function(ds, ds->ixold, lowa-1, f1);
1350 		if (f != NULL)
1351 			diff_output(outfile, " %s", f);
1352 	}
1353 	diff_output(outfile, "\n*** ");
1354 	range(outfile, lowa, upb, ",");
1355 	diff_output(outfile, " ****\n");
1356 
1357 	/*
1358 	 * Output changes to the "old" file.  The first loop suppresses
1359 	 * output if there were no changes to the "old" file (we'll see
1360 	 * the "old" lines as context in the "new" list).
1361 	 */
1362 	do_output = 0;
1363 	for (; cvp <= ds->context_vec_ptr; cvp++)
1364 		if (cvp->a <= cvp->b) {
1365 			cvp = ds->context_vec_start;
1366 			do_output++;
1367 			break;
1368 		}
1369 	if (do_output) {
1370 		while (cvp <= ds->context_vec_ptr) {
1371 			a = cvp->a;
1372 			b = cvp->b;
1373 			c = cvp->c;
1374 			d = cvp->d;
1375 
1376 			if (a <= b && c <= d)
1377 				ch = 'c';
1378 			else
1379 				ch = (a <= b) ? 'd' : 'a';
1380 
1381 			if (ch == 'a')
1382 				fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1383 			else {
1384 				fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1385 				fetch(outfile, ds, args, ds->ixold, a, b, f1,
1386 				    ch == 'c' ? '!' : '-', 0, flags);
1387 			}
1388 			lowa = b + 1;
1389 			cvp++;
1390 		}
1391 		fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1392 	}
1393 	/* output changes to the "new" file */
1394 	diff_output(outfile, "--- ");
1395 	range(outfile, lowc, upd, ",");
1396 	diff_output(outfile, " ----\n");
1397 
1398 	do_output = 0;
1399 	for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1400 		if (cvp->c <= cvp->d) {
1401 			cvp = ds->context_vec_start;
1402 			do_output++;
1403 			break;
1404 		}
1405 	if (do_output) {
1406 		while (cvp <= ds->context_vec_ptr) {
1407 			a = cvp->a;
1408 			b = cvp->b;
1409 			c = cvp->c;
1410 			d = cvp->d;
1411 
1412 			if (a <= b && c <= d)
1413 				ch = 'c';
1414 			else
1415 				ch = (a <= b) ? 'd' : 'a';
1416 
1417 			if (ch == 'd')
1418 				fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1419 			else {
1420 				fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1421 				fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1422 				    ch == 'c' ? '!' : '+', 0, flags);
1423 			}
1424 			lowc = d + 1;
1425 			cvp++;
1426 		}
1427 		fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1428 	}
1429 	ds->context_vec_ptr = ds->context_vec_start - 1;
1430 }
1431 
1432 /* dump accumulated "unified" diff changes */
1433 static void
1434 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1435     FILE *f1, FILE *f2, int flags)
1436 {
1437 	struct context_vec *cvp = ds->context_vec_start;
1438 	int lowa, upb, lowc, upd;
1439 	int a, b, c, d;
1440 	char ch, *f;
1441 
1442 	if (ds->context_vec_start > ds->context_vec_ptr)
1443 		return;
1444 
1445 	b = d = 0;		/* gcc */
1446 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1447 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1448 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1449 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1450 
1451 	diff_output(outfile, "@@ -");
1452 	uni_range(outfile, lowa, upb);
1453 	diff_output(outfile, " +");
1454 	uni_range(outfile, lowc, upd);
1455 	diff_output(outfile, " @@");
1456 	if ((flags & D_PROTOTYPE)) {
1457 		f = match_function(ds, ds->ixold, lowa-1, f1);
1458 		if (f != NULL)
1459 			diff_output(outfile, " %s", f);
1460 	}
1461 	diff_output(outfile, "\n");
1462 
1463 	/*
1464 	 * Output changes in "unified" diff format--the old and new lines
1465 	 * are printed together.
1466 	 */
1467 	for (; cvp <= ds->context_vec_ptr; cvp++) {
1468 		a = cvp->a;
1469 		b = cvp->b;
1470 		c = cvp->c;
1471 		d = cvp->d;
1472 
1473 		/*
1474 		 * c: both new and old changes
1475 		 * d: only changes in the old file
1476 		 * a: only changes in the new file
1477 		 */
1478 		if (a <= b && c <= d)
1479 			ch = 'c';
1480 		else
1481 			ch = (a <= b) ? 'd' : 'a';
1482 
1483 		switch (ch) {
1484 		case 'c':
1485 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1486 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1487 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1488 			break;
1489 		case 'd':
1490 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1491 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1492 			break;
1493 		case 'a':
1494 			fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1495 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1496 			break;
1497 		}
1498 		lowa = b + 1;
1499 		lowc = d + 1;
1500 	}
1501 	fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1502 
1503 	ds->context_vec_ptr = ds->context_vec_start - 1;
1504 }
1505 
1506 static void
1507 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1508     const char *file1, const char *file2)
1509 {
1510 	if (args->label[0] != NULL)
1511 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1512 		    args->label[0]);
1513 	else
1514 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1515 		    file1, ctime(&ds->stb1.st_mtime));
1516 	if (args->label[1] != NULL)
1517 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1518 		    args->label[1]);
1519 	else
1520 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1521 		    file2, ctime(&ds->stb2.st_mtime));
1522 }