Gotweb

Blob

Date:: Sat Mar 17 17:13:19 2018 UTC
Message:: remove GOT_ERR_NO_MEM, we can just use errno in those cases
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
86 
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
90 
91 #include "got_lib_diff.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 };
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 static void	 diff_output(FILE *, const char *, ...);
187 static int	 output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void	 check(struct got_diff_state *, FILE *, FILE *, int);
189 static void	 range(FILE *, int, int, char *);
190 static void	 uni_range(FILE *, int, int);
191 static void	 dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void	 dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int	 prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void	 prune(struct got_diff_state *);
195 static void	 equiv(struct line *, int, struct line *, int, int *);
196 static void	 unravel(struct got_diff_state *, int);
197 static int	 unsort(struct line *, int, int *);
198 static int	 change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void	 sort(struct line *, int);
200 static void	 print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int	 ignoreline(char *);
202 static int	 asciifile(FILE *);
203 static int	 fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int	 newcand(struct got_diff_state *, int, int, int, int *);
205 static int	 search(struct got_diff_state *, int *, int, int);
206 static int	 skipline(FILE *);
207 static int	 isqrt(int);
208 static int	 stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int	 readhash(struct got_diff_state *, FILE *, int);
210 static int	 files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char	*match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char	*preadline(int, size_t, off_t);
213 
214 /*
215  * chrtran points to one of 2 translation tables: cup2low if folding upper to
216  * lower case clow2low if not folding case
217  */
218 u_char clow2low[256] = {
219 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
242 	0xfd, 0xfe, 0xff
243 };
244 
245 u_char cup2low[256] = {
246 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 	0xfd, 0xfe, 0xff
270 };
271 
272 static void
273 diff_output(FILE *outfile, const char *fmt, ...)
274 {
275 	va_list ap;
276 
277 	va_start(ap, fmt);
278 	vfprintf(outfile, fmt, ap);
279 	va_end(ap);
280 }
281 
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284     struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
285 {
286 	const struct got_error *err = NULL;
287 	int i;
288 
289 	*rval = D_SAME;
290 	ds->anychange = 0;
291 	ds->lastline = 0;
292 	ds->lastmatchline = 0;
293 	ds->context_vec_ptr = ds->context_vec_start - 1;
294 	if (flags & D_IGNORECASE)
295 		ds->chrtran = cup2low;
296 	else
297 		ds->chrtran = clow2low;
298 	if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
299 		*rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
300 		return NULL;
301 	}
302 	if (flags & D_EMPTY1)
303 		f1 = fopen(_PATH_DEVNULL, "r");
304 	else if (f1 == NULL) {
305 		args->status |= 2;
306 		goto closem;
307 	}
308 
309 	if (flags & D_EMPTY2)
310 		f2 = fopen(_PATH_DEVNULL, "r");
311 	else if (f2 == NULL) {
312 		args->status |= 2;
313 		goto closem;
314 	}
315 
316 	switch (files_differ(ds, f1, f2, flags)) {
317 	case 0:
318 		goto closem;
319 	case 1:
320 		break;
321 	default:
322 		/* error */
323 		args->status |= 2;
324 		goto closem;
325 	}
326 
327 	if ((flags & D_FORCEASCII) == 0 &&
328 	    (!asciifile(f1) || !asciifile(f2))) {
329 		*rval = D_BINARY;
330 		args->status |= 1;
331 		goto closem;
332 	}
333 	if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
334 		err = got_error_from_errno();
335 		goto closem;
336 	}
337 	if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
338 		err = got_error_from_errno();
339 		goto closem;
340 	}
341 
342 	prune(ds);
343 	sort(ds->sfile[0], ds->slen[0]);
344 	sort(ds->sfile[1], ds->slen[1]);
345 
346 	ds->member = (int *)ds->file[1];
347 	equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
348 	ds->member = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
349 	if (ds->member == NULL) {
350 		err = got_error_from_errno();
351 		goto closem;
352 	}
353 
354 	ds->class = (int *)ds->file[0];
355 	if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
356 		err = got_error_from_errno();
357 		goto closem;
358 	}
359 	ds->class = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
360 	if (ds->class == NULL) {
361 		err = got_error_from_errno();
362 		goto closem;
363 	}
364 
365 	ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
366 	if (ds->klist == NULL) {
367 		err = got_error_from_errno();
368 		goto closem;
369 	}
370 	ds->clen = 0;
371 	ds->clistlen = 100;
372 	ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
373 	if (ds->clist == NULL) {
374 		err = got_error_from_errno();
375 		goto closem;
376 	}
377 	i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
378 	if (i < 0) {
379 		err = got_error_from_errno();
380 		free(ds->member);
381 		free(ds->class);
382 		goto closem;
383 	}
384 	free(ds->member);
385 	free(ds->class);
386 
387 	ds->J = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
388 	if (ds->J == NULL) {
389 		err = got_error_from_errno();
390 		goto closem;
391 	}
392 	unravel(ds, ds->klist[i]);
393 	free(ds->clist);
394 	ds->clist = NULL;
395 	free(ds->klist);
396 	ds->klist = NULL;
397 
398 	ds->ixold = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
399 	if (ds->ixold == NULL) {
400 		err = got_error_from_errno();
401 		goto closem;
402 	}
403 	ds->ixnew = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
404 	if (ds->ixnew == NULL) {
405 		err = got_error_from_errno();
406 		goto closem;
407 	}
408 	check(ds, f1, f2, flags);
409 	if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
410 	    flags))
411 		err = got_error_from_errno();
412 closem:
413 	if (ds->anychange) {
414 		args->status |= 1;
415 		if (*rval == D_SAME)
416 			*rval = D_DIFFER;
417 	}
418 	if (f1 != NULL)
419 		fclose(f1);
420 	if (f2 != NULL)
421 		fclose(f2);
422 
423 	return (err);
424 }
425 
426 /*
427  * Check to see if the given files differ.
428  * Returns 0 if they are the same, 1 if different, and -1 on error.
429  * XXX - could use code from cmp(1) [faster]
430  */
431 static int
432 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
433 {
434 	char buf1[BUFSIZ], buf2[BUFSIZ];
435 	size_t i, j;
436 
437 	if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
438 	    (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
439 		return (1);
440 	for (;;) {
441 		i = fread(buf1, 1, sizeof(buf1), f1);
442 		j = fread(buf2, 1, sizeof(buf2), f2);
443 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
444 			return (-1);
445 		if (i != j)
446 			return (1);
447 		if (i == 0)
448 			return (0);
449 		if (memcmp(buf1, buf2, i) != 0)
450 			return (1);
451 	}
452 }
453 
454 static int
455 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
456 {
457 	struct line *p;
458 	int j, h;
459 	size_t sz;
460 
461 	rewind(fd);
462 
463 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
464 	if (sz < 100)
465 		sz = 100;
466 
467 	p = calloc(sz + 3, sizeof(*p));
468 	if (p == NULL)
469 		return (-1);
470 	for (j = 0; (h = readhash(ds, fd, flags));) {
471 		if (j == sz) {
472 			sz = sz * 3 / 2;
473 			p = reallocarray(p, sz + 3, sizeof(*p));
474 			if (p == NULL)
475 				return (-1);
476 		}
477 		p[++j].value = h;
478 	}
479 	ds->len[i] = j;
480 	ds->file[i] = p;
481 
482 	return (0);
483 }
484 
485 static void
486 prune(struct got_diff_state *ds)
487 {
488 	int i, j;
489 
490 	for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
491 	    ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
492 	    ds->pref++)
493 		;
494 	for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
495 	    ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
496 	    ds->suff++)
497 		;
498 	for (j = 0; j < 2; j++) {
499 		ds->sfile[j] = ds->file[j] + ds->pref;
500 		ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
501 		for (i = 0; i <= ds->slen[j]; i++)
502 			ds->sfile[j][i].serial = i;
503 	}
504 }
505 
506 static void
507 equiv(struct line *a, int n, struct line *b, int m, int *c)
508 {
509 	int i, j;
510 
511 	i = j = 1;
512 	while (i <= n && j <= m) {
513 		if (a[i].value < b[j].value)
514 			a[i++].value = 0;
515 		else if (a[i].value == b[j].value)
516 			a[i++].value = j;
517 		else
518 			j++;
519 	}
520 	while (i <= n)
521 		a[i++].value = 0;
522 	b[m + 1].value = 0;
523 	j = 0;
524 	while (++j <= m) {
525 		c[j] = -b[j].serial;
526 		while (b[j + 1].value == b[j].value) {
527 			j++;
528 			c[j] = b[j].serial;
529 		}
530 	}
531 	c[j] = -1;
532 }
533 
534 /* Code taken from ping.c */
535 static int
536 isqrt(int n)
537 {
538 	int y, x = 1;
539 
540 	if (n == 0)
541 		return (0);
542 
543 	do { /* newton was a stinker */
544 		y = x;
545 		x = n / x;
546 		x += y;
547 		x /= 2;
548 	} while ((x - y) > 1 || (x - y) < -1);
549 
550 	return (x);
551 }
552 
553 static int
554 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
555 {
556 	int i, k, y, j, l;
557 	int oldc, tc, oldl, sq;
558 	u_int numtries, bound;
559 	int error;
560 
561 	if (flags & D_MINIMAL)
562 		bound = UINT_MAX;
563 	else {
564 		sq = isqrt(n);
565 		bound = MAXIMUM(256, sq);
566 	}
567 
568 	k = 0;
569 	c[0] = newcand(ds, 0, 0, 0, &error);
570 	if (error)
571 		return -1;
572 	for (i = 1; i <= n; i++) {
573 		j = a[i];
574 		if (j == 0)
575 			continue;
576 		y = -b[j];
577 		oldl = 0;
578 		oldc = c[0];
579 		numtries = 0;
580 		do {
581 			if (y <= ds->clist[oldc].y)
582 				continue;
583 			l = search(ds, c, k, y);
584 			if (l != oldl + 1)
585 				oldc = c[l - 1];
586 			if (l <= k) {
587 				if (ds->clist[c[l]].y <= y)
588 					continue;
589 				tc = c[l];
590 				c[l] = newcand(ds, i, y, oldc, &error);
591 				if (error)
592 					return -1;
593 				oldc = tc;
594 				oldl = l;
595 				numtries++;
596 			} else {
597 				c[l] = newcand(ds, i, y, oldc, &error);
598 				if (error)
599 					return -1;
600 				k++;
601 				break;
602 			}
603 		} while ((y = b[++j]) > 0 && numtries < bound);
604 	}
605 	return (k);
606 }
607 
608 static int
609 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
610 {
611 	struct cand *q;
612 
613 	if (ds->clen == ds->clistlen) {
614 		ds->clistlen = ds->clistlen * 11 / 10;
615 		ds->clist = reallocarray(ds->clist, ds->clistlen,
616 		    sizeof(*ds->clist));
617 		if (ds->clist == NULL) {
618 			*errorp = -1;
619 			return 0;
620 		}
621 	}
622 	q = ds->clist + ds->clen;
623 	q->x = x;
624 	q->y = y;
625 	q->pred = pred;
626 	*errorp = 0;
627 	return (ds->clen++);
628 }
629 
630 static int
631 search(struct got_diff_state *ds, int *c, int k, int y)
632 {
633 	int i, j, l, t;
634 
635 	if (ds->clist[c[k]].y < y)	/* quick look for typical case */
636 		return (k + 1);
637 	i = 0;
638 	j = k + 1;
639 	for (;;) {
640 		l = (i + j) / 2;
641 		if (l <= i)
642 			break;
643 		t = ds->clist[c[l]].y;
644 		if (t > y)
645 			j = l;
646 		else if (t < y)
647 			i = l;
648 		else
649 			return (l);
650 	}
651 	return (l + 1);
652 }
653 
654 static void
655 unravel(struct got_diff_state *ds, int p)
656 {
657 	struct cand *q;
658 	int i;
659 
660 	for (i = 0; i <= ds->len[0]; i++)
661 		ds->J[i] = i <= ds->pref ? i :
662 		    i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
663 	for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
664 		ds->J[q->x + ds->pref] = q->y + ds->pref;
665 }
666 
667 /*
668  * Check does double duty:
669  *  1.	ferret out any fortuitous correspondences due
670  *	to confounding by hashing (which result in "jackpot")
671  *  2.  collect random access indexes to the two files
672  */
673 static void
674 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
675 {
676 	int i, j, jackpot, c, d;
677 	long ctold, ctnew;
678 
679 	rewind(f1);
680 	rewind(f2);
681 	j = 1;
682 	ds->ixold[0] = ds->ixnew[0] = 0;
683 	jackpot = 0;
684 	ctold = ctnew = 0;
685 	for (i = 1; i <= ds->len[0]; i++) {
686 		if (ds->J[i] == 0) {
687 			ds->ixold[i] = ctold += skipline(f1);
688 			continue;
689 		}
690 		while (j < ds->J[i]) {
691 			ds->ixnew[j] = ctnew += skipline(f2);
692 			j++;
693 		}
694 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
695 			for (;;) {
696 				c = getc(f1);
697 				d = getc(f2);
698 				/*
699 				 * GNU diff ignores a missing newline
700 				 * in one file for -b or -w.
701 				 */
702 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
703 					if (c == EOF && d == '\n') {
704 						ctnew++;
705 						break;
706 					} else if (c == '\n' && d == EOF) {
707 						ctold++;
708 						break;
709 					}
710 				}
711 				ctold++;
712 				ctnew++;
713 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
714 				    isspace(d)) {
715 					do {
716 						if (c == '\n')
717 							break;
718 						ctold++;
719 					} while (isspace(c = getc(f1)));
720 					do {
721 						if (d == '\n')
722 							break;
723 						ctnew++;
724 					} while (isspace(d = getc(f2)));
725 				} else if ((flags & D_IGNOREBLANKS)) {
726 					while (isspace(c) && c != '\n') {
727 						c = getc(f1);
728 						ctold++;
729 					}
730 					while (isspace(d) && d != '\n') {
731 						d = getc(f2);
732 						ctnew++;
733 					}
734 				}
735 				if (ds->chrtran[c] != ds->chrtran[d]) {
736 					jackpot++;
737 					ds->J[i] = 0;
738 					if (c != '\n' && c != EOF)
739 						ctold += skipline(f1);
740 					if (d != '\n' && c != EOF)
741 						ctnew += skipline(f2);
742 					break;
743 				}
744 				if (c == '\n' || c == EOF)
745 					break;
746 			}
747 		} else {
748 			for (;;) {
749 				ctold++;
750 				ctnew++;
751 				if ((c = getc(f1)) != (d = getc(f2))) {
752 					/* jackpot++; */
753 					ds->J[i] = 0;
754 					if (c != '\n' && c != EOF)
755 						ctold += skipline(f1);
756 					if (d != '\n' && c != EOF)
757 						ctnew += skipline(f2);
758 					break;
759 				}
760 				if (c == '\n' || c == EOF)
761 					break;
762 			}
763 		}
764 		ds->ixold[i] = ctold;
765 		ds->ixnew[j] = ctnew;
766 		j++;
767 	}
768 	for (; j <= ds->len[1]; j++)
769 		ds->ixnew[j] = ctnew += skipline(f2);
770 	/*
771 	 * if (jackpot)
772 	 *	fprintf(stderr, "jackpot\n");
773 	 */
774 }
775 
776 /* shellsort CACM #201 */
777 static void
778 sort(struct line *a, int n)
779 {
780 	struct line *ai, *aim, w;
781 	int j, m = 0, k;
782 
783 	if (n == 0)
784 		return;
785 	for (j = 1; j <= n; j *= 2)
786 		m = 2 * j - 1;
787 	for (m /= 2; m != 0; m /= 2) {
788 		k = n - m;
789 		for (j = 1; j <= k; j++) {
790 			for (ai = &a[j]; ai > a; ai -= m) {
791 				aim = &ai[m];
792 				if (aim < ai)
793 					break;	/* wraparound */
794 				if (aim->value > ai[0].value ||
795 				    (aim->value == ai[0].value &&
796 					aim->serial > ai[0].serial))
797 					break;
798 				w.value = ai[0].value;
799 				ai[0].value = aim->value;
800 				aim->value = w.value;
801 				w.serial = ai[0].serial;
802 				ai[0].serial = aim->serial;
803 				aim->serial = w.serial;
804 			}
805 		}
806 	}
807 }
808 
809 static int
810 unsort(struct line *f, int l, int *b)
811 {
812 	int *a, i;
813 
814 	a = calloc(l + 1, sizeof(*a));
815 	if (a == NULL)
816 		return (-1);
817 	for (i = 1; i <= l; i++)
818 		a[f[i].serial] = f[i].value;
819 	for (i = 1; i <= l; i++)
820 		b[i] = a[i];
821 	free(a);
822 
823 	return (0);
824 }
825 
826 static int
827 skipline(FILE *f)
828 {
829 	int i, c;
830 
831 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
832 		continue;
833 	return (i);
834 }
835 
836 static int
837 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
838     const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
839 {
840 	int m, i0, i1, j0, j1;
841 	int error = 0;
842 
843 	rewind(f1);
844 	rewind(f2);
845 	m = ds->len[0];
846 	ds->J[0] = 0;
847 	ds->J[m + 1] = ds->len[1] + 1;
848 	if (args->diff_format != D_EDIT) {
849 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
850 			while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
851 				i0++;
852 			j0 = ds->J[i0 - 1] + 1;
853 			i1 = i0 - 1;
854 			while (i1 < m && ds->J[i1 + 1] == 0)
855 				i1++;
856 			j1 = ds->J[i1 + 1] - 1;
857 			ds->J[i1] = j1;
858 			error = change(outfile, ds, args, file1, f1, file2, f2,
859 			    i0, i1, j0, j1, &flags);
860 			if (error)
861 				return (error);
862 		}
863 	} else {
864 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
865 			while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
866 				i0--;
867 			j0 = ds->J[i0 + 1] - 1;
868 			i1 = i0 + 1;
869 			while (i1 > 1 && ds->J[i1 - 1] == 0)
870 				i1--;
871 			j1 = ds->J[i1 - 1] + 1;
872 			ds->J[i1] = j1;
873 			change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
874 			    j1, j0, &flags);
875 			if (error)
876 				return (error);
877 		}
878 	}
879 	if (m == 0) {
880 		error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
881 		    1, ds->len[1], &flags);
882 		if (error)
883 			return (error);
884 	}
885 	if (args->diff_format == D_IFDEF) {
886 		for (;;) {
887 #define	c i0
888 			if ((c = getc(f1)) == EOF)
889 				return (0);
890 			diff_output(outfile, "%c", c);
891 		}
892 #undef c
893 	}
894 	if (ds->anychange != 0) {
895 		if (args->diff_format == D_CONTEXT)
896 			dump_context_vec(outfile, ds, args, f1, f2, flags);
897 		else if (args->diff_format == D_UNIFIED)
898 			dump_unified_vec(outfile, ds, args, f1, f2, flags);
899 	}
900 
901 	return (0);
902 }
903 
904 static void
905 range(FILE *outfile, int a, int b, char *separator)
906 {
907 	diff_output(outfile, "%d", a > b ? b : a);
908 	if (a < b)
909 		diff_output(outfile, "%s%d", separator, b);
910 }
911 
912 static void
913 uni_range(FILE *outfile, int a, int b)
914 {
915 	if (a < b)
916 		diff_output(outfile, "%d,%d", a, b - a + 1);
917 	else if (a == b)
918 		diff_output(outfile, "%d", b);
919 	else
920 		diff_output(outfile, "%d,0", b);
921 }
922 
923 static char *
924 preadline(int fd, size_t rlen, off_t off)
925 {
926 	char *line;
927 	ssize_t nr;
928 
929 	line = malloc(rlen + 1);
930 	if (line == NULL)
931 		return NULL;
932 	if ((nr = pread(fd, line, rlen, off)) < 0) {
933 		free(line);
934 		return NULL;
935 	}
936 	if (nr > 0 && line[nr-1] == '\n')
937 		nr--;
938 	line[nr] = '\0';
939 	return (line);
940 }
941 
942 static int
943 ignoreline(char *line)
944 {
945 	return 0; /* do not ignore any lines */
946 }
947 
948 /*
949  * Indicate that there is a difference between lines a and b of the from file
950  * to get to lines c to d of the to file.  If a is greater then b then there
951  * are no lines in the from file involved and this means that there were
952  * lines appended (beginning at b).  If c is greater than d then there are
953  * lines missing from the to file.
954  */
955 static int
956 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
957     const char *file1, FILE *f1, const char *file2, FILE *f2,
958     int a, int b, int c, int d, int *pflags)
959 {
960 	size_t max_context = 64;
961 	int i;
962 
963 restart:
964 	if (args->diff_format != D_IFDEF && a > b && c > d)
965 		return (0);
966 	if (args->ignore_pats != NULL) {
967 		char *line;
968 		/*
969 		 * All lines in the change, insert, or delete must
970 		 * match an ignore pattern for the change to be
971 		 * ignored.
972 		 */
973 		if (a <= b) {		/* Changes and deletes. */
974 			for (i = a; i <= b; i++) {
975 				line = preadline(fileno(f1),
976 				    ds->ixold[i] - ds->ixold[i - 1],
977 				    ds->ixold[i - 1]);
978 				if (line == NULL)
979 					return (-1);
980 				if (!ignoreline(line))
981 					goto proceed;
982 			}
983 		}
984 		if (a > b || c <= d) {	/* Changes and inserts. */
985 			for (i = c; i <= d; i++) {
986 				line = preadline(fileno(f2),
987 				    ds->ixnew[i] - ds->ixnew[i - 1],
988 				    ds->ixnew[i - 1]);
989 				if (line == NULL)
990 					return (-1);
991 				if (!ignoreline(line))
992 					goto proceed;
993 			}
994 		}
995 		return (0);
996 	}
997 proceed:
998 	if (*pflags & D_HEADER) {
999 		diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
1000 		*pflags &= ~D_HEADER;
1001 	}
1002 	if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1003 		/*
1004 		 * Allocate change records as needed.
1005 		 */
1006 		if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1007 			ptrdiff_t offset = ds->context_vec_ptr - ds->context_vec_start;
1008 			max_context <<= 1;
1009 			ds->context_vec_start =
1010 			    reallocarray(ds->context_vec_start, max_context,
1011 				sizeof(*ds->context_vec_start));
1012 			if (ds->context_vec_start == NULL)
1013 				return (-1);
1014 			ds->context_vec_end = ds->context_vec_start + max_context;
1015 			ds->context_vec_ptr = ds->context_vec_start + offset;
1016 		}
1017 		if (ds->anychange == 0) {
1018 			/*
1019 			 * Print the context/unidiff header first time through.
1020 			 */
1021 			print_header(outfile, ds, args, file1, file2);
1022 			ds->anychange = 1;
1023 		} else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1024 		    c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1025 			/*
1026 			 * If this change is more than 'diff_context' lines from the
1027 			 * previous change, dump the record and reset it.
1028 			 */
1029 			if (args->diff_format == D_CONTEXT)
1030 				dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1031 			else
1032 				dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1033 		}
1034 		ds->context_vec_ptr++;
1035 		ds->context_vec_ptr->a = a;
1036 		ds->context_vec_ptr->b = b;
1037 		ds->context_vec_ptr->c = c;
1038 		ds->context_vec_ptr->d = d;
1039 		return (0);
1040 	}
1041 	if (ds->anychange == 0)
1042 		ds->anychange = 1;
1043 	switch (args->diff_format) {
1044 	case D_BRIEF:
1045 		return (0);
1046 	case D_NORMAL:
1047 	case D_EDIT:
1048 		range(outfile, a, b, ",");
1049 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1050 		if (args->diff_format == D_NORMAL)
1051 			range(outfile, c, d, ",");
1052 		diff_output(outfile, "\n");
1053 		break;
1054 	case D_REVERSE:
1055 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1056 		range(outfile, a, b, " ");
1057 		diff_output(outfile, "\n");
1058 		break;
1059 	case D_NREVERSE:
1060 		if (a > b)
1061 			diff_output(outfile, "a%d %d\n", b, d - c + 1);
1062 		else {
1063 			diff_output(outfile, "d%d %d\n", a, b - a + 1);
1064 			if (!(c > d))
1065 				/* add changed lines */
1066 				diff_output(outfile, "a%d %d\n", b, d - c + 1);
1067 		}
1068 		break;
1069 	}
1070 	if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1071 		fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1072 		if (a <= b && c <= d && args->diff_format == D_NORMAL)
1073 			diff_output(outfile, "---\n");
1074 	}
1075 	i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1076 	if (i != 0 && args->diff_format == D_EDIT) {
1077 		/*
1078 		 * A non-zero return value for D_EDIT indicates that the
1079 		 * last line printed was a bare dot (".") that has been
1080 		 * escaped as ".." to prevent ed(1) from misinterpreting
1081 		 * it.  We have to add a substitute command to change this
1082 		 * back and restart where we left off.
1083 		 */
1084 		diff_output(outfile, ".\n");
1085 		diff_output(outfile, "%ds/.//\n", a + i - 1);
1086 		b = a + i - 1;
1087 		a = b + 1;
1088 		c += i;
1089 		goto restart;
1090 	}
1091 	if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1092 		diff_output(outfile, ".\n");
1093 	if (ds->inifdef) {
1094 		diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1095 		ds->inifdef = 0;
1096 	}
1097 
1098 	return (0);
1099 }
1100 
1101 static int
1102 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1103     long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1104 {
1105 	int i, j, c, lastc, col, nc;
1106 
1107 	/*
1108 	 * When doing #ifdef's, copy down to current line
1109 	 * if this is the first file, so that stuff makes it to output.
1110 	 */
1111 	if (args->diff_format == D_IFDEF && oldfile) {
1112 		long curpos = ftell(lb);
1113 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1114 		nc = f[a > b ? b : a - 1] - curpos;
1115 		for (i = 0; i < nc; i++)
1116 			diff_output(outfile, "%c", getc(lb));
1117 	}
1118 	if (a > b)
1119 		return (0);
1120 	if (args->diff_format == D_IFDEF) {
1121 		if (ds->inifdef) {
1122 			diff_output(outfile, "#else /* %s%s */\n",
1123 			    oldfile == 1 ? "!" : "", args->ifdefname);
1124 		} else {
1125 			if (oldfile)
1126 				diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1127 			else
1128 				diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1129 		}
1130 		ds->inifdef = 1 + oldfile;
1131 	}
1132 	for (i = a; i <= b; i++) {
1133 		fseek(lb, f[i - 1], SEEK_SET);
1134 		nc = f[i] - f[i - 1];
1135 		if (args->diff_format != D_IFDEF && ch != '\0') {
1136 			diff_output(outfile, "%c", ch);
1137 			if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1138 			    || args->diff_format == D_UNIFIED))
1139 				diff_output(outfile, "\t");
1140 			else if (args->diff_format != D_UNIFIED)
1141 				diff_output(outfile, " ");
1142 		}
1143 		col = 0;
1144 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1145 			if ((c = getc(lb)) == EOF) {
1146 				if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1147 				    args->diff_format == D_NREVERSE)
1148 					warnx("No newline at end of file");
1149 				else
1150 					diff_output(outfile, "\n\\ No newline at end of "
1151 					    "file\n");
1152 				return (0);
1153 			}
1154 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1155 				do {
1156 					diff_output(outfile, " ");
1157 				} while (++col & 7);
1158 			} else {
1159 				if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1160 				    && lastc == '.') {
1161 					/*
1162 					 * Don't print a bare "." line
1163 					 * since that will confuse ed(1).
1164 					 * Print ".." instead and return,
1165 					 * giving the caller an offset
1166 					 * from which to restart.
1167 					 */
1168 					diff_output(outfile, ".\n");
1169 					return (i - a + 1);
1170 				}
1171 				diff_output(outfile, "%c", c);
1172 				col++;
1173 			}
1174 		}
1175 	}
1176 	return (0);
1177 }
1178 
1179 /*
1180  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1181  */
1182 static int
1183 readhash(struct got_diff_state *ds, FILE *f, int flags)
1184 {
1185 	int i, t, space;
1186 	int sum;
1187 
1188 	sum = 1;
1189 	space = 0;
1190 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1191 		if (flags & D_IGNORECASE)
1192 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1193 				if (t == EOF) {
1194 					if (i == 0)
1195 						return (0);
1196 					break;
1197 				}
1198 				sum = sum * 127 + ds->chrtran[t];
1199 			}
1200 		else
1201 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1202 				if (t == EOF) {
1203 					if (i == 0)
1204 						return (0);
1205 					break;
1206 				}
1207 				sum = sum * 127 + t;
1208 			}
1209 	} else {
1210 		for (i = 0;;) {
1211 			switch (t = getc(f)) {
1212 			case '\t':
1213 			case '\r':
1214 			case '\v':
1215 			case '\f':
1216 			case ' ':
1217 				space++;
1218 				continue;
1219 			default:
1220 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1221 					i++;
1222 					space = 0;
1223 				}
1224 				sum = sum * 127 + ds->chrtran[t];
1225 				i++;
1226 				continue;
1227 			case EOF:
1228 				if (i == 0)
1229 					return (0);
1230 				/* FALLTHROUGH */
1231 			case '\n':
1232 				break;
1233 			}
1234 			break;
1235 		}
1236 	}
1237 	/*
1238 	 * There is a remote possibility that we end up with a zero sum.
1239 	 * Zero is used as an EOF marker, so return 1 instead.
1240 	 */
1241 	return (sum == 0 ? 1 : sum);
1242 }
1243 
1244 static int
1245 asciifile(FILE *f)
1246 {
1247 	unsigned char buf[BUFSIZ];
1248 	size_t cnt;
1249 
1250 	if (f == NULL)
1251 		return (1);
1252 
1253 	rewind(f);
1254 	cnt = fread(buf, 1, sizeof(buf), f);
1255 	return (memchr(buf, '\0', cnt) == NULL);
1256 }
1257 
1258 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1259 
1260 static char *
1261 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1262 {
1263 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1264 	size_t nc;
1265 	int last = ds->lastline;
1266 	char *state = NULL;
1267 
1268 	ds->lastline = pos;
1269 	while (pos > last) {
1270 		fseek(fp, f[pos - 1], SEEK_SET);
1271 		nc = f[pos] - f[pos - 1];
1272 		if (nc >= sizeof(buf))
1273 			nc = sizeof(buf) - 1;
1274 		nc = fread(buf, 1, nc, fp);
1275 		if (nc > 0) {
1276 			buf[nc] = '\0';
1277 			buf[strcspn(buf, "\n")] = '\0';
1278 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1279 				if (begins_with(buf, "private:")) {
1280 					if (!state)
1281 						state = " (private)";
1282 				} else if (begins_with(buf, "protected:")) {
1283 					if (!state)
1284 						state = " (protected)";
1285 				} else if (begins_with(buf, "public:")) {
1286 					if (!state)
1287 						state = " (public)";
1288 				} else {
1289 					strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1290 					if (state)
1291 						strlcat(ds->lastbuf, state,
1292 						    sizeof ds->lastbuf);
1293 					ds->lastmatchline = pos;
1294 					return ds->lastbuf;
1295 				}
1296 			}
1297 		}
1298 		pos--;
1299 	}
1300 	return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1301 }
1302 
1303 /* dump accumulated "context" diff changes */
1304 static void
1305 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1306     FILE *f1, FILE *f2, int flags)
1307 {
1308 	struct context_vec *cvp = ds->context_vec_start;
1309 	int lowa, upb, lowc, upd, do_output;
1310 	int a, b, c, d;
1311 	char ch, *f;
1312 
1313 	if (ds->context_vec_start > ds->context_vec_ptr)
1314 		return;
1315 
1316 	b = d = 0;		/* gcc */
1317 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1318 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1319 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1320 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1321 
1322 	diff_output(outfile, "***************");
1323 	if ((flags & D_PROTOTYPE)) {
1324 		f = match_function(ds, ds->ixold, lowa-1, f1);
1325 		if (f != NULL)
1326 			diff_output(outfile, " %s", f);
1327 	}
1328 	diff_output(outfile, "\n*** ");
1329 	range(outfile, lowa, upb, ",");
1330 	diff_output(outfile, " ****\n");
1331 
1332 	/*
1333 	 * Output changes to the "old" file.  The first loop suppresses
1334 	 * output if there were no changes to the "old" file (we'll see
1335 	 * the "old" lines as context in the "new" list).
1336 	 */
1337 	do_output = 0;
1338 	for (; cvp <= ds->context_vec_ptr; cvp++)
1339 		if (cvp->a <= cvp->b) {
1340 			cvp = ds->context_vec_start;
1341 			do_output++;
1342 			break;
1343 		}
1344 	if (do_output) {
1345 		while (cvp <= ds->context_vec_ptr) {
1346 			a = cvp->a;
1347 			b = cvp->b;
1348 			c = cvp->c;
1349 			d = cvp->d;
1350 
1351 			if (a <= b && c <= d)
1352 				ch = 'c';
1353 			else
1354 				ch = (a <= b) ? 'd' : 'a';
1355 
1356 			if (ch == 'a')
1357 				fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1358 			else {
1359 				fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1360 				fetch(outfile, ds, args, ds->ixold, a, b, f1,
1361 				    ch == 'c' ? '!' : '-', 0, flags);
1362 			}
1363 			lowa = b + 1;
1364 			cvp++;
1365 		}
1366 		fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1367 	}
1368 	/* output changes to the "new" file */
1369 	diff_output(outfile, "--- ");
1370 	range(outfile, lowc, upd, ",");
1371 	diff_output(outfile, " ----\n");
1372 
1373 	do_output = 0;
1374 	for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1375 		if (cvp->c <= cvp->d) {
1376 			cvp = ds->context_vec_start;
1377 			do_output++;
1378 			break;
1379 		}
1380 	if (do_output) {
1381 		while (cvp <= ds->context_vec_ptr) {
1382 			a = cvp->a;
1383 			b = cvp->b;
1384 			c = cvp->c;
1385 			d = cvp->d;
1386 
1387 			if (a <= b && c <= d)
1388 				ch = 'c';
1389 			else
1390 				ch = (a <= b) ? 'd' : 'a';
1391 
1392 			if (ch == 'd')
1393 				fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1394 			else {
1395 				fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1396 				fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1397 				    ch == 'c' ? '!' : '+', 0, flags);
1398 			}
1399 			lowc = d + 1;
1400 			cvp++;
1401 		}
1402 		fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1403 	}
1404 	ds->context_vec_ptr = ds->context_vec_start - 1;
1405 }
1406 
1407 /* dump accumulated "unified" diff changes */
1408 static void
1409 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1410     FILE *f1, FILE *f2, int flags)
1411 {
1412 	struct context_vec *cvp = ds->context_vec_start;
1413 	int lowa, upb, lowc, upd;
1414 	int a, b, c, d;
1415 	char ch, *f;
1416 
1417 	if (ds->context_vec_start > ds->context_vec_ptr)
1418 		return;
1419 
1420 	b = d = 0;		/* gcc */
1421 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1422 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1423 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1424 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1425 
1426 	diff_output(outfile, "@@ -");
1427 	uni_range(outfile, lowa, upb);
1428 	diff_output(outfile, " +");
1429 	uni_range(outfile, lowc, upd);
1430 	diff_output(outfile, " @@");
1431 	if ((flags & D_PROTOTYPE)) {
1432 		f = match_function(ds, ds->ixold, lowa-1, f1);
1433 		if (f != NULL)
1434 			diff_output(outfile, " %s", f);
1435 	}
1436 	diff_output(outfile, "\n");
1437 
1438 	/*
1439 	 * Output changes in "unified" diff format--the old and new lines
1440 	 * are printed together.
1441 	 */
1442 	for (; cvp <= ds->context_vec_ptr; cvp++) {
1443 		a = cvp->a;
1444 		b = cvp->b;
1445 		c = cvp->c;
1446 		d = cvp->d;
1447 
1448 		/*
1449 		 * c: both new and old changes
1450 		 * d: only changes in the old file
1451 		 * a: only changes in the new file
1452 		 */
1453 		if (a <= b && c <= d)
1454 			ch = 'c';
1455 		else
1456 			ch = (a <= b) ? 'd' : 'a';
1457 
1458 		switch (ch) {
1459 		case 'c':
1460 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1461 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1462 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1463 			break;
1464 		case 'd':
1465 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1466 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1467 			break;
1468 		case 'a':
1469 			fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1470 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1471 			break;
1472 		}
1473 		lowa = b + 1;
1474 		lowc = d + 1;
1475 	}
1476 	fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1477 
1478 	ds->context_vec_ptr = ds->context_vec_start - 1;
1479 }
1480 
1481 static void
1482 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1483     const char *file1, const char *file2)
1484 {
1485 	if (args->label[0] != NULL)
1486 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1487 		    args->label[0]);
1488 	else
1489 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1490 		    file1, ctime(&ds->stb1.st_mtime));
1491 	if (args->label[1] != NULL)
1492 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1493 		    args->label[1]);
1494 	else
1495 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1496 		    file2, ctime(&ds->stb2.st_mtime));
1497 }