Gotweb

Blob

Date:: Mon Feb 12 22:25:38 2018 UTC
Message:: rename private header filenames so they are in got_ namespace
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
86 
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
90 
91 #include "got_diff_priv.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 };
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 static void	 diff_output(FILE *, const char *, ...);
187 static int	 output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void	 check(struct got_diff_state *, FILE *, FILE *, int);
189 static void	 range(FILE *, int, int, char *);
190 static void	 uni_range(FILE *, int, int);
191 static void	 dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void	 dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int	 prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void	 prune(struct got_diff_state *);
195 static void	 equiv(struct line *, int, struct line *, int, int *);
196 static void	 unravel(struct got_diff_state *, int);
197 static int	 unsort(struct line *, int, int *);
198 static int	 change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void	 sort(struct line *, int);
200 static void	 print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int	 ignoreline(char *);
202 static int	 asciifile(FILE *);
203 static int	 fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int	 newcand(struct got_diff_state *, int, int, int, int *);
205 static int	 search(struct got_diff_state *, int *, int, int);
206 static int	 skipline(FILE *);
207 static int	 isqrt(int);
208 static int	 stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int	 readhash(struct got_diff_state *, FILE *, int);
210 static int	 files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char	*match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char	*preadline(int, size_t, off_t);
213 
214 /*
215  * chrtran points to one of 2 translation tables: cup2low if folding upper to
216  * lower case clow2low if not folding case
217  */
218 u_char clow2low[256] = {
219 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
242 	0xfd, 0xfe, 0xff
243 };
244 
245 u_char cup2low[256] = {
246 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 	0xfd, 0xfe, 0xff
270 };
271 
272 static void
273 diff_output(FILE *outfile, const char *fmt, ...)
274 {
275 	va_list ap;
276 
277 	va_start(ap, fmt);
278 	vfprintf(outfile, fmt, ap);
279 	va_end(ap);
280 }
281 
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284     struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
285 {
286 	const struct got_error *err = NULL;
287 	int i;
288 
289 	*rval = D_SAME;
290 	ds->anychange = 0;
291 	ds->lastline = 0;
292 	ds->lastmatchline = 0;
293 	ds->context_vec_ptr = ds->context_vec_start - 1;
294 	if (flags & D_IGNORECASE)
295 		ds->chrtran = cup2low;
296 	else
297 		ds->chrtran = clow2low;
298 	if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
299 		*rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
300 		return NULL;
301 	}
302 	if (flags & D_EMPTY1)
303 		f1 = fopen(_PATH_DEVNULL, "r");
304 	else if (f1 == NULL) {
305 		args->status |= 2;
306 		goto closem;
307 	}
308 
309 	if (flags & D_EMPTY2)
310 		f2 = fopen(_PATH_DEVNULL, "r");
311 	else if (f2 == NULL) {
312 		args->status |= 2;
313 		goto closem;
314 	}
315 
316 	switch (files_differ(ds, f1, f2, flags)) {
317 	case 0:
318 		goto closem;
319 	case 1:
320 		break;
321 	default:
322 		/* error */
323 		args->status |= 2;
324 		goto closem;
325 	}
326 
327 	if ((flags & D_FORCEASCII) == 0 &&
328 	    (!asciifile(f1) || !asciifile(f2))) {
329 		*rval = D_BINARY;
330 		args->status |= 1;
331 		goto closem;
332 	}
333 	if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
334 		err = got_error(GOT_ERR_NO_MEM);
335 		goto closem;
336 	}
337 	if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
338 		err = got_error(GOT_ERR_NO_MEM);
339 		goto closem;
340 	}
341 
342 	prune(ds);
343 	sort(ds->sfile[0], ds->slen[0]);
344 	sort(ds->sfile[1], ds->slen[1]);
345 
346 	ds->member = (int *)ds->file[1];
347 	equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
348 	ds->member = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
349 	if (ds->member == NULL) {
350 		err = got_error(GOT_ERR_NO_MEM);
351 		goto closem;
352 	}
353 
354 	ds->class = (int *)ds->file[0];
355 	if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
356 		err = got_error(GOT_ERR_NO_MEM);
357 		goto closem;
358 	}
359 	ds->class = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
360 	if (ds->class == NULL) {
361 		err = got_error(GOT_ERR_NO_MEM);
362 		goto closem;
363 	}
364 
365 	ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
366 	if (ds->klist == NULL) {
367 		err = got_error(GOT_ERR_NO_MEM);
368 		goto closem;
369 	}
370 	ds->clen = 0;
371 	ds->clistlen = 100;
372 	ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
373 	if (ds->clist == NULL) {
374 		err = got_error(GOT_ERR_NO_MEM);
375 		goto closem;
376 	}
377 	i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
378 	free(ds->member);
379 	free(ds->class);
380 	if (i < 0) {
381 		err = got_error(GOT_ERR_NO_MEM);
382 		goto closem;
383 	}
384 
385 	ds->J = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
386 	if (ds->J == NULL) {
387 		err = got_error(GOT_ERR_NO_MEM);
388 		goto closem;
389 	}
390 	unravel(ds, ds->klist[i]);
391 	free(ds->clist);
392 	ds->clist = NULL;
393 	free(ds->klist);
394 	ds->klist = NULL;
395 
396 	ds->ixold = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
397 	if (ds->ixold == NULL) {
398 		err = got_error(GOT_ERR_NO_MEM);
399 		goto closem;
400 	}
401 	ds->ixnew = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
402 	if (ds->ixnew == NULL) {
403 		err = got_error(GOT_ERR_NO_MEM);
404 		goto closem;
405 	}
406 	check(ds, f1, f2, flags);
407 	if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
408 	    flags))
409 		err = got_error(GOT_ERR_NO_MEM);
410 closem:
411 	if (ds->anychange) {
412 		args->status |= 1;
413 		if (*rval == D_SAME)
414 			*rval = D_DIFFER;
415 	}
416 	if (f1 != NULL)
417 		fclose(f1);
418 	if (f2 != NULL)
419 		fclose(f2);
420 
421 	return (err);
422 }
423 
424 /*
425  * Check to see if the given files differ.
426  * Returns 0 if they are the same, 1 if different, and -1 on error.
427  * XXX - could use code from cmp(1) [faster]
428  */
429 static int
430 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
431 {
432 	char buf1[BUFSIZ], buf2[BUFSIZ];
433 	size_t i, j;
434 
435 	if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
436 	    (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
437 		return (1);
438 	for (;;) {
439 		i = fread(buf1, 1, sizeof(buf1), f1);
440 		j = fread(buf2, 1, sizeof(buf2), f2);
441 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
442 			return (-1);
443 		if (i != j)
444 			return (1);
445 		if (i == 0)
446 			return (0);
447 		if (memcmp(buf1, buf2, i) != 0)
448 			return (1);
449 	}
450 }
451 
452 static int
453 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
454 {
455 	struct line *p;
456 	int j, h;
457 	size_t sz;
458 
459 	rewind(fd);
460 
461 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
462 	if (sz < 100)
463 		sz = 100;
464 
465 	p = calloc(sz + 3, sizeof(*p));
466 	if (p == NULL)
467 		return (-1);
468 	for (j = 0; (h = readhash(ds, fd, flags));) {
469 		if (j == sz) {
470 			sz = sz * 3 / 2;
471 			p = reallocarray(p, sz + 3, sizeof(*p));
472 			if (p == NULL)
473 				return (-1);
474 		}
475 		p[++j].value = h;
476 	}
477 	ds->len[i] = j;
478 	ds->file[i] = p;
479 
480 	return (0);
481 }
482 
483 static void
484 prune(struct got_diff_state *ds)
485 {
486 	int i, j;
487 
488 	for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
489 	    ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
490 	    ds->pref++)
491 		;
492 	for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
493 	    ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
494 	    ds->suff++)
495 		;
496 	for (j = 0; j < 2; j++) {
497 		ds->sfile[j] = ds->file[j] + ds->pref;
498 		ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
499 		for (i = 0; i <= ds->slen[j]; i++)
500 			ds->sfile[j][i].serial = i;
501 	}
502 }
503 
504 static void
505 equiv(struct line *a, int n, struct line *b, int m, int *c)
506 {
507 	int i, j;
508 
509 	i = j = 1;
510 	while (i <= n && j <= m) {
511 		if (a[i].value < b[j].value)
512 			a[i++].value = 0;
513 		else if (a[i].value == b[j].value)
514 			a[i++].value = j;
515 		else
516 			j++;
517 	}
518 	while (i <= n)
519 		a[i++].value = 0;
520 	b[m + 1].value = 0;
521 	j = 0;
522 	while (++j <= m) {
523 		c[j] = -b[j].serial;
524 		while (b[j + 1].value == b[j].value) {
525 			j++;
526 			c[j] = b[j].serial;
527 		}
528 	}
529 	c[j] = -1;
530 }
531 
532 /* Code taken from ping.c */
533 static int
534 isqrt(int n)
535 {
536 	int y, x = 1;
537 
538 	if (n == 0)
539 		return (0);
540 
541 	do { /* newton was a stinker */
542 		y = x;
543 		x = n / x;
544 		x += y;
545 		x /= 2;
546 	} while ((x - y) > 1 || (x - y) < -1);
547 
548 	return (x);
549 }
550 
551 static int
552 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
553 {
554 	int i, k, y, j, l;
555 	int oldc, tc, oldl, sq;
556 	u_int numtries, bound;
557 	int error;
558 
559 	if (flags & D_MINIMAL)
560 		bound = UINT_MAX;
561 	else {
562 		sq = isqrt(n);
563 		bound = MAXIMUM(256, sq);
564 	}
565 
566 	k = 0;
567 	c[0] = newcand(ds, 0, 0, 0, &error);
568 	if (error)
569 		return -1;
570 	for (i = 1; i <= n; i++) {
571 		j = a[i];
572 		if (j == 0)
573 			continue;
574 		y = -b[j];
575 		oldl = 0;
576 		oldc = c[0];
577 		numtries = 0;
578 		do {
579 			if (y <= ds->clist[oldc].y)
580 				continue;
581 			l = search(ds, c, k, y);
582 			if (l != oldl + 1)
583 				oldc = c[l - 1];
584 			if (l <= k) {
585 				if (ds->clist[c[l]].y <= y)
586 					continue;
587 				tc = c[l];
588 				c[l] = newcand(ds, i, y, oldc, &error);
589 				if (error)
590 					return -1;
591 				oldc = tc;
592 				oldl = l;
593 				numtries++;
594 			} else {
595 				c[l] = newcand(ds, i, y, oldc, &error);
596 				if (error)
597 					return -1;
598 				k++;
599 				break;
600 			}
601 		} while ((y = b[++j]) > 0 && numtries < bound);
602 	}
603 	return (k);
604 }
605 
606 static int
607 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
608 {
609 	struct cand *q;
610 
611 	if (ds->clen == ds->clistlen) {
612 		ds->clistlen = ds->clistlen * 11 / 10;
613 		ds->clist = reallocarray(ds->clist, ds->clistlen,
614 		    sizeof(*ds->clist));
615 		if (ds->clist == NULL) {
616 			*errorp = -1;
617 			return 0;
618 		}
619 	}
620 	q = ds->clist + ds->clen;
621 	q->x = x;
622 	q->y = y;
623 	q->pred = pred;
624 	*errorp = 0;
625 	return (ds->clen++);
626 }
627 
628 static int
629 search(struct got_diff_state *ds, int *c, int k, int y)
630 {
631 	int i, j, l, t;
632 
633 	if (ds->clist[c[k]].y < y)	/* quick look for typical case */
634 		return (k + 1);
635 	i = 0;
636 	j = k + 1;
637 	for (;;) {
638 		l = (i + j) / 2;
639 		if (l <= i)
640 			break;
641 		t = ds->clist[c[l]].y;
642 		if (t > y)
643 			j = l;
644 		else if (t < y)
645 			i = l;
646 		else
647 			return (l);
648 	}
649 	return (l + 1);
650 }
651 
652 static void
653 unravel(struct got_diff_state *ds, int p)
654 {
655 	struct cand *q;
656 	int i;
657 
658 	for (i = 0; i <= ds->len[0]; i++)
659 		ds->J[i] = i <= ds->pref ? i :
660 		    i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
661 	for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
662 		ds->J[q->x + ds->pref] = q->y + ds->pref;
663 }
664 
665 /*
666  * Check does double duty:
667  *  1.	ferret out any fortuitous correspondences due
668  *	to confounding by hashing (which result in "jackpot")
669  *  2.  collect random access indexes to the two files
670  */
671 static void
672 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
673 {
674 	int i, j, jackpot, c, d;
675 	long ctold, ctnew;
676 
677 	rewind(f1);
678 	rewind(f2);
679 	j = 1;
680 	ds->ixold[0] = ds->ixnew[0] = 0;
681 	jackpot = 0;
682 	ctold = ctnew = 0;
683 	for (i = 1; i <= ds->len[0]; i++) {
684 		if (ds->J[i] == 0) {
685 			ds->ixold[i] = ctold += skipline(f1);
686 			continue;
687 		}
688 		while (j < ds->J[i]) {
689 			ds->ixnew[j] = ctnew += skipline(f2);
690 			j++;
691 		}
692 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
693 			for (;;) {
694 				c = getc(f1);
695 				d = getc(f2);
696 				/*
697 				 * GNU diff ignores a missing newline
698 				 * in one file for -b or -w.
699 				 */
700 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
701 					if (c == EOF && d == '\n') {
702 						ctnew++;
703 						break;
704 					} else if (c == '\n' && d == EOF) {
705 						ctold++;
706 						break;
707 					}
708 				}
709 				ctold++;
710 				ctnew++;
711 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
712 				    isspace(d)) {
713 					do {
714 						if (c == '\n')
715 							break;
716 						ctold++;
717 					} while (isspace(c = getc(f1)));
718 					do {
719 						if (d == '\n')
720 							break;
721 						ctnew++;
722 					} while (isspace(d = getc(f2)));
723 				} else if ((flags & D_IGNOREBLANKS)) {
724 					while (isspace(c) && c != '\n') {
725 						c = getc(f1);
726 						ctold++;
727 					}
728 					while (isspace(d) && d != '\n') {
729 						d = getc(f2);
730 						ctnew++;
731 					}
732 				}
733 				if (ds->chrtran[c] != ds->chrtran[d]) {
734 					jackpot++;
735 					ds->J[i] = 0;
736 					if (c != '\n' && c != EOF)
737 						ctold += skipline(f1);
738 					if (d != '\n' && c != EOF)
739 						ctnew += skipline(f2);
740 					break;
741 				}
742 				if (c == '\n' || c == EOF)
743 					break;
744 			}
745 		} else {
746 			for (;;) {
747 				ctold++;
748 				ctnew++;
749 				if ((c = getc(f1)) != (d = getc(f2))) {
750 					/* jackpot++; */
751 					ds->J[i] = 0;
752 					if (c != '\n' && c != EOF)
753 						ctold += skipline(f1);
754 					if (d != '\n' && c != EOF)
755 						ctnew += skipline(f2);
756 					break;
757 				}
758 				if (c == '\n' || c == EOF)
759 					break;
760 			}
761 		}
762 		ds->ixold[i] = ctold;
763 		ds->ixnew[j] = ctnew;
764 		j++;
765 	}
766 	for (; j <= ds->len[1]; j++)
767 		ds->ixnew[j] = ctnew += skipline(f2);
768 	/*
769 	 * if (jackpot)
770 	 *	fprintf(stderr, "jackpot\n");
771 	 */
772 }
773 
774 /* shellsort CACM #201 */
775 static void
776 sort(struct line *a, int n)
777 {
778 	struct line *ai, *aim, w;
779 	int j, m = 0, k;
780 
781 	if (n == 0)
782 		return;
783 	for (j = 1; j <= n; j *= 2)
784 		m = 2 * j - 1;
785 	for (m /= 2; m != 0; m /= 2) {
786 		k = n - m;
787 		for (j = 1; j <= k; j++) {
788 			for (ai = &a[j]; ai > a; ai -= m) {
789 				aim = &ai[m];
790 				if (aim < ai)
791 					break;	/* wraparound */
792 				if (aim->value > ai[0].value ||
793 				    (aim->value == ai[0].value &&
794 					aim->serial > ai[0].serial))
795 					break;
796 				w.value = ai[0].value;
797 				ai[0].value = aim->value;
798 				aim->value = w.value;
799 				w.serial = ai[0].serial;
800 				ai[0].serial = aim->serial;
801 				aim->serial = w.serial;
802 			}
803 		}
804 	}
805 }
806 
807 static int
808 unsort(struct line *f, int l, int *b)
809 {
810 	int *a, i;
811 
812 	a = calloc(l + 1, sizeof(*a));
813 	if (a == NULL)
814 		return (-1);
815 	for (i = 1; i <= l; i++)
816 		a[f[i].serial] = f[i].value;
817 	for (i = 1; i <= l; i++)
818 		b[i] = a[i];
819 	free(a);
820 
821 	return (0);
822 }
823 
824 static int
825 skipline(FILE *f)
826 {
827 	int i, c;
828 
829 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
830 		continue;
831 	return (i);
832 }
833 
834 static int
835 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
836     const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
837 {
838 	int m, i0, i1, j0, j1;
839 	int error = 0;
840 
841 	rewind(f1);
842 	rewind(f2);
843 	m = ds->len[0];
844 	ds->J[0] = 0;
845 	ds->J[m + 1] = ds->len[1] + 1;
846 	if (args->diff_format != D_EDIT) {
847 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
848 			while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
849 				i0++;
850 			j0 = ds->J[i0 - 1] + 1;
851 			i1 = i0 - 1;
852 			while (i1 < m && ds->J[i1 + 1] == 0)
853 				i1++;
854 			j1 = ds->J[i1 + 1] - 1;
855 			ds->J[i1] = j1;
856 			error = change(outfile, ds, args, file1, f1, file2, f2,
857 			    i0, i1, j0, j1, &flags);
858 			if (error)
859 				return (error);
860 		}
861 	} else {
862 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
863 			while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
864 				i0--;
865 			j0 = ds->J[i0 + 1] - 1;
866 			i1 = i0 + 1;
867 			while (i1 > 1 && ds->J[i1 - 1] == 0)
868 				i1--;
869 			j1 = ds->J[i1 - 1] + 1;
870 			ds->J[i1] = j1;
871 			change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
872 			    j1, j0, &flags);
873 			if (error)
874 				return (error);
875 		}
876 	}
877 	if (m == 0) {
878 		error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
879 		    1, ds->len[1], &flags);
880 		if (error)
881 			return (error);
882 	}
883 	if (args->diff_format == D_IFDEF) {
884 		for (;;) {
885 #define	c i0
886 			if ((c = getc(f1)) == EOF)
887 				return (0);
888 			diff_output(outfile, "%c", c);
889 		}
890 #undef c
891 	}
892 	if (ds->anychange != 0) {
893 		if (args->diff_format == D_CONTEXT)
894 			dump_context_vec(outfile, ds, args, f1, f2, flags);
895 		else if (args->diff_format == D_UNIFIED)
896 			dump_unified_vec(outfile, ds, args, f1, f2, flags);
897 	}
898 
899 	return (0);
900 }
901 
902 static void
903 range(FILE *outfile, int a, int b, char *separator)
904 {
905 	diff_output(outfile, "%d", a > b ? b : a);
906 	if (a < b)
907 		diff_output(outfile, "%s%d", separator, b);
908 }
909 
910 static void
911 uni_range(FILE *outfile, int a, int b)
912 {
913 	if (a < b)
914 		diff_output(outfile, "%d,%d", a, b - a + 1);
915 	else if (a == b)
916 		diff_output(outfile, "%d", b);
917 	else
918 		diff_output(outfile, "%d,0", b);
919 }
920 
921 static char *
922 preadline(int fd, size_t rlen, off_t off)
923 {
924 	char *line;
925 	ssize_t nr;
926 
927 	line = malloc(rlen + 1);
928 	if (line == NULL)
929 		return NULL;
930 	if ((nr = pread(fd, line, rlen, off)) < 0) {
931 		free(line);
932 		return NULL;
933 	}
934 	if (nr > 0 && line[nr-1] == '\n')
935 		nr--;
936 	line[nr] = '\0';
937 	return (line);
938 }
939 
940 static int
941 ignoreline(char *line)
942 {
943 	return 0; /* do not ignore any lines */
944 }
945 
946 /*
947  * Indicate that there is a difference between lines a and b of the from file
948  * to get to lines c to d of the to file.  If a is greater then b then there
949  * are no lines in the from file involved and this means that there were
950  * lines appended (beginning at b).  If c is greater than d then there are
951  * lines missing from the to file.
952  */
953 static int
954 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
955     const char *file1, FILE *f1, const char *file2, FILE *f2,
956     int a, int b, int c, int d, int *pflags)
957 {
958 	size_t max_context = 64;
959 	int i;
960 
961 restart:
962 	if (args->diff_format != D_IFDEF && a > b && c > d)
963 		return (0);
964 	if (args->ignore_pats != NULL) {
965 		char *line;
966 		/*
967 		 * All lines in the change, insert, or delete must
968 		 * match an ignore pattern for the change to be
969 		 * ignored.
970 		 */
971 		if (a <= b) {		/* Changes and deletes. */
972 			for (i = a; i <= b; i++) {
973 				line = preadline(fileno(f1),
974 				    ds->ixold[i] - ds->ixold[i - 1],
975 				    ds->ixold[i - 1]);
976 				if (line == NULL)
977 					return (-1);
978 				if (!ignoreline(line))
979 					goto proceed;
980 			}
981 		}
982 		if (a > b || c <= d) {	/* Changes and inserts. */
983 			for (i = c; i <= d; i++) {
984 				line = preadline(fileno(f2),
985 				    ds->ixnew[i] - ds->ixnew[i - 1],
986 				    ds->ixnew[i - 1]);
987 				if (line == NULL)
988 					return (-1);
989 				if (!ignoreline(line))
990 					goto proceed;
991 			}
992 		}
993 		return (0);
994 	}
995 proceed:
996 	if (*pflags & D_HEADER) {
997 		diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
998 		*pflags &= ~D_HEADER;
999 	}
1000 	if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1001 		/*
1002 		 * Allocate change records as needed.
1003 		 */
1004 		if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1005 			ptrdiff_t offset = ds->context_vec_ptr - ds->context_vec_start;
1006 			max_context <<= 1;
1007 			ds->context_vec_start =
1008 			    reallocarray(ds->context_vec_start, max_context,
1009 				sizeof(*ds->context_vec_start));
1010 			if (ds->context_vec_start == NULL)
1011 				return (-1);
1012 			ds->context_vec_end = ds->context_vec_start + max_context;
1013 			ds->context_vec_ptr = ds->context_vec_start + offset;
1014 		}
1015 		if (ds->anychange == 0) {
1016 			/*
1017 			 * Print the context/unidiff header first time through.
1018 			 */
1019 			print_header(outfile, ds, args, file1, file2);
1020 			ds->anychange = 1;
1021 		} else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1022 		    c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1023 			/*
1024 			 * If this change is more than 'diff_context' lines from the
1025 			 * previous change, dump the record and reset it.
1026 			 */
1027 			if (args->diff_format == D_CONTEXT)
1028 				dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1029 			else
1030 				dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1031 		}
1032 		ds->context_vec_ptr++;
1033 		ds->context_vec_ptr->a = a;
1034 		ds->context_vec_ptr->b = b;
1035 		ds->context_vec_ptr->c = c;
1036 		ds->context_vec_ptr->d = d;
1037 		return (0);
1038 	}
1039 	if (ds->anychange == 0)
1040 		ds->anychange = 1;
1041 	switch (args->diff_format) {
1042 	case D_BRIEF:
1043 		return (0);
1044 	case D_NORMAL:
1045 	case D_EDIT:
1046 		range(outfile, a, b, ",");
1047 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1048 		if (args->diff_format == D_NORMAL)
1049 			range(outfile, c, d, ",");
1050 		diff_output(outfile, "\n");
1051 		break;
1052 	case D_REVERSE:
1053 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1054 		range(outfile, a, b, " ");
1055 		diff_output(outfile, "\n");
1056 		break;
1057 	case D_NREVERSE:
1058 		if (a > b)
1059 			diff_output(outfile, "a%d %d\n", b, d - c + 1);
1060 		else {
1061 			diff_output(outfile, "d%d %d\n", a, b - a + 1);
1062 			if (!(c > d))
1063 				/* add changed lines */
1064 				diff_output(outfile, "a%d %d\n", b, d - c + 1);
1065 		}
1066 		break;
1067 	}
1068 	if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1069 		fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1070 		if (a <= b && c <= d && args->diff_format == D_NORMAL)
1071 			diff_output(outfile, "---\n");
1072 	}
1073 	i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1074 	if (i != 0 && args->diff_format == D_EDIT) {
1075 		/*
1076 		 * A non-zero return value for D_EDIT indicates that the
1077 		 * last line printed was a bare dot (".") that has been
1078 		 * escaped as ".." to prevent ed(1) from misinterpreting
1079 		 * it.  We have to add a substitute command to change this
1080 		 * back and restart where we left off.
1081 		 */
1082 		diff_output(outfile, ".\n");
1083 		diff_output(outfile, "%ds/.//\n", a + i - 1);
1084 		b = a + i - 1;
1085 		a = b + 1;
1086 		c += i;
1087 		goto restart;
1088 	}
1089 	if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1090 		diff_output(outfile, ".\n");
1091 	if (ds->inifdef) {
1092 		diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1093 		ds->inifdef = 0;
1094 	}
1095 
1096 	return (0);
1097 }
1098 
1099 static int
1100 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1101     long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1102 {
1103 	int i, j, c, lastc, col, nc;
1104 
1105 	/*
1106 	 * When doing #ifdef's, copy down to current line
1107 	 * if this is the first file, so that stuff makes it to output.
1108 	 */
1109 	if (args->diff_format == D_IFDEF && oldfile) {
1110 		long curpos = ftell(lb);
1111 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1112 		nc = f[a > b ? b : a - 1] - curpos;
1113 		for (i = 0; i < nc; i++)
1114 			diff_output(outfile, "%c", getc(lb));
1115 	}
1116 	if (a > b)
1117 		return (0);
1118 	if (args->diff_format == D_IFDEF) {
1119 		if (ds->inifdef) {
1120 			diff_output(outfile, "#else /* %s%s */\n",
1121 			    oldfile == 1 ? "!" : "", args->ifdefname);
1122 		} else {
1123 			if (oldfile)
1124 				diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1125 			else
1126 				diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1127 		}
1128 		ds->inifdef = 1 + oldfile;
1129 	}
1130 	for (i = a; i <= b; i++) {
1131 		fseek(lb, f[i - 1], SEEK_SET);
1132 		nc = f[i] - f[i - 1];
1133 		if (args->diff_format != D_IFDEF && ch != '\0') {
1134 			diff_output(outfile, "%c", ch);
1135 			if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1136 			    || args->diff_format == D_UNIFIED))
1137 				diff_output(outfile, "\t");
1138 			else if (args->diff_format != D_UNIFIED)
1139 				diff_output(outfile, " ");
1140 		}
1141 		col = 0;
1142 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1143 			if ((c = getc(lb)) == EOF) {
1144 				if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1145 				    args->diff_format == D_NREVERSE)
1146 					warnx("No newline at end of file");
1147 				else
1148 					diff_output(outfile, "\n\\ No newline at end of "
1149 					    "file\n");
1150 				return (0);
1151 			}
1152 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1153 				do {
1154 					diff_output(outfile, " ");
1155 				} while (++col & 7);
1156 			} else {
1157 				if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1158 				    && lastc == '.') {
1159 					/*
1160 					 * Don't print a bare "." line
1161 					 * since that will confuse ed(1).
1162 					 * Print ".." instead and return,
1163 					 * giving the caller an offset
1164 					 * from which to restart.
1165 					 */
1166 					diff_output(outfile, ".\n");
1167 					return (i - a + 1);
1168 				}
1169 				diff_output(outfile, "%c", c);
1170 				col++;
1171 			}
1172 		}
1173 	}
1174 	return (0);
1175 }
1176 
1177 /*
1178  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1179  */
1180 static int
1181 readhash(struct got_diff_state *ds, FILE *f, int flags)
1182 {
1183 	int i, t, space;
1184 	int sum;
1185 
1186 	sum = 1;
1187 	space = 0;
1188 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1189 		if (flags & D_IGNORECASE)
1190 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1191 				if (t == EOF) {
1192 					if (i == 0)
1193 						return (0);
1194 					break;
1195 				}
1196 				sum = sum * 127 + ds->chrtran[t];
1197 			}
1198 		else
1199 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1200 				if (t == EOF) {
1201 					if (i == 0)
1202 						return (0);
1203 					break;
1204 				}
1205 				sum = sum * 127 + t;
1206 			}
1207 	} else {
1208 		for (i = 0;;) {
1209 			switch (t = getc(f)) {
1210 			case '\t':
1211 			case '\r':
1212 			case '\v':
1213 			case '\f':
1214 			case ' ':
1215 				space++;
1216 				continue;
1217 			default:
1218 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1219 					i++;
1220 					space = 0;
1221 				}
1222 				sum = sum * 127 + ds->chrtran[t];
1223 				i++;
1224 				continue;
1225 			case EOF:
1226 				if (i == 0)
1227 					return (0);
1228 				/* FALLTHROUGH */
1229 			case '\n':
1230 				break;
1231 			}
1232 			break;
1233 		}
1234 	}
1235 	/*
1236 	 * There is a remote possibility that we end up with a zero sum.
1237 	 * Zero is used as an EOF marker, so return 1 instead.
1238 	 */
1239 	return (sum == 0 ? 1 : sum);
1240 }
1241 
1242 static int
1243 asciifile(FILE *f)
1244 {
1245 	unsigned char buf[BUFSIZ];
1246 	size_t cnt;
1247 
1248 	if (f == NULL)
1249 		return (1);
1250 
1251 	rewind(f);
1252 	cnt = fread(buf, 1, sizeof(buf), f);
1253 	return (memchr(buf, '\0', cnt) == NULL);
1254 }
1255 
1256 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1257 
1258 static char *
1259 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1260 {
1261 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1262 	size_t nc;
1263 	int last = ds->lastline;
1264 	char *state = NULL;
1265 
1266 	ds->lastline = pos;
1267 	while (pos > last) {
1268 		fseek(fp, f[pos - 1], SEEK_SET);
1269 		nc = f[pos] - f[pos - 1];
1270 		if (nc >= sizeof(buf))
1271 			nc = sizeof(buf) - 1;
1272 		nc = fread(buf, 1, nc, fp);
1273 		if (nc > 0) {
1274 			buf[nc] = '\0';
1275 			buf[strcspn(buf, "\n")] = '\0';
1276 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1277 				if (begins_with(buf, "private:")) {
1278 					if (!state)
1279 						state = " (private)";
1280 				} else if (begins_with(buf, "protected:")) {
1281 					if (!state)
1282 						state = " (protected)";
1283 				} else if (begins_with(buf, "public:")) {
1284 					if (!state)
1285 						state = " (public)";
1286 				} else {
1287 					strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1288 					if (state)
1289 						strlcat(ds->lastbuf, state,
1290 						    sizeof ds->lastbuf);
1291 					ds->lastmatchline = pos;
1292 					return ds->lastbuf;
1293 				}
1294 			}
1295 		}
1296 		pos--;
1297 	}
1298 	return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1299 }
1300 
1301 /* dump accumulated "context" diff changes */
1302 static void
1303 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1304     FILE *f1, FILE *f2, int flags)
1305 {
1306 	struct context_vec *cvp = ds->context_vec_start;
1307 	int lowa, upb, lowc, upd, do_output;
1308 	int a, b, c, d;
1309 	char ch, *f;
1310 
1311 	if (ds->context_vec_start > ds->context_vec_ptr)
1312 		return;
1313 
1314 	b = d = 0;		/* gcc */
1315 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1316 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1317 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1318 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1319 
1320 	diff_output(outfile, "***************");
1321 	if ((flags & D_PROTOTYPE)) {
1322 		f = match_function(ds, ds->ixold, lowa-1, f1);
1323 		if (f != NULL)
1324 			diff_output(outfile, " %s", f);
1325 	}
1326 	diff_output(outfile, "\n*** ");
1327 	range(outfile, lowa, upb, ",");
1328 	diff_output(outfile, " ****\n");
1329 
1330 	/*
1331 	 * Output changes to the "old" file.  The first loop suppresses
1332 	 * output if there were no changes to the "old" file (we'll see
1333 	 * the "old" lines as context in the "new" list).
1334 	 */
1335 	do_output = 0;
1336 	for (; cvp <= ds->context_vec_ptr; cvp++)
1337 		if (cvp->a <= cvp->b) {
1338 			cvp = ds->context_vec_start;
1339 			do_output++;
1340 			break;
1341 		}
1342 	if (do_output) {
1343 		while (cvp <= ds->context_vec_ptr) {
1344 			a = cvp->a;
1345 			b = cvp->b;
1346 			c = cvp->c;
1347 			d = cvp->d;
1348 
1349 			if (a <= b && c <= d)
1350 				ch = 'c';
1351 			else
1352 				ch = (a <= b) ? 'd' : 'a';
1353 
1354 			if (ch == 'a')
1355 				fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1356 			else {
1357 				fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1358 				fetch(outfile, ds, args, ds->ixold, a, b, f1,
1359 				    ch == 'c' ? '!' : '-', 0, flags);
1360 			}
1361 			lowa = b + 1;
1362 			cvp++;
1363 		}
1364 		fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1365 	}
1366 	/* output changes to the "new" file */
1367 	diff_output(outfile, "--- ");
1368 	range(outfile, lowc, upd, ",");
1369 	diff_output(outfile, " ----\n");
1370 
1371 	do_output = 0;
1372 	for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1373 		if (cvp->c <= cvp->d) {
1374 			cvp = ds->context_vec_start;
1375 			do_output++;
1376 			break;
1377 		}
1378 	if (do_output) {
1379 		while (cvp <= ds->context_vec_ptr) {
1380 			a = cvp->a;
1381 			b = cvp->b;
1382 			c = cvp->c;
1383 			d = cvp->d;
1384 
1385 			if (a <= b && c <= d)
1386 				ch = 'c';
1387 			else
1388 				ch = (a <= b) ? 'd' : 'a';
1389 
1390 			if (ch == 'd')
1391 				fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1392 			else {
1393 				fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1394 				fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1395 				    ch == 'c' ? '!' : '+', 0, flags);
1396 			}
1397 			lowc = d + 1;
1398 			cvp++;
1399 		}
1400 		fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1401 	}
1402 	ds->context_vec_ptr = ds->context_vec_start - 1;
1403 }
1404 
1405 /* dump accumulated "unified" diff changes */
1406 static void
1407 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1408     FILE *f1, FILE *f2, int flags)
1409 {
1410 	struct context_vec *cvp = ds->context_vec_start;
1411 	int lowa, upb, lowc, upd;
1412 	int a, b, c, d;
1413 	char ch, *f;
1414 
1415 	if (ds->context_vec_start > ds->context_vec_ptr)
1416 		return;
1417 
1418 	b = d = 0;		/* gcc */
1419 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1420 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1421 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1422 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1423 
1424 	diff_output(outfile, "@@ -");
1425 	uni_range(outfile, lowa, upb);
1426 	diff_output(outfile, " +");
1427 	uni_range(outfile, lowc, upd);
1428 	diff_output(outfile, " @@");
1429 	if ((flags & D_PROTOTYPE)) {
1430 		f = match_function(ds, ds->ixold, lowa-1, f1);
1431 		if (f != NULL)
1432 			diff_output(outfile, " %s", f);
1433 	}
1434 	diff_output(outfile, "\n");
1435 
1436 	/*
1437 	 * Output changes in "unified" diff format--the old and new lines
1438 	 * are printed together.
1439 	 */
1440 	for (; cvp <= ds->context_vec_ptr; cvp++) {
1441 		a = cvp->a;
1442 		b = cvp->b;
1443 		c = cvp->c;
1444 		d = cvp->d;
1445 
1446 		/*
1447 		 * c: both new and old changes
1448 		 * d: only changes in the old file
1449 		 * a: only changes in the new file
1450 		 */
1451 		if (a <= b && c <= d)
1452 			ch = 'c';
1453 		else
1454 			ch = (a <= b) ? 'd' : 'a';
1455 
1456 		switch (ch) {
1457 		case 'c':
1458 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1459 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1460 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1461 			break;
1462 		case 'd':
1463 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1464 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1465 			break;
1466 		case 'a':
1467 			fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1468 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1469 			break;
1470 		}
1471 		lowa = b + 1;
1472 		lowc = d + 1;
1473 	}
1474 	fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1475 
1476 	ds->context_vec_ptr = ds->context_vec_start - 1;
1477 }
1478 
1479 static void
1480 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1481     const char *file1, const char *file2)
1482 {
1483 	if (args->label[0] != NULL)
1484 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1485 		    args->label[0]);
1486 	else
1487 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1488 		    file1, ctime(&ds->stb1.st_mtime));
1489 	if (args->label[1] != NULL)
1490 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1491 		    args->label[1]);
1492 	else
1493 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1494 		    file2, ctime(&ds->stb2.st_mtime));
1495 }