Gotweb

Blob

Date:: Mon Apr 2 14:37:53 2018 UTC
Message:: move diffreg's max_context into struct got_diff_state
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
86 
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
90 
91 #include "got_lib_diff.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 };
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 static void	 diff_output(FILE *, const char *, ...);
187 static int	 output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void	 check(struct got_diff_state *, FILE *, FILE *, int);
189 static void	 range(FILE *, int, int, char *);
190 static void	 uni_range(FILE *, int, int);
191 static void	 dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static void	 dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static int	 prepare(struct got_diff_state *, int, FILE *, off_t, int);
194 static void	 prune(struct got_diff_state *);
195 static void	 equiv(struct line *, int, struct line *, int, int *);
196 static void	 unravel(struct got_diff_state *, int);
197 static int	 unsort(struct line *, int, int *);
198 static int	 change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
199 static void	 sort(struct line *, int);
200 static void	 print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
201 static int	 ignoreline(char *);
202 static int	 asciifile(FILE *);
203 static int	 fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
204 static int	 newcand(struct got_diff_state *, int, int, int, int *);
205 static int	 search(struct got_diff_state *, int *, int, int);
206 static int	 skipline(FILE *);
207 static int	 isqrt(int);
208 static int	 stone(struct got_diff_state *, int *, int, int *, int *, int);
209 static int	 readhash(struct got_diff_state *, FILE *, int);
210 static int	 files_differ(struct got_diff_state *, FILE *, FILE *, int);
211 static char	*match_function(struct got_diff_state *, const long *, int, FILE *);
212 static char	*preadline(int, size_t, off_t);
213 
214 /*
215  * chrtran points to one of 2 translation tables: cup2low if folding upper to
216  * lower case clow2low if not folding case
217  */
218 u_char clow2low[256] = {
219 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
220 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
221 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
222 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
223 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
224 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
225 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
226 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
228 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
229 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
230 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
231 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
232 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
233 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
234 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
235 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
236 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
237 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
238 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
239 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
240 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
241 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
242 	0xfd, 0xfe, 0xff
243 };
244 
245 u_char cup2low[256] = {
246 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
247 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
248 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
249 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
250 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
251 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
252 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
253 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
254 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
255 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
256 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
257 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
258 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
259 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
260 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
261 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
262 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
263 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
264 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
265 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
266 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
267 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
268 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 	0xfd, 0xfe, 0xff
270 };
271 
272 static void
273 diff_output(FILE *outfile, const char *fmt, ...)
274 {
275 	va_list ap;
276 
277 	va_start(ap, fmt);
278 	vfprintf(outfile, fmt, ap);
279 	va_end(ap);
280 }
281 
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284     struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
285 {
286 	const struct got_error *err = NULL;
287 	int i;
288 
289 	*rval = D_SAME;
290 	ds->anychange = 0;
291 	ds->lastline = 0;
292 	ds->lastmatchline = 0;
293 	ds->context_vec_ptr = ds->context_vec_start - 1;
294 	ds->max_context = 64;
295 	if (flags & D_IGNORECASE)
296 		ds->chrtran = cup2low;
297 	else
298 		ds->chrtran = clow2low;
299 	if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
300 		*rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
301 		return NULL;
302 	}
303 	if (flags & D_EMPTY1) {
304 		f1 = fopen(_PATH_DEVNULL, "r");
305 		if (f1 == NULL) {
306 			err = got_error_from_errno();
307 			goto closem;
308 		}
309 	}
310 	else if (f1 == NULL) {
311 		args->status |= 2;
312 		goto closem;
313 	}
314 
315 	if (flags & D_EMPTY2) {
316 		f2 = fopen(_PATH_DEVNULL, "r");
317 		if (f2 == NULL) {
318 			err = got_error_from_errno();
319 			goto closem;
320 		}
321 	} else if (f2 == NULL) {
322 		args->status |= 2;
323 		goto closem;
324 	}
325 
326 	switch (files_differ(ds, f1, f2, flags)) {
327 	case 0:
328 		goto closem;
329 	case 1:
330 		break;
331 	default:
332 		/* error */
333 		args->status |= 2;
334 		goto closem;
335 	}
336 
337 	if ((flags & D_FORCEASCII) == 0 &&
338 	    (!asciifile(f1) || !asciifile(f2))) {
339 		*rval = D_BINARY;
340 		args->status |= 1;
341 		goto closem;
342 	}
343 	if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
344 		err = got_error_from_errno();
345 		goto closem;
346 	}
347 	if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
348 		err = got_error_from_errno();
349 		goto closem;
350 	}
351 
352 	prune(ds);
353 	sort(ds->sfile[0], ds->slen[0]);
354 	sort(ds->sfile[1], ds->slen[1]);
355 
356 	ds->member = (int *)ds->file[1];
357 	equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
358 	ds->member = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
359 	if (ds->member == NULL) {
360 		err = got_error_from_errno();
361 		goto closem;
362 	}
363 
364 	ds->class = (int *)ds->file[0];
365 	if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
366 		err = got_error_from_errno();
367 		goto closem;
368 	}
369 	ds->class = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
370 	if (ds->class == NULL) {
371 		err = got_error_from_errno();
372 		goto closem;
373 	}
374 
375 	ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
376 	if (ds->klist == NULL) {
377 		err = got_error_from_errno();
378 		goto closem;
379 	}
380 	ds->clen = 0;
381 	ds->clistlen = 100;
382 	ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
383 	if (ds->clist == NULL) {
384 		err = got_error_from_errno();
385 		goto closem;
386 	}
387 	i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
388 	if (i < 0) {
389 		err = got_error_from_errno();
390 		free(ds->member);
391 		free(ds->class);
392 		goto closem;
393 	}
394 	free(ds->member);
395 	free(ds->class);
396 
397 	ds->J = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
398 	if (ds->J == NULL) {
399 		err = got_error_from_errno();
400 		goto closem;
401 	}
402 	unravel(ds, ds->klist[i]);
403 	free(ds->clist);
404 	ds->clist = NULL;
405 	free(ds->klist);
406 	ds->klist = NULL;
407 
408 	ds->ixold = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
409 	if (ds->ixold == NULL) {
410 		err = got_error_from_errno();
411 		goto closem;
412 	}
413 	ds->ixnew = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
414 	if (ds->ixnew == NULL) {
415 		err = got_error_from_errno();
416 		goto closem;
417 	}
418 	check(ds, f1, f2, flags);
419 	if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
420 	    flags))
421 		err = got_error_from_errno();
422 closem:
423 	if (ds->anychange) {
424 		args->status |= 1;
425 		if (*rval == D_SAME)
426 			*rval = D_DIFFER;
427 	}
428 	if (f1 != NULL)
429 		fclose(f1);
430 	if (f2 != NULL)
431 		fclose(f2);
432 
433 	return (err);
434 }
435 
436 /*
437  * Check to see if the given files differ.
438  * Returns 0 if they are the same, 1 if different, and -1 on error.
439  * XXX - could use code from cmp(1) [faster]
440  */
441 static int
442 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
443 {
444 	char buf1[BUFSIZ], buf2[BUFSIZ];
445 	size_t i, j;
446 
447 	if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
448 	    (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
449 		return (1);
450 	for (;;) {
451 		i = fread(buf1, 1, sizeof(buf1), f1);
452 		j = fread(buf2, 1, sizeof(buf2), f2);
453 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
454 			return (-1);
455 		if (i != j)
456 			return (1);
457 		if (i == 0)
458 			return (0);
459 		if (memcmp(buf1, buf2, i) != 0)
460 			return (1);
461 	}
462 }
463 
464 static int
465 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
466 {
467 	struct line *p;
468 	int j, h;
469 	size_t sz;
470 
471 	rewind(fd);
472 
473 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
474 	if (sz < 100)
475 		sz = 100;
476 
477 	p = calloc(sz + 3, sizeof(*p));
478 	if (p == NULL)
479 		return (-1);
480 	for (j = 0; (h = readhash(ds, fd, flags));) {
481 		if (j == sz) {
482 			sz = sz * 3 / 2;
483 			p = reallocarray(p, sz + 3, sizeof(*p));
484 			if (p == NULL)
485 				return (-1);
486 		}
487 		p[++j].value = h;
488 	}
489 	ds->len[i] = j;
490 	ds->file[i] = p;
491 
492 	return (0);
493 }
494 
495 static void
496 prune(struct got_diff_state *ds)
497 {
498 	int i, j;
499 
500 	for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
501 	    ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
502 	    ds->pref++)
503 		;
504 	for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
505 	    ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
506 	    ds->suff++)
507 		;
508 	for (j = 0; j < 2; j++) {
509 		ds->sfile[j] = ds->file[j] + ds->pref;
510 		ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
511 		for (i = 0; i <= ds->slen[j]; i++)
512 			ds->sfile[j][i].serial = i;
513 	}
514 }
515 
516 static void
517 equiv(struct line *a, int n, struct line *b, int m, int *c)
518 {
519 	int i, j;
520 
521 	i = j = 1;
522 	while (i <= n && j <= m) {
523 		if (a[i].value < b[j].value)
524 			a[i++].value = 0;
525 		else if (a[i].value == b[j].value)
526 			a[i++].value = j;
527 		else
528 			j++;
529 	}
530 	while (i <= n)
531 		a[i++].value = 0;
532 	b[m + 1].value = 0;
533 	j = 0;
534 	while (++j <= m) {
535 		c[j] = -b[j].serial;
536 		while (b[j + 1].value == b[j].value) {
537 			j++;
538 			c[j] = b[j].serial;
539 		}
540 	}
541 	c[j] = -1;
542 }
543 
544 /* Code taken from ping.c */
545 static int
546 isqrt(int n)
547 {
548 	int y, x = 1;
549 
550 	if (n == 0)
551 		return (0);
552 
553 	do { /* newton was a stinker */
554 		y = x;
555 		x = n / x;
556 		x += y;
557 		x /= 2;
558 	} while ((x - y) > 1 || (x - y) < -1);
559 
560 	return (x);
561 }
562 
563 static int
564 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
565 {
566 	int i, k, y, j, l;
567 	int oldc, tc, oldl, sq;
568 	u_int numtries, bound;
569 	int error;
570 
571 	if (flags & D_MINIMAL)
572 		bound = UINT_MAX;
573 	else {
574 		sq = isqrt(n);
575 		bound = MAXIMUM(256, sq);
576 	}
577 
578 	k = 0;
579 	c[0] = newcand(ds, 0, 0, 0, &error);
580 	if (error)
581 		return -1;
582 	for (i = 1; i <= n; i++) {
583 		j = a[i];
584 		if (j == 0)
585 			continue;
586 		y = -b[j];
587 		oldl = 0;
588 		oldc = c[0];
589 		numtries = 0;
590 		do {
591 			if (y <= ds->clist[oldc].y)
592 				continue;
593 			l = search(ds, c, k, y);
594 			if (l != oldl + 1)
595 				oldc = c[l - 1];
596 			if (l <= k) {
597 				if (ds->clist[c[l]].y <= y)
598 					continue;
599 				tc = c[l];
600 				c[l] = newcand(ds, i, y, oldc, &error);
601 				if (error)
602 					return -1;
603 				oldc = tc;
604 				oldl = l;
605 				numtries++;
606 			} else {
607 				c[l] = newcand(ds, i, y, oldc, &error);
608 				if (error)
609 					return -1;
610 				k++;
611 				break;
612 			}
613 		} while ((y = b[++j]) > 0 && numtries < bound);
614 	}
615 	return (k);
616 }
617 
618 static int
619 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
620 {
621 	struct cand *q;
622 
623 	if (ds->clen == ds->clistlen) {
624 		ds->clistlen = ds->clistlen * 11 / 10;
625 		ds->clist = reallocarray(ds->clist, ds->clistlen,
626 		    sizeof(*ds->clist));
627 		if (ds->clist == NULL) {
628 			*errorp = -1;
629 			return 0;
630 		}
631 	}
632 	q = ds->clist + ds->clen;
633 	q->x = x;
634 	q->y = y;
635 	q->pred = pred;
636 	*errorp = 0;
637 	return (ds->clen++);
638 }
639 
640 static int
641 search(struct got_diff_state *ds, int *c, int k, int y)
642 {
643 	int i, j, l, t;
644 
645 	if (ds->clist[c[k]].y < y)	/* quick look for typical case */
646 		return (k + 1);
647 	i = 0;
648 	j = k + 1;
649 	for (;;) {
650 		l = (i + j) / 2;
651 		if (l <= i)
652 			break;
653 		t = ds->clist[c[l]].y;
654 		if (t > y)
655 			j = l;
656 		else if (t < y)
657 			i = l;
658 		else
659 			return (l);
660 	}
661 	return (l + 1);
662 }
663 
664 static void
665 unravel(struct got_diff_state *ds, int p)
666 {
667 	struct cand *q;
668 	int i;
669 
670 	for (i = 0; i <= ds->len[0]; i++)
671 		ds->J[i] = i <= ds->pref ? i :
672 		    i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
673 	for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
674 		ds->J[q->x + ds->pref] = q->y + ds->pref;
675 }
676 
677 /*
678  * Check does double duty:
679  *  1.	ferret out any fortuitous correspondences due
680  *	to confounding by hashing (which result in "jackpot")
681  *  2.  collect random access indexes to the two files
682  */
683 static void
684 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
685 {
686 	int i, j, jackpot, c, d;
687 	long ctold, ctnew;
688 
689 	rewind(f1);
690 	rewind(f2);
691 	j = 1;
692 	ds->ixold[0] = ds->ixnew[0] = 0;
693 	jackpot = 0;
694 	ctold = ctnew = 0;
695 	for (i = 1; i <= ds->len[0]; i++) {
696 		if (ds->J[i] == 0) {
697 			ds->ixold[i] = ctold += skipline(f1);
698 			continue;
699 		}
700 		while (j < ds->J[i]) {
701 			ds->ixnew[j] = ctnew += skipline(f2);
702 			j++;
703 		}
704 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
705 			for (;;) {
706 				c = getc(f1);
707 				d = getc(f2);
708 				/*
709 				 * GNU diff ignores a missing newline
710 				 * in one file for -b or -w.
711 				 */
712 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
713 					if (c == EOF && d == '\n') {
714 						ctnew++;
715 						break;
716 					} else if (c == '\n' && d == EOF) {
717 						ctold++;
718 						break;
719 					}
720 				}
721 				ctold++;
722 				ctnew++;
723 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
724 				    isspace(d)) {
725 					do {
726 						if (c == '\n')
727 							break;
728 						ctold++;
729 					} while (isspace(c = getc(f1)));
730 					do {
731 						if (d == '\n')
732 							break;
733 						ctnew++;
734 					} while (isspace(d = getc(f2)));
735 				} else if ((flags & D_IGNOREBLANKS)) {
736 					while (isspace(c) && c != '\n') {
737 						c = getc(f1);
738 						ctold++;
739 					}
740 					while (isspace(d) && d != '\n') {
741 						d = getc(f2);
742 						ctnew++;
743 					}
744 				}
745 				if (ds->chrtran[c] != ds->chrtran[d]) {
746 					jackpot++;
747 					ds->J[i] = 0;
748 					if (c != '\n' && c != EOF)
749 						ctold += skipline(f1);
750 					if (d != '\n' && c != EOF)
751 						ctnew += skipline(f2);
752 					break;
753 				}
754 				if (c == '\n' || c == EOF)
755 					break;
756 			}
757 		} else {
758 			for (;;) {
759 				ctold++;
760 				ctnew++;
761 				if ((c = getc(f1)) != (d = getc(f2))) {
762 					/* jackpot++; */
763 					ds->J[i] = 0;
764 					if (c != '\n' && c != EOF)
765 						ctold += skipline(f1);
766 					if (d != '\n' && c != EOF)
767 						ctnew += skipline(f2);
768 					break;
769 				}
770 				if (c == '\n' || c == EOF)
771 					break;
772 			}
773 		}
774 		ds->ixold[i] = ctold;
775 		ds->ixnew[j] = ctnew;
776 		j++;
777 	}
778 	for (; j <= ds->len[1]; j++)
779 		ds->ixnew[j] = ctnew += skipline(f2);
780 	/*
781 	 * if (jackpot)
782 	 *	fprintf(stderr, "jackpot\n");
783 	 */
784 }
785 
786 /* shellsort CACM #201 */
787 static void
788 sort(struct line *a, int n)
789 {
790 	struct line *ai, *aim, w;
791 	int j, m = 0, k;
792 
793 	if (n == 0)
794 		return;
795 	for (j = 1; j <= n; j *= 2)
796 		m = 2 * j - 1;
797 	for (m /= 2; m != 0; m /= 2) {
798 		k = n - m;
799 		for (j = 1; j <= k; j++) {
800 			for (ai = &a[j]; ai > a; ai -= m) {
801 				aim = &ai[m];
802 				if (aim < ai)
803 					break;	/* wraparound */
804 				if (aim->value > ai[0].value ||
805 				    (aim->value == ai[0].value &&
806 					aim->serial > ai[0].serial))
807 					break;
808 				w.value = ai[0].value;
809 				ai[0].value = aim->value;
810 				aim->value = w.value;
811 				w.serial = ai[0].serial;
812 				ai[0].serial = aim->serial;
813 				aim->serial = w.serial;
814 			}
815 		}
816 	}
817 }
818 
819 static int
820 unsort(struct line *f, int l, int *b)
821 {
822 	int *a, i;
823 
824 	a = calloc(l + 1, sizeof(*a));
825 	if (a == NULL)
826 		return (-1);
827 	for (i = 1; i <= l; i++)
828 		a[f[i].serial] = f[i].value;
829 	for (i = 1; i <= l; i++)
830 		b[i] = a[i];
831 	free(a);
832 
833 	return (0);
834 }
835 
836 static int
837 skipline(FILE *f)
838 {
839 	int i, c;
840 
841 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
842 		continue;
843 	return (i);
844 }
845 
846 static int
847 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
848     const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
849 {
850 	int m, i0, i1, j0, j1;
851 	int error = 0;
852 
853 	rewind(f1);
854 	rewind(f2);
855 	m = ds->len[0];
856 	ds->J[0] = 0;
857 	ds->J[m + 1] = ds->len[1] + 1;
858 	if (args->diff_format != D_EDIT) {
859 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
860 			while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
861 				i0++;
862 			j0 = ds->J[i0 - 1] + 1;
863 			i1 = i0 - 1;
864 			while (i1 < m && ds->J[i1 + 1] == 0)
865 				i1++;
866 			j1 = ds->J[i1 + 1] - 1;
867 			ds->J[i1] = j1;
868 			error = change(outfile, ds, args, file1, f1, file2, f2,
869 			    i0, i1, j0, j1, &flags);
870 			if (error)
871 				return (error);
872 		}
873 	} else {
874 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
875 			while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
876 				i0--;
877 			j0 = ds->J[i0 + 1] - 1;
878 			i1 = i0 + 1;
879 			while (i1 > 1 && ds->J[i1 - 1] == 0)
880 				i1--;
881 			j1 = ds->J[i1 - 1] + 1;
882 			ds->J[i1] = j1;
883 			change(outfile, ds, args, file1, f1, file2, f2, i1, i0,
884 			    j1, j0, &flags);
885 			if (error)
886 				return (error);
887 		}
888 	}
889 	if (m == 0) {
890 		error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
891 		    1, ds->len[1], &flags);
892 		if (error)
893 			return (error);
894 	}
895 	if (args->diff_format == D_IFDEF) {
896 		for (;;) {
897 #define	c i0
898 			if ((c = getc(f1)) == EOF)
899 				return (0);
900 			diff_output(outfile, "%c", c);
901 		}
902 #undef c
903 	}
904 	if (ds->anychange != 0) {
905 		if (args->diff_format == D_CONTEXT)
906 			dump_context_vec(outfile, ds, args, f1, f2, flags);
907 		else if (args->diff_format == D_UNIFIED)
908 			dump_unified_vec(outfile, ds, args, f1, f2, flags);
909 	}
910 
911 	return (0);
912 }
913 
914 static void
915 range(FILE *outfile, int a, int b, char *separator)
916 {
917 	diff_output(outfile, "%d", a > b ? b : a);
918 	if (a < b)
919 		diff_output(outfile, "%s%d", separator, b);
920 }
921 
922 static void
923 uni_range(FILE *outfile, int a, int b)
924 {
925 	if (a < b)
926 		diff_output(outfile, "%d,%d", a, b - a + 1);
927 	else if (a == b)
928 		diff_output(outfile, "%d", b);
929 	else
930 		diff_output(outfile, "%d,0", b);
931 }
932 
933 static char *
934 preadline(int fd, size_t rlen, off_t off)
935 {
936 	char *line;
937 	ssize_t nr;
938 
939 	line = malloc(rlen + 1);
940 	if (line == NULL)
941 		return NULL;
942 	if ((nr = pread(fd, line, rlen, off)) < 0) {
943 		free(line);
944 		return NULL;
945 	}
946 	if (nr > 0 && line[nr-1] == '\n')
947 		nr--;
948 	line[nr] = '\0';
949 	return (line);
950 }
951 
952 static int
953 ignoreline(char *line)
954 {
955 	return 0; /* do not ignore any lines */
956 }
957 
958 /*
959  * Indicate that there is a difference between lines a and b of the from file
960  * to get to lines c to d of the to file.  If a is greater then b then there
961  * are no lines in the from file involved and this means that there were
962  * lines appended (beginning at b).  If c is greater than d then there are
963  * lines missing from the to file.
964  */
965 static int
966 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
967     const char *file1, FILE *f1, const char *file2, FILE *f2,
968     int a, int b, int c, int d, int *pflags)
969 {
970 	int i;
971 
972 restart:
973 	if (args->diff_format != D_IFDEF && a > b && c > d)
974 		return (0);
975 	if (args->ignore_pats != NULL) {
976 		char *line;
977 		/*
978 		 * All lines in the change, insert, or delete must
979 		 * match an ignore pattern for the change to be
980 		 * ignored.
981 		 */
982 		if (a <= b) {		/* Changes and deletes. */
983 			for (i = a; i <= b; i++) {
984 				line = preadline(fileno(f1),
985 				    ds->ixold[i] - ds->ixold[i - 1],
986 				    ds->ixold[i - 1]);
987 				if (line == NULL)
988 					return (-1);
989 				if (!ignoreline(line))
990 					goto proceed;
991 			}
992 		}
993 		if (a > b || c <= d) {	/* Changes and inserts. */
994 			for (i = c; i <= d; i++) {
995 				line = preadline(fileno(f2),
996 				    ds->ixnew[i] - ds->ixnew[i - 1],
997 				    ds->ixnew[i - 1]);
998 				if (line == NULL)
999 					return (-1);
1000 				if (!ignoreline(line))
1001 					goto proceed;
1002 			}
1003 		}
1004 		return (0);
1005 	}
1006 proceed:
1007 	if (*pflags & D_HEADER) {
1008 		diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
1009 		*pflags &= ~D_HEADER;
1010 	}
1011 	if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1012 		/*
1013 		 * Allocate change records as needed.
1014 		 */
1015 		if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1016 			ptrdiff_t offset;
1017 			offset = ds->context_vec_ptr - ds->context_vec_start;
1018 			ds->max_context <<= 1;
1019 			ds->context_vec_start =
1020 			    reallocarray(ds->context_vec_start, ds->max_context,
1021 				sizeof(*ds->context_vec_start));
1022 			if (ds->context_vec_start == NULL)
1023 				return (-1);
1024 			ds->context_vec_end = ds->context_vec_start +
1025 			    ds->max_context;
1026 			ds->context_vec_ptr = ds->context_vec_start + offset;
1027 		}
1028 		if (ds->anychange == 0) {
1029 			/*
1030 			 * Print the context/unidiff header first time through.
1031 			 */
1032 			print_header(outfile, ds, args, file1, file2);
1033 			ds->anychange = 1;
1034 		} else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1035 		    c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1036 			/*
1037 			 * If this change is more than 'diff_context' lines from the
1038 			 * previous change, dump the record and reset it.
1039 			 */
1040 			if (args->diff_format == D_CONTEXT)
1041 				dump_context_vec(outfile, ds, args, f1, f2, *pflags);
1042 			else
1043 				dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
1044 		}
1045 		ds->context_vec_ptr++;
1046 		ds->context_vec_ptr->a = a;
1047 		ds->context_vec_ptr->b = b;
1048 		ds->context_vec_ptr->c = c;
1049 		ds->context_vec_ptr->d = d;
1050 		return (0);
1051 	}
1052 	if (ds->anychange == 0)
1053 		ds->anychange = 1;
1054 	switch (args->diff_format) {
1055 	case D_BRIEF:
1056 		return (0);
1057 	case D_NORMAL:
1058 	case D_EDIT:
1059 		range(outfile, a, b, ",");
1060 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1061 		if (args->diff_format == D_NORMAL)
1062 			range(outfile, c, d, ",");
1063 		diff_output(outfile, "\n");
1064 		break;
1065 	case D_REVERSE:
1066 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1067 		range(outfile, a, b, " ");
1068 		diff_output(outfile, "\n");
1069 		break;
1070 	case D_NREVERSE:
1071 		if (a > b)
1072 			diff_output(outfile, "a%d %d\n", b, d - c + 1);
1073 		else {
1074 			diff_output(outfile, "d%d %d\n", a, b - a + 1);
1075 			if (!(c > d))
1076 				/* add changed lines */
1077 				diff_output(outfile, "a%d %d\n", b, d - c + 1);
1078 		}
1079 		break;
1080 	}
1081 	if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1082 		fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1083 		if (a <= b && c <= d && args->diff_format == D_NORMAL)
1084 			diff_output(outfile, "---\n");
1085 	}
1086 	i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1087 	if (i != 0 && args->diff_format == D_EDIT) {
1088 		/*
1089 		 * A non-zero return value for D_EDIT indicates that the
1090 		 * last line printed was a bare dot (".") that has been
1091 		 * escaped as ".." to prevent ed(1) from misinterpreting
1092 		 * it.  We have to add a substitute command to change this
1093 		 * back and restart where we left off.
1094 		 */
1095 		diff_output(outfile, ".\n");
1096 		diff_output(outfile, "%ds/.//\n", a + i - 1);
1097 		b = a + i - 1;
1098 		a = b + 1;
1099 		c += i;
1100 		goto restart;
1101 	}
1102 	if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1103 		diff_output(outfile, ".\n");
1104 	if (ds->inifdef) {
1105 		diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1106 		ds->inifdef = 0;
1107 	}
1108 
1109 	return (0);
1110 }
1111 
1112 static int
1113 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1114     long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1115 {
1116 	int i, j, c, lastc, col, nc;
1117 
1118 	/*
1119 	 * When doing #ifdef's, copy down to current line
1120 	 * if this is the first file, so that stuff makes it to output.
1121 	 */
1122 	if (args->diff_format == D_IFDEF && oldfile) {
1123 		long curpos = ftell(lb);
1124 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1125 		nc = f[a > b ? b : a - 1] - curpos;
1126 		for (i = 0; i < nc; i++)
1127 			diff_output(outfile, "%c", getc(lb));
1128 	}
1129 	if (a > b)
1130 		return (0);
1131 	if (args->diff_format == D_IFDEF) {
1132 		if (ds->inifdef) {
1133 			diff_output(outfile, "#else /* %s%s */\n",
1134 			    oldfile == 1 ? "!" : "", args->ifdefname);
1135 		} else {
1136 			if (oldfile)
1137 				diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1138 			else
1139 				diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1140 		}
1141 		ds->inifdef = 1 + oldfile;
1142 	}
1143 	for (i = a; i <= b; i++) {
1144 		fseek(lb, f[i - 1], SEEK_SET);
1145 		nc = f[i] - f[i - 1];
1146 		if (args->diff_format != D_IFDEF && ch != '\0') {
1147 			diff_output(outfile, "%c", ch);
1148 			if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1149 			    || args->diff_format == D_UNIFIED))
1150 				diff_output(outfile, "\t");
1151 			else if (args->diff_format != D_UNIFIED)
1152 				diff_output(outfile, " ");
1153 		}
1154 		col = 0;
1155 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1156 			if ((c = getc(lb)) == EOF) {
1157 				if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1158 				    args->diff_format == D_NREVERSE)
1159 					warnx("No newline at end of file");
1160 				else
1161 					diff_output(outfile, "\n\\ No newline at end of "
1162 					    "file\n");
1163 				return (0);
1164 			}
1165 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1166 				do {
1167 					diff_output(outfile, " ");
1168 				} while (++col & 7);
1169 			} else {
1170 				if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1171 				    && lastc == '.') {
1172 					/*
1173 					 * Don't print a bare "." line
1174 					 * since that will confuse ed(1).
1175 					 * Print ".." instead and return,
1176 					 * giving the caller an offset
1177 					 * from which to restart.
1178 					 */
1179 					diff_output(outfile, ".\n");
1180 					return (i - a + 1);
1181 				}
1182 				diff_output(outfile, "%c", c);
1183 				col++;
1184 			}
1185 		}
1186 	}
1187 	return (0);
1188 }
1189 
1190 /*
1191  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1192  */
1193 static int
1194 readhash(struct got_diff_state *ds, FILE *f, int flags)
1195 {
1196 	int i, t, space;
1197 	int sum;
1198 
1199 	sum = 1;
1200 	space = 0;
1201 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1202 		if (flags & D_IGNORECASE)
1203 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1204 				if (t == EOF) {
1205 					if (i == 0)
1206 						return (0);
1207 					break;
1208 				}
1209 				sum = sum * 127 + ds->chrtran[t];
1210 			}
1211 		else
1212 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1213 				if (t == EOF) {
1214 					if (i == 0)
1215 						return (0);
1216 					break;
1217 				}
1218 				sum = sum * 127 + t;
1219 			}
1220 	} else {
1221 		for (i = 0;;) {
1222 			switch (t = getc(f)) {
1223 			case '\t':
1224 			case '\r':
1225 			case '\v':
1226 			case '\f':
1227 			case ' ':
1228 				space++;
1229 				continue;
1230 			default:
1231 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1232 					i++;
1233 					space = 0;
1234 				}
1235 				sum = sum * 127 + ds->chrtran[t];
1236 				i++;
1237 				continue;
1238 			case EOF:
1239 				if (i == 0)
1240 					return (0);
1241 				/* FALLTHROUGH */
1242 			case '\n':
1243 				break;
1244 			}
1245 			break;
1246 		}
1247 	}
1248 	/*
1249 	 * There is a remote possibility that we end up with a zero sum.
1250 	 * Zero is used as an EOF marker, so return 1 instead.
1251 	 */
1252 	return (sum == 0 ? 1 : sum);
1253 }
1254 
1255 static int
1256 asciifile(FILE *f)
1257 {
1258 	unsigned char buf[BUFSIZ];
1259 	size_t cnt;
1260 
1261 	if (f == NULL)
1262 		return (1);
1263 
1264 	rewind(f);
1265 	cnt = fread(buf, 1, sizeof(buf), f);
1266 	return (memchr(buf, '\0', cnt) == NULL);
1267 }
1268 
1269 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1270 
1271 static char *
1272 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1273 {
1274 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1275 	size_t nc;
1276 	int last = ds->lastline;
1277 	char *state = NULL;
1278 
1279 	ds->lastline = pos;
1280 	while (pos > last) {
1281 		fseek(fp, f[pos - 1], SEEK_SET);
1282 		nc = f[pos] - f[pos - 1];
1283 		if (nc >= sizeof(buf))
1284 			nc = sizeof(buf) - 1;
1285 		nc = fread(buf, 1, nc, fp);
1286 		if (nc > 0) {
1287 			buf[nc] = '\0';
1288 			buf[strcspn(buf, "\n")] = '\0';
1289 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1290 				if (begins_with(buf, "private:")) {
1291 					if (!state)
1292 						state = " (private)";
1293 				} else if (begins_with(buf, "protected:")) {
1294 					if (!state)
1295 						state = " (protected)";
1296 				} else if (begins_with(buf, "public:")) {
1297 					if (!state)
1298 						state = " (public)";
1299 				} else {
1300 					strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1301 					if (state)
1302 						strlcat(ds->lastbuf, state,
1303 						    sizeof ds->lastbuf);
1304 					ds->lastmatchline = pos;
1305 					return ds->lastbuf;
1306 				}
1307 			}
1308 		}
1309 		pos--;
1310 	}
1311 	return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1312 }
1313 
1314 /* dump accumulated "context" diff changes */
1315 static void
1316 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1317     FILE *f1, FILE *f2, int flags)
1318 {
1319 	struct context_vec *cvp = ds->context_vec_start;
1320 	int lowa, upb, lowc, upd, do_output;
1321 	int a, b, c, d;
1322 	char ch, *f;
1323 
1324 	if (ds->context_vec_start > ds->context_vec_ptr)
1325 		return;
1326 
1327 	b = d = 0;		/* gcc */
1328 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1329 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1330 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1331 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1332 
1333 	diff_output(outfile, "***************");
1334 	if ((flags & D_PROTOTYPE)) {
1335 		f = match_function(ds, ds->ixold, lowa-1, f1);
1336 		if (f != NULL)
1337 			diff_output(outfile, " %s", f);
1338 	}
1339 	diff_output(outfile, "\n*** ");
1340 	range(outfile, lowa, upb, ",");
1341 	diff_output(outfile, " ****\n");
1342 
1343 	/*
1344 	 * Output changes to the "old" file.  The first loop suppresses
1345 	 * output if there were no changes to the "old" file (we'll see
1346 	 * the "old" lines as context in the "new" list).
1347 	 */
1348 	do_output = 0;
1349 	for (; cvp <= ds->context_vec_ptr; cvp++)
1350 		if (cvp->a <= cvp->b) {
1351 			cvp = ds->context_vec_start;
1352 			do_output++;
1353 			break;
1354 		}
1355 	if (do_output) {
1356 		while (cvp <= ds->context_vec_ptr) {
1357 			a = cvp->a;
1358 			b = cvp->b;
1359 			c = cvp->c;
1360 			d = cvp->d;
1361 
1362 			if (a <= b && c <= d)
1363 				ch = 'c';
1364 			else
1365 				ch = (a <= b) ? 'd' : 'a';
1366 
1367 			if (ch == 'a')
1368 				fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1369 			else {
1370 				fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1371 				fetch(outfile, ds, args, ds->ixold, a, b, f1,
1372 				    ch == 'c' ? '!' : '-', 0, flags);
1373 			}
1374 			lowa = b + 1;
1375 			cvp++;
1376 		}
1377 		fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1378 	}
1379 	/* output changes to the "new" file */
1380 	diff_output(outfile, "--- ");
1381 	range(outfile, lowc, upd, ",");
1382 	diff_output(outfile, " ----\n");
1383 
1384 	do_output = 0;
1385 	for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1386 		if (cvp->c <= cvp->d) {
1387 			cvp = ds->context_vec_start;
1388 			do_output++;
1389 			break;
1390 		}
1391 	if (do_output) {
1392 		while (cvp <= ds->context_vec_ptr) {
1393 			a = cvp->a;
1394 			b = cvp->b;
1395 			c = cvp->c;
1396 			d = cvp->d;
1397 
1398 			if (a <= b && c <= d)
1399 				ch = 'c';
1400 			else
1401 				ch = (a <= b) ? 'd' : 'a';
1402 
1403 			if (ch == 'd')
1404 				fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1405 			else {
1406 				fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1407 				fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1408 				    ch == 'c' ? '!' : '+', 0, flags);
1409 			}
1410 			lowc = d + 1;
1411 			cvp++;
1412 		}
1413 		fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1414 	}
1415 	ds->context_vec_ptr = ds->context_vec_start - 1;
1416 }
1417 
1418 /* dump accumulated "unified" diff changes */
1419 static void
1420 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1421     FILE *f1, FILE *f2, int flags)
1422 {
1423 	struct context_vec *cvp = ds->context_vec_start;
1424 	int lowa, upb, lowc, upd;
1425 	int a, b, c, d;
1426 	char ch, *f;
1427 
1428 	if (ds->context_vec_start > ds->context_vec_ptr)
1429 		return;
1430 
1431 	b = d = 0;		/* gcc */
1432 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1433 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1434 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1435 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1436 
1437 	diff_output(outfile, "@@ -");
1438 	uni_range(outfile, lowa, upb);
1439 	diff_output(outfile, " +");
1440 	uni_range(outfile, lowc, upd);
1441 	diff_output(outfile, " @@");
1442 	if ((flags & D_PROTOTYPE)) {
1443 		f = match_function(ds, ds->ixold, lowa-1, f1);
1444 		if (f != NULL)
1445 			diff_output(outfile, " %s", f);
1446 	}
1447 	diff_output(outfile, "\n");
1448 
1449 	/*
1450 	 * Output changes in "unified" diff format--the old and new lines
1451 	 * are printed together.
1452 	 */
1453 	for (; cvp <= ds->context_vec_ptr; cvp++) {
1454 		a = cvp->a;
1455 		b = cvp->b;
1456 		c = cvp->c;
1457 		d = cvp->d;
1458 
1459 		/*
1460 		 * c: both new and old changes
1461 		 * d: only changes in the old file
1462 		 * a: only changes in the new file
1463 		 */
1464 		if (a <= b && c <= d)
1465 			ch = 'c';
1466 		else
1467 			ch = (a <= b) ? 'd' : 'a';
1468 
1469 		switch (ch) {
1470 		case 'c':
1471 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1472 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1473 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1474 			break;
1475 		case 'd':
1476 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1477 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1478 			break;
1479 		case 'a':
1480 			fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1481 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1482 			break;
1483 		}
1484 		lowa = b + 1;
1485 		lowc = d + 1;
1486 	}
1487 	fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1488 
1489 	ds->context_vec_ptr = ds->context_vec_start - 1;
1490 }
1491 
1492 static void
1493 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1494     const char *file1, const char *file2)
1495 {
1496 	if (args->label[0] != NULL)
1497 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1498 		    args->label[0]);
1499 	else
1500 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1501 		    file1, ctime(&ds->stb1.st_mtime));
1502 	if (args->label[1] != NULL)
1503 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1504 		    args->label[1]);
1505 	else
1506 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1507 		    file2, ctime(&ds->stb2.st_mtime));
1508 }