Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
20 #include <sha1.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <time.h>
25 #include <util.h>
26 #include <zlib.h>
28 #include "got_error.h"
29 #include "got_object.h"
30 #include "got_blame.h"
31 #include "got_opentemp.h"
33 #include "got_lib_inflate.h"
34 #include "got_lib_delta.h"
35 #include "got_lib_object.h"
36 #include "got_lib_diff.h"
37 #include "got_lib_diffoffset.h"
38 #include "got_commit_graph.h"
40 struct got_blame_line {
41 int annotated;
42 struct got_object_id id;
43 };
45 struct got_blame_diff_offsets {
46 struct got_diffoffset_chunks *chunks;
47 struct got_object_id *commit_id;
48 SLIST_ENTRY(got_blame_diff_offsets) entry;
49 };
51 SLIST_HEAD(got_blame_diff_offsets_list, got_blame_diff_offsets);
53 struct got_blame {
54 FILE *f;
55 int nlines;
56 int nannotated;
57 struct got_blame_line *lines; /* one per line */
58 int ncommits;
59 struct got_blame_diff_offsets_list diff_offsets_list;
60 };
62 static void
63 free_diff_offsets(struct got_blame_diff_offsets *diff_offsets)
64 {
65 if (diff_offsets->chunks)
66 got_diffoffset_free(diff_offsets->chunks);
67 free(diff_offsets->commit_id);
68 free(diff_offsets);
69 }
71 static const struct got_error *
72 alloc_diff_offsets(struct got_blame_diff_offsets **diff_offsets,
73 struct got_object_id *commit_id)
74 {
75 const struct got_error *err = NULL;
77 *diff_offsets = calloc(1, sizeof(**diff_offsets));
78 if (*diff_offsets == NULL)
79 return got_error_from_errno();
81 (*diff_offsets)->commit_id = got_object_id_dup(commit_id);
82 if ((*diff_offsets)->commit_id == NULL) {
83 err = got_error_from_errno();
84 free_diff_offsets(*diff_offsets);
85 *diff_offsets = NULL;
86 return err;
87 }
89 err = got_diffoffset_alloc(&(*diff_offsets)->chunks);
90 if (err) {
91 free_diff_offsets(*diff_offsets);
92 return err;
93 }
95 return NULL;
96 }
98 static const struct got_error *
99 annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id,
100 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
101 void *arg)
103 const struct got_error *err = NULL;
104 struct got_blame_line *line;
106 if (lineno < 1 || lineno > blame->nlines)
107 return NULL;
109 line = &blame->lines[lineno - 1];
110 if (line->annotated)
111 return NULL;
113 memcpy(&line->id, id, sizeof(line->id));
114 line->annotated = 1;
115 blame->nannotated++;
116 if (cb)
117 err = cb(arg, blame->nlines, lineno, id);
118 return err;
121 static int
122 get_blamed_line(struct got_blame_diff_offsets_list *diff_offsets_list,
123 int lineno)
125 struct got_blame_diff_offsets *diff_offsets;
127 SLIST_FOREACH(diff_offsets, diff_offsets_list, entry)
128 lineno = got_diffoffset_get(diff_offsets->chunks, lineno);
130 return lineno;
133 static const struct got_error *
134 blame_changes(struct got_blame *blame, struct got_diff_changes *changes,
135 struct got_object_id *commit_id,
136 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
137 void *arg)
139 const struct got_error *err = NULL;
140 struct got_diff_change *change;
141 struct got_blame_diff_offsets *diff_offsets;
143 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
144 int c = change->cv.c;
145 int d = change->cv.d;
146 int new_lineno = c;
147 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
148 int ln;
150 for (ln = new_lineno; ln < new_lineno + new_length; ln++) {
151 err = annotate_line(blame,
152 get_blamed_line(&blame->diff_offsets_list, ln),
153 commit_id, cb, arg);
154 if (err)
155 return err;
156 if (blame->nlines == blame->nannotated)
157 return NULL;
161 err = alloc_diff_offsets(&diff_offsets, commit_id);
162 if (err)
163 return err;
164 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
165 int a = change->cv.a;
166 int b = change->cv.b;
167 int c = change->cv.c;
168 int d = change->cv.d;
169 int old_lineno = a;
170 int old_length = (a < b ? b - a + 1 : (a == b ? 1 : 0));
171 int new_lineno = c;
172 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
174 err = got_diffoffset_add(diff_offsets->chunks,
175 old_lineno, old_length, new_lineno, new_length);
176 if (err) {
177 free_diff_offsets(diff_offsets);
178 return err;
181 SLIST_INSERT_HEAD(&blame->diff_offsets_list, diff_offsets, entry);
183 return NULL;
186 static const struct got_error *
187 blame_commit(struct got_blame *blame, struct got_object_id *id,
188 struct got_object_id *pid, const char *path, struct got_repository *repo,
189 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
190 void *arg)
192 const struct got_error *err = NULL;
193 struct got_object *obj = NULL, *pobj = NULL;
194 struct got_object_id *obj_id = NULL, *pobj_id = NULL;
195 struct got_blob_object *blob = NULL, *pblob = NULL;
196 struct got_diff_changes *changes = NULL;
198 err = got_object_id_by_path(&obj_id, repo, id, path);
199 if (err)
200 goto done;
202 err = got_object_open(&obj, repo, obj_id);
203 if (err)
204 goto done;
206 if (obj->type != GOT_OBJ_TYPE_BLOB) {
207 err = got_error(GOT_ERR_OBJ_TYPE);
208 goto done;
211 err = got_object_id_by_path(&pobj_id, repo, pid, path);
212 if (err) {
213 if (err->code == GOT_ERR_NO_TREE_ENTRY) {
214 /* Blob's history began in previous commit. */
215 err = got_error(GOT_ERR_ITER_COMPLETED);
217 goto done;
220 /* If IDs match then don't bother with diffing. */
221 if (got_object_id_cmp(obj_id, pobj_id) == 0) {
222 if (cb)
223 err = cb(arg, blame->nlines, -1, id);
224 goto done;
227 err = got_object_open(&pobj, repo, pobj_id);
228 if (err)
229 goto done;
231 if (pobj->type != GOT_OBJ_TYPE_BLOB) {
232 /*
233 * Encountered a non-blob at the path (probably a tree).
234 * Blob's history began in previous commit.
235 */
236 err = got_error(GOT_ERR_ITER_COMPLETED);
237 goto done;
240 err = got_object_blob_open(&blob, repo, obj, 8192);
241 if (err)
242 goto done;
244 err = got_object_blob_open(&pblob, repo, pobj, 8192);
245 if (err)
246 goto done;
248 err = got_diff_blob_lines_changed(&changes, pblob, blob);
249 if (err)
250 goto done;
252 if (changes) {
253 err = blame_changes(blame, changes, id, cb, arg);
254 got_diff_free_changes(changes);
255 } else if (cb)
256 err = cb(arg, blame->nlines, -1, id);
257 done:
258 free(obj_id);
259 free(pobj_id);
260 if (obj)
261 got_object_close(obj);
262 if (pobj)
263 got_object_close(pobj);
264 if (blob)
265 got_object_blob_close(blob);
266 if (pblob)
267 got_object_blob_close(pblob);
268 return err;
271 static void
272 blame_close(struct got_blame *blame)
274 struct got_blame_diff_offsets *diff_offsets;
276 if (blame->f)
277 fclose(blame->f);
278 free(blame->lines);
279 while (!SLIST_EMPTY(&blame->diff_offsets_list)) {
280 diff_offsets = SLIST_FIRST(&blame->diff_offsets_list);
281 SLIST_REMOVE_HEAD(&blame->diff_offsets_list, entry);
282 free_diff_offsets(diff_offsets);
284 free(blame);
287 static const struct got_error *
288 blame_open(struct got_blame **blamep, const char *path,
289 struct got_object_id *start_commit_id, struct got_repository *repo,
290 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
291 void *arg)
293 const struct got_error *err = NULL;
294 struct got_object *obj = NULL;
295 struct got_object_id *obj_id = NULL;
296 struct got_blob_object *blob = NULL;
297 struct got_blame *blame = NULL;
298 struct got_object_id *id = NULL;
299 int lineno;
300 struct got_commit_graph *graph = NULL;
302 *blamep = NULL;
304 err = got_object_id_by_path(&obj_id, repo, start_commit_id, path);
305 if (err)
306 return err;
308 err = got_object_open(&obj, repo, obj_id);
309 if (err)
310 goto done;
312 if (obj->type != GOT_OBJ_TYPE_BLOB) {
313 err = got_error(GOT_ERR_OBJ_TYPE);
314 goto done;
317 err = got_object_blob_open(&blob, repo, obj, 8192);
318 if (err)
319 goto done;
321 blame = calloc(1, sizeof(*blame));
322 if (blame == NULL)
323 return got_error_from_errno();
325 blame->f = got_opentemp();
326 if (blame->f == NULL) {
327 err = got_error_from_errno();
328 goto done;
330 err = got_object_blob_dump_to_file(NULL, &blame->nlines, blame->f,
331 blob);
332 if (err)
333 goto done;
335 blame->lines = calloc(blame->nlines, sizeof(*blame->lines));
336 if (blame->lines == NULL) {
337 err = got_error_from_errno();
338 goto done;
341 err = got_commit_graph_open(&graph, start_commit_id, path, 0, repo);
342 if (err)
343 return err;
344 err = got_commit_graph_iter_start(graph, start_commit_id, repo);
345 if (err)
346 goto done;
348 id = NULL;
349 while (1) {
350 struct got_object_id *next_id;
352 err = got_commit_graph_iter_next(&next_id, graph);
353 if (err) {
354 if (err->code == GOT_ERR_ITER_COMPLETED) {
355 err = NULL;
356 break;
358 if (err->code != GOT_ERR_ITER_NEED_MORE)
359 break;
360 err = got_commit_graph_fetch_commits(graph, 1, repo);
361 if (err)
362 break;
363 else
364 continue;
366 if (next_id == NULL)
367 break;
368 if (id) {
369 err = blame_commit(blame, id, next_id, path, repo,
370 cb, arg);
371 if (err) {
372 if (err->code == GOT_ERR_ITER_COMPLETED)
373 err = NULL;
374 break;
376 if (blame->nannotated == blame->nlines)
377 break;
379 id = next_id;
382 if (id && blame->nannotated < blame->nlines) {
383 /* Annotate remaining non-annotated lines with last commit. */
384 for (lineno = 1; lineno <= blame->nlines; lineno++) {
385 err = annotate_line(blame, lineno, id, cb, arg);
386 if (err)
387 goto done;
391 done:
392 if (graph)
393 got_commit_graph_close(graph);
394 free(obj_id);
395 if (obj)
396 got_object_close(obj);
397 if (blob)
398 got_object_blob_close(blob);
399 if (err) {
400 if (blame)
401 blame_close(blame);
402 } else
403 *blamep = blame;
405 return err;
408 static const struct got_error *
409 blame_line(struct got_object_id **id, struct got_blame *blame, int lineno)
411 if (lineno < 1 || lineno > blame->nlines)
412 return got_error(GOT_ERR_RANGE);
413 *id = &blame->lines[lineno - 1].id;
414 return NULL;
417 static char *
418 parse_next_line(FILE *f, size_t *len)
420 char *line;
421 size_t linelen;
422 size_t lineno;
423 const char delim[3] = { '\0', '\0', '\0'};
425 line = fparseln(f, &linelen, &lineno, delim, 0);
426 if (len)
427 *len = linelen;
428 return line;
431 const struct got_error *
432 got_blame(const char *path, struct got_object_id *start_commit_id,
433 struct got_repository *repo, FILE *outfile)
435 const struct got_error *err = NULL;
436 struct got_blame *blame;
437 int lineno;
438 char *abspath;
440 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
441 return got_error_from_errno();
443 err = blame_open(&blame, abspath, start_commit_id, repo, NULL, NULL);
444 if (err) {
445 free(abspath);
446 return err;
449 for (lineno = 1; lineno <= blame->nlines; lineno++) {
450 struct got_object_id *id;
451 char *line, *id_str;
453 line = parse_next_line(blame->f, NULL);
454 if (line == NULL)
455 break;
457 err = blame_line(&id, blame, lineno);
458 if (err) {
459 free(line);
460 break;
463 err = got_object_id_str(&id_str, id);
464 /* Do not free id; It points into blame->lines. */
465 if (err) {
466 free(line);
467 break;
470 fprintf(outfile, "%.8s %s\n", id_str, line);
471 free(line);
472 free(id_str);
475 blame_close(blame);
476 free(abspath);
477 return err;
480 const struct got_error *
481 got_blame_incremental(const char *path, struct got_object_id *commit_id,
482 struct got_repository *repo,
483 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
484 void *arg)
486 const struct got_error *err = NULL;
487 struct got_blame *blame;
488 char *abspath;
490 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
491 return got_error_from_errno();
493 err = blame_open(&blame, abspath, commit_id, repo, cb, arg);
494 free(abspath);
495 if (blame)
496 blame_close(blame);
497 return err;