Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
20 #include <sha1.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <time.h>
25 #include <util.h>
26 #include <zlib.h>
28 #include "got_error.h"
29 #include "got_object.h"
30 #include "got_blame.h"
31 #include "got_opentemp.h"
33 #include "got_lib_inflate.h"
34 #include "got_lib_delta.h"
35 #include "got_lib_object.h"
36 #include "got_lib_diff.h"
37 #include "got_lib_diffoffset.h"
38 #include "got_commit_graph.h"
40 struct got_blame_line {
41 int annotated;
42 struct got_object_id id;
43 };
45 struct got_blame_diff_offsets {
46 struct got_diffoffset_chunks *chunks;
47 struct got_object_id *commit_id;
48 SLIST_ENTRY(got_blame_diff_offsets) entry;
49 };
51 SLIST_HEAD(got_blame_diff_offsets_list, got_blame_diff_offsets);
53 struct got_blame {
54 FILE *f;
55 size_t filesize;
56 int nlines;
57 int nannotated;
58 struct got_blame_line *lines; /* one per line */
59 off_t *line_offsets; /* one per line */
60 int ncommits;
61 struct got_blame_diff_offsets_list diff_offsets_list;
62 };
64 static void
65 free_diff_offsets(struct got_blame_diff_offsets *diff_offsets)
66 {
67 if (diff_offsets->chunks)
68 got_diffoffset_free(diff_offsets->chunks);
69 free(diff_offsets->commit_id);
70 free(diff_offsets);
71 }
73 static const struct got_error *
74 alloc_diff_offsets(struct got_blame_diff_offsets **diff_offsets,
75 struct got_object_id *commit_id)
76 {
77 const struct got_error *err = NULL;
79 *diff_offsets = calloc(1, sizeof(**diff_offsets));
80 if (*diff_offsets == NULL)
81 return got_error_from_errno("calloc");
83 (*diff_offsets)->commit_id = got_object_id_dup(commit_id);
84 if ((*diff_offsets)->commit_id == NULL) {
85 err = got_error_from_errno("got_object_id_dup");
86 free_diff_offsets(*diff_offsets);
87 *diff_offsets = NULL;
88 return err;
89 }
91 err = got_diffoffset_alloc(&(*diff_offsets)->chunks);
92 if (err) {
93 free_diff_offsets(*diff_offsets);
94 return err;
95 }
97 return NULL;
98 }
100 static const struct got_error *
101 annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id,
102 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
103 void *arg)
105 const struct got_error *err = NULL;
106 struct got_blame_line *line;
108 if (lineno < 1 || lineno > blame->nlines)
109 return NULL;
111 line = &blame->lines[lineno - 1];
112 if (line->annotated)
113 return NULL;
115 memcpy(&line->id, id, sizeof(line->id));
116 line->annotated = 1;
117 blame->nannotated++;
118 if (cb)
119 err = cb(arg, blame->nlines, lineno, id);
120 return err;
123 static int
124 get_blamed_line(struct got_blame_diff_offsets_list *diff_offsets_list,
125 int lineno)
127 struct got_blame_diff_offsets *diff_offsets;
128 int offset = 0;
130 SLIST_FOREACH(diff_offsets, diff_offsets_list, entry)
131 lineno = got_diffoffset_get(diff_offsets->chunks, lineno);
133 return lineno + offset;
136 static const struct got_error *
137 blame_changes(struct got_blame *blame, struct got_diff_changes *changes,
138 struct got_object_id *commit_id,
139 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
140 void *arg)
142 const struct got_error *err = NULL;
143 struct got_diff_change *change;
144 struct got_blame_diff_offsets *diff_offsets;
146 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
147 int c = change->cv.c;
148 int d = change->cv.d;
149 int new_lineno = c;
150 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
151 int ln;
153 for (ln = new_lineno; ln < new_lineno + new_length; ln++) {
154 err = annotate_line(blame,
155 get_blamed_line(&blame->diff_offsets_list, ln),
156 commit_id, cb, arg);
157 if (err)
158 return err;
159 if (blame->nlines == blame->nannotated)
160 return NULL;
164 err = alloc_diff_offsets(&diff_offsets, commit_id);
165 if (err)
166 return err;
167 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
168 int a = change->cv.a;
169 int b = change->cv.b;
170 int c = change->cv.c;
171 int d = change->cv.d;
172 int old_lineno = a;
173 int old_length = (a < b ? b - a + 1 : (a == b ? 1 : 0));
174 int new_lineno = c;
175 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
177 err = got_diffoffset_add(diff_offsets->chunks,
178 old_lineno, old_length, new_lineno, new_length);
179 if (err) {
180 free_diff_offsets(diff_offsets);
181 return err;
184 SLIST_INSERT_HEAD(&blame->diff_offsets_list, diff_offsets, entry);
186 return NULL;
189 static const struct got_error *
190 blame_commit(struct got_blame *blame, struct got_object_id *id,
191 const char *path, struct got_repository *repo,
192 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
193 void *arg)
195 const struct got_error *err = NULL;
196 struct got_object *obj = NULL, *pobj = NULL;
197 struct got_object_id *obj_id = NULL, *pobj_id = NULL;
198 struct got_commit_object *commit = NULL;
199 struct got_blob_object *blob = NULL, *pblob = NULL;
200 struct got_diff_changes *changes = NULL;
201 struct got_object_qid *pid = NULL;
203 err = got_object_open_as_commit(&commit, repo, id);
204 if (err)
205 return err;
207 err = got_object_id_by_path(&obj_id, repo, id, path);
208 if (err)
209 goto done;
211 err = got_object_open(&obj, repo, obj_id);
212 if (err)
213 goto done;
215 if (obj->type != GOT_OBJ_TYPE_BLOB) {
216 err = got_error(GOT_ERR_OBJ_TYPE);
217 goto done;
220 pid = SIMPLEQ_FIRST(got_object_commit_get_parent_ids(commit));
221 if (pid) {
222 err = got_object_id_by_path(&pobj_id, repo, pid->id, path);
223 if (err) {
224 if (err->code == GOT_ERR_NO_TREE_ENTRY) {
225 /* Blob's history began in previous commit. */
226 err = got_error(GOT_ERR_ITER_COMPLETED);
228 goto done;
231 /* If IDs match then don't bother with diffing. */
232 if (got_object_id_cmp(obj_id, pobj_id) == 0) {
233 if (cb)
234 err = cb(arg, blame->nlines, -1, id);
235 goto done;
238 err = got_object_open(&pobj, repo, pobj_id);
239 if (err)
240 goto done;
242 if (pobj->type != GOT_OBJ_TYPE_BLOB) {
243 /*
244 * Encountered a non-blob at the path (probably a tree).
245 * Blob's history began in previous commit.
246 */
247 err = got_error(GOT_ERR_ITER_COMPLETED);
248 goto done;
251 err = got_object_blob_open(&pblob, repo, pobj, 8192);
252 if (err)
253 goto done;
256 err = got_object_blob_open(&blob, repo, obj, 8192);
257 if (err)
258 goto done;
260 err = got_diff_blob_lines_changed(&changes, pblob, blob);
261 if (err)
262 goto done;
264 if (changes) {
265 err = blame_changes(blame, changes, id, cb, arg);
266 got_diff_free_changes(changes);
267 } else if (cb)
268 err = cb(arg, blame->nlines, -1, id);
269 done:
270 if (commit)
271 got_object_commit_close(commit);
272 free(obj_id);
273 free(pobj_id);
274 if (obj)
275 got_object_close(obj);
276 if (pobj)
277 got_object_close(pobj);
278 if (blob)
279 got_object_blob_close(blob);
280 if (pblob)
281 got_object_blob_close(pblob);
282 return err;
285 static const struct got_error *
286 blame_close(struct got_blame *blame)
288 const struct got_error *err = NULL;
289 struct got_blame_diff_offsets *diff_offsets;
291 if (blame->f && fclose(blame->f) != 0)
292 err = got_error_from_errno("fclose");
293 free(blame->lines);
294 while (!SLIST_EMPTY(&blame->diff_offsets_list)) {
295 diff_offsets = SLIST_FIRST(&blame->diff_offsets_list);
296 SLIST_REMOVE_HEAD(&blame->diff_offsets_list, entry);
297 free_diff_offsets(diff_offsets);
299 free(blame);
300 return err;
303 static const struct got_error *
304 blame_open(struct got_blame **blamep, const char *path,
305 struct got_object_id *start_commit_id, struct got_repository *repo,
306 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
307 void *arg)
309 const struct got_error *err = NULL;
310 struct got_object *obj = NULL;
311 struct got_object_id *obj_id = NULL;
312 struct got_blob_object *blob = NULL;
313 struct got_blame *blame = NULL;
314 struct got_object_id *id = NULL, *next_id = NULL;
315 int lineno;
316 struct got_commit_graph *graph = NULL;
318 *blamep = NULL;
320 err = got_object_id_by_path(&obj_id, repo, start_commit_id, path);
321 if (err)
322 return err;
324 err = got_object_open(&obj, repo, obj_id);
325 if (err)
326 goto done;
328 if (obj->type != GOT_OBJ_TYPE_BLOB) {
329 err = got_error(GOT_ERR_OBJ_TYPE);
330 goto done;
333 err = got_object_blob_open(&blob, repo, obj, 8192);
334 if (err)
335 goto done;
337 blame = calloc(1, sizeof(*blame));
338 if (blame == NULL)
339 return got_error_from_errno("calloc");
341 blame->f = got_opentemp();
342 if (blame->f == NULL) {
343 err = got_error_from_errno("got_opentemp");
344 goto done;
346 err = got_object_blob_dump_to_file(&blame->filesize, &blame->nlines,
347 &blame->line_offsets, blame->f, blob);
348 if (err || blame->nlines == 0)
349 goto done;
351 /* Don't include \n at EOF in the blame line count. */
352 if (blame->line_offsets[blame->nlines - 1] == blame->filesize)
353 blame->nlines--;
355 blame->lines = calloc(blame->nlines, sizeof(*blame->lines));
356 if (blame->lines == NULL) {
357 err = got_error_from_errno("calloc");
358 goto done;
361 err = got_commit_graph_open(&graph, start_commit_id, path, 1, repo);
362 if (err)
363 return err;
364 err = got_commit_graph_iter_start(graph, start_commit_id, repo);
365 if (err)
366 goto done;
368 id = NULL;
369 for (;;) {
370 err = got_commit_graph_iter_next(&next_id, graph);
371 if (err) {
372 if (err->code == GOT_ERR_ITER_COMPLETED) {
373 if (id)
374 err = blame_commit(blame, id,
375 path, repo, cb, arg);
376 else
377 err = NULL;
378 break;
380 if (err->code != GOT_ERR_ITER_NEED_MORE)
381 break;
382 err = got_commit_graph_fetch_commits(graph, 1, repo);
383 if (err)
384 break;
385 continue;
387 if (id) {
388 err = blame_commit(blame, id, path, repo,
389 cb, arg);
390 if (err) {
391 if (err->code == GOT_ERR_ITER_COMPLETED)
392 err = NULL;
393 break;
395 if (blame->nannotated == blame->nlines)
396 break;
398 id = next_id;
401 if (id && blame->nannotated < blame->nlines) {
402 /* Annotate remaining non-annotated lines with last commit. */
403 for (lineno = 1; lineno <= blame->nlines; lineno++) {
404 err = annotate_line(blame, lineno, id, cb, arg);
405 if (err)
406 goto done;
410 done:
411 if (graph)
412 got_commit_graph_close(graph);
413 free(obj_id);
414 if (obj)
415 got_object_close(obj);
416 if (blob)
417 got_object_blob_close(blob);
418 if (err) {
419 if (blame)
420 blame_close(blame);
421 } else
422 *blamep = blame;
424 return err;
427 const struct got_error *
428 got_blame(const char *path, struct got_object_id *commit_id,
429 struct got_repository *repo,
430 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
431 void *arg)
433 const struct got_error *err = NULL, *close_err = NULL;
434 struct got_blame *blame;
435 char *abspath;
437 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
438 return got_error_from_errno2("asprintf", path);
440 err = blame_open(&blame, abspath, commit_id, repo, cb, arg);
441 free(abspath);
442 if (blame)
443 close_err = blame_close(blame);
444 return err ? err : close_err;