Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
20 #include <sha1.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <time.h>
25 #include <util.h>
26 #include <zlib.h>
28 #include "got_error.h"
29 #include "got_object.h"
30 #include "got_blame.h"
31 #include "got_opentemp.h"
33 #include "got_lib_inflate.h"
34 #include "got_lib_delta.h"
35 #include "got_lib_object.h"
36 #include "got_lib_diff.h"
37 #include "got_lib_diffoffset.h"
38 #include "got_commit_graph.h"
40 struct got_blame_line {
41 int annotated;
42 struct got_object_id id;
43 };
45 struct got_blame_diff_offsets {
46 struct got_diffoffset_chunks *chunks;
47 struct got_object_id *commit_id;
48 SLIST_ENTRY(got_blame_diff_offsets) entry;
49 };
51 SLIST_HEAD(got_blame_diff_offsets_list, got_blame_diff_offsets);
53 struct got_blame {
54 FILE *f;
55 size_t filesize;
56 int nlines;
57 int nannotated;
58 struct got_blame_line *lines; /* one per line */
59 off_t *line_offsets; /* one per line */
60 int ncommits;
61 struct got_blame_diff_offsets_list diff_offsets_list;
62 };
64 static void
65 free_diff_offsets(struct got_blame_diff_offsets *diff_offsets)
66 {
67 if (diff_offsets->chunks)
68 got_diffoffset_free(diff_offsets->chunks);
69 free(diff_offsets->commit_id);
70 free(diff_offsets);
71 }
73 static const struct got_error *
74 alloc_diff_offsets(struct got_blame_diff_offsets **diff_offsets,
75 struct got_object_id *commit_id)
76 {
77 const struct got_error *err = NULL;
79 *diff_offsets = calloc(1, sizeof(**diff_offsets));
80 if (*diff_offsets == NULL)
81 return got_error_from_errno("calloc");
83 (*diff_offsets)->commit_id = got_object_id_dup(commit_id);
84 if ((*diff_offsets)->commit_id == NULL) {
85 err = got_error_from_errno("got_object_id_dup");
86 free_diff_offsets(*diff_offsets);
87 *diff_offsets = NULL;
88 return err;
89 }
91 err = got_diffoffset_alloc(&(*diff_offsets)->chunks);
92 if (err) {
93 free_diff_offsets(*diff_offsets);
94 return err;
95 }
97 return NULL;
98 }
100 static const struct got_error *
101 annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id,
102 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
103 void *arg)
105 const struct got_error *err = NULL;
106 struct got_blame_line *line;
108 if (lineno < 1 || lineno > blame->nlines)
109 return NULL;
111 line = &blame->lines[lineno - 1];
112 if (line->annotated)
113 return NULL;
115 memcpy(&line->id, id, sizeof(line->id));
116 line->annotated = 1;
117 blame->nannotated++;
118 if (cb)
119 err = cb(arg, blame->nlines, lineno, id);
120 return err;
123 static int
124 get_blamed_line(struct got_blame_diff_offsets_list *diff_offsets_list,
125 int lineno)
127 struct got_blame_diff_offsets *diff_offsets;
129 SLIST_FOREACH(diff_offsets, diff_offsets_list, entry)
130 lineno = got_diffoffset_get(diff_offsets->chunks, lineno);
132 return lineno;
135 static const struct got_error *
136 blame_changes(struct got_blame *blame, struct got_diff_changes *changes,
137 struct got_object_id *commit_id,
138 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
139 void *arg)
141 const struct got_error *err = NULL;
142 struct got_diff_change *change;
143 struct got_blame_diff_offsets *diff_offsets;
145 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
146 int c = change->cv.c;
147 int d = change->cv.d;
148 int new_lineno = c;
149 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
150 int ln;
152 for (ln = new_lineno; ln < new_lineno + new_length; ln++) {
153 err = annotate_line(blame,
154 get_blamed_line(&blame->diff_offsets_list, ln),
155 commit_id, cb, arg);
156 if (err)
157 return err;
158 if (blame->nlines == blame->nannotated)
159 return NULL;
163 err = alloc_diff_offsets(&diff_offsets, commit_id);
164 if (err)
165 return err;
166 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
167 int a = change->cv.a;
168 int b = change->cv.b;
169 int c = change->cv.c;
170 int d = change->cv.d;
171 int old_lineno = a;
172 int old_length = (a < b ? b - a + 1 : (a == b ? 1 : 0));
173 int new_lineno = c;
174 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
176 err = got_diffoffset_add(diff_offsets->chunks,
177 old_lineno, old_length, new_lineno, new_length);
178 if (err) {
179 free_diff_offsets(diff_offsets);
180 return err;
183 SLIST_INSERT_HEAD(&blame->diff_offsets_list, diff_offsets, entry);
185 return NULL;
188 static const struct got_error *
189 blame_commit(struct got_blame *blame, struct got_object_id *id,
190 const char *path, struct got_repository *repo,
191 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
192 void *arg)
194 const struct got_error *err = NULL;
195 struct got_object *obj = NULL, *pobj = NULL;
196 struct got_object_id *obj_id = NULL, *pobj_id = NULL;
197 struct got_commit_object *commit = NULL;
198 struct got_blob_object *blob = NULL, *pblob = NULL;
199 struct got_diff_changes *changes = NULL;
200 struct got_object_qid *pid = NULL;
202 err = got_object_open_as_commit(&commit, repo, id);
203 if (err)
204 return err;
206 err = got_object_id_by_path(&obj_id, repo, id, path);
207 if (err)
208 goto done;
210 err = got_object_open(&obj, repo, obj_id);
211 if (err)
212 goto done;
214 if (obj->type != GOT_OBJ_TYPE_BLOB) {
215 err = got_error(GOT_ERR_OBJ_TYPE);
216 goto done;
219 pid = SIMPLEQ_FIRST(got_object_commit_get_parent_ids(commit));
220 if (pid) {
221 err = got_object_id_by_path(&pobj_id, repo, pid->id, path);
222 if (err) {
223 if (err->code == GOT_ERR_NO_TREE_ENTRY) {
224 /* Blob's history began in previous commit. */
225 err = got_error(GOT_ERR_ITER_COMPLETED);
227 goto done;
230 /* If IDs match then don't bother with diffing. */
231 if (got_object_id_cmp(obj_id, pobj_id) == 0) {
232 if (cb)
233 err = cb(arg, blame->nlines, -1, id);
234 goto done;
237 err = got_object_open(&pobj, repo, pobj_id);
238 if (err)
239 goto done;
241 if (pobj->type != GOT_OBJ_TYPE_BLOB) {
242 /*
243 * Encountered a non-blob at the path (probably a tree).
244 * Blob's history began in previous commit.
245 */
246 err = got_error(GOT_ERR_ITER_COMPLETED);
247 goto done;
250 err = got_object_blob_open(&pblob, repo, pobj, 8192);
251 if (err)
252 goto done;
255 err = got_object_blob_open(&blob, repo, obj, 8192);
256 if (err)
257 goto done;
259 err = got_diff_blob_lines_changed(&changes, pblob, blob);
260 if (err)
261 goto done;
263 if (changes) {
264 err = blame_changes(blame, changes, id, cb, arg);
265 got_diff_free_changes(changes);
266 } else if (cb)
267 err = cb(arg, blame->nlines, -1, id);
268 done:
269 if (commit)
270 got_object_commit_close(commit);
271 free(obj_id);
272 free(pobj_id);
273 if (obj)
274 got_object_close(obj);
275 if (pobj)
276 got_object_close(pobj);
277 if (blob)
278 got_object_blob_close(blob);
279 if (pblob)
280 got_object_blob_close(pblob);
281 return err;
284 static const struct got_error *
285 blame_close(struct got_blame *blame)
287 const struct got_error *err = NULL;
288 struct got_blame_diff_offsets *diff_offsets;
290 if (blame->f && fclose(blame->f) != 0)
291 err = got_error_from_errno("fclose");
292 free(blame->lines);
293 while (!SLIST_EMPTY(&blame->diff_offsets_list)) {
294 diff_offsets = SLIST_FIRST(&blame->diff_offsets_list);
295 SLIST_REMOVE_HEAD(&blame->diff_offsets_list, entry);
296 free_diff_offsets(diff_offsets);
298 free(blame);
299 return err;
302 static const struct got_error *
303 blame_open(struct got_blame **blamep, const char *path,
304 struct got_object_id *start_commit_id, struct got_repository *repo,
305 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
306 void *arg)
308 const struct got_error *err = NULL;
309 struct got_object *obj = NULL;
310 struct got_object_id *obj_id = NULL;
311 struct got_blob_object *blob = NULL;
312 struct got_blame *blame = NULL;
313 struct got_object_id *id = NULL, *next_id = NULL;
314 int lineno;
315 struct got_commit_graph *graph = NULL;
317 *blamep = NULL;
319 err = got_object_id_by_path(&obj_id, repo, start_commit_id, path);
320 if (err)
321 return err;
323 err = got_object_open(&obj, repo, obj_id);
324 if (err)
325 goto done;
327 if (obj->type != GOT_OBJ_TYPE_BLOB) {
328 err = got_error(GOT_ERR_OBJ_TYPE);
329 goto done;
332 err = got_object_blob_open(&blob, repo, obj, 8192);
333 if (err)
334 goto done;
336 blame = calloc(1, sizeof(*blame));
337 if (blame == NULL)
338 return got_error_from_errno("calloc");
340 blame->f = got_opentemp();
341 if (blame->f == NULL) {
342 err = got_error_from_errno("got_opentemp");
343 goto done;
345 err = got_object_blob_dump_to_file(&blame->filesize, &blame->nlines,
346 &blame->line_offsets, blame->f, blob);
347 if (err)
348 goto done;
350 blame->lines = calloc(blame->nlines, sizeof(*blame->lines));
351 if (blame->lines == NULL) {
352 err = got_error_from_errno("calloc");
353 goto done;
356 err = got_commit_graph_open(&graph, start_commit_id, path, 1, repo);
357 if (err)
358 return err;
359 err = got_commit_graph_iter_start(graph, start_commit_id, repo);
360 if (err)
361 goto done;
363 id = NULL;
364 for (;;) {
365 err = got_commit_graph_iter_next(&next_id, graph);
366 if (err) {
367 if (err->code == GOT_ERR_ITER_COMPLETED) {
368 if (id)
369 err = blame_commit(blame, id,
370 path, repo, cb, arg);
371 else
372 err = NULL;
373 break;
375 if (err->code != GOT_ERR_ITER_NEED_MORE)
376 break;
377 err = got_commit_graph_fetch_commits(graph, 1, repo);
378 if (err)
379 break;
380 continue;
382 if (id) {
383 err = blame_commit(blame, id, path, repo,
384 cb, arg);
385 if (err) {
386 if (err->code == GOT_ERR_ITER_COMPLETED)
387 err = NULL;
388 break;
390 if (blame->nannotated == blame->nlines)
391 break;
393 id = next_id;
396 if (id && blame->nannotated < blame->nlines) {
397 /* Annotate remaining non-annotated lines with last commit. */
398 for (lineno = 1; lineno <= blame->nlines; lineno++) {
399 err = annotate_line(blame, lineno, id, cb, arg);
400 if (err)
401 goto done;
405 done:
406 if (graph)
407 got_commit_graph_close(graph);
408 free(obj_id);
409 if (obj)
410 got_object_close(obj);
411 if (blob)
412 got_object_blob_close(blob);
413 if (err) {
414 if (blame)
415 blame_close(blame);
416 } else
417 *blamep = blame;
419 return err;
422 const struct got_error *
423 got_blame(const char *path, struct got_object_id *commit_id,
424 struct got_repository *repo,
425 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
426 void *arg)
428 const struct got_error *err = NULL, *close_err = NULL;
429 struct got_blame *blame;
430 char *abspath;
432 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
433 return got_error_from_errno2("asprintf", path);
435 err = blame_open(&blame, abspath, commit_id, repo, cb, arg);
436 free(abspath);
437 if (blame)
438 close_err = blame_close(blame);
439 return err ? err : close_err;