1 | // Copyright (C) 2005 Emile Snyder <emile@alumni.reed.edu>␊ |
2 | //␊ |
3 | // This program is made available under the GNU GPL version 2.0 or␊ |
4 | // greater. See the accompanying file COPYING for details.␊ |
5 | //␊ |
6 | // This program is distributed WITHOUT ANY WARRANTY; without even the␊ |
7 | // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR␊ |
8 | // PURPOSE.␊ |
9 | ␊ |
10 | #include "base.hh"␊ |
11 | #include <set>␊ |
12 | #include <iostream>␊ |
13 | ␊ |
14 | #include <boost/shared_ptr.hpp>␊ |
15 | #include <boost/multi_index_container.hpp>␊ |
16 | #include <boost/multi_index/ordered_index.hpp>␊ |
17 | #include <boost/multi_index/key_extractors.hpp>␊ |
18 | ␊ |
19 | #include "annotate.hh"␊ |
20 | #include "app_state.hh"␊ |
21 | #include "cert.hh"␊ |
22 | #include "constants.hh"␊ |
23 | #include "cset.hh"␊ |
24 | #include "interner.hh"␊ |
25 | #include "lcs.hh"␊ |
26 | #include "platform.hh"␊ |
27 | #include "revision.hh"␊ |
28 | #include "sanity.hh"␊ |
29 | #include "simplestring_xform.hh"␊ |
30 | #include "transforms.hh"␊ |
31 | #include "ui.hh"␊ |
32 | #include "vocab.hh"␊ |
33 | #include "rev_height.hh"␊ |
34 | ␊ |
35 | using std::back_insert_iterator;␊ |
36 | using std::back_inserter;␊ |
37 | using std::cout;␊ |
38 | using std::map;␊ |
39 | using std::min;␊ |
40 | using std::set;␊ |
41 | using std::string;␊ |
42 | using std::vector;␊ |
43 | using std::pair;␊ |
44 | ␊ |
45 | using boost::shared_ptr;␊ |
46 | ␊ |
47 | using boost::multi_index::multi_index_container;␊ |
48 | using boost::multi_index::indexed_by;␊ |
49 | using boost::multi_index::ordered_unique;␊ |
50 | using boost::multi_index::tag;␊ |
51 | using boost::multi_index::member;␊ |
52 | ␊ |
53 | ␊ |
54 | class annotate_lineage_mapping;␊ |
55 | ␊ |
56 | ␊ |
57 | class annotate_context␊ |
58 | {␊ |
59 | public:␊ |
60 | annotate_context(file_id fid, app_state & app);␊ |
61 | ␊ |
62 | shared_ptr<annotate_lineage_mapping> initial_lineage() const;␊ |
63 | ␊ |
64 | /// credit any uncopied lines (as recorded in copied_lines) to␊ |
65 | /// rev, and reset copied_lines.␊ |
66 | void evaluate(revision_id rev);␊ |
67 | ␊ |
68 | void set_copied(int index);␊ |
69 | void set_touched(int index);␊ |
70 | ␊ |
71 | void set_equivalent(int index, int index2);␊ |
72 | void annotate_equivalent_lines();␊ |
73 | ␊ |
74 | /// return true if we have no more unassigned lines␊ |
75 | bool is_complete() const;␊ |
76 | ␊ |
77 | void dump(app_state & app, bool just_revs) const;␊ |
78 | ␊ |
79 | string get_line(int line_index) const␊ |
80 | {␊ |
81 | return file_lines[line_index];␊ |
82 | }␊ |
83 | ␊ |
84 | private:␊ |
85 | void build_revisions_to_annotations(app_state & app,␊ |
86 | map<revision_id, string> & r2a) const;␊ |
87 | ␊ |
88 | vector<string> file_lines;␊ |
89 | vector<revision_id> annotations;␊ |
90 | ␊ |
91 | /// equivalent_lines[n] = m means that line n should be blamed to the same␊ |
92 | /// revision as line m␊ |
93 | map<int, int> equivalent_lines;␊ |
94 | ␊ |
95 | /// keep a count so we can tell quickly whether we can terminate␊ |
96 | size_t annotated_lines_completed;␊ |
97 | ␊ |
98 | // elements of the set are indexes into the array of lines in the␊ |
99 | // UDOI lineages add entries here when they notice that they copied␊ |
100 | // a line from the UDOI␊ |
101 | set<size_t> copied_lines;␊ |
102 | ␊ |
103 | // similarly, lineages add entries here for all the lines from the␊ |
104 | // UDOI they know about that they didn't copy␊ |
105 | set<size_t> touched_lines;␊ |
106 | };␊ |
107 | ␊ |
108 | ␊ |
109 | /*␊ |
110 | An annotate_lineage_mapping tells you, for each line of a file,␊ |
111 | where in the ultimate descendent of interest (UDOI) the line came␊ |
112 | from (a line not present in the UDOI is represented as -1).␊ |
113 | */␊ |
114 | class annotate_lineage_mapping␊ |
115 | {␊ |
116 | public:␊ |
117 | annotate_lineage_mapping(file_data const & data);␊ |
118 | annotate_lineage_mapping(vector<string> const & lines);␊ |
119 | ␊ |
120 | // debugging␊ |
121 | //bool equal_interned (const annotate_lineage_mapping &rhs) const;␊ |
122 | ␊ |
123 | /// need a better name. does the work of setting copied bits in the␊ |
124 | /// context object.␊ |
125 | shared_ptr<annotate_lineage_mapping>␊ |
126 | build_parent_lineage(shared_ptr<annotate_context> acp,␊ |
127 | revision_id parent_rev,␊ |
128 | file_data const & parent_data) const;␊ |
129 | ␊ |
130 | void merge(annotate_lineage_mapping const & other,␊ |
131 | shared_ptr<annotate_context> const & acp);␊ |
132 | ␊ |
133 | void credit_mapped_lines(shared_ptr<annotate_context> acp) const;␊ |
134 | void set_copied_all_mapped(shared_ptr<annotate_context> acp) const;␊ |
135 | ␊ |
136 | private:␊ |
137 | void init_with_lines(vector<string> const & lines);␊ |
138 | ␊ |
139 | static interner<long> in; // FIX, testing hack␊ |
140 | ␊ |
141 | vector<long, QA(long)> file_interned;␊ |
142 | ␊ |
143 | // maps an index into the vector of lines for our current version of␊ |
144 | // the file into an index into the vector of lines of the UDOI:␊ |
145 | // eg. if the line file_interned[i] will turn into line 4 in the␊ |
146 | // UDOI, mapping[i] = 4␊ |
147 | vector<int> mapping;␊ |
148 | };␊ |
149 | ␊ |
150 | interner<long> annotate_lineage_mapping::in;␊ |
151 | ␊ |
152 | ␊ |
153 | /*␊ |
154 | annotate_node_work encapsulates the input data needed to process␊ |
155 | the annotations for a given childrev, considering all the␊ |
156 | childrev -> parentrevN edges.␊ |
157 | */␊ |
158 | struct annotate_node_work␊ |
159 | {␊ |
160 | annotate_node_work(shared_ptr<annotate_context> annotations_,␊ |
161 | shared_ptr<annotate_lineage_mapping> lineage_,␊ |
162 | revision_id revision_, node_id fid_,␊ |
163 | rev_height height_,␊ |
164 | set<revision_id> interesting_ancestors_,␊ |
165 | file_id content_,␊ |
166 | bool marked_)␊ |
167 | : annotations(annotations_),␊ |
168 | lineage(lineage_),␊ |
169 | revision(revision_),␊ |
170 | fid(fid_),␊ |
171 | height(height_),␊ |
172 | interesting_ancestors(interesting_ancestors_),␊ |
173 | content(content_),␊ |
174 | marked(marked_)␊ |
175 | {}␊ |
176 | ␊ |
177 | shared_ptr<annotate_context> annotations;␊ |
178 | shared_ptr<annotate_lineage_mapping> lineage;␊ |
179 | revision_id revision;␊ |
180 | node_id fid;␊ |
181 | rev_height height;␊ |
182 | set<revision_id> interesting_ancestors;␊ |
183 | file_id content;␊ |
184 | bool marked;␊ |
185 | };␊ |
186 | ␊ |
187 | struct by_rev {};␊ |
188 | ␊ |
189 | // instead of using a priority queue and a set to keep track of the already␊ |
190 | // seen revisions, we use a multi index container. it stores work units␊ |
191 | // indexed by both, their revision and their revision's height, with the␊ |
192 | // latter being used by default. usage of that data structure frees us from␊ |
193 | // the burden of keeping two data structures in sync.␊ |
194 | typedef multi_index_container<␊ |
195 | annotate_node_work,␊ |
196 | indexed_by<␊ |
197 | ordered_unique<␊ |
198 | member<annotate_node_work,rev_height,&annotate_node_work::height>,␊ |
199 | std::greater<rev_height> >,␊ |
200 | ordered_unique<␊ |
201 | tag<by_rev>,␊ |
202 | member<annotate_node_work,revision_id,&annotate_node_work::revision> >␊ |
203 | >␊ |
204 | > work_units;␊ |
205 | ␊ |
206 | ␊ |
207 | annotate_context::annotate_context(file_id fid, app_state & app)␊ |
208 | : annotated_lines_completed(0)␊ |
209 | {␊ |
210 | // initialize file_lines␊ |
211 | file_data fpacked;␊ |
212 | app.db.get_file_version(fid, fpacked);␊ |
213 | string encoding = constants::default_encoding; // FIXME␊ |
214 | split_into_lines(fpacked.inner()(), encoding, file_lines);␊ |
215 | L(FL("annotate_context::annotate_context initialized "␊ |
216 | "with %d file lines\n") % file_lines.size());␊ |
217 | ␊ |
218 | // initialize annotations␊ |
219 | revision_id nullid;␊ |
220 | annotations.clear();␊ |
221 | annotations.reserve(file_lines.size());␊ |
222 | annotations.insert(annotations.begin(), file_lines.size(), nullid);␊ |
223 | L(FL("annotate_context::annotate_context initialized "␊ |
224 | "with %d entries in annotations\n") % annotations.size());␊ |
225 | ␊ |
226 | // initialize copied_lines and touched_lines␊ |
227 | copied_lines.clear();␊ |
228 | touched_lines.clear();␊ |
229 | }␊ |
230 | ␊ |
231 | ␊ |
232 | shared_ptr<annotate_lineage_mapping>␊ |
233 | annotate_context::initial_lineage() const␊ |
234 | {␊ |
235 | shared_ptr<annotate_lineage_mapping>␊ |
236 | res(new annotate_lineage_mapping(file_lines));␊ |
237 | return res;␊ |
238 | }␊ |
239 | ␊ |
240 | ␊ |
241 | void␊ |
242 | annotate_context::evaluate(revision_id rev)␊ |
243 | {␊ |
244 | revision_id nullid;␊ |
245 | I(copied_lines.size() <= annotations.size());␊ |
246 | I(touched_lines.size() <= annotations.size());␊ |
247 | ␊ |
248 | // Find the lines that we touched but that no other parent copied.␊ |
249 | set<size_t> credit_lines;␊ |
250 | set_difference(touched_lines.begin(), touched_lines.end(),␊ |
251 | copied_lines.begin(), copied_lines.end(),␊ |
252 | inserter(credit_lines, credit_lines.begin()));␊ |
253 | ␊ |
254 | set<size_t>::const_iterator i;␊ |
255 | for (i = credit_lines.begin(); i != credit_lines.end(); i++)␊ |
256 | {␊ |
257 | I(*i < annotations.size());␊ |
258 | if (annotations[*i] == nullid)␊ |
259 | {␊ |
260 | // L(FL("evaluate setting annotations[%d] -> %s, since "␊ |
261 | // "touched_lines contained %d, copied_lines didn't and "␊ |
262 | // "annotations[%d] was nullid\n") % *i % rev % *i % *i);␊ |
263 | ␊ |
264 | annotations[*i] = rev;␊ |
265 | annotated_lines_completed++;␊ |
266 | }␊ |
267 | else␊ |
268 | {␊ |
269 | //L(FL("evaluate LEAVING annotations[%d] -> %s")␊ |
270 | // % *i % annotations[*i]);␊ |
271 | }␊ |
272 | }␊ |
273 | ␊ |
274 | copied_lines.clear();␊ |
275 | touched_lines.clear();␊ |
276 | }␊ |
277 | ␊ |
278 | void␊ |
279 | annotate_context::set_copied(int index)␊ |
280 | {␊ |
281 | //L(FL("annotate_context::set_copied %d") % index);␊ |
282 | ␊ |
283 | if (index == -1)␊ |
284 | return;␊ |
285 | ␊ |
286 | I(index >= 0 && index < (int)file_lines.size());␊ |
287 | copied_lines.insert(index);␊ |
288 | }␊ |
289 | ␊ |
290 | void␊ |
291 | annotate_context::set_touched(int index)␊ |
292 | {␊ |
293 | //L(FL("annotate_context::set_touched %d") % index);␊ |
294 | ␊ |
295 | if (index == -1)␊ |
296 | return;␊ |
297 | ␊ |
298 | I(index >= 0 && index <= (int)file_lines.size());␊ |
299 | touched_lines.insert(index);␊ |
300 | }␊ |
301 | ␊ |
302 | void␊ |
303 | annotate_context::set_equivalent(int index, int index2)␊ |
304 | {␊ |
305 | L(FL("annotate_context::set_equivalent "␊ |
306 | "index %d index2 %d\n") % index % index2);␊ |
307 | equivalent_lines[index] = index2;␊ |
308 | }␊ |
309 | ␊ |
310 | void␊ |
311 | annotate_context::annotate_equivalent_lines()␊ |
312 | {␊ |
313 | revision_id null_id;␊ |
314 | ␊ |
315 | for (size_t i=0; i<annotations.size(); i++)␊ |
316 | {␊ |
317 | if (annotations[i] == null_id)␊ |
318 | {␊ |
319 | map<int, int>::const_iterator j = equivalent_lines.find(i);␊ |
320 | if (j == equivalent_lines.end())␊ |
321 | {␊ |
322 | L(FL("annotate_equivalent_lines unable to find "␊ |
323 | "equivalent for line %d\n") % i);␊ |
324 | }␊ |
325 | I(j != equivalent_lines.end());␊ |
326 | annotations[i] = annotations[j->second];␊ |
327 | annotated_lines_completed++;␊ |
328 | }␊ |
329 | }␊ |
330 | }␊ |
331 | ␊ |
332 | bool␊ |
333 | annotate_context::is_complete() const␊ |
334 | {␊ |
335 | if (annotated_lines_completed == annotations.size())␊ |
336 | return true;␊ |
337 | ␊ |
338 | I(annotated_lines_completed < annotations.size());␊ |
339 | return false;␊ |
340 | }␊ |
341 | ␊ |
342 | static string␊ |
343 | cert_string_value(vector< revision<cert> > const & certs,␊ |
344 | cert_name const & name,␊ |
345 | bool from_start, bool from_end,␊ |
346 | string const & sep)␊ |
347 | {␊ |
348 | for (vector< revision<cert> >::const_iterator i = certs.begin();␊ |
349 | i != certs.end(); ++i)␊ |
350 | {␊ |
351 | if (i->inner().name == name)␊ |
352 | {␊ |
353 | cert_value tv;␊ |
354 | decode_base64 (i->inner().value, tv);␊ |
355 | string::size_type f = 0;␊ |
356 | string::size_type l = string::npos;␊ |
357 | if (from_start)␊ |
358 | l = tv ().find_first_of(sep);␊ |
359 | if (from_end)␊ |
360 | {␊ |
361 | f = tv ().find_last_of(sep);␊ |
362 | if (f == string::npos)␊ |
363 | f = 0;␊ |
364 | }␊ |
365 | return tv().substr(f, l);␊ |
366 | }␊ |
367 | }␊ |
368 | ␊ |
369 | return "";␊ |
370 | }␊ |
371 | ␊ |
372 | void␊ |
373 | annotate_context::build_revisions_to_annotations␊ |
374 | (app_state & app,␊ |
375 | map<revision_id, string> & revs_to_notations) const␊ |
376 | {␊ |
377 | I(annotations.size() == file_lines.size());␊ |
378 | ␊ |
379 | // build set of unique revisions present in annotations␊ |
380 | set<revision_id> seen;␊ |
381 | for (vector<revision_id>::const_iterator i = annotations.begin();␊ |
382 | i != annotations.end(); i++)␊ |
383 | {␊ |
384 | seen.insert(*i);␊ |
385 | }␊ |
386 | ␊ |
387 | size_t max_note_length = 0;␊ |
388 | ␊ |
389 | // build revision -> annotation string mapping␊ |
390 | for (set<revision_id>::const_iterator i = seen.begin();␊ |
391 | i != seen.end(); i++)␊ |
392 | {␊ |
393 | vector< revision<cert> > certs;␊ |
394 | app.get_project().get_revision_certs(*i, certs);␊ |
395 | erase_bogus_certs(certs, app);␊ |
396 | ␊ |
397 | string author(cert_string_value(certs, author_cert_name,␊ |
398 | true, false, "@< "));␊ |
399 | ␊ |
400 | string date(cert_string_value(certs, date_cert_name,␊ |
401 | true, false, "T"));␊ |
402 | ␊ |
403 | string result;␊ |
404 | result.append((*i).inner ()().substr(0, 8));␊ |
405 | result.append(".. by ");␊ |
406 | result.append(author);␊ |
407 | result.append(" ");␊ |
408 | result.append(date);␊ |
409 | result.append(": ");␊ |
410 | ␊ |
411 | max_note_length = ((result.size() > max_note_length)␊ |
412 | ? result.size()␊ |
413 | : max_note_length);␊ |
414 | revs_to_notations[*i] = result;␊ |
415 | }␊ |
416 | ␊ |
417 | // justify annotation strings␊ |
418 | for (map<revision_id, string>::iterator i = revs_to_notations.begin();␊ |
419 | i != revs_to_notations.end(); i++)␊ |
420 | {␊ |
421 | size_t l = i->second.size();␊ |
422 | i->second.insert(string::size_type(0), max_note_length - l, ' ');␊ |
423 | }␊ |
424 | }␊ |
425 | ␊ |
426 | void␊ |
427 | annotate_context::dump(app_state & app, bool just_revs) const␊ |
428 | {␊ |
429 | revision_id nullid;␊ |
430 | I(annotations.size() == file_lines.size());␊ |
431 | ␊ |
432 | map<revision_id, string> revs_to_notations;␊ |
433 | string empty_note;␊ |
434 | if (!just_revs)␊ |
435 | {␊ |
436 | build_revisions_to_annotations(app, revs_to_notations);␊ |
437 | size_t max_note_length = revs_to_notations.begin()->second.size();␊ |
438 | empty_note.insert(string::size_type(0), max_note_length - 2, ' ');␊ |
439 | }␊ |
440 | ␊ |
441 | revision_id lastid = nullid;␊ |
442 | for (size_t i = 0; i < file_lines.size(); i++)␊ |
443 | {␊ |
444 | //I(! (annotations[i] == nullid) );␊ |
445 | if (!just_revs)␊ |
446 | {␊ |
447 | if (lastid == annotations[i])␊ |
448 | cout << empty_note << ": "␊ |
449 | << file_lines[i] << '\n';␊ |
450 | else␊ |
451 | cout << revs_to_notations[annotations[i]]␊ |
452 | << file_lines[i] << '\n';␊ |
453 | lastid = annotations[i];␊ |
454 | }␊ |
455 | else␊ |
456 | cout << annotations[i] << ": "␊ |
457 | << file_lines[i] << '\n';␊ |
458 | }␊ |
459 | }␊ |
460 | ␊ |
461 | annotate_lineage_mapping::annotate_lineage_mapping(file_data const & data)␊ |
462 | {␊ |
463 | // split into lines␊ |
464 | vector<string> lines;␊ |
465 | string encoding = constants::default_encoding; // FIXME␊ |
466 | split_into_lines (data.inner()().data(), encoding, lines);␊ |
467 | ␊ |
468 | init_with_lines(lines);␊ |
469 | }␊ |
470 | ␊ |
471 | annotate_lineage_mapping::annotate_lineage_mapping␊ |
472 | (vector<string> const & lines)␊ |
473 | {␊ |
474 | init_with_lines(lines);␊ |
475 | }␊ |
476 | ␊ |
477 | /*␊ |
478 | bool␊ |
479 | annotate_lineage_mapping::equal_interned␊ |
480 | (annotate_lineage_mapping const & rhs) const␊ |
481 | {␊ |
482 | bool result = true;␊ |
483 | ␊ |
484 | if (file_interned.size() != rhs.file_interned.size()) {␊ |
485 | L(FL("annotate_lineage_mapping::equal_interned "␊ |
486 | "lhs size %d != rhs size %d\n")␊ |
487 | % file_interned.size() % rhs.file_interned.size());␊ |
488 | result = false;␊ |
489 | }␊ |
490 | ␊ |
491 | size_t limit = min(file_interned.size(), rhs.file_interned.size());␊ |
492 | for (size_t i=0; i<limit; i++)␊ |
493 | {␊ |
494 | if (file_interned[i] != rhs.file_interned[i])␊ |
495 | {␊ |
496 | L(FL("annotate_lineage_mapping::equal_interned "␊ |
497 | "lhs[%d]:%ld != rhs[%d]:%ld\n")␊ |
498 | % i % file_interned[i] % i % rhs.file_interned[i]);␊ |
499 | result = false;␊ |
500 | }␊ |
501 | }␊ |
502 | ␊ |
503 | return result;␊ |
504 | }␊ |
505 | */␊ |
506 | ␊ |
507 | void␊ |
508 | annotate_lineage_mapping::init_with_lines(vector<string> const & lines)␊ |
509 | {␊ |
510 | file_interned.clear();␊ |
511 | file_interned.reserve(lines.size());␊ |
512 | mapping.clear();␊ |
513 | mapping.reserve(lines.size());␊ |
514 | ␊ |
515 | int count;␊ |
516 | vector<string>::const_iterator i;␊ |
517 | for (count=0, i = lines.begin(); i != lines.end(); i++, count++)␊ |
518 | {␊ |
519 | file_interned.push_back(in.intern(*i));␊ |
520 | mapping.push_back(count);␊ |
521 | }␊ |
522 | L(FL("annotate_lineage_mapping::init_with_lines "␊ |
523 | "ending with %d entries in mapping\n") % mapping.size());␊ |
524 | }␊ |
525 | ␊ |
526 | shared_ptr<annotate_lineage_mapping>␊ |
527 | annotate_lineage_mapping::build_parent_lineage␊ |
528 | (shared_ptr<annotate_context> acp,␊ |
529 | revision_id parent_rev,␊ |
530 | file_data const & parent_data) const␊ |
531 | {␊ |
532 | bool verbose = false;␊ |
533 | shared_ptr<annotate_lineage_mapping>␊ |
534 | parent_lineage(new annotate_lineage_mapping(parent_data));␊ |
535 | ␊ |
536 | vector<long, QA(long)> lcs;␊ |
537 | back_insert_iterator< vector<long, QA(long)> > bii(lcs);␊ |
538 | longest_common_subsequence(file_interned.begin(),␊ |
539 | file_interned.end(),␊ |
540 | parent_lineage->file_interned.begin(),␊ |
541 | parent_lineage->file_interned.end(),␊ |
542 | min(file_interned.size(),␊ |
543 | parent_lineage->file_interned.size()),␊ |
544 | back_inserter(lcs));␊ |
545 | ␊ |
546 | if (verbose)␊ |
547 | L(FL("build_parent_lineage: "␊ |
548 | "file_lines.size() == %d, "␊ |
549 | "parent.file_lines.size() == %d, "␊ |
550 | "lcs.size() == %d\n")␊ |
551 | % file_interned.size()␊ |
552 | % parent_lineage->file_interned.size()␊ |
553 | % lcs.size());␊ |
554 | ␊ |
555 | // do the copied lines thing for our annotate_context␊ |
556 | vector<long> lcs_src_lines;␊ |
557 | lcs_src_lines.resize(lcs.size());␊ |
558 | size_t i, j;␊ |
559 | i = j = 0;␊ |
560 | while (i < file_interned.size() && j < lcs.size())␊ |
561 | {␊ |
562 | //if (verbose)␊ |
563 | if (file_interned[i] == 14)␊ |
564 | L(FL("%s file_interned[%d]: %ld\tlcs[%d]: %ld\tmapping[%d]: %ld")␊ |
565 | % parent_rev % i % file_interned[i] % j % lcs[j] % i % mapping[i]);␊ |
566 | ␊ |
567 | if (file_interned[i] == lcs[j])␊ |
568 | {␊ |
569 | acp->set_copied(mapping[i]);␊ |
570 | lcs_src_lines[j] = mapping[i];␊ |
571 | j++;␊ |
572 | }␊ |
573 | else␊ |
574 | {␊ |
575 | acp->set_touched(mapping[i]);␊ |
576 | }␊ |
577 | ␊ |
578 | i++;␊ |
579 | }␊ |
580 | if (verbose)␊ |
581 | L(FL("loop ended with i: %d, j: %d, lcs.size(): %d")␊ |
582 | % i % j % lcs.size());␊ |
583 | I(j == lcs.size());␊ |
584 | ␊ |
585 | // set touched for the rest of the lines in the file␊ |
586 | while (i < file_interned.size())␊ |
587 | {␊ |
588 | acp->set_touched(mapping[i]);␊ |
589 | i++;␊ |
590 | }␊ |
591 | ␊ |
592 | // determine the mapping for parent lineage␊ |
593 | if (verbose)␊ |
594 | L(FL("build_parent_lineage: building mapping now "␊ |
595 | "for parent_rev %s\n") % parent_rev);␊ |
596 | ␊ |
597 | i = j = 0;␊ |
598 | ␊ |
599 | while (i < parent_lineage->file_interned.size() && j < lcs.size())␊ |
600 | {␊ |
601 | if (parent_lineage->file_interned[i] == lcs[j])␊ |
602 | {␊ |
603 | parent_lineage->mapping[i] = lcs_src_lines[j];␊ |
604 | j++;␊ |
605 | }␊ |
606 | else␊ |
607 | {␊ |
608 | parent_lineage->mapping[i] = -1;␊ |
609 | }␊ |
610 | if (verbose)␊ |
611 | L(FL("mapping[%d] -> %d") % i % parent_lineage->mapping[i]);␊ |
612 | ␊ |
613 | i++;␊ |
614 | }␊ |
615 | I(j == lcs.size());␊ |
616 | // set mapping for the rest of the lines in the file␊ |
617 | while (i < parent_lineage->file_interned.size())␊ |
618 | {␊ |
619 | parent_lineage->mapping[i] = -1;␊ |
620 | if (verbose)␊ |
621 | L(FL("mapping[%d] -> %d") % i % parent_lineage->mapping[i]);␊ |
622 | i++;␊ |
623 | }␊ |
624 | ␊ |
625 | return parent_lineage;␊ |
626 | }␊ |
627 | ␊ |
628 | void␊ |
629 | annotate_lineage_mapping::merge(annotate_lineage_mapping const & other,␊ |
630 | shared_ptr<annotate_context> const & acp)␊ |
631 | {␊ |
632 | I(file_interned.size() == other.file_interned.size());␊ |
633 | I(mapping.size() == other.mapping.size());␊ |
634 | //I(equal_interned(other)); // expensive check␊ |
635 | ␊ |
636 | for (size_t i=0; i<mapping.size(); i++)␊ |
637 | {␊ |
638 | if (mapping[i] == -1 && other.mapping[i] >= 0)␊ |
639 | mapping[i] = other.mapping[i];␊ |
640 | ␊ |
641 | if (mapping[i] >= 0 && other.mapping[i] >= 0)␊ |
642 | {␊ |
643 | //I(mapping[i] == other.mapping[i]);␊ |
644 | if (mapping[i] != other.mapping[i])␊ |
645 | {␊ |
646 | // a given line in the current merged mapping will split␊ |
647 | // and become multiple lines in the UDOI. so we have to␊ |
648 | // remember that whenever we ultimately assign blame for␊ |
649 | // mapping[i] we blame the same revision on␊ |
650 | // other.mapping[i].␊ |
651 | acp->set_equivalent(other.mapping[i], mapping[i]);␊ |
652 | }␊ |
653 | }␊ |
654 | }␊ |
655 | }␊ |
656 | ␊ |
657 | void␊ |
658 | annotate_lineage_mapping::credit_mapped_lines␊ |
659 | (shared_ptr<annotate_context> acp) const␊ |
660 | {␊ |
661 | vector<int>::const_iterator i;␊ |
662 | for (i=mapping.begin(); i != mapping.end(); i++)␊ |
663 | {␊ |
664 | acp->set_touched(*i);␊ |
665 | }␊ |
666 | }␊ |
667 | ␊ |
668 | void␊ |
669 | annotate_lineage_mapping::set_copied_all_mapped␊ |
670 | (shared_ptr<annotate_context> acp) const␊ |
671 | {␊ |
672 | vector<int>::const_iterator i;␊ |
673 | for (i=mapping.begin(); i != mapping.end(); i++)␊ |
674 | {␊ |
675 | acp->set_copied(*i);␊ |
676 | }␊ |
677 | }␊ |
678 | ␊ |
679 | // fetches the list of file_content markings for the given revision_id and␊ |
680 | // node_id␊ |
681 | static void get_file_content_marks(app_state & app,␊ |
682 | revision_id const & rev,␊ |
683 | node_id const & fid,␊ |
684 | set<revision_id> & content_marks)␊ |
685 | {␊ |
686 | marking_t markings;␊ |
687 | app.db.get_markings(rev, fid, markings);␊ |
688 | ␊ |
689 | I(!markings.file_content.empty());␊ |
690 | ␊ |
691 | content_marks.clear();␊ |
692 | content_marks.insert(markings.file_content.begin(),␊ |
693 | markings.file_content.end());␊ |
694 | }␊ |
695 | ␊ |
696 | static void␊ |
697 | do_annotate_node(annotate_node_work const & work_unit,␊ |
698 | app_state & app,␊ |
699 | work_units & work_units)␊ |
700 | {␊ |
701 | L(FL("do_annotate_node for node %s") % work_unit.revision);␊ |
702 | ␊ |
703 | size_t added_in_parent_count = 0;␊ |
704 | ␊ |
705 | for (set<revision_id>::const_iterator i = work_unit.interesting_ancestors.begin();␊ |
706 | i != work_unit.interesting_ancestors.end(); i++)␊ |
707 | {␊ |
708 | // here, 'parent' means either a real parent or one of the marked␊ |
709 | // ancestors, depending on whether work_unit.marked is true.␊ |
710 | revision_id parent_revision = *i;␊ |
711 | ␊ |
712 | L(FL("do_annotate_node processing edge from parent %s to child %s")␊ |
713 | % parent_revision % work_unit.revision);␊ |
714 | ␊ |
715 | I(!(work_unit.revision == parent_revision));␊ |
716 | ␊ |
717 | file_id file_in_parent;␊ |
718 | ␊ |
719 | work_units::index<by_rev>::type::iterator lmn =␊ |
720 | work_units.get<by_rev>().find(parent_revision);␊ |
721 | ␊ |
722 | // find out the content hash of the file in parent.␊ |
723 | if (lmn != work_units.get<by_rev>().end())␊ |
724 | // we already got the content hash.␊ |
725 | file_in_parent = lmn->content;␊ |
726 | else␊ |
727 | {␊ |
728 | if (work_unit.marked)␊ |
729 | app.db.get_file_content(parent_revision, work_unit.fid, file_in_parent);␊ |
730 | else␊ |
731 | // we are not marked, so parent is marked.␊ |
732 | file_in_parent = work_unit.content;␊ |
733 | }␊ |
734 | ␊ |
735 | // stop if file is not present in the parent.␊ |
736 | if (null_id(file_in_parent))␊ |
737 | {␊ |
738 | L(FL("file added in %s, continuing") % work_unit.revision);␊ |
739 | added_in_parent_count++;␊ |
740 | continue;␊ |
741 | }␊ |
742 | ␊ |
743 | // the node was live in the parent, so this represents a delta.␊ |
744 | shared_ptr<annotate_lineage_mapping> parent_lineage;␊ |
745 | ␊ |
746 | if (file_in_parent == work_unit.content)␊ |
747 | {␊ |
748 | L(FL("parent file identical, "␊ |
749 | "set copied all mapped and copy lineage\n"));␊ |
750 | parent_lineage = work_unit.lineage;␊ |
751 | parent_lineage->set_copied_all_mapped(work_unit.annotations);␊ |
752 | }␊ |
753 | else␊ |
754 | {␊ |
755 | file_data data;␊ |
756 | app.db.get_file_version(file_in_parent, data);␊ |
757 | L(FL("building parent lineage for parent file %s")␊ |
758 | % file_in_parent);␊ |
759 | parent_lineage␊ |
760 | = work_unit.lineage->build_parent_lineage(work_unit.annotations,␊ |
761 | parent_revision,␊ |
762 | data);␊ |
763 | }␊ |
764 | ␊ |
765 | // If this parent has not yet been queued for processing, create the␊ |
766 | // work unit for it.␊ |
767 | if (lmn == work_units.get<by_rev>().end())␊ |
768 | {␊ |
769 | set<revision_id> parents_interesting_ancestors;␊ |
770 | bool parent_marked;␊ |
771 | ␊ |
772 | if (work_unit.marked)␊ |
773 | {␊ |
774 | // we are marked, thus we don't know a priori whether parent␊ |
775 | // is marked or not.␊ |
776 | get_file_content_marks(app, parent_revision, work_unit.fid, parents_interesting_ancestors);␊ |
777 | parent_marked = (parents_interesting_ancestors.size() == 1␊ |
778 | && *(parents_interesting_ancestors.begin()) == parent_revision);␊ |
779 | }␊ |
780 | else␊ |
781 | parent_marked = true;␊ |
782 | ␊ |
783 | // if it's marked, we need to look at its parents instead.␊ |
784 | if (parent_marked)␊ |
785 | app.db.get_revision_parents(parent_revision, parents_interesting_ancestors);␊ |
786 | ␊ |
787 | rev_height parent_height;␊ |
788 | app.db.get_rev_height(parent_revision, parent_height);␊ |
789 | annotate_node_work newunit(work_unit.annotations,␊ |
790 | parent_lineage,␊ |
791 | parent_revision,␊ |
792 | work_unit.fid,␊ |
793 | parent_height,␊ |
794 | parents_interesting_ancestors,␊ |
795 | file_in_parent,␊ |
796 | parent_marked);␊ |
797 | work_units.insert(newunit);␊ |
798 | }␊ |
799 | else␊ |
800 | {␊ |
801 | // already a pending node, so we just have to merge the lineage.␊ |
802 | L(FL("merging lineage from node %s to parent %s")␊ |
803 | % work_unit.revision % parent_revision);␊ |
804 | ␊ |
805 | lmn->lineage->merge(*parent_lineage, work_unit.annotations);␊ |
806 | }␊ |
807 | }␊ |
808 | ␊ |
809 | if (added_in_parent_count == work_unit.interesting_ancestors.size())␊ |
810 | {␊ |
811 | work_unit.lineage->credit_mapped_lines(work_unit.annotations);␊ |
812 | }␊ |
813 | ␊ |
814 | work_unit.annotations->evaluate(work_unit.revision);␊ |
815 | }␊ |
816 | ␊ |
817 | void␊ |
818 | do_annotate (app_state &app, file_t file_node, revision_id rid, bool just_revs)␊ |
819 | {␊ |
820 | L(FL("annotating file %s with content %s in revision %s")␊ |
821 | % file_node->self % file_node->content % rid);␊ |
822 | ␊ |
823 | shared_ptr<annotate_context>␊ |
824 | acp(new annotate_context(file_node->content, app));␊ |
825 | ␊ |
826 | shared_ptr<annotate_lineage_mapping> lineage␊ |
827 | = acp->initial_lineage();␊ |
828 | ␊ |
829 | work_units work_units;␊ |
830 | {␊ |
831 | // prepare the first work_unit␊ |
832 | rev_height height;␊ |
833 | app.db.get_rev_height(rid, height);␊ |
834 | set<revision_id> rids_interesting_ancestors;␊ |
835 | get_file_content_marks(app, rid, file_node->self, rids_interesting_ancestors);␊ |
836 | bool rid_marked = (rids_interesting_ancestors.size() == 1␊ |
837 | && *(rids_interesting_ancestors.begin()) == rid);␊ |
838 | if (rid_marked)␊ |
839 | app.db.get_revision_parents(rid, rids_interesting_ancestors);␊ |
840 | ␊ |
841 | annotate_node_work workunit(acp, lineage, rid, file_node->self, height,␊ |
842 | rids_interesting_ancestors, file_node->content, rid_marked);␊ |
843 | work_units.insert(workunit);␊ |
844 | }␊ |
845 | ␊ |
846 | while (!(work_units.empty() || acp->is_complete()))␊ |
847 | {␊ |
848 | // get the work unit for the revision with the greatest height␊ |
849 | work_units::iterator w = work_units.begin();␊ |
850 | I(w != work_units.end());␊ |
851 | ␊ |
852 | // do_annotate_node() might insert new work units into work_units, and␊ |
853 | // thus might invalidate the iterator␊ |
854 | annotate_node_work work = *w;␊ |
855 | work_units.erase(w);␊ |
856 | ␊ |
857 | do_annotate_node(work, app, work_units);␊ |
858 | }␊ |
859 | ␊ |
860 | acp->annotate_equivalent_lines();␊ |
861 | I(acp->is_complete());␊ |
862 | ␊ |
863 | acp->dump(app, just_revs);␊ |
864 | }␊ |
865 | ␊ |
866 | // Local Variables:␊ |
867 | // mode: C++␊ |
868 | // fill-column: 76␊ |
869 | // c-file-style: "gnu"␊ |
870 | // indent-tabs-mode: nil␊ |
871 | // End:␊ |
872 | // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:␊ |