monotone

monotone Mtn Source Tree

Root/annotate.cc

1// copyright (C) 2005 emile snyder <emile@alumni.reed.edu>
2// all rights reserved.
3// licensed to the public under the terms of the GNU GPL (>= 2)
4// see the file COPYING for details
5
6#include <string>
7#include <sstream>
8#include <deque>
9#include <set>
10
11#include <boost/shared_ptr.hpp>
12
13#include "platform.hh"
14#include "vocab.hh"
15#include "sanity.hh"
16#include "revision.hh"
17#include "change_set.hh"
18#include "app_state.hh"
19#include "manifest.hh"
20#include "transforms.hh"
21#include "lcs.hh"
22#include "annotate.hh"
23
24
25
26class annotate_lineage_mapping;
27
28
29class annotate_context {
30public:
31 annotate_context(file_id fid, app_state &app);
32
33 boost::shared_ptr<annotate_lineage_mapping> initial_lineage() const;
34
35 /// credit any uncopied lines (as recorded in copied_lines) to
36 /// rev, and reset copied_lines.
37 void evaluate(revision_id rev);
38
39 void set_copied(int index);
40 void set_touched(int index);
41
42 void set_equivalent(int index, int index2);
43 void annotate_equivalent_lines();
44
45 /// return true if we have no more unassigned lines
46 bool is_complete() const;
47
48 void dump() const;
49
50 std::string get_line(int line_index) const { return file_lines[line_index]; }
51
52private:
53 std::vector<std::string> file_lines;
54 std::vector<revision_id> annotations;
55
56 /// equivalent_lines[n] = m means that line n should be blamed to the same
57 /// revision as line m
58 std::map<int, int> equivalent_lines;
59
60 /// keep a count so we can tell quickly whether we can terminate
61 size_t annotated_lines_completed;
62
63 // elements of the set are indexes into the array of lines in the UDOI
64 // lineages add entries here when they notice that they copied a line from the UDOI
65 std::set<size_t> copied_lines;
66
67 // similarly, lineages add entries here for all the lines from the UDOI they know about that they didn't copy
68 std::set<size_t> touched_lines;
69};
70
71
72
73/*
74 An annotate_lineage_mapping tells you, for each line of a file, where in the
75 ultimate descendent of interest (UDOI) the line came from (a line not
76 present in the UDOI is represented as -1).
77*/
78class annotate_lineage_mapping {
79public:
80 annotate_lineage_mapping(const file_data &data);
81 annotate_lineage_mapping(const std::vector<std::string> &lines);
82
83 // debugging
84 //bool equal_interned (const annotate_lineage_mapping &rhs) const;
85
86 /// need a better name. does the work of setting copied bits in the context object.
87 boost::shared_ptr<annotate_lineage_mapping>
88 build_parent_lineage(boost::shared_ptr<annotate_context> acp, revision_id parent_rev, const file_data &parent_data) const;
89
90 void merge(const annotate_lineage_mapping &other, const boost::shared_ptr<annotate_context> &acp);
91
92 void credit_mapped_lines (boost::shared_ptr<annotate_context> acp) const;
93 void set_copied_all_mapped (boost::shared_ptr<annotate_context> acp) const;
94
95private:
96 void init_with_lines(const std::vector<std::string> &lines);
97
98 static interner<long> in; // FIX, testing hack
99
100 std::vector<long, QA(long)> file_interned;
101
102 // maps an index into the vector of lines for our current version of the file
103 // into an index into the vector of lines of the UDOI:
104 // eg. if the line file_interned[i] will turn into line 4 in the UDOI, mapping[i] = 4
105 std::vector<int> mapping;
106};
107
108
109interner<long> annotate_lineage_mapping::in;
110
111
112/*
113 annotate_node_work encapsulates the input data needed to process
114 the annotations for a given childrev, considering all the
115 childrev -> parentrevN edges.
116*/
117struct annotate_node_work {
118 annotate_node_work (boost::shared_ptr<annotate_context> annotations_,
119 boost::shared_ptr<annotate_lineage_mapping> lineage_,
120 revision_id node_revision_, file_id node_fid_, file_path node_fpath_)
121 : annotations(annotations_),
122 lineage(lineage_),
123 node_revision(node_revision_),
124 node_fid(node_fid_),
125 node_fpath(node_fpath_)
126 {}
127 annotate_node_work (const annotate_node_work &w)
128 : annotations(w.annotations),
129 lineage(w.lineage),
130 node_revision(w.node_revision),
131 node_fid(w.node_fid),
132 node_fpath(w.node_fpath)
133 {}
134
135 boost::shared_ptr<annotate_context> annotations;
136 boost::shared_ptr<annotate_lineage_mapping> lineage;
137 revision_id node_revision;
138 file_id node_fid;
139 file_path node_fpath;
140};
141
142
143class lineage_merge_node {
144public:
145 typedef boost::shared_ptr<annotate_lineage_mapping> splm;
146
147 lineage_merge_node(const lineage_merge_node &m)
148 : work(m.work), incoming_edges(m.incoming_edges), completed_edges(m.completed_edges)
149 {}
150
151 lineage_merge_node(annotate_node_work wu, size_t incoming)
152 : work(wu), incoming_edges(incoming), completed_edges(1)
153 {}
154
155 void merge(splm incoming, const boost::shared_ptr<annotate_context> &acp)
156 {
157 work.lineage->merge(*incoming, acp); completed_edges++;
158 }
159
160 bool iscomplete () const { I(completed_edges <= incoming_edges); return incoming_edges == completed_edges; }
161
162 annotate_node_work get_work() const { I(iscomplete()); return work; }
163
164private:
165 annotate_node_work work;
166 size_t incoming_edges;
167 size_t completed_edges;
168};
169
170
171
172
173
174annotate_context::annotate_context(file_id fid, app_state &app)
175 : annotated_lines_completed(0)
176{
177 // initialize file_lines
178 file_data fpacked;
179 app.db.get_file_version(fid, fpacked);
180 std::string encoding = default_encoding; // FIXME
181 split_into_lines(fpacked.inner()(), encoding, file_lines);
182 L(F("annotate_context::annotate_context initialized with %d file lines\n") % file_lines.size());
183
184 // initialize annotations
185 revision_id nullid;
186 annotations.clear();
187 annotations.reserve(file_lines.size());
188 annotations.insert(annotations.begin(), file_lines.size(), nullid);
189 L(F("annotate_context::annotate_context initialized with %d entries in annotations\n") % annotations.size());
190
191 // initialize copied_lines and touched_lines
192 copied_lines.clear();
193 touched_lines.clear();
194}
195
196
197boost::shared_ptr<annotate_lineage_mapping>
198annotate_context::initial_lineage() const
199{
200 boost::shared_ptr<annotate_lineage_mapping> res(new annotate_lineage_mapping(file_lines));
201 return res;
202}
203
204
205void
206annotate_context::evaluate(revision_id rev)
207{
208 revision_id nullid;
209 I(copied_lines.size() <= annotations.size());
210 I(touched_lines.size() <= annotations.size());
211
212 // Find the lines that we touched but that no other parent copied.
213 std::set<size_t> credit_lines;
214 std::set_difference(touched_lines.begin(), touched_lines.end(),
215 copied_lines.begin(), copied_lines.end(),
216 inserter(credit_lines, credit_lines.begin()));
217
218 std::set<size_t>::const_iterator i;
219 for (i = credit_lines.begin(); i != credit_lines.end(); i++) {
220 I(*i >= 0 && *i < annotations.size());
221 if (annotations[*i] == nullid) {
222 //L(F("evaluate setting annotations[%d] -> %s, since touched_lines contained %d, copied_lines didn't and annotations[%d] was nullid\n")
223 // % *i % rev % *i % *i);
224 annotations[*i] = rev;
225 annotated_lines_completed++;
226 } else {
227 //L(F("evaluate LEAVING annotations[%d] -> %s\n") % *i % annotations[*i]);
228 }
229 }
230
231 copied_lines.clear();
232 touched_lines.clear();
233}
234
235void
236annotate_context::set_copied(int index)
237{
238 //L(F("annotate_context::set_copied %d\n") % index);
239 if (index == -1)
240 return;
241
242 I(index >= 0 && index < (int)file_lines.size());
243 copied_lines.insert(index);
244}
245
246void
247annotate_context::set_touched(int index)
248{
249 //L(F("annotate_context::set_touched %d\n") % index);
250 if (index == -1)
251 return;
252
253 I(index >= 0 && index <= (int)file_lines.size());
254 touched_lines.insert(index);
255}
256
257void
258annotate_context::set_equivalent(int index, int index2)
259{
260 L(F("annotate_context::set_equivalent index %d index2 %d\n") % index % index2);
261 equivalent_lines[index] = index2;
262}
263
264void
265annotate_context::annotate_equivalent_lines()
266{
267 revision_id null_id;
268
269 for (size_t i=0; i<annotations.size(); i++) {
270 if (annotations[i] == null_id) {
271 std::map<int, int>::const_iterator j = equivalent_lines.find(i);
272 if (j == equivalent_lines.end()) {
273 L(F("annotate_equivalent_lines unable to find equivalent for line %d\n") % i);
274 }
275 I(j != equivalent_lines.end());
276 annotations[i] = annotations[j->second];
277 annotated_lines_completed++;
278 }
279 }
280}
281
282bool
283annotate_context::is_complete() const
284{
285 if (annotated_lines_completed == annotations.size())
286 return true;
287
288 I(annotated_lines_completed < annotations.size());
289 return false;
290}
291
292
293void
294annotate_context::dump() const
295{
296 revision_id nullid;
297 I(annotations.size() == file_lines.size());
298
299 revision_id lastid = nullid;
300 for (size_t i=0; i<file_lines.size(); i++) {
301 //I(! (annotations[i] == nullid) );
302 if (false) //(lastid == annotations[i])
303 std::cout << " : " << file_lines[i] << std::endl;
304 else
305 std::cout << annotations[i] << ": " << file_lines[i] << std::endl;
306
307 lastid = annotations[i];
308 }
309}
310
311
312annotate_lineage_mapping::annotate_lineage_mapping(const file_data &data)
313{
314 // split into lines
315 std::vector<std::string> lines;
316 std::string encoding = default_encoding; // FIXME
317 split_into_lines (data.inner()().data(), encoding, lines);
318
319 init_with_lines(lines);
320}
321
322annotate_lineage_mapping::annotate_lineage_mapping(const std::vector<std::string> &lines)
323{
324 init_with_lines(lines);
325}
326
327/*
328bool
329annotate_lineage_mapping::equal_interned (const annotate_lineage_mapping &rhs) const
330{
331 bool result = true;
332
333 if (file_interned.size() != rhs.file_interned.size()) {
334 L(F("annotate_lineage_mapping::equal_interned lhs size %d != rhs size %d\n")
335 % file_interned.size() % rhs.file_interned.size());
336 result = false;
337 }
338
339 size_t limit = std::min(file_interned.size(), rhs.file_interned.size());
340 for (size_t i=0; i<limit; i++) {
341 if (file_interned[i] != rhs.file_interned[i]) {
342 L(F("annotate_lineage_mapping::equal_interned lhs[%d]:%ld != rhs[%d]:%ld\n")
343 % i % file_interned[i] % i % rhs.file_interned[i]);
344 result = false;
345 }
346 }
347
348 return result;
349}
350*/
351
352void
353annotate_lineage_mapping::init_with_lines(const std::vector<std::string> &lines)
354{
355 file_interned.clear();
356 file_interned.reserve(lines.size());
357 mapping.clear();
358 mapping.reserve(lines.size());
359
360 int count;
361 std::vector<std::string>::const_iterator i;
362 for (count=0, i = lines.begin(); i != lines.end(); i++, count++) {
363 file_interned.push_back(in.intern(*i));
364 mapping.push_back(count);
365 }
366 L(F("annotate_lineage_mapping::init_with_lines ending with %d entries in mapping\n") % mapping.size());
367}
368
369
370boost::shared_ptr<annotate_lineage_mapping>
371annotate_lineage_mapping::build_parent_lineage (boost::shared_ptr<annotate_context> acp,
372 revision_id parent_rev,
373 const file_data &parent_data) const
374{
375 bool verbose = false;
376 boost::shared_ptr<annotate_lineage_mapping> parent_lineage(new annotate_lineage_mapping(parent_data));
377
378 std::vector<long, QA(long)> lcs;
379 std::back_insert_iterator< std::vector<long, QA(long)> > bii(lcs);
380 longest_common_subsequence(file_interned.begin(),
381 file_interned.end(),
382 parent_lineage->file_interned.begin(),
383 parent_lineage->file_interned.end(),
384 std::min(file_interned.size(), parent_lineage->file_interned.size()),
385 std::back_inserter(lcs));
386
387 if (verbose)
388 L(F("build_parent_lineage: file_lines.size() == %d, parent.file_lines.size() == %d, lcs.size() == %d\n")
389 % file_interned.size() % parent_lineage->file_interned.size() % lcs.size());
390
391 // do the copied lines thing for our annotate_context
392 std::vector<long> lcs_src_lines;
393 lcs_src_lines.resize(lcs.size());
394 size_t i, j;
395 i = j = 0;
396 while (i < file_interned.size() && j < lcs.size()) {
397 //if (verbose)
398 if (file_interned[i] == 14)
399 L(F("%s file_interned[%d]: %ld\tlcs[%d]: %ld\tmapping[%d]: %ld\n")
400 % parent_rev % i % file_interned[i] % j % lcs[j] % i % mapping[i]);
401
402 if (file_interned[i] == lcs[j]) {
403 acp->set_copied(mapping[i]);
404 lcs_src_lines[j] = mapping[i];
405 j++;
406 } else {
407 acp->set_touched(mapping[i]);
408 }
409
410 i++;
411 }
412 if (verbose)
413 L(F("loop ended with i: %d, j: %d, lcs.size(): %d\n") % i % j % lcs.size());
414 I(j == lcs.size());
415
416 // set touched for the rest of the lines in the file
417 while (i < file_interned.size()) {
418 acp->set_touched(mapping[i]);
419 i++;
420 }
421
422 // determine the mapping for parent lineage
423 if (verbose)
424 L(F("build_parent_lineage: building mapping now for parent_rev %s\n") % parent_rev);
425 i = j = 0;
426 while (i < parent_lineage->file_interned.size() && j < lcs.size()) {
427 if (parent_lineage->file_interned[i] == lcs[j]) {
428 parent_lineage->mapping[i] = lcs_src_lines[j];
429 j++;
430 } else {
431 parent_lineage->mapping[i] = -1;
432 }
433 if (verbose)
434 L(F("mapping[%d] -> %d\n") % i % parent_lineage->mapping[i]);
435
436 i++;
437 }
438 I(j == lcs.size());
439 // set mapping for the rest of the lines in the file
440 while (i < parent_lineage->file_interned.size()) {
441 parent_lineage->mapping[i] = -1;
442 if (verbose)
443 L(F("mapping[%d] -> %d\n") % i % parent_lineage->mapping[i]);
444 i++;
445 }
446
447 return parent_lineage;
448}
449
450
451void
452annotate_lineage_mapping::merge (const annotate_lineage_mapping &other,
453 const boost::shared_ptr<annotate_context> &acp)
454{
455 I(file_interned.size() == other.file_interned.size());
456 I(mapping.size() == other.mapping.size());
457 //I(equal_interned(other)); // expensive check
458
459 for (size_t i=0; i<mapping.size(); i++) {
460 if (mapping[i] == -1 && other.mapping[i] >= 0)
461 mapping[i] = other.mapping[i];
462
463 if (mapping[i] >= 0 && other.mapping[i] >= 0) {
464 //I(mapping[i] == other.mapping[i]);
465 if (mapping[i] != other.mapping[i]) {
466 // a given line in the current merged mapping will split and become
467 // multiple lines in the UDOI. so we have to remember that whenever we
468 // ultimately assign blame for mapping[i] we blame the same revision
469 // on other.mapping[i].
470 acp->set_equivalent(other.mapping[i], mapping[i]);
471 }
472 }
473 }
474}
475
476void
477annotate_lineage_mapping::credit_mapped_lines (boost::shared_ptr<annotate_context> acp) const
478{
479 std::vector<int>::const_iterator i;
480 for (i=mapping.begin(); i != mapping.end(); i++) {
481 acp->set_touched(*i);
482 }
483}
484
485
486void
487annotate_lineage_mapping::set_copied_all_mapped (boost::shared_ptr<annotate_context> acp) const
488{
489 std::vector<int>::const_iterator i;
490 for (i=mapping.begin(); i != mapping.end(); i++) {
491 acp->set_copied(*i);
492 }
493}
494
495
496static void
497do_annotate_node (const annotate_node_work &work_unit,
498 app_state &app,
499 std::deque<annotate_node_work> &nodes_to_process,
500 std::set<revision_id> &nodes_complete,
501 const std::map<revision_id, size_t> &paths_to_nodes,
502 std::map<revision_id, lineage_merge_node> &pending_merge_nodes)
503{
504 L(F("do_annotate_node for node %s\n") % work_unit.node_revision);
505 I(nodes_complete.find(work_unit.node_revision) == nodes_complete.end());
506 //nodes_seen.insert(std::make_pair(work_unit.node_revision, work_unit.lineage));
507
508 revision_set rev;
509 app.db.get_revision(work_unit.node_revision, rev);
510
511 if (rev.edges.size() == 0) {
512 L(F("do_annotate_node credit_mapped_lines to revision %s\n") % work_unit.node_revision);
513 work_unit.lineage->credit_mapped_lines(work_unit.annotations);
514 work_unit.annotations->evaluate(work_unit.node_revision);
515 nodes_complete.insert(work_unit.node_revision);
516 return;
517 }
518
519 // if all deltas backwards have to add the file, then we credit any mapped but
520 // unassigned lines in our lineage to this revision. gotta count adds to compare to number
521 // of parent revs.
522 size_t added_in_parent_count = 0;
523
524 // edges are from parent -> child where child is our work_unit node
525 for (edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); i++)
526 {
527 revision_id parent_revision = edge_old_revision(i);
528 L(F("do_annotate_node processing edge from parent %s to child %s\n") % parent_revision % work_unit.node_revision);
529
530 change_set cs = edge_changes(i);
531 if (cs.rearrangement.added_files.find(work_unit.node_fpath) != cs.rearrangement.added_files.end())
532 {
533 L(F("file %s added in %s, continuing\n") % work_unit.node_fpath % work_unit.node_revision);
534 added_in_parent_count++;
535 continue;
536 }
537
538 file_path parent_fpath = apply_change_set_inverse(cs, work_unit.node_fpath);
539 L(F("file %s in parent revision %s is %s\n") % work_unit.node_fpath % parent_revision % parent_fpath);
540
541 I(!parent_fpath.empty());
542
543 change_set::delta_map::const_iterator fdelta_iter = cs.deltas.find(parent_fpath);
544 file_id parent_fid = work_unit.node_fid;
545
546 boost::shared_ptr<annotate_lineage_mapping> parent_lineage;
547
548 if (fdelta_iter != cs.deltas.end()) // then the file changed
549 {
550 I(delta_entry_dst(fdelta_iter) == work_unit.node_fid);
551 parent_fid = delta_entry_src(fdelta_iter);
552 file_data data;
553 app.db.get_file_version(parent_fid, data);
554
555 L(F("building parent lineage for parent file %s\n") % parent_fid);
556 parent_lineage = work_unit.lineage->build_parent_lineage(work_unit.annotations,
557 parent_revision,
558 data);
559 }
560 else
561 {
562 L(F("parent file identical, set copied all mapped and copy lineage\n"));
563 parent_lineage = work_unit.lineage;
564 parent_lineage->set_copied_all_mapped(work_unit.annotations);
565 }
566
567 // if this parent has not yet been queued for processing, create the work unit for it.
568 std::map<revision_id, lineage_merge_node>::iterator lmn = pending_merge_nodes.find(parent_revision);
569 if (lmn == pending_merge_nodes.end())
570 {
571 annotate_node_work newunit(work_unit.annotations, parent_lineage, parent_revision, parent_fid, parent_fpath);
572
573 std::map<revision_id, size_t>::const_iterator ptn = paths_to_nodes.find(parent_revision);
574 if (ptn->second > 1) {
575 lineage_merge_node nmn(newunit, ptn->second);
576 pending_merge_nodes.insert(std::make_pair(parent_revision, nmn));
577 // just checking...
578 //(pending_merge_nodes.find(parent_revision))->second.dump();
579 }
580 else {
581 //L(F("single path to node, just stick work on the queue\n"));
582 nodes_to_process.push_back(newunit);
583 }
584 }
585 else
586 {
587 // already a pending node, so we just have to merge the lineage and decide whether to move it
588 // over to the nodes_to_process queue
589 L(F("merging lineage from node %s to parent %s\n") % work_unit.node_revision % parent_revision);
590 lmn->second.merge(parent_lineage, work_unit.annotations);
591 //L(F("after merging from work revision %s to parent %s lineage_merge_node is:\n") % work_unit.node_revision % parent_revision);
592 //lmn->second.dump();
593 if (lmn->second.iscomplete()) {
594 nodes_to_process.push_back(lmn->second.get_work());
595 pending_merge_nodes.erase(lmn);
596 }
597 }
598 }
599
600 I(added_in_parent_count <= rev.edges.size());
601 if (added_in_parent_count == rev.edges.size())
602 {
603 //L(F("added_in_parent_count == rev.edges.size(), credit_mapped_lines to %s\n") % work_unit.node_revision);
604 work_unit.lineage->credit_mapped_lines(work_unit.annotations);
605 }
606
607 work_unit.annotations->evaluate(work_unit.node_revision);
608 nodes_complete.insert(work_unit.node_revision);
609}
610
611
612void
613find_ancestors(app_state &app, revision_id rid, std::map<revision_id, size_t> &paths_to_nodes)
614{
615 std::vector<revision_id> frontier;
616 frontier.push_back(rid);
617
618 while (!frontier.empty())
619 {
620 revision_id rid = frontier.back();
621 frontier.pop_back();
622 if(!null_id(rid)) {
623 std::set<revision_id> parents;
624 app.db.get_revision_parents(rid, parents);
625 for (std::set<revision_id>::const_iterator i = parents.begin();
626 i != parents.end(); ++i)
627 {
628 std::map<revision_id, size_t>::iterator found = paths_to_nodes.find(*i);
629 if (found == paths_to_nodes.end())
630 {
631 frontier.push_back(*i);
632 paths_to_nodes.insert(std::make_pair(*i, 1));
633 }
634 else
635 {
636 (found->second)++;
637 }
638 }
639 }
640 }
641}
642
643void
644do_annotate (app_state &app, file_path fpath, file_id fid, revision_id rid)
645{
646 L(F("annotating file %s with id %s in revision %s\n") % fpath % fid % rid);
647
648 boost::shared_ptr<annotate_context> acp(new annotate_context(fid, app));
649 boost::shared_ptr<annotate_lineage_mapping> lineage = acp->initial_lineage();
650
651 std::set<revision_id> nodes_complete;
652 std::map<revision_id, size_t> paths_to_nodes;
653 std::map<revision_id, lineage_merge_node> pending_merge_nodes;
654 find_ancestors(app, rid, paths_to_nodes);
655
656 // build node work unit
657 std::deque<annotate_node_work> nodes_to_process;
658 annotate_node_work workunit(acp, lineage, rid, fid, fpath);
659 nodes_to_process.push_back(workunit);
660
661 while (nodes_to_process.size() && !acp->is_complete()) {
662 annotate_node_work work = nodes_to_process.front();
663 nodes_to_process.pop_front();
664 do_annotate_node(work, app, nodes_to_process, nodes_complete, paths_to_nodes, pending_merge_nodes);
665 }
666 I(pending_merge_nodes.size() == 0);
667 acp->annotate_equivalent_lines();
668 I(acp->is_complete());
669
670 acp->dump();
671}

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status