monotone

monotone Mtn Source Tree

Root/src/ancestry.cc

1// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "sanity.hh"
12#include "revision.hh"
13#include "rev_height.hh"
14#include "roster.hh"
15
16#include "database.hh"
17#include "interner.hh"
18
19#include "safe_map.hh"
20#include <stack>
21#include <boost/shared_ptr.hpp>
22#include <boost/dynamic_bitset.hpp>
23
24using std::make_pair;
25using std::map;
26using std::max;
27using std::multimap;
28using std::pair;
29using std::set;
30using std::stack;
31using std::vector;
32
33using boost::shared_ptr;
34using boost::dynamic_bitset;
35
36// For a surprisingly long time, we have been using an algorithm which
37// is nonsense, based on a misunderstanding of what "LCA" means. The
38// LCA of two nodes is *not* the first common ancestor which you find
39// when iteratively expanding their ancestor sets. Instead, the LCA is
40// the common ancestor which is a descendent of all other common
41// ancestors.
42//
43// In general, a set of nodes in a DAG doesn't always have an
44// LCA. There might be multiple common ancestors which are not parents
45// of one another. So we implement something which is "functionally
46// useful" for finding a merge point (and moreover, which always
47// terminates): we find an LCA of the input set if it exists,
48// otherwise we replace the input set with the nodes we did find and
49// repeat.
50//
51// All previous discussions in monotone-land, before say August 2005,
52// of LCA (and LCAD) are essentially wrong due to our silly
53// misunderstanding. It's unfortunate, but our half-baked
54// approximations worked almost well enough to take us through 3 years
55// of deployed use. Hopefully this more accurate new use will serve us
56// even longer.
57
58typedef unsigned long ctx;
59typedef dynamic_bitset<> bitmap;
60typedef shared_ptr<bitmap> shared_bitmap;
61
62static void
63calculate_ancestors_from_graph(interner<ctx> & intern,
64 revision_id const & init,
65 multimap<revision_id, revision_id> const & graph,
66 map< ctx, shared_bitmap > & ancestors,
67 shared_bitmap & total_union);
68
69void
70find_common_ancestor_for_merge(database & db,
71 revision_id const & left,
72 revision_id const & right,
73 revision_id & anc)
74{
75 interner<ctx> intern;
76 set<ctx> leaves;
77 map<ctx, shared_bitmap> ancestors;
78
79 shared_bitmap isect = shared_bitmap(new bitmap());
80 shared_bitmap isect_ancs = shared_bitmap(new bitmap());
81
82 leaves.insert(intern.intern(left.inner()()));
83 leaves.insert(intern.intern(right.inner()()));
84
85
86 multimap<revision_id, revision_id> inverse_graph;
87 db.get_reverse_ancestry(inverse_graph);
88
89 while (leaves.size() != 1)
90 {
91 isect->clear();
92 isect_ancs->clear();
93
94 // First intersect all ancestors of current leaf set
95 for (set<ctx>::const_iterator i = leaves.begin(); i != leaves.end(); ++i)
96 {
97 ctx curr_leaf = *i;
98 shared_bitmap curr_leaf_ancestors;
99 map<ctx, shared_bitmap >::const_iterator j = ancestors.find(*i);
100 if (j != ancestors.end())
101 curr_leaf_ancestors = j->second;
102 else
103 {
104 curr_leaf_ancestors = shared_bitmap(new bitmap());
105 calculate_ancestors_from_graph(intern, revision_id(intern.lookup(curr_leaf),
106 origin::internal),
107 inverse_graph, ancestors,
108 curr_leaf_ancestors);
109 }
110 if (isect->size() > curr_leaf_ancestors->size())
111 curr_leaf_ancestors->resize(isect->size());
112
113 if (curr_leaf_ancestors->size() > isect->size())
114 isect->resize(curr_leaf_ancestors->size());
115
116 if (i == leaves.begin())
117 *isect = *curr_leaf_ancestors;
118 else
119 (*isect) &= (*curr_leaf_ancestors);
120 }
121
122 // isect is now the set of common ancestors of leaves, but that is not enough.
123 // We need the set of leaves of isect; to do that we calculate the set of
124 // ancestors of isect, in order to subtract it from isect (below).
125 set<ctx> new_leaves;
126 for (ctx i = 0; i < isect->size(); ++i)
127 {
128 if (isect->test(i))
129 {
130 calculate_ancestors_from_graph(intern, revision_id(intern.lookup(i),
131 origin::internal),
132 inverse_graph, ancestors, isect_ancs);
133 }
134 }
135
136 // Finally, the subtraction step: for any element i of isect, if
137 // it's *not* in isect_ancs, it survives as a new leaf.
138 leaves.clear();
139 for (ctx i = 0; i < isect->size(); ++i)
140 {
141 if (!isect->test(i))
142 continue;
143 if (i < isect_ancs->size() && isect_ancs->test(i))
144 continue;
145 safe_insert(leaves, i);
146 }
147 }
148
149 I(leaves.size() == 1);
150 anc = revision_id(intern.lookup(*leaves.begin()), origin::internal);
151}
152
153static void
154add_bitset_to_union(shared_bitmap src,
155 shared_bitmap dst)
156{
157 if (dst->size() > src->size())
158 src->resize(dst->size());
159 if (src->size() > dst->size())
160 dst->resize(src->size());
161 *dst |= *src;
162}
163
164
165static void
166calculate_ancestors_from_graph(interner<ctx> & intern,
167 revision_id const & init,
168 multimap<revision_id, revision_id> const & graph,
169 map< ctx, shared_bitmap > & ancestors,
170 shared_bitmap & total_union)
171{
172 typedef multimap<revision_id, revision_id>::const_iterator gi;
173 stack<ctx> stk;
174
175 stk.push(intern.intern(init.inner()()));
176
177 while (! stk.empty())
178 {
179 ctx us = stk.top();
180 revision_id rev(intern.lookup(us), origin::internal);
181
182 pair<gi,gi> parents = graph.equal_range(rev);
183 bool pushed = false;
184
185 // first make sure all parents are done
186 for (gi i = parents.first; i != parents.second; ++i)
187 {
188 ctx parent = intern.intern(i->second.inner()());
189 if (ancestors.find(parent) == ancestors.end())
190 {
191 stk.push(parent);
192 pushed = true;
193 break;
194 }
195 }
196
197 // if we pushed anything we stop now. we'll come back later when all
198 // the parents are done.
199 if (pushed)
200 continue;
201
202 shared_bitmap b = shared_bitmap(new bitmap());
203
204 for (gi i = parents.first; i != parents.second; ++i)
205 {
206 ctx parent = intern.intern(i->second.inner()());
207
208 // set all parents
209 if (b->size() <= parent)
210 b->resize(parent + 1);
211 b->set(parent);
212
213 // ensure all parents are loaded into the ancestor map
214 I(ancestors.find(parent) != ancestors.end());
215
216 // union them into our map
217 map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
218 I(j != ancestors.end());
219 add_bitset_to_union(j->second, b);
220 }
221
222 add_bitset_to_union(b, total_union);
223 ancestors.insert(make_pair(us, b));
224 stk.pop();
225 }
226}
227
228void
229toposort(database & db,
230 set<revision_id> const & revisions,
231 vector<revision_id> & sorted)
232{
233 map<rev_height, revision_id> work;
234
235 for (set<revision_id>::const_iterator i = revisions.begin();
236 i != revisions.end(); ++i)
237 {
238 rev_height height;
239 db.get_rev_height(*i, height);
240 work.insert(make_pair(height, *i));
241 }
242
243 sorted.clear();
244
245 for (map<rev_height, revision_id>::const_iterator i = work.begin();
246 i != work.end(); ++i)
247 {
248 sorted.push_back(i->second);
249 }
250}
251
252static void
253accumulate_strict_ancestors(database & db,
254 revision_id const & start,
255 set<revision_id> & all_ancestors,
256 multimap<revision_id, revision_id> const & inverse_graph,
257 rev_height const & min_height)
258{
259 typedef multimap<revision_id, revision_id>::const_iterator gi;
260
261 vector<revision_id> frontier;
262 frontier.push_back(start);
263
264 while (!frontier.empty())
265 {
266 revision_id rid = frontier.back();
267 frontier.pop_back();
268 pair<gi, gi> parents = inverse_graph.equal_range(rid);
269 for (gi i = parents.first; i != parents.second; ++i)
270 {
271 revision_id const & parent = i->second;
272 if (all_ancestors.find(parent) == all_ancestors.end())
273 {
274 // prune if we're below min_height
275 rev_height h;
276 db.get_rev_height(parent, h);
277 if (h >= min_height)
278 {
279 all_ancestors.insert(parent);
280 frontier.push_back(parent);
281 }
282 }
283 }
284 }
285}
286
287// this call is equivalent to running:
288// erase(remove_if(candidates.begin(), candidates.end(), p));
289// erase_ancestors(candidates, db);
290// however, by interleaving the two operations, it can in common cases make
291// many fewer calls to the predicate, which can be a significant speed win.
292
293void
294erase_ancestors_and_failures(database & db,
295 std::set<revision_id> & candidates,
296 is_failure & p,
297 multimap<revision_id, revision_id> *inverse_graph_cache_ptr)
298{
299 // Load up the ancestry graph
300 multimap<revision_id, revision_id> inverse_graph;
301
302 if (candidates.empty())
303 return;
304
305 if (inverse_graph_cache_ptr == NULL)
306 inverse_graph_cache_ptr = &inverse_graph;
307 if (inverse_graph_cache_ptr->empty())
308 {
309 db.get_reverse_ancestry(*inverse_graph_cache_ptr);
310 }
311
312 // Keep a set of all ancestors that we've traversed -- to avoid
313 // combinatorial explosion.
314 set<revision_id> all_ancestors;
315
316 rev_height min_height;
317 db.get_rev_height(*candidates.begin(), min_height);
318 for (std::set<revision_id>::const_iterator it = candidates.begin(); it != candidates.end(); it++)
319 {
320 rev_height h;
321 db.get_rev_height(*it, h);
322 if (h < min_height)
323 min_height = h;
324 }
325
326 vector<revision_id> todo(candidates.begin(), candidates.end());
327 std::random_shuffle(todo.begin(), todo.end());
328
329 size_t predicates = 0;
330 while (!todo.empty())
331 {
332 revision_id rid = todo.back();
333 todo.pop_back();
334 // check if this one has already been eliminated
335 if (all_ancestors.find(rid) != all_ancestors.end())
336 continue;
337 // and then whether it actually should stay in the running:
338 ++predicates;
339 if (p(rid))
340 {
341 candidates.erase(rid);
342 continue;
343 }
344 // okay, it is good enough that all its ancestors should be
345 // eliminated
346 accumulate_strict_ancestors(db, rid, all_ancestors, *inverse_graph_cache_ptr, min_height);
347 }
348
349 // now go and eliminate the ancestors
350 for (set<revision_id>::const_iterator i = all_ancestors.begin();
351 i != all_ancestors.end(); ++i)
352 candidates.erase(*i);
353
354 L(FL("called predicate %s times") % predicates);
355}
356
357// This function looks at a set of revisions, and for every pair A, B in that
358// set such that A is an ancestor of B, it erases A.
359
360namespace
361{
362 struct no_failures : public is_failure
363 {
364 virtual bool operator()(revision_id const & rid)
365 {
366 return false;
367 }
368 };
369}
370void
371erase_ancestors(database & db, set<revision_id> & revisions)
372{
373 no_failures p;
374 erase_ancestors_and_failures(db, revisions, p);
375}
376
377static void
378accumulate_strict_descendants(database & db,
379 revision_id const & start,
380 set<revision_id> & all_descendants,
381 multimap<revision_id, revision_id> const & graph,
382 rev_height const & max_height)
383{
384 typedef multimap<revision_id, revision_id>::const_iterator gi;
385
386 vector<revision_id> frontier;
387 frontier.push_back(start);
388
389 while (!frontier.empty())
390 {
391 revision_id rid = frontier.back();
392 frontier.pop_back();
393 pair<gi, gi> parents = graph.equal_range(rid);
394 for (gi i = parents.first; i != parents.second; ++i)
395 {
396 revision_id const & parent = i->second;
397 if (all_descendants.find(parent) == all_descendants.end())
398 {
399 // prune if we're above max_height
400 rev_height h;
401 db.get_rev_height(parent, h);
402 if (h <= max_height)
403 {
404 all_descendants.insert(parent);
405 frontier.push_back(parent);
406 }
407 }
408 }
409 }
410}
411
412// this call is equivalent to running:
413// erase(remove_if(candidates.begin(), candidates.end(), p));
414// erase_descendants(candidates, db);
415// however, by interleaving the two operations, it can in common cases make
416// many fewer calls to the predicate, which can be a significant speed win.
417
418void
419erase_descendants_and_failures(database & db,
420 std::set<revision_id> & candidates,
421 is_failure & p,
422 multimap<revision_id, revision_id> *graph_cache_ptr)
423{
424 // Load up the ancestry graph
425 multimap<revision_id, revision_id> graph;
426
427 if (candidates.empty())
428 return;
429
430 if (graph_cache_ptr == NULL)
431 graph_cache_ptr = &graph;
432 if (graph_cache_ptr->empty())
433 {
434 db.get_forward_ancestry(*graph_cache_ptr);
435 }
436
437 // Keep a set of all descendants that we've traversed -- to avoid
438 // combinatorial explosion.
439 set<revision_id> all_descendants;
440
441 rev_height max_height;
442 db.get_rev_height(*candidates.begin(), max_height);
443 for (std::set<revision_id>::const_iterator it = candidates.begin(); it != candidates.end(); it++)
444 {
445 rev_height h;
446 db.get_rev_height(*it, h);
447 if (h > max_height)
448 max_height = h;
449 }
450
451 vector<revision_id> todo(candidates.begin(), candidates.end());
452 std::random_shuffle(todo.begin(), todo.end());
453
454 size_t predicates = 0;
455 while (!todo.empty())
456 {
457 revision_id rid = todo.back();
458 todo.pop_back();
459 // check if this one has already been eliminated
460 if (all_descendants.find(rid) != all_descendants.end())
461 continue;
462 // and then whether it actually should stay in the running:
463 ++predicates;
464 if (p(rid))
465 {
466 candidates.erase(rid);
467 continue;
468 }
469 // okay, it is good enough that all its descendants should be
470 // eliminated
471 accumulate_strict_descendants(db, rid, all_descendants, *graph_cache_ptr, max_height);
472 }
473
474 // now go and eliminate the ancestors
475 for (set<revision_id>::const_iterator i = all_descendants.begin();
476 i != all_descendants.end(); ++i)
477 candidates.erase(*i);
478
479 L(FL("called predicate %s times") % predicates);
480}
481
482// This function looks at a set of revisions, and for every pair A, B in that
483// set such that A is an descendant of B, it erases A.
484
485void
486erase_descendants(database & db, set<revision_id> & revisions)
487{
488 no_failures p;
489 erase_descendants_and_failures(db, revisions, p);
490}
491
492// This function takes a revision A and a set of revision Bs, calculates the
493// ancestry of each, and returns the set of revisions that are in A's ancestry
494// but not in the ancestry of any of the Bs. It tells you 'what's new' in A
495// that's not in the Bs. If the output set if non-empty, then A will
496// certainly be in it; but the output set might be empty.
497void
498ancestry_difference(database & db, revision_id const & a,
499 set<revision_id> const & bs,
500 set<revision_id> & new_stuff)
501{
502 new_stuff.clear();
503 typedef multimap<revision_id, revision_id>::const_iterator gi;
504 multimap<revision_id, revision_id> inverse_graph;
505
506 db.get_reverse_ancestry(inverse_graph);
507
508 interner<ctx> intern;
509 map< ctx, shared_bitmap > ancestors;
510
511 shared_bitmap u = shared_bitmap(new bitmap());
512
513 for (set<revision_id>::const_iterator i = bs.begin();
514 i != bs.end(); ++i)
515 {
516 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
517 ctx c = intern.intern(i->inner()());
518 if (u->size() <= c)
519 u->resize(c + 1);
520 u->set(c);
521 }
522
523 shared_bitmap au = shared_bitmap(new bitmap());
524 calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
525 {
526 ctx c = intern.intern(a.inner()());
527 if (au->size() <= c)
528 au->resize(c + 1);
529 au->set(c);
530 }
531
532 au->resize(max(au->size(), u->size()));
533 u->resize(max(au->size(), u->size()));
534
535 *au -= *u;
536
537 for (unsigned int i = 0; i != au->size(); ++i)
538 {
539 if (au->test(i))
540 {
541 revision_id rid(intern.lookup(i), origin::internal);
542 if (!null_id(rid))
543 new_stuff.insert(rid);
544 }
545 }
546}
547
548void
549select_nodes_modified_by_rev(database & db,
550 revision_t const & rev,
551 roster_t const new_roster,
552 set<node_id> & nodes_modified)
553{
554 nodes_modified.clear();
555
556 for (edge_map::const_iterator i = rev.edges.begin();
557 i != rev.edges.end(); ++i)
558 {
559 set<node_id> edge_nodes_modified;
560 roster_t old_roster;
561 db.get_roster(edge_old_revision(i), old_roster);
562 select_nodes_modified_by_cset(edge_changes(i),
563 old_roster,
564 new_roster,
565 edge_nodes_modified);
566
567 copy(edge_nodes_modified.begin(), edge_nodes_modified.end(),
568 inserter(nodes_modified, nodes_modified.begin()));
569 }
570}
571
572// These functions create new ancestry!
573
574namespace {
575 struct true_node_id_source
576 : public node_id_source
577 {
578 true_node_id_source(database & db) : db(db) {}
579 virtual node_id next()
580 {
581 node_id n = db.next_node_id();
582 I(!temp_node(n));
583 return n;
584 }
585 database & db;
586 };
587}
588
589// WARNING: these functions have no unit tests. All the real work
590// should be done in the alternative overloads in roster.cc, where it
591// can be unit tested. (See comments in that file for further explanation.)
592static void
593make_roster_for_merge(revision_t const & rev, revision_id const & new_rid,
594 roster_t & new_roster, marking_map & new_markings,
595 database & db, node_id_source & nis)
596{
597 edge_map::const_iterator i = rev.edges.begin();
598 revision_id const & left_rid = edge_old_revision(i);
599 cset const & left_cs = edge_changes(i);
600 ++i;
601 revision_id const & right_rid = edge_old_revision(i);
602 cset const & right_cs = edge_changes(i);
603
604 I(!null_id(left_rid) && !null_id(right_rid));
605 cached_roster left_cached, right_cached;
606 db.get_roster(left_rid, left_cached);
607 db.get_roster(right_rid, right_cached);
608
609 set<revision_id> left_uncommon_ancestors, right_uncommon_ancestors;
610 db.get_uncommon_ancestors(left_rid, right_rid,
611 left_uncommon_ancestors,
612 right_uncommon_ancestors);
613
614 make_roster_for_merge(left_rid, *left_cached.first, *left_cached.second,
615 left_cs, left_uncommon_ancestors,
616 right_rid, *right_cached.first, *right_cached.second,
617 right_cs, right_uncommon_ancestors,
618 new_rid,
619 new_roster, new_markings,
620 nis);
621}
622
623static void
624make_roster_for_nonmerge(revision_t const & rev,
625 revision_id const & new_rid,
626 roster_t & new_roster, marking_map & new_markings,
627 database & db, node_id_source & nis)
628{
629 revision_id const & parent_rid = edge_old_revision(rev.edges.begin());
630 cset const & parent_cs = edge_changes(rev.edges.begin());
631 db.get_roster(parent_rid, new_roster, new_markings);
632 make_roster_for_nonmerge(parent_cs, new_rid, new_roster, new_markings, nis);
633}
634
635void
636make_roster_for_revision(database & db, node_id_source & nis,
637 revision_t const & rev, revision_id const & new_rid,
638 roster_t & new_roster, marking_map & new_markings)
639{
640 MM(rev);
641 MM(new_rid);
642 MM(new_roster);
643 MM(new_markings);
644 if (rev.edges.size() == 1)
645 make_roster_for_nonmerge(rev, new_rid, new_roster, new_markings, db, nis);
646 else if (rev.edges.size() == 2)
647 make_roster_for_merge(rev, new_rid, new_roster, new_markings, db, nis);
648 else
649 I(false);
650
651 // If nis is not a true_node_id_source, we have to assume we can get temp
652 // node ids out of it. ??? Provide a predicate method on node_id_sources
653 // instead of doing a typeinfo comparison.
654 new_roster.check_sane_against(new_markings,
655 typeid(nis) != typeid(true_node_id_source));
656}
657
658void
659make_roster_for_revision(database & db,
660 revision_t const & rev, revision_id const & new_rid,
661 roster_t & new_roster, marking_map & new_markings)
662{
663 true_node_id_source nis(db);
664 make_roster_for_revision(db, nis, rev, new_rid, new_roster, new_markings);
665}
666
667// ancestry graph loader
668
669void
670graph_loader::load_parents(revision_id const rid,
671 set<revision_id> & parents)
672{
673 db.get_revision_parents(rid, parents);
674}
675
676void
677graph_loader::load_children(revision_id const rid,
678 set<revision_id> & children)
679{
680 db.get_revision_children(rid, children);
681}
682
683void
684graph_loader::load_ancestors(set<revision_id> & revs)
685{
686 load_revs(ancestors, revs);
687}
688
689void
690graph_loader::load_descendants(set<revision_id> & revs)
691{
692 load_revs(descendants, revs);
693}
694
695void
696graph_loader::load_revs(load_direction const direction,
697 set<revision_id> & revs)
698{
699 std::deque<revision_id> next(revs.begin(), revs.end());
700
701 while (!next.empty())
702 {
703 revision_id const & rid(next.front());
704 MM(rid);
705
706 set<revision_id> relatives;
707 MM(relatives);
708
709 if (direction == ancestors)
710 load_parents(rid, relatives);
711 else if (direction == descendants)
712 load_children(rid, relatives);
713 else
714 I(false);
715
716 for (set<revision_id>::const_iterator i = relatives.begin();
717 i != relatives.end(); ++i)
718 {
719 if (null_id(*i))
720 continue;
721 pair<set<revision_id>::iterator, bool> res = revs.insert(*i);
722 if (res.second)
723 next.push_back(*i);
724 }
725
726 next.pop_front();
727 }
728}
729
730// Local Variables:
731// mode: C++
732// fill-column: 76
733// c-file-style: "gnu"
734// indent-tabs-mode: nil
735// End:
736// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status