monotone

monotone Mtn Source Tree

Root/graph.cc

1#include "base.hh"
2#include <map>
3#include <utility>
4#include <list>
5#include <boost/shared_ptr.hpp>
6
7#include "sanity.hh"
8#include "graph.hh"
9#include "safe_map.hh"
10#include "numeric_vocab.hh"
11#include "hash_map.hh"
12#include "vocab_hash.hh"
13#include "rev_height.hh"
14#include "transforms.hh"
15
16using boost::shared_ptr;
17using std::string;
18using std::vector;
19using std::set;
20using std::pair;
21using std::map;
22using std::multimap;
23using std::make_pair;
24using std::list;
25
26using hashmap::hash_set;
27
28void
29get_reconstruction_path(id const & start,
30 reconstruction_graph const & graph,
31 reconstruction_path & path)
32{
33 // This function does a breadth-first search from a starting point, until it
34 // finds some node that matches an arbitrary condition. The intended usage
35 // is for finding reconstruction paths in a database of deltas -- we start
36 // from the node we want to reconstruct, and follow existing deltas outward
37 // until we reach a full-text base. We return the shortest path from
38 // 'start' to a base version.
39 //
40 // The algorithm involves keeping a set of parallel linear paths, starting
41 // from 'start', that move forward through the DAG until we hit a base.
42 //
43 // On each iteration, we extend every active path by one step. If our
44 // extension involves a fork, we duplicate the path. If any path
45 // contains a cycle, we fault.
46 //
47 // If, by extending a path C, we enter a node which another path
48 // D has already seen, we kill path C. This avoids the possibility of
49 // exponential growth in the number of paths due to extensive forking
50 // and merging.
51
52 // Long ago, we used to do this with the boost graph library, but it
53 // involved loading too much of the storage graph into memory at any
54 // moment. this imperative version only loads the descendents of the
55 // reconstruction node, so it much cheaper in terms of memory.
56
57 vector< shared_ptr<reconstruction_path> > live_paths;
58
59 {
60 shared_ptr<reconstruction_path> pth0 = shared_ptr<reconstruction_path>(new reconstruction_path());
61 pth0->push_back(start);
62 live_paths.push_back(pth0);
63 }
64
65 shared_ptr<reconstruction_path> selected_path;
66 set<id> seen_nodes;
67
68 while (!selected_path)
69 {
70 vector< shared_ptr<reconstruction_path> > next_paths;
71
72 I(!live_paths.empty());
73 for (vector<shared_ptr<reconstruction_path> >::const_iterator i = live_paths.begin();
74 i != live_paths.end(); ++i)
75 {
76 shared_ptr<reconstruction_path> pth = *i;
77 id tip = pth->back();
78
79 if (graph.is_base(tip))
80 {
81 selected_path = pth;
82 break;
83 }
84 else
85 {
86 // This tip is not a root, so extend the path.
87 set<id> next;
88 graph.get_next(tip, next);
89 I(!next.empty());
90
91 // Replicate the path if there's a fork.
92 bool first = true;
93 for (set<id>::const_iterator j = next.begin();
94 j != next.end(); ++j)
95 {
96 if (global_sanity.debug_p())
97 L(FL("considering %s -> %s") % tip % *j);
98 if (seen_nodes.find(*j) == seen_nodes.end())
99 {
100 shared_ptr<reconstruction_path> pthN;
101 if (first)
102 {
103 pthN = pth;
104 first = false;
105 }
106 else
107 {
108 // NOTE: this is not the first iteration of the loop, and
109 // the first iteration appended one item to pth. So, we
110 // want to remove one before we use it. (Why not just
111 // copy every time? Because that makes this into an
112 // O(n^2) algorithm, in the common case where there is
113 // only one direction to go at each stop.)
114 pthN = shared_ptr<reconstruction_path>(new reconstruction_path(*pth));
115 I(!pthN->empty());
116 pthN->pop_back();
117 }
118 // check for a cycle... not that anything would break if
119 // there were one, but it's nice to let us know we have a bug
120 for (reconstruction_path::const_iterator k = pthN->begin(); k != pthN->end(); ++k)
121 I(*k != *j);
122 pthN->push_back(*j);
123 next_paths.push_back(pthN);
124 seen_nodes.insert(*j);
125 }
126 }
127 }
128 }
129
130 I(selected_path || !next_paths.empty());
131 live_paths = next_paths;
132 }
133
134 path = *selected_path;
135}
136
137
138#ifdef BUILD_UNIT_TESTS
139
140#include <map>
141#include "unit_tests.hh"
142#include "randomizer.hh"
143
144#include "transforms.hh"
145#include "lexical_cast.hh"
146
147using boost::lexical_cast;
148using std::pair;
149
150typedef std::multimap<id, id> rg_map;
151struct mock_reconstruction_graph : public reconstruction_graph
152{
153 rg_map ancestry;
154 set<id> bases;
155 mock_reconstruction_graph(rg_map const & ancestry, set<id> const & bases)
156 : ancestry(ancestry), bases(bases)
157 {}
158 virtual bool is_base(id const & node) const
159 {
160 return bases.find(node) != bases.end();
161 }
162 virtual void get_next(id const & from, set<id> & next) const
163 {
164 typedef rg_map::const_iterator ci;
165 pair<ci, ci> range = ancestry.equal_range(from);
166 for (ci i = range.first; i != range.second; ++i)
167 next.insert(i->second);
168 }
169};
170
171static void
172make_random_reconstruction_graph(size_t num_nodes, size_t num_random_edges,
173 size_t num_random_bases,
174 vector<id> & all_nodes, rg_map & ancestry,
175 set<id> & bases,
176 randomizer & rng)
177{
178 for (size_t i = 0; i != num_nodes; ++i)
179 {
180 id hash;
181 string s(lexical_cast<string>(i));
182 calculate_ident(data(s), hash);
183 all_nodes.push_back(hash);
184 }
185 // We put a single long chain of edges in, to make sure that everything is
186 // reconstructable somehow.
187 for (size_t i = 1; i != num_nodes; ++i)
188 ancestry.insert(make_pair(idx(all_nodes, i - 1), idx(all_nodes, i)));
189 bases.insert(all_nodes.back());
190 // Then we insert a bunch of random edges too. These edges always go
191 // forwards, to avoid creating cycles (which make get_reconstruction_path
192 // unhappy).
193 for (size_t i = 0; i != num_random_edges; ++i)
194 {
195 size_t from_idx = rng.uniform(all_nodes.size() - 1);
196 size_t to_idx = from_idx + 1 + rng.uniform(all_nodes.size() - 1 - from_idx);
197 ancestry.insert(make_pair(idx(all_nodes, from_idx),
198 idx(all_nodes, to_idx)));
199 }
200 // And a bunch of random bases.
201 for (size_t i = 0; i != num_random_bases; ++i)
202 bases.insert(idx(all_nodes, rng.uniform(all_nodes.size())));
203}
204
205static void
206check_reconstruction_path(id const & start, reconstruction_graph const & graph,
207 reconstruction_path const & path)
208{
209 I(!path.empty());
210 I(*path.begin() == start);
211 reconstruction_path::const_iterator last = path.end();
212 --last;
213 I(graph.is_base(*last));
214 for (reconstruction_path::const_iterator i = path.begin(); i != last; ++i)
215 {
216 set<id> children;
217 graph.get_next(*i, children);
218 reconstruction_path::const_iterator next = i;
219 ++next;
220 I(children.find(*next) != children.end());
221 }
222}
223
224static void
225run_get_reconstruction_path_tests_on_random_graph(size_t num_nodes,
226 size_t num_random_edges,
227 size_t num_random_bases,
228 randomizer & rng)
229{
230 vector<id> all_nodes;
231 rg_map ancestry;
232 set<id> bases;
233 make_random_reconstruction_graph(num_nodes, num_random_edges, num_random_bases,
234 all_nodes, ancestry, bases,
235 rng);
236 mock_reconstruction_graph graph(ancestry, bases);
237 for (vector<id>::const_iterator i = all_nodes.begin();
238 i != all_nodes.end(); ++i)
239 {
240 reconstruction_path path;
241 get_reconstruction_path(*i, graph, path);
242 check_reconstruction_path(*i, graph, path);
243 }
244}
245
246UNIT_TEST(graph, random_get_reconstruction_path)
247{
248 randomizer rng;
249 // Some arbitrary numbers.
250 run_get_reconstruction_path_tests_on_random_graph(100, 100, 10, rng);
251 run_get_reconstruction_path_tests_on_random_graph(100, 200, 5, rng);
252 run_get_reconstruction_path_tests_on_random_graph(1000, 1000, 50, rng);
253 run_get_reconstruction_path_tests_on_random_graph(1000, 2000, 100, rng);
254}
255
256#endif // BUILD_UNIT_TESTS
257
258
259// graph is a parent->child map
260void toposort_rev_ancestry(rev_ancestry_map const & graph,
261 vector<revision_id> & revisions)
262{
263 typedef multimap<revision_id, revision_id>::const_iterator gi;
264 typedef map<revision_id, int>::iterator pi;
265
266 revisions.clear();
267 // determine the number of parents for each rev
268 map<revision_id, int> pcount;
269 for (gi i = graph.begin(); i != graph.end(); ++i)
270 pcount.insert(make_pair(i->first, 0));
271 for (gi i = graph.begin(); i != graph.end(); ++i)
272 ++(pcount[i->second]);
273
274 // find the set of graph roots
275 list<revision_id> roots;
276 for (pi i = pcount.begin(); i != pcount.end(); ++i)
277 if(i->second==0)
278 roots.push_back(i->first);
279
280 while (!roots.empty())
281 {
282 revision_id cur = roots.front();
283 roots.pop_front();
284 if (!null_id(cur))
285 revisions.push_back(cur);
286
287 for(gi i = graph.lower_bound(cur);
288 i != graph.upper_bound(cur); i++)
289 if(--(pcount[i->second]) == 0)
290 roots.push_back(i->second);
291 }
292}
293
294
295// get_uncommon_ancestors
296typedef std::pair<rev_height, revision_id> height_rev_pair;
297
298static void
299advance_frontier(set<height_rev_pair> & frontier,
300 hash_set<revision_id> & seen,
301 rev_graph const & rg)
302{
303 const height_rev_pair h_node = *frontier.rbegin();
304 const revision_id & node(h_node.second);
305 frontier.erase(h_node);
306 set<revision_id> parents;
307 rg.get_parents(node, parents);
308 for (set<revision_id>::const_iterator r = parents.begin();
309 r != parents.end(); r++)
310 {
311 if (seen.find(*r) == seen.end())
312 {
313 rev_height h;
314 rg.get_height(*r, h);
315 frontier.insert(make_pair(h, *r));
316 seen.insert(*r);
317 }
318 }
319}
320
321void
322get_uncommon_ancestors(revision_id const & a,
323 revision_id const & b,
324 rev_graph const & rg,
325 set<revision_id> & a_uncommon_ancs,
326 set<revision_id> & b_uncommon_ancs)
327{
328 a_uncommon_ancs.clear();
329 b_uncommon_ancs.clear();
330
331 // We extend a frontier from each revision until it reaches
332 // a revision that has been seen by the other frontier. By
333 // traversing in ascending height order we can ensure that
334 // any common ancestor will have been 'seen' by both sides
335 // before it is traversed.
336
337 set<height_rev_pair> a_frontier, b_frontier, common_frontier;
338 {
339 rev_height h;
340 rg.get_height(a, h);
341 a_frontier.insert(make_pair(h, a));
342 rg.get_height(b, h);
343 b_frontier.insert(make_pair(h, b));
344 }
345
346 hash_set<revision_id> a_seen, b_seen, common_seen;
347 a_seen.insert(a);
348 b_seen.insert(b);
349
350 while (!a_frontier.empty() || !b_frontier.empty())
351 {
352 // We take the leaf-most (ie highest) height entry from any frontier.
353 // Note: the default height is the lowest possible.
354 rev_height a_height, b_height, common_height;
355 if (!a_frontier.empty())
356 a_height = a_frontier.rbegin()->first;
357 if (!b_frontier.empty())
358 b_height = b_frontier.rbegin()->first;
359 if (!common_frontier.empty())
360 common_height = common_frontier.rbegin()->first;
361
362 if (a_height > b_height && a_height > common_height)
363 {
364 a_uncommon_ancs.insert(a_frontier.rbegin()->second);
365 advance_frontier(a_frontier, a_seen, rg);
366 }
367 else if (b_height > a_height && b_height > common_height)
368 {
369 b_uncommon_ancs.insert(b_frontier.rbegin()->second);
370 advance_frontier(b_frontier, b_seen, rg);
371 }
372 else if (common_height > a_height && common_height > b_height)
373 {
374 advance_frontier(common_frontier, common_seen, rg);
375 }
376 else if (a_height == b_height) // may or may not also == common_height
377 {
378 // if both frontiers are the same, then we can safely say that
379 // we've found all uncommon ancestors. This stopping condition
380 // can result in traversing more nodes than required, but is simple.
381 if (a_frontier == b_frontier)
382 break;
383
384 common_frontier.insert(*a_frontier.rbegin());
385 a_frontier.erase(*a_frontier.rbegin());
386 b_frontier.erase(*b_frontier.rbegin());
387 }
388 else if (a_height == common_height)
389 {
390 a_frontier.erase(*a_frontier.rbegin());
391 }
392 else if (b_height == common_height)
393 {
394 b_frontier.erase(*b_frontier.rbegin());
395 }
396 else
397 I(false);
398 }
399}
400
401#ifdef BUILD_UNIT_TESTS
402
403#include <map>
404#include "unit_tests.hh"
405#include "randomizer.hh"
406#include "roster.hh"
407
408
409static void
410get_all_ancestors(revision_id const & start, rev_ancestry_map const & child_to_parent_map,
411 set<revision_id> & ancestors)
412{
413 ancestors.clear();
414 vector<revision_id> frontier;
415 frontier.push_back(start);
416 while (!frontier.empty())
417 {
418 revision_id rid = frontier.back();
419 frontier.pop_back();
420 if (ancestors.find(rid) != ancestors.end())
421 continue;
422 safe_insert(ancestors, rid);
423 typedef rev_ancestry_map::const_iterator ci;
424 pair<ci,ci> range = child_to_parent_map.equal_range(rid);
425 for (ci i = range.first; i != range.second; ++i)
426 frontier.push_back(i->second);
427 }
428}
429
430struct mock_rev_graph : rev_graph
431{
432 mock_rev_graph(rev_ancestry_map const & child_to_parent_map)
433 : child_to_parent_map(child_to_parent_map)
434 {
435 // assign sensible heights
436 height_map.clear();
437
438 // toposort expects parent->child
439 rev_ancestry_map parent_to_child;
440 for (rev_ancestry_map::const_iterator i = child_to_parent_map.begin();
441 i != child_to_parent_map.end(); i++)
442 {
443 parent_to_child.insert(make_pair(i->second, i->first));
444 }
445 vector<revision_id> topo_revs;
446 toposort_rev_ancestry(parent_to_child, topo_revs);
447
448 // this is ugly but works. just give each one a sequential number.
449 rev_height top = rev_height::root_height();
450 u32 num = 1;
451 for (vector<revision_id>::const_iterator r = topo_revs.begin();
452 r != topo_revs.end(); r++, num++)
453 {
454 height_map.insert(make_pair(*r, top.child_height(num)));
455 }
456 }
457
458 virtual void get_parents(revision_id const & node, set<revision_id> & parents) const
459 {
460 parents.clear();
461 for (rev_ancestry_map::const_iterator i = child_to_parent_map.lower_bound(node);
462 i != child_to_parent_map.upper_bound(node); i++)
463 {
464 if (!null_id(i->second))
465 safe_insert(parents, i->second);
466 }
467 }
468
469 virtual void get_children(revision_id const & node, set<revision_id> & parents) const
470 {
471 // not required
472 I(false);
473 }
474
475 virtual void get_height(revision_id const & rev, rev_height & h) const
476 {
477 MM(rev);
478 h = safe_get(height_map, rev);
479 }
480
481
482 rev_ancestry_map const & child_to_parent_map;
483 map<revision_id, rev_height> height_map;
484};
485
486
487static void
488run_a_get_uncommon_ancestors_test(rev_ancestry_map const & child_to_parent_map,
489 revision_id const & left, revision_id const & right)
490{
491 set<revision_id> true_left_ancestors, true_right_ancestors;
492 get_all_ancestors(left, child_to_parent_map, true_left_ancestors);
493 get_all_ancestors(right, child_to_parent_map, true_right_ancestors);
494 set<revision_id> true_left_uncommon_ancestors, true_right_uncommon_ancestors;
495 MM(true_left_uncommon_ancestors);
496 MM(true_right_uncommon_ancestors);
497 set_difference(true_left_ancestors.begin(), true_left_ancestors.end(),
498 true_right_ancestors.begin(), true_right_ancestors.end(),
499 inserter(true_left_uncommon_ancestors, true_left_uncommon_ancestors.begin()));
500 set_difference(true_right_ancestors.begin(), true_right_ancestors.end(),
501 true_left_ancestors.begin(), true_left_ancestors.end(),
502 inserter(true_right_uncommon_ancestors, true_right_uncommon_ancestors.begin()));
503
504 set<revision_id> calculated_left_uncommon_ancestors, calculated_right_uncommon_ancestors;
505 MM(calculated_left_uncommon_ancestors);
506 MM(calculated_right_uncommon_ancestors);
507 mock_rev_graph rg(child_to_parent_map);
508 get_uncommon_ancestors(left, right, rg,
509 calculated_left_uncommon_ancestors,
510 calculated_right_uncommon_ancestors);
511 I(calculated_left_uncommon_ancestors == true_left_uncommon_ancestors);
512 I(calculated_right_uncommon_ancestors == true_right_uncommon_ancestors);
513 get_uncommon_ancestors(right, left, rg,
514 calculated_right_uncommon_ancestors,
515 calculated_left_uncommon_ancestors);
516 I(calculated_left_uncommon_ancestors == true_left_uncommon_ancestors);
517 I(calculated_right_uncommon_ancestors == true_right_uncommon_ancestors);
518}
519
520UNIT_TEST(graph, get_uncommon_ancestors_nasty_convexity_case)
521{
522 // This tests the nasty case described in the giant comment above
523 // get_uncommon_ancestors:
524 //
525 // 9
526 // |\ . Extraneous dots brought to you by the
527 // 8 \ . Committee to Shut Up the C Preprocessor
528 // /| \ . (CSUCPP), and viewers like you and me.
529 // / | |
530 // / 7 |
531 // | | |
532 // | 6 |
533 // | | |
534 // | 5 |
535 // | | |
536 // | 4 |
537 // | | |
538 // | : | <-- insert arbitrarily many revisions at the ellipsis
539 // | : |
540 // | | |
541 // 1 2 3
542 // \ / \ /
543 // L R
544
545 rev_ancestry_map child_to_parent_map;
546 revision_id left(fake_id()), right(fake_id());
547 revision_id one(fake_id()), two(fake_id()), eight(fake_id()), three(fake_id()), nine(fake_id());
548 MM(left);
549 MM(right);
550 MM(one);
551 MM(two);
552 MM(three);
553 MM(eight);
554 MM(nine);
555 child_to_parent_map.insert(make_pair(left, one));
556 child_to_parent_map.insert(make_pair(one, eight));
557 child_to_parent_map.insert(make_pair(eight, nine));
558 child_to_parent_map.insert(make_pair(right, three));
559 child_to_parent_map.insert(make_pair(three, nine));
560
561 revision_id middle(fake_id());
562 child_to_parent_map.insert(make_pair(left, two));
563 child_to_parent_map.insert(make_pair(right, two));
564 // We insert a _lot_ of revisions at the ellipsis, to make sure that
565 // whatever sort of step-size is used on the expansion, we can't take the
566 // entire middle portion in one big gulp and make the test pointless.
567 for (int i = 0; i != 1000; ++i)
568 {
569 revision_id next(fake_id());
570 child_to_parent_map.insert(make_pair(middle, next));
571 middle = next;
572 }
573 child_to_parent_map.insert(make_pair(middle, eight));
574
575 run_a_get_uncommon_ancestors_test(child_to_parent_map, left, right);
576}
577
578double const new_root_freq = 0.05;
579double const merge_node_freq = 0.2;
580double const skip_up_freq = 0.5;
581
582static revision_id
583pick_node_from_set(set<revision_id> const & heads,
584 randomizer & rng)
585{
586 I(!heads.empty());
587 size_t which_start = rng.uniform(heads.size());
588 set<revision_id>::const_iterator i = heads.begin();
589 for (size_t j = 0; j != which_start; ++j)
590 ++i;
591 return *i;
592}
593
594static revision_id
595pick_node_or_ancestor(set<revision_id> const & heads,
596 rev_ancestry_map const & child_to_parent_map,
597 randomizer & rng)
598{
599 revision_id rev = pick_node_from_set(heads, rng);
600 // now we recurse up from this starting point
601 while (rng.bernoulli(skip_up_freq))
602 {
603 typedef rev_ancestry_map::const_iterator ci;
604 pair<ci,ci> range = child_to_parent_map.equal_range(rev);
605 if (range.first == range.second)
606 break;
607 ci second = range.first;
608 ++second;
609 if (second == range.second)
610 // there was only one parent
611 rev = range.first->second;
612 else
613 {
614 // there are two parents, pick one randomly
615 if (rng.flip())
616 rev = range.first->second;
617 else
618 rev = second->second;
619 }
620 }
621 return rev;
622}
623
624static void
625make_random_graph(size_t num_nodes,
626 rev_ancestry_map & child_to_parent_map,
627 vector<revision_id> & nodes,
628 randomizer & rng)
629{
630 set<revision_id> heads;
631
632 for (size_t i = 0; i != num_nodes; ++i)
633 {
634 revision_id new_rid = revision_id(fake_id());
635 nodes.push_back(new_rid);
636 set<revision_id> parents;
637 if (heads.empty() || rng.bernoulli(new_root_freq))
638 parents.insert(revision_id());
639 else if (rng.bernoulli(merge_node_freq) && heads.size() > 1)
640 {
641 // maybe we'll pick the same node twice and end up not doing a
642 // merge, oh well...
643 parents.insert(pick_node_from_set(heads, rng));
644 parents.insert(pick_node_from_set(heads, rng));
645 }
646 else
647 {
648 parents.insert(pick_node_or_ancestor(heads, child_to_parent_map, rng));
649 }
650 for (set<revision_id>::const_iterator j = parents.begin();
651 j != parents.end(); ++j)
652 {
653 heads.erase(*j);
654 child_to_parent_map.insert(std::make_pair(new_rid, *j));
655 }
656 safe_insert(heads, new_rid);
657 }
658}
659
660static void
661run_a_get_uncommon_ancestors_random_test(size_t num_nodes,
662 size_t iterations,
663 randomizer & rng)
664{
665 rev_ancestry_map child_to_parent_map;
666 vector<revision_id> nodes;
667 make_random_graph(num_nodes, child_to_parent_map, nodes, rng);
668 for (size_t i = 0; i != iterations; ++i)
669 {
670 L(FL("get_uncommon_ancestors: random test %s-%s") % num_nodes % i);
671 revision_id left = idx(nodes, rng.uniform(nodes.size()));
672 revision_id right = idx(nodes, rng.uniform(nodes.size()));
673 run_a_get_uncommon_ancestors_test(child_to_parent_map, left, right);
674 }
675}
676
677UNIT_TEST(graph, get_uncommon_ancestors_randomly)
678{
679 randomizer rng;
680 run_a_get_uncommon_ancestors_random_test(100, 100, rng);
681 run_a_get_uncommon_ancestors_random_test(1000, 100, rng);
682 run_a_get_uncommon_ancestors_random_test(10000, 1000, rng);
683}
684
685
686#endif // BUILD_UNIT_TESTS
687
688// Local Variables:
689// mode: C++
690// fill-column: 76
691// c-file-style: "gnu"
692// indent-tabs-mode: nil
693// End:
694// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status