monotone

monotone Mtn Source Tree

Root/graph.cc

1#include <boost/shared_ptr.hpp>
2
3#include "sanity.hh"
4#include "graph.hh"
5
6using boost::shared_ptr;
7using std::string;
8using std::vector;
9using std::set;
10
11void
12get_reconstruction_path(std::string const & start,
13 reconstruction_graph const & graph,
14 reconstruction_path & path)
15{
16 // This function does a breadth-first search from a starting point, until it
17 // finds some node that matches an arbitrary condition. The intended usage
18 // is for finding reconstruction paths in a database of deltas -- we start
19 // from the node we want to reconstruct, and follow existing deltas outward
20 // until we reach a full-text base. We return the shortest path from
21 // 'start' to a base version.
22 //
23 // The algorithm involves keeping a set of parallel linear paths, starting
24 // from 'start', that move forward through the DAG until we hit a base.
25 //
26 // On each iteration, we extend every active path by one step. If our
27 // extension involves a fork, we duplicate the path. If any path
28 // contains a cycle, we fault.
29 //
30 // If, by extending a path C, we enter a node which another path
31 // D has already seen, we kill path C. This avoids the possibility of
32 // exponential growth in the number of paths due to extensive forking
33 // and merging.
34
35 // Long ago, we used to do this with the boost graph library, but it
36 // involved loading too much of the storage graph into memory at any
37 // moment. this imperative version only loads the descendents of the
38 // reconstruction node, so it much cheaper in terms of memory.
39
40 vector< shared_ptr<reconstruction_path> > live_paths;
41
42 {
43 shared_ptr<reconstruction_path> pth0 = shared_ptr<reconstruction_path>(new reconstruction_path());
44 pth0->push_back(start);
45 live_paths.push_back(pth0);
46 }
47
48 shared_ptr<reconstruction_path> selected_path;
49 set<string> seen_nodes;
50
51 while (!selected_path)
52 {
53 vector< shared_ptr<reconstruction_path> > next_paths;
54
55 I(!live_paths.empty());
56 for (vector<shared_ptr<reconstruction_path> >::const_iterator i = live_paths.begin();
57 i != live_paths.end(); ++i)
58 {
59 shared_ptr<reconstruction_path> pth = *i;
60 string tip = pth->back();
61
62 if (graph.is_base(tip))
63 {
64 selected_path = pth;
65 break;
66 }
67 else
68 {
69 // This tip is not a root, so extend the path.
70 set<string> next;
71 graph.get_next(tip, next);
72 I(!next.empty());
73
74 // Replicate the path if there's a fork.
75 bool first = true;
76 for (set<string>::const_iterator j = next.begin(); j != next.end(); ++j)
77 {
78 L(FL("considering %s -> %s") % tip % *j);
79 if (seen_nodes.find(*j) == seen_nodes.end())
80 {
81 shared_ptr<reconstruction_path> pthN;
82 if (first)
83 {
84 pthN = pth;
85 first = false;
86 }
87 else
88 {
89 // NOTE: this is not the first iteration of the loop, and
90 // the first iteration appended one item to pth. So, we
91 // want to remove one before we use it. (Why not just
92 // copy every time? Because that makes this into an
93 // O(n^2) algorithm, in the common case where there is
94 // only one direction to go at each stop.)
95 pthN = shared_ptr<reconstruction_path>(new reconstruction_path(*pth));
96 I(!pthN->empty());
97 pthN->pop_back();
98 }
99 // check for a cycle... not that anything would break if
100 // there were one, but it's nice to let us know we have a bug
101 for (reconstruction_path::const_iterator k = pthN->begin(); k != pthN->end(); ++k)
102 I(*k != *j);
103 pthN->push_back(*j);
104 next_paths.push_back(pthN);
105 seen_nodes.insert(*j);
106 }
107 }
108 }
109 }
110
111 I(selected_path || !next_paths.empty());
112 live_paths = next_paths;
113 }
114
115 path = *selected_path;
116}
117
118#ifdef BUILD_UNIT_TESTS
119
120#include <map>
121#include "unit_tests.hh"
122#include "randomizer.hh"
123
124#include <boost/lexical_cast.hpp>
125
126using boost::lexical_cast;
127using std::pair;
128
129typedef std::multimap<string, string> rg_map;
130struct mock_reconstruction_graph : public reconstruction_graph
131{
132 rg_map ancestry;
133 set<string> bases;
134 mock_reconstruction_graph(rg_map const & ancestry, set<string> const & bases)
135 : ancestry(ancestry), bases(bases)
136 {}
137 virtual bool is_base(string const & node) const
138 {
139 return bases.find(node) != bases.end();
140 }
141 virtual void get_next(string const & from, set<string> & next) const
142 {
143 typedef rg_map::const_iterator ci;
144 pair<ci, ci> range = ancestry.equal_range(from);
145 for (ci i = range.first; i != range.second; ++i)
146 next.insert(i->second);
147 }
148};
149
150static void
151make_random_reconstruction_graph(size_t num_nodes, size_t num_random_edges,
152 size_t num_random_bases,
153 vector<string> & all_nodes, rg_map & ancestry,
154 set<string> & bases,
155 randomizer & rng)
156{
157 for (size_t i = 0; i != num_nodes; ++i)
158 all_nodes.push_back(lexical_cast<string>(i));
159 // We put a single long chain of edges in, to make sure that everything is
160 // reconstructable somehow.
161 for (size_t i = 1; i != num_nodes; ++i)
162 ancestry.insert(make_pair(idx(all_nodes, i - 1), idx(all_nodes, i)));
163 bases.insert(all_nodes.back());
164 // Then we insert a bunch of random edges too. These edges always go
165 // forwards, to avoid creating cycles (which make get_reconstruction_path
166 // unhappy).
167 for (size_t i = 0; i != num_random_edges; ++i)
168 {
169 size_t from_idx = rng.uniform(all_nodes.size() - 1);
170 size_t to_idx = from_idx + 1 + rng.uniform(all_nodes.size() - 1 - from_idx);
171 ancestry.insert(make_pair(idx(all_nodes, from_idx),
172 idx(all_nodes, to_idx)));
173 }
174 // And a bunch of random bases.
175 for (size_t i = 0; i != num_random_bases; ++i)
176 bases.insert(idx(all_nodes, rng.uniform(all_nodes.size())));
177}
178
179static void
180check_reconstruction_path(string const & start, reconstruction_graph const & graph,
181 reconstruction_path const & path)
182{
183 I(!path.empty());
184 I(*path.begin() == start);
185 reconstruction_path::const_iterator last = path.end();
186 --last;
187 I(graph.is_base(*last));
188 for (reconstruction_path::const_iterator i = path.begin(); i != last; ++i)
189 {
190 set<string> children;
191 graph.get_next(*i, children);
192 reconstruction_path::const_iterator next = i;
193 ++next;
194 I(children.find(*next) != children.end());
195 }
196}
197
198static void
199run_get_reconstruction_path_tests_on_random_graph(size_t num_nodes,
200 size_t num_random_edges,
201 size_t num_random_bases,
202 randomizer & rng)
203{
204 vector<string> all_nodes;
205 rg_map ancestry;
206 set<string> bases;
207 make_random_reconstruction_graph(num_nodes, num_random_edges, num_random_bases,
208 all_nodes, ancestry, bases,
209 rng);
210 mock_reconstruction_graph graph(ancestry, bases);
211 for (vector<string>::const_iterator i = all_nodes.begin();
212 i != all_nodes.end(); ++i)
213 {
214 reconstruction_path path;
215 get_reconstruction_path(*i, graph, path);
216 check_reconstruction_path(*i, graph, path);
217 }
218}
219
220UNIT_TEST(graph, random_get_reconstruction_path)
221{
222 randomizer rng;
223 // Some arbitrary numbers.
224 run_get_reconstruction_path_tests_on_random_graph(100, 100, 10, rng);
225 run_get_reconstruction_path_tests_on_random_graph(100, 200, 5, rng);
226 run_get_reconstruction_path_tests_on_random_graph(1000, 1000, 50, rng);
227 run_get_reconstruction_path_tests_on_random_graph(1000, 2000, 100, rng);
228}
229
230#endif // BUILD_UNIT_TESTS
231
232// Local Variables:
233// mode: C++
234// fill-column: 76
235// c-file-style: "gnu"
236// indent-tabs-mode: nil
237// End:
238// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status