monotone

monotone Mtn Source Tree

Root/revision.cc

1// -*- mode: C++; c-file-style: "gnu"; indent-tabs-mode: nil -*-
2// copyright (C) 2004 graydon hoare <graydon@pobox.com>
3// all rights reserved.
4// licensed to the public under the terms of the GNU GPL (>= 2)
5// see the file COPYING for details
6
7#include <cctype>
8#include <cstdlib>
9#include <iostream>
10#include <map>
11#include <queue>
12#include <set>
13#include <sstream>
14#include <stack>
15#include <string>
16#include <iterator>
17#include <functional>
18#include <list>
19
20#include <boost/lexical_cast.hpp>
21#include <boost/dynamic_bitset.hpp>
22#include <boost/shared_ptr.hpp>
23
24#include "botan/botan.h"
25
26#include "basic_io.hh"
27#include "change_set.hh"
28#include "constants.hh"
29#include "numeric_vocab.hh"
30#include "revision.hh"
31#include "sanity.hh"
32#include "transforms.hh"
33#include "ui.hh"
34#include "vocab.hh"
35#include "keys.hh"
36
37void revision_set::check_sane() const
38{
39 I(!null_id(new_manifest));
40
41 manifest_map fragment;
42 for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i)
43 {
44 change_set const & cs = edge_changes(i);
45 cs.check_sane();
46 if (!global_sanity.relaxed)
47 {
48 // null old revisions come with null old manifests
49 I(!null_id(edge_old_revision(i)) || null_id(edge_old_manifest(i)));
50 }
51 for (change_set::delta_map::const_iterator j = cs.deltas.begin(); j != cs.deltas.end(); ++j)
52 {
53 manifest_map::const_iterator k = fragment.find(delta_entry_path(j));
54 if (k == fragment.end())
55 fragment.insert(std::make_pair(delta_entry_path(j),
56 delta_entry_dst(j)));
57 else
58 {
59 if (!global_sanity.relaxed)
60 {
61 I(delta_entry_dst(j) == manifest_entry_id(k));
62 }
63 }
64 }
65 }
66}
67
68bool
69revision_set::is_merge_node() const
70{
71 return edges.size() > 1;
72}
73
74revision_set::revision_set(revision_set const & other)
75{
76 /* behave like normal constructor if other is empty */
77 if (null_id(other.new_manifest) && other.edges.empty()) return;
78 other.check_sane();
79 new_manifest = other.new_manifest;
80 edges = other.edges;
81}
82
83revision_set const &
84revision_set::operator=(revision_set const & other)
85{
86 other.check_sane();
87 new_manifest = other.new_manifest;
88 edges = other.edges;
89 return *this;
90}
91
92
93// Traces history back 'depth' levels from 'child_id', ensuring that
94// historical information is consistent within this subgraph.
95// The child must be in the database already.
96//
97// "Consistent" currently means that we compose manifests along every path (of
98// any length) that terminates at the child, and for each one check that paths
99// that should be the same in fact are the same, and that the calculated
100// change sets can be applied to the old manifests to create the new
101// manifest.
102//
103// We also make a special check for merge nodes, where if the previous
104// paragraph wasn't enough to get us back to a common ancestor, we also search
105// all the way up to a common ancestor and make the same check, because that's
106// the invariant that merge is actually required to preserve.
107//
108// NB: While this function has some invariants in it itself, a lot of its
109// purpose is just to exercise all the invariants inside change_set.cc. So
110// don't remove those invariants. (As if you needed another reason...)
111void
112check_sane_history(revision_id const & child_id,
113 int depth,
114 app_state & app)
115{
116 // We are, unfortunately, still quite slow. So we want to give at least a
117 // little feedback. Let's print exactly one warning, on the _second_ time
118 // we are called within one run -- just checking one revision isn't too
119 // slow, so no need to print anything on "commit", but usually if we're
120 // checking 2 revisions we're checking a lot more.
121 // FIXME: make sanity checking faster, so we can remove this kluge
122 // altogether...
123 static int num_checked = 0;
124 ++num_checked;
125 if (num_checked == 2)
126 P(F("verifying new revisions (this may take a while)\n"));
127
128 L(F("Verifying revision %s has sane history (to depth %i)\n")
129 % child_id % depth);
130
131 typedef boost::shared_ptr<change_set> shared_cs;
132 // (ancestor, change_set from ancestor to child)
133 std::map<revision_id, shared_cs> changesets;
134
135 manifest_id m_child_id;
136 app.db.get_revision_manifest(child_id, m_child_id);
137 manifest_map m_child;
138 app.db.get_manifest(m_child_id, m_child);
139
140 std::set<revision_id> frontier;
141 frontier.insert(child_id);
142
143 while (depth-- > 0)
144 {
145 std::set<revision_id> next_frontier;
146
147 for (std::set<revision_id>::const_iterator i = frontier.begin();
148 i != frontier.end();
149 ++i)
150 {
151 revision_id current_id = *i;
152 revision_set current;
153 app.db.get_revision(current_id, current);
154 // and the parents's manifests to the manifests
155 // and the change_set's to the parents to the changesets
156 for (edge_map::const_iterator j = current.edges.begin();
157 j != current.edges.end();
158 ++j)
159 {
160 revision_id old_id = edge_old_revision(j);
161 manifest_id m_old_id = edge_old_manifest(j);
162 change_set old_to_current_changes = edge_changes(j);
163 if (!null_id(old_id))
164 next_frontier.insert(old_id);
165
166 L(F("Examining %s -> %s\n") % old_id % child_id);
167
168 // build the change_set
169 // if
170 shared_cs old_to_child_changes_p = shared_cs(new change_set);
171 if (current_id == child_id)
172 *old_to_child_changes_p = old_to_current_changes;
173 else
174 {
175 shared_cs current_to_child_changes_p;
176 I(changesets.find(current_id) != changesets.end());
177 current_to_child_changes_p = changesets.find(current_id)->second;
178 concatenate_change_sets(old_to_current_changes,
179 *current_to_child_changes_p,
180 *old_to_child_changes_p);
181 }
182 MM(*old_to_child_changes_p);
183
184 // we have the change_set; now, is it one we've seen before?
185 if (changesets.find(old_id) != changesets.end())
186 {
187 // If it is, then make sure the paths agree on the
188 // changeset.
189 MM(*changesets.find(old_id)->second);
190 I(*changesets.find(old_id)->second == *old_to_child_changes_p);
191 }
192 else
193 {
194 // If not, this is the first time we've seen this.
195 // So store it in the map for later reference:
196 changesets.insert(std::make_pair(old_id, old_to_child_changes_p));
197 // and check that it works:
198
199 manifest_map purported_m_child;
200 // The null revision has empty manifest, which is the
201 // default.
202 if (!null_id(old_id))
203 app.db.get_manifest(m_old_id, purported_m_child);
204 apply_change_set(*old_to_child_changes_p, purported_m_child);
205 MM(purported_m_child);
206 MM(m_child);
207 I(purported_m_child == m_child);
208 }
209 }
210 }
211 frontier = next_frontier;
212 }
213
214 // Finally, there's a danger that if we have a long divergence, then after a
215 // merge, the common ancestor will be far back enough that the above
216 // depth-limited search won't have any idea whether the ancestry invariants
217 // are actually preserved. So do an additional check on merge revisions, to
218 // make sure that the paths to both ways going back to their parents's
219 // common ancestor give the same change_set (i.e., this is a valid merge at
220 // all).
221 if (!global_sanity.relaxed)
222 {
223 revision_set child_rev;
224 app.db.get_revision(child_id, child_rev);
225 // Nothing inherently impossible about having more than 2 parents, but if
226 // you come up with some case where it should be possible then you'll
227 // have to also adjust the code below to figure out what "common
228 // ancestor" means.
229 I(child_rev.edges.size() <= 2);
230 if (child_rev.edges.size() != 2)
231 return;
232 edge_map::const_iterator i = child_rev.edges.begin();
233 revision_id parent_left = edge_old_revision(i);
234 change_set left_edge = edge_changes(i);
235 ++i;
236 revision_id parent_right = edge_old_revision(i);
237 change_set right_edge = edge_changes(i);
238 ++i;
239 I(i == child_rev.edges.end());
240 revision_id lca;
241 if (!find_least_common_ancestor(parent_left, parent_right, lca, app))
242 {
243 L(F("%s and %s have no common ancestor, so done\n")
244 % parent_left % parent_right);
245 return;
246 }
247 if (changesets.find(lca) != changesets.end())
248 {
249 L(F("already checked common ancestor, so done\n"));
250 return;
251 }
252 L(F("%s is a merge; verifying paths to common ancestor %s are sane\n")
253 % child_id % lca);
254 // we have a merge node, with an lca sufficiently far back in history
255 // that we haven't yet figured out whether this is a valid merge or
256 // not. so find out.
257 change_set cs_parent_left, cs_parent_right, cs_left, cs_right;
258 MM(cs_parent_left);
259 MM(cs_parent_right);
260 MM(cs_left);
261 MM(cs_right);
262 calculate_composite_change_set(lca, parent_left, app, cs_parent_left);
263 calculate_composite_change_set(lca, parent_right, app, cs_parent_right);
264 concatenate_change_sets(cs_parent_left, left_edge, cs_left);
265 concatenate_change_sets(cs_parent_right, right_edge, cs_right);
266 I(cs_left == cs_right);
267 }
268}
269
270// calculating least common ancestors is a delicate thing.
271//
272// it turns out that we cannot choose the simple "least common ancestor"
273// for purposes of a merge, because it is possible that there are two
274// equally reachable common ancestors, and this produces ambiguity in the
275// merge. the result -- in a pathological case -- is silently accepting one
276// set of edits while discarding another; not exactly what you want a
277// version control tool to do.
278//
279// a conservative approximation is what we'll call a "subgraph recurring"
280// LCA algorithm. this is somewhat like locating the least common dominator
281// node, but not quite. it is actually just a vanilla LCA search, except
282// that any time there's a fork (a historical merge looks like a fork from
283// our perspective, working backwards from children to parents) it reduces
284// the fork to a common parent via a sequence of pairwise recursive calls
285// to itself before proceeding. this will always resolve to a common parent
286// with no ambiguity, unless it falls off the root of the graph.
287//
288// unfortunately the subgraph recurring algorithm sometimes goes too far
289// back in history -- for example if there is an unambiguous propagate from
290// one branch to another, the entire subgraph preceeding the propagate on
291// the recipient branch is elided, since it is a merge.
292//
293// our current hypothesis is that the *exact* condition we're looking for,
294// when doing a merge, is the least node which dominates one side of the
295// merge and is an ancestor of the other.
296
297typedef unsigned long ctx;
298typedef boost::dynamic_bitset<> bitmap;
299typedef boost::shared_ptr<bitmap> shared_bitmap;
300
301static void
302ensure_parents_loaded(ctx child,
303 std::map<ctx, shared_bitmap> & parents,
304 interner<ctx> & intern,
305 app_state & app)
306{
307 if (parents.find(child) != parents.end())
308 return;
309
310 L(F("loading parents for node %d\n") % child);
311
312 std::set<revision_id> imm_parents;
313 app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents);
314
315 // The null revision is not a parent for purposes of finding common
316 // ancestors.
317 for (std::set<revision_id>::iterator p = imm_parents.begin();
318 p != imm_parents.end(); )
319 {
320 if (null_id(*p))
321 imm_parents.erase(p++);
322 else
323 ++p;
324 }
325
326 shared_bitmap bits = shared_bitmap(new bitmap(parents.size()));
327
328 for (std::set<revision_id>::const_iterator p = imm_parents.begin();
329 p != imm_parents.end(); ++p)
330 {
331 ctx pn = intern.intern(p->inner()());
332 L(F("parent %s -> node %d\n") % *p % pn);
333 if (pn >= bits->size())
334 bits->resize(pn+1);
335 bits->set(pn);
336 }
337
338 parents.insert(std::make_pair(child, bits));
339}
340
341static bool
342expand_dominators(std::map<ctx, shared_bitmap> & parents,
343 std::map<ctx, shared_bitmap> & dominators,
344 interner<ctx> & intern,
345 app_state & app)
346{
347 bool something_changed = false;
348 std::vector<ctx> nodes;
349
350 nodes.reserve(dominators.size());
351
352 // pass 1, pull out all the node numbers we're going to scan this time around
353 for (std::map<ctx, shared_bitmap>::reverse_iterator e = dominators.rbegin();
354 e != dominators.rend(); ++e)
355 nodes.push_back(e->first);
356
357 // pass 2, update any of the dominator entries we can
358 for (std::vector<ctx>::const_iterator n = nodes.begin();
359 n != nodes.end(); ++n)
360 {
361 shared_bitmap bits = dominators[*n];
362 bitmap saved(*bits);
363 if (bits->size() <= *n)
364 bits->resize(*n + 1);
365 bits->set(*n);
366
367 ensure_parents_loaded(*n, parents, intern, app);
368 shared_bitmap n_parents = parents[*n];
369
370 bitmap intersection(bits->size());
371
372 bool first = true;
373 for (unsigned long parent = 0;
374 parent != n_parents->size(); ++parent)
375 {
376 if (! n_parents->test(parent))
377 continue;
378
379 if (dominators.find(parent) == dominators.end())
380 dominators.insert(std::make_pair(parent,
381 shared_bitmap(new bitmap())));
382 shared_bitmap pbits = dominators[parent];
383
384 if (intersection.size() > pbits->size())
385 pbits->resize(intersection.size());
386
387 if (pbits->size() > intersection.size())
388 intersection.resize(pbits->size());
389
390 if (first)
391 {
392 intersection = (*pbits);
393 first = false;
394 }
395 else
396 intersection &= (*pbits);
397 }
398
399 if (intersection.size() > bits->size())
400 bits->resize(intersection.size());
401
402 if (bits->size() > intersection.size())
403 intersection.resize(bits->size());
404 (*bits) |= intersection;
405 if (*bits != saved)
406 something_changed = true;
407 }
408 return something_changed;
409}
410
411
412static bool
413expand_ancestors(std::map<ctx, shared_bitmap> & parents,
414 std::map<ctx, shared_bitmap> & ancestors,
415 interner<ctx> & intern,
416 app_state & app)
417{
418 bool something_changed = false;
419 std::vector<ctx> nodes;
420
421 nodes.reserve(ancestors.size());
422
423 // pass 1, pull out all the node numbers we're going to scan this time around
424 for (std::map<ctx, shared_bitmap>::reverse_iterator e = ancestors.rbegin();
425 e != ancestors.rend(); ++e)
426 nodes.push_back(e->first);
427
428 // pass 2, update any of the ancestor entries we can
429 for (std::vector<ctx>::const_iterator n = nodes.begin(); n != nodes.end(); ++n)
430 {
431 shared_bitmap bits = ancestors[*n];
432 bitmap saved(*bits);
433 if (bits->size() <= *n)
434 bits->resize(*n + 1);
435 bits->set(*n);
436
437 ensure_parents_loaded(*n, parents, intern, app);
438 shared_bitmap n_parents = parents[*n];
439 for (ctx parent = 0; parent != n_parents->size(); ++parent)
440 {
441 if (! n_parents->test(parent))
442 continue;
443
444 if (bits->size() <= parent)
445 bits->resize(parent + 1);
446 bits->set(parent);
447
448 if (ancestors.find(parent) == ancestors.end())
449 ancestors.insert(make_pair(parent,
450 shared_bitmap(new bitmap())));
451 shared_bitmap pbits = ancestors[parent];
452
453 if (bits->size() > pbits->size())
454 pbits->resize(bits->size());
455
456 if (pbits->size() > bits->size())
457 bits->resize(pbits->size());
458
459 (*bits) |= (*pbits);
460 }
461 if (*bits != saved)
462 something_changed = true;
463 }
464 return something_changed;
465}
466
467static bool
468find_intersecting_node(bitmap & fst,
469 bitmap & snd,
470 interner<ctx> const & intern,
471 revision_id & anc)
472{
473
474 if (fst.size() > snd.size())
475 snd.resize(fst.size());
476 else if (snd.size() > fst.size())
477 fst.resize(snd.size());
478
479 bitmap intersection = fst & snd;
480 if (intersection.any())
481 {
482 L(F("found %d intersecting nodes\n") % intersection.count());
483 for (ctx i = 0; i < intersection.size(); ++i)
484 {
485 if (intersection.test(i))
486 {
487 anc = revision_id(intern.lookup(i));
488 return true;
489 }
490 }
491 }
492 return false;
493}
494
495// static void
496// dump_bitset_map(std::string const & hdr,
497// std::map< ctx, shared_bitmap > const & mm)
498// {
499// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size());
500// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin();
501// i != mm.end(); ++i)
502// {
503// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second)));
504// }
505// }
506
507bool
508find_common_ancestor_for_merge(revision_id const & left,
509 revision_id const & right,
510 revision_id & anc,
511 app_state & app)
512{
513 // Temporary workaround until we figure out how to clean up the whole
514 // ancestor selection mess:
515 if (app.use_lca)
516 return find_least_common_ancestor(left, right, anc, app);
517
518 interner<ctx> intern;
519 std::map< ctx, shared_bitmap >
520 parents, ancestors, dominators;
521
522 ctx ln = intern.intern(left.inner()());
523 ctx rn = intern.intern(right.inner()());
524
525 shared_bitmap lanc = shared_bitmap(new bitmap());
526 shared_bitmap ranc = shared_bitmap(new bitmap());
527 shared_bitmap ldom = shared_bitmap(new bitmap());
528 shared_bitmap rdom = shared_bitmap(new bitmap());
529
530 ancestors.insert(make_pair(ln, lanc));
531 ancestors.insert(make_pair(rn, ranc));
532 dominators.insert(make_pair(ln, ldom));
533 dominators.insert(make_pair(rn, rdom));
534
535 L(F("searching for common ancestor, left=%s right=%s\n") % left % right);
536
537 while (expand_ancestors(parents, ancestors, intern, app) |
538 expand_dominators(parents, dominators, intern, app))
539 {
540 L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") %
541 parents.size() % ancestors.size() % dominators.size());
542
543 if (find_intersecting_node(*lanc, *rdom, intern, anc))
544 {
545 L(F("found node %d, ancestor of left %s and dominating right %s\n")
546 % anc % left % right);
547 return true;
548 }
549
550 else if (find_intersecting_node(*ranc, *ldom, intern, anc))
551 {
552 L(F("found node %d, ancestor of right %s and dominating left %s\n")
553 % anc % right % left);
554 return true;
555 }
556 }
557// dump_bitset_map("ancestors", ancestors);
558// dump_bitset_map("dominators", dominators);
559// dump_bitset_map("parents", parents);
560 return false;
561}
562
563
564bool
565find_least_common_ancestor(revision_id const & left,
566 revision_id const & right,
567 revision_id & anc,
568 app_state & app)
569{
570 interner<ctx> intern;
571 std::map< ctx, shared_bitmap >
572 parents, ancestors;
573
574 if (left == right)
575 {
576 anc = left;
577 return true;
578 }
579
580 ctx ln = intern.intern(left.inner()());
581 ctx rn = intern.intern(right.inner()());
582
583 shared_bitmap lanc = shared_bitmap(new bitmap());
584 shared_bitmap ranc = shared_bitmap(new bitmap());
585
586 ancestors.insert(make_pair(ln, lanc));
587 ancestors.insert(make_pair(rn, ranc));
588
589 L(F("searching for least common ancestor, left=%s right=%s\n") % left % right);
590
591 while (expand_ancestors(parents, ancestors, intern, app))
592 {
593 L(F("least common ancestor scan [par=%d,anc=%d]\n") %
594 parents.size() % ancestors.size());
595
596 if (find_intersecting_node(*lanc, *ranc, intern, anc))
597 {
598 L(F("found node %d, ancestor of left %s and right %s\n")
599 % anc % left % right);
600 return true;
601 }
602 }
603// dump_bitset_map("ancestors", ancestors);
604// dump_bitset_map("parents", parents);
605 return false;
606}
607
608
609// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the
610// size of the entire revision graph.
611
612static bool
613is_ancestor(revision_id const & ancestor_id,
614 revision_id const & descendent_id,
615 std::multimap<revision_id, revision_id> const & graph)
616{
617
618 std::set<revision_id> visited;
619 std::queue<revision_id> queue;
620
621 queue.push(ancestor_id);
622
623 while (!queue.empty())
624 {
625 revision_id current_id = queue.front();
626 queue.pop();
627
628 if (current_id == descendent_id)
629 return true;
630 else
631 {
632 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
633 std::pair<gi, gi> children = graph.equal_range(current_id);
634 for (gi i = children.first; i != children.second; ++i)
635 {
636 if (visited.find(i->second) == visited.end())
637 {
638 queue.push(i->second);
639 visited.insert(i->second);
640 }
641 }
642 }
643 }
644 return false;
645}
646
647bool
648is_ancestor(revision_id const & ancestor_id,
649 revision_id const & descendent_id,
650 app_state & app)
651{
652 L(F("checking whether %s is an ancestor of %s\n") % ancestor_id % descendent_id);
653
654 std::multimap<revision_id, revision_id> graph;
655 app.db.get_revision_ancestry(graph);
656 return is_ancestor(ancestor_id, descendent_id, graph);
657}
658
659
660static void
661add_bitset_to_union(shared_bitmap src,
662 shared_bitmap dst)
663{
664 if (dst->size() > src->size())
665 src->resize(dst->size());
666 if (src->size() > dst->size())
667 dst->resize(src->size());
668 *dst |= *src;
669}
670
671
672static void
673calculate_ancestors_from_graph(interner<ctx> & intern,
674 revision_id const & init,
675 std::multimap<revision_id, revision_id> const & graph,
676 std::map< ctx, shared_bitmap > & ancestors,
677 shared_bitmap & total_union)
678{
679 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
680 std::stack<ctx> stk;
681
682 stk.push(intern.intern(init.inner()()));
683
684 while (! stk.empty())
685 {
686 ctx us = stk.top();
687 revision_id rev(hexenc<id>(intern.lookup(us)));
688
689 std::pair<gi,gi> parents = graph.equal_range(rev);
690 bool pushed = false;
691
692 // first make sure all parents are done
693 for (gi i = parents.first; i != parents.second; ++i)
694 {
695 ctx parent = intern.intern(i->second.inner()());
696 if (ancestors.find(parent) == ancestors.end())
697 {
698 stk.push(parent);
699 pushed = true;
700 break;
701 }
702 }
703
704 // if we pushed anything we stop now. we'll come back later when all
705 // the parents are done.
706 if (pushed)
707 continue;
708
709 shared_bitmap b = shared_bitmap(new bitmap());
710
711 for (gi i = parents.first; i != parents.second; ++i)
712 {
713 ctx parent = intern.intern(i->second.inner()());
714
715 // set all parents
716 if (b->size() <= parent)
717 b->resize(parent + 1);
718 b->set(parent);
719
720 // ensure all parents are loaded into the ancestor map
721 I(ancestors.find(parent) != ancestors.end());
722
723 // union them into our map
724 std::map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
725 I(j != ancestors.end());
726 add_bitset_to_union(j->second, b);
727 }
728
729 add_bitset_to_union(b, total_union);
730 ancestors.insert(std::make_pair(us, b));
731 stk.pop();
732 }
733}
734
735static bool
736topofilter(std::set<revision_id> const &revs, revision_id const &rev,
737 toposort_filter filter)
738{
739 if (filter == topo_all)
740 return true;
741 bool in_revs = (revs.find(rev) != revs.end());
742 return (filter == topo_include) ? in_revs : !in_revs;
743}
744
745// this function actually toposorts the whole graph, and then filters by the
746// passed in set (unless filter is set to topo_all).
747void
748toposort(std::set<revision_id> const & revisions,
749 std::vector<revision_id> & sorted,
750 app_state & app,
751 toposort_filter filter)
752{
753 sorted.clear();
754 typedef std::multimap<revision_id, revision_id>::iterator gi;
755 typedef std::map<revision_id, int>::iterator pi;
756 std::multimap<revision_id, revision_id> graph;
757 app.db.get_revision_ancestry(graph);
758 std::set<revision_id> leaves;
759 app.db.get_revision_ids(leaves);
760 std::map<revision_id, int> pcount;
761 for (gi i = graph.begin(); i != graph.end(); ++i)
762 pcount.insert(std::make_pair(i->first, 0));
763 for (gi i = graph.begin(); i != graph.end(); ++i)
764 ++(pcount[i->second]);
765 // first find the set of graph roots
766 std::list<revision_id> roots;
767 for (pi i = pcount.begin(); i != pcount.end(); ++i)
768 if(i->second==0)
769 roots.push_back(i->first);
770 while (!roots.empty())
771 {
772 // now stick them in our ordering (if wanted) and remove them from the
773 // graph, calculating the new roots as we go
774 L(F("new root: %s\n") % (roots.front()));
775 if (topofilter(revisions, roots.front(), filter))
776 sorted.push_back(roots.front());
777 for(gi i = graph.lower_bound(roots.front());
778 i != graph.upper_bound(roots.front()); i++)
779 if(--(pcount[i->second]) == 0)
780 roots.push_back(i->second);
781 graph.erase(roots.front());
782 leaves.erase(roots.front());
783 roots.pop_front();
784 }
785 I(graph.empty());
786 for (std::set<revision_id>::const_iterator i = leaves.begin();
787 i != leaves.end(); ++i)
788 {
789 L(F("new leaf: %s\n") % (*i));
790 if (topofilter(revisions, *i, filter))
791 sorted.push_back(*i);
792 }
793}
794
795// This function looks at a set of revisions, and for every pair A, B in that
796// set such that A is an ancestor of B, it erases A.
797
798void
799erase_ancestors(std::set<revision_id> & revisions, app_state & app)
800{
801 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
802 std::multimap<revision_id, revision_id> graph;
803 std::multimap<revision_id, revision_id> inverse_graph;
804
805 app.db.get_revision_ancestry(graph);
806 for (gi i = graph.begin(); i != graph.end(); ++i)
807 inverse_graph.insert(std::make_pair(i->second, i->first));
808
809 interner<ctx> intern;
810 std::map< ctx, shared_bitmap > ancestors;
811
812 shared_bitmap u = shared_bitmap(new bitmap());
813
814 for (std::set<revision_id>::const_iterator i = revisions.begin();
815 i != revisions.end(); ++i)
816 {
817 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
818 }
819
820 std::set<revision_id> tmp;
821 for (std::set<revision_id>::const_iterator i = revisions.begin();
822 i != revisions.end(); ++i)
823 {
824 ctx id = intern.intern(i->inner()());
825 bool has_ancestor_in_set = id < u->size() && u->test(id);
826 if (!has_ancestor_in_set)
827 tmp.insert(*i);
828 }
829
830 revisions = tmp;
831}
832
833// This function takes a revision A and a set of revision Bs, calculates the
834// ancestry of each, and returns the set of revisions that are in A's ancestry
835// but not in the ancestry of any of the Bs. It tells you 'what's new' in A
836// that's not in the Bs. If the output set if non-empty, then A will
837// certainly be in it; but the output set might be empty.
838void
839ancestry_difference(revision_id const & a, std::set<revision_id> const & bs,
840 std::set<revision_id> & new_stuff,
841 app_state & app)
842{
843 new_stuff.clear();
844 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
845 std::multimap<revision_id, revision_id> graph;
846 std::multimap<revision_id, revision_id> inverse_graph;
847
848 app.db.get_revision_ancestry(graph);
849 for (gi i = graph.begin(); i != graph.end(); ++i)
850 inverse_graph.insert(std::make_pair(i->second, i->first));
851
852 interner<ctx> intern;
853 std::map< ctx, shared_bitmap > ancestors;
854
855 shared_bitmap u = shared_bitmap(new bitmap());
856
857 for (std::set<revision_id>::const_iterator i = bs.begin();
858 i != bs.end(); ++i)
859 {
860 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
861 ctx c = intern.intern(i->inner()());
862 if (u->size() <= c)
863 u->resize(c + 1);
864 u->set(c);
865 }
866
867 shared_bitmap au = shared_bitmap(new bitmap());
868 calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
869 {
870 ctx c = intern.intern(a.inner()());
871 if (au->size() <= c)
872 au->resize(c + 1);
873 au->set(c);
874 }
875
876 au->resize(std::max(au->size(), u->size()));
877 u->resize(std::max(au->size(), u->size()));
878
879 *au -= *u;
880
881 for (unsigned int i = 0; i != au->size(); ++i)
882 {
883 if (au->test(i))
884 {
885 revision_id rid(intern.lookup(i));
886 if (!null_id(rid))
887 new_stuff.insert(rid);
888 }
889 }
890}
891
892//
893// The idea with this algorithm is to walk from child up to ancestor,
894// recursively, accumulating all the change_sets associated with
895// intermediate nodes into *one big change_set*.
896//
897// clever readers will realize this is an overlapping-subproblem type
898// situation and thus needs to keep a dynamic programming map to keep
899// itself in linear complexity.
900//
901// in fact, we keep two: one which maps to computed results (partial_csets)
902// and one which just keeps a set of all nodes we traversed
903// (visited_nodes). in theory it could be one map with an extra bool stuck
904// on each entry, but I think that would make it even less readable. it's
905// already quite ugly.
906//
907
908static bool
909calculate_change_sets_recursive(revision_id const & ancestor,
910 revision_id const & child,
911 app_state & app,
912 change_set & cumulative_cset,
913 std::map<revision_id, boost::shared_ptr<change_set> > & partial_csets,
914 std::set<revision_id> & visited_nodes,
915 std::set<revision_id> const & subgraph)
916{
917
918 if (ancestor == child)
919 return true;
920
921 if (subgraph.find(child) == subgraph.end())
922 return false;
923
924 visited_nodes.insert(child);
925
926 bool relevant_child = false;
927
928 revision_set rev;
929 app.db.get_revision(child, rev);
930
931 L(F("exploring changesets from parents of %s, seeking towards %s\n")
932 % child % ancestor);
933
934 for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i)
935 {
936 bool relevant_parent = false;
937 revision_id curr_parent = edge_old_revision(i);
938
939 if (curr_parent.inner()().empty())
940 continue;
941
942 change_set cset_to_curr_parent;
943
944 L(F("considering parent %s of %s\n") % curr_parent % child);
945
946 std::map<revision_id, boost::shared_ptr<change_set> >::const_iterator j =
947 partial_csets.find(curr_parent);
948 if (j != partial_csets.end())
949 {
950 // a recursive call has traversed this parent before and built an
951 // existing cset. just reuse that rather than re-traversing
952 cset_to_curr_parent = *(j->second);
953 relevant_parent = true;
954 }
955 else if (visited_nodes.find(curr_parent) != visited_nodes.end())
956 {
957 // a recursive call has traversed this parent, but there was no
958 // path from it to the root, so the parent is irrelevant. skip.
959 relevant_parent = false;
960 }
961 else
962 relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app,
963 cset_to_curr_parent,
964 partial_csets,
965 visited_nodes,
966 subgraph);
967
968 if (relevant_parent)
969 {
970 L(F("revision %s is relevant, composing with edge to %s\n")
971 % curr_parent % child);
972 concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset);
973 relevant_child = true;
974 break;
975 }
976 else
977 L(F("parent %s of %s is not relevant\n") % curr_parent % child);
978 }
979
980 // store the partial edge from ancestor -> child, so that if anyone
981 // re-traverses this edge they'll just fetch from the partial_edges
982 // cache.
983 if (relevant_child)
984 partial_csets.insert(std::make_pair(child,
985 boost::shared_ptr<change_set>
986 (new change_set(cumulative_cset))));
987
988 return relevant_child;
989}
990
991// this finds (by breadth-first search) the set of nodes you'll have to
992// walk over in calculate_change_sets_recursive, to build the composite
993// changeset. this is to prevent the recursive algorithm from going way
994// back in history on an unlucky guess of parent.
995
996static void
997find_subgraph_for_composite_search(revision_id const & ancestor,
998 revision_id const & child,
999 app_state & app,
1000 std::set<revision_id> & subgraph)
1001{
1002 std::set<revision_id> frontier;
1003 frontier.insert(child);
1004 subgraph.insert(child);
1005 while (!frontier.empty())
1006 {
1007 std::set<revision_id> next_frontier;
1008 for (std::set<revision_id>::const_iterator i = frontier.begin();
1009 i != frontier.end(); ++i)
1010 {
1011 revision_set rev;
1012 app.db.get_revision(*i, rev);
1013 L(F("adding parents of %s to subgraph\n") % *i);
1014
1015 for(edge_map::const_iterator j = rev.edges.begin(); j != rev.edges.end(); ++j)
1016 {
1017 revision_id curr_parent = edge_old_revision(j);
1018 if (null_id(curr_parent))
1019 continue;
1020 subgraph.insert(curr_parent);
1021 if (curr_parent == ancestor)
1022 {
1023 L(F("found parent %s of %s\n") % curr_parent % *i);
1024 return;
1025 }
1026 else
1027 L(F("adding parent %s to next frontier\n") % curr_parent);
1028 next_frontier.insert(curr_parent);
1029 }
1030 }
1031 frontier = next_frontier;
1032 }
1033}
1034
1035void
1036calculate_composite_change_set(revision_id const & ancestor,
1037 revision_id const & child,
1038 app_state & app,
1039 change_set & composed)
1040{
1041 I(composed.empty());
1042 L(F("calculating composite changeset between %s and %s\n")
1043 % ancestor % child);
1044 if (ancestor == child)
1045 return;
1046 std::set<revision_id> visited;
1047 std::set<revision_id> subgraph;
1048 std::map<revision_id, boost::shared_ptr<change_set> > partial;
1049 find_subgraph_for_composite_search(ancestor, child, app, subgraph);
1050 calculate_change_sets_recursive(ancestor, child, app, composed, partial,
1051 visited, subgraph);
1052}
1053
1054void
1055calculate_arbitrary_change_set(revision_id const & start,
1056 revision_id const & end,
1057 app_state & app,
1058 change_set & composed)
1059{
1060 L(F("calculating changeset from %s to %s\n") % start % end);
1061 revision_id r_ca_id;
1062 change_set ca_to_start, ca_to_end, start_to_ca;
1063 N(find_least_common_ancestor(start, end, r_ca_id, app),
1064 F("no common ancestor for %s and %s\n") % start % end);
1065 L(F("common ancestor is %s\n") % r_ca_id);
1066 calculate_composite_change_set(r_ca_id, start, app, ca_to_start);
1067 calculate_composite_change_set(r_ca_id, end, app, ca_to_end);
1068 manifest_id m_ca_id;
1069 manifest_map m_ca;
1070 app.db.get_revision_manifest(r_ca_id, m_ca_id);
1071 app.db.get_manifest(m_ca_id, m_ca);
1072 invert_change_set(ca_to_start, m_ca, start_to_ca);
1073 concatenate_change_sets(start_to_ca, ca_to_end, composed);
1074}
1075
1076
1077// Stuff related to rebuilding the revision graph. Unfortunately this is a
1078// real enough error case that we need support code for it.
1079
1080
1081static void
1082analyze_manifest_changes(app_state & app,
1083 manifest_id const & parent,
1084 manifest_id const & child,
1085 std::set<file_path> const & need_history_splitting,
1086 change_set & cs)
1087{
1088 manifest_map m_parent, m_child;
1089
1090 if (!null_id(parent))
1091 app.db.get_manifest(parent, m_parent);
1092
1093 I(!null_id(child));
1094 app.db.get_manifest(child, m_child);
1095
1096 L(F("analyzing manifest changes from '%s' -> '%s'\n") % parent % child);
1097
1098 for (manifest_map::const_iterator i = m_parent.begin();
1099 i != m_parent.end(); ++i)
1100 {
1101 file_path f = manifest_entry_path(i);
1102 manifest_map::const_iterator j = m_child.find(f);
1103 if (j == m_child.end())
1104 {
1105 cs.delete_file(f);
1106 }
1107 else if (need_history_splitting.find(f) != need_history_splitting.end())
1108 {
1109 P(F("splitting ancestry for file %s\n") % f);
1110 cs.delete_file(f);
1111 cs.add_file(f, manifest_entry_id(j));
1112 }
1113 else if (! (manifest_entry_id(i) == manifest_entry_id(j)))
1114 {
1115 cs.apply_delta(manifest_entry_path(i),
1116 manifest_entry_id(i),
1117 manifest_entry_id(j));
1118 }
1119 }
1120 for (manifest_map::const_iterator i = m_child.begin();
1121 i != m_child.end(); ++i)
1122 {
1123 manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i));
1124 if (j == m_parent.end())
1125 cs.add_file(manifest_entry_path(i),
1126 manifest_entry_id(i));
1127 }
1128}
1129
1130void
1131change_set_for_merge(app_state &app, revision_id const &parent_rid, revision_id const &other_parent_rid,
1132 manifest_id const &parent_man, manifest_id const &child_man, change_set &cs)
1133{
1134 // this is stupidly inefficient, in that we do this whole expensive
1135 // changeset finding thing twice in a row. oh well.
1136 revision_id lca;
1137 std::set<file_path> need_killing_files;
1138 if (find_least_common_ancestor(parent_rid, other_parent_rid, lca, app))
1139 {
1140 change_set parent_cs, other_parent_cs;
1141 calculate_composite_change_set(lca, other_parent_rid, app, other_parent_cs);
1142 calculate_composite_change_set(lca, parent_rid, app, parent_cs);
1143 std::set_difference(other_parent_cs.rearrangement.deleted_files.begin(),
1144 other_parent_cs.rearrangement.deleted_files.end(),
1145 parent_cs.rearrangement.deleted_files.begin(),
1146 parent_cs.rearrangement.deleted_files.end(),
1147 std::inserter(need_killing_files,
1148need_killing_files.begin()));
1149 }
1150
1151 analyze_manifest_changes(app, parent_man, child_man, need_killing_files, cs);
1152}
1153
1154
1155struct anc_graph
1156{
1157 anc_graph(bool existing, app_state & a) :
1158 existing_graph(existing),
1159 app(a),
1160 max_node(0),
1161 n_nodes("nodes", "n", 1),
1162 n_certs_in("certs in", "c", 1),
1163 n_revs_out("revs out", "r", 1),
1164 n_certs_out("certs out", "C", 1)
1165 {}
1166
1167 bool existing_graph;
1168 app_state & app;
1169 u64 max_node;
1170
1171 ticker n_nodes;
1172 ticker n_certs_in;
1173 ticker n_revs_out;
1174 ticker n_certs_out;
1175
1176 std::map<u64,manifest_id> node_to_old_man;
1177 std::map<manifest_id,u64> old_man_to_node;
1178
1179 std::map<u64,revision_id> node_to_old_rev;
1180 std::map<revision_id,u64> old_rev_to_node;
1181
1182 std::map<u64,revision_id> node_to_new_rev;
1183 std::multimap<u64, std::pair<cert_name, cert_value> > certs;
1184 std::multimap<u64, u64> ancestry;
1185 std::set<std::string> branches;
1186
1187 void add_node_ancestry(u64 child, u64 parent);
1188 void write_certs();
1189 void kluge_for_3_ancestor_nodes();
1190 void rebuild_ancestry();
1191 void get_node_manifest(u64 node, manifest_id & man);
1192 u64 add_node_for_old_manifest(manifest_id const & man);
1193 u64 add_node_for_old_revision(revision_id const & rev);
1194 revision_id construct_revision_from_ancestry(u64 child);
1195};
1196
1197
1198void anc_graph::add_node_ancestry(u64 child, u64 parent)
1199{
1200 L(F("noting ancestry from child %d -> parent %d\n") % child % parent);
1201 ancestry.insert(std::make_pair(child, parent));
1202}
1203
1204void anc_graph::get_node_manifest(u64 node, manifest_id & man)
1205{
1206 std::map<u64,manifest_id>::const_iterator i = node_to_old_man.find(node);
1207 I(i != node_to_old_man.end());
1208 man = i->second;
1209}
1210
1211void anc_graph::write_certs()
1212{
1213 std::set<cert_name> cnames;
1214 cnames.insert(cert_name(branch_cert_name));
1215 cnames.insert(cert_name(date_cert_name));
1216 cnames.insert(cert_name(author_cert_name));
1217 cnames.insert(cert_name(tag_cert_name));
1218 cnames.insert(cert_name(changelog_cert_name));
1219 cnames.insert(cert_name(comment_cert_name));
1220 cnames.insert(cert_name(testresult_cert_name));
1221
1222
1223 {
1224 // regenerate epochs on all branches to random states
1225
1226 for (std::set<std::string>::const_iterator i = branches.begin(); i != branches.end(); ++i)
1227 {
1228 char buf[constants::epochlen_bytes];
1229 Botan::Global_RNG::randomize(reinterpret_cast<Botan::byte *>(buf), constants::epochlen_bytes);
1230 hexenc<data> hexdata;
1231 encode_hexenc(data(std::string(buf, buf + constants::epochlen_bytes)), hexdata);
1232 epoch_data new_epoch(hexdata);
1233 L(F("setting epoch for %s to %s\n") % *i % new_epoch);
1234 app.db.set_epoch(cert_value(*i), new_epoch);
1235 }
1236 }
1237
1238
1239 typedef std::multimap<u64, std::pair<cert_name, cert_value> >::const_iterator ci;
1240
1241 for (std::map<u64,revision_id>::const_iterator i = node_to_new_rev.begin();
1242 i != node_to_new_rev.end(); ++i)
1243 {
1244 revision_id rev(i->second);
1245
1246 std::pair<ci,ci> range = certs.equal_range(i->first);
1247
1248 for (ci j = range.first; j != range.second; ++j)
1249 {
1250 cert_name name(j->second.first);
1251 cert_value val(j->second.second);
1252
1253 if (cnames.find(name) == cnames.end())
1254 continue;
1255
1256 cert new_cert;
1257 make_simple_cert(rev.inner(), name, val, app, new_cert);
1258 revision<cert> rcert(new_cert);
1259 if (! app.db.revision_cert_exists(rcert))
1260 {
1261 ++n_certs_out;
1262 app.db.put_revision_cert(rcert);
1263 }
1264 }
1265 }
1266}
1267
1268void
1269anc_graph::kluge_for_3_ancestor_nodes()
1270{
1271 // This method is, as the name suggests, a kluge. It exists because in the
1272 // 0.17 timeframe, monotone's ancestry graph has several nodes with 3
1273 // parents. This isn't, in principle, necessarily a bad thing; having 3
1274 // parents is reasonably well defined, I don't know of much code that is
1275 // dependent on revisions having only 2 parents, etc. But it is a very
1276 // weird thing, that we would never under any circumstances create today,
1277 // and it only exists as a side-effect of the pre-changeset days. In the
1278 // future we may decide to allow 3+-parent revisions; we may decide to
1279 // disallow it. Right now, I'd rather keep options open.
1280 // We remove only edges that are "redundant" (i.e., already weird...).
1281 // These are also something that we currently refuse to produce -- when a
1282 // node has more than one parent, and one parent is an ancestor of another.
1283 // These edges, again, only exist because of weirdnesses in the
1284 // pre-changeset days, and are not particularly meaningful. Again, we may
1285 // decide in the future to use them for some things, but...
1286 // FIXME: remove this method eventually, since it is (mildly) destructive on
1287 // history, and isn't really doing anything that necessarily needs to happen
1288 // anyway.
1289 P(F("scanning for nodes with 3+ parents\n"));
1290 std::set<u64> manyparents;
1291 for (std::multimap<u64, u64>::const_iterator i = ancestry.begin();
1292 i != ancestry.end(); ++i)
1293 {
1294 if (ancestry.count(i->first) > 2)
1295 manyparents.insert(i->first);
1296 }
1297 for (std::set<u64>::const_iterator i = manyparents.begin();
1298 i != manyparents.end(); ++i)
1299 {
1300 std::set<u64> indirect_ancestors;
1301 std::set<u64> parents;
1302 std::vector<u64> to_examine;
1303 for (std::multimap<u64, u64>::const_iterator j = ancestry.lower_bound(*i);
1304 j != ancestry.upper_bound(*i); ++j)
1305 {
1306 to_examine.push_back(j->second);
1307 parents.insert(j->second);
1308 }
1309 I(!to_examine.empty());
1310 while (!to_examine.empty())
1311 {
1312 u64 current = to_examine.back();
1313 to_examine.pop_back();
1314 for (std::multimap<u64, u64>::const_iterator j = ancestry.lower_bound(current);
1315 j != ancestry.upper_bound(current); ++j)
1316 {
1317 if (indirect_ancestors.find(j->second) == indirect_ancestors.end())
1318 {
1319 to_examine.push_back(j->second);
1320 indirect_ancestors.insert(j->second);
1321 }
1322 }
1323 }
1324 size_t killed = 0;
1325 for (std::set<u64>::const_iterator p = parents.begin();
1326 p != parents.end(); ++p)
1327 {
1328 if (indirect_ancestors.find(*p) != indirect_ancestors.end())
1329 {
1330 P(F("optimizing out redundant edge %i -> %i\n") % (*p) % (*i));
1331 // sometimes I hate STL. or at least my incompetence at using it.
1332 size_t old_size = ancestry.size();
1333 for (std::multimap<u64, u64>::iterator e = ancestry.lower_bound(*i);
1334 e != ancestry.upper_bound(*i); ++e)
1335 {
1336 I(e->first == *i);
1337 if (e->second == *p)
1338 {
1339 ancestry.erase(e);
1340 break;
1341 }
1342 }
1343 I(old_size - 1 == ancestry.size());
1344 ++killed;
1345 }
1346 }
1347 I(killed < parents.size());
1348 I(ancestry.find(*i) != ancestry.end());
1349 }
1350}
1351
1352void
1353anc_graph::rebuild_ancestry()
1354{
1355 kluge_for_3_ancestor_nodes();
1356
1357 P(F("rebuilding %d nodes\n") % max_node);
1358 {
1359 transaction_guard guard(app.db);
1360 if (existing_graph)
1361 app.db.delete_existing_revs_and_certs();
1362
1363 std::set<u64> parents, children, heads;
1364 for (std::multimap<u64, u64>::const_iterator i = ancestry.begin();
1365 i != ancestry.end(); ++i)
1366 {
1367 children.insert(i->first);
1368 parents.insert(i->second);
1369 }
1370 set_difference(children.begin(), children.end(),
1371 parents.begin(), parents.end(),
1372 std::inserter(heads, heads.begin()));
1373
1374 // FIXME: should do a depth-first traversal here, or something like,
1375 // instead of being recursive.
1376 for (std::set<u64>::const_iterator i = heads.begin();
1377 i != heads.end(); ++i)
1378 {
1379 construct_revision_from_ancestry(*i);
1380 }
1381 write_certs();
1382 guard.commit();
1383 }
1384}
1385
1386u64 anc_graph::add_node_for_old_manifest(manifest_id const & man)
1387{
1388 I(!existing_graph);
1389 u64 node = 0;
1390 if (old_man_to_node.find(man) == old_man_to_node.end())
1391 {
1392 node = max_node++;
1393 ++n_nodes;
1394 L(F("node %d = manifest %s\n") % node % man);
1395 old_man_to_node.insert(std::make_pair(man, node));
1396 node_to_old_man.insert(std::make_pair(node, man));
1397
1398 // load certs
1399 std::vector< manifest<cert> > mcerts;
1400 app.db.get_manifest_certs(man, mcerts);
1401 erase_bogus_certs(mcerts, app);
1402 for(std::vector< manifest<cert> >::const_iterator i = mcerts.begin();
1403 i != mcerts.end(); ++i)
1404 {
1405 L(F("loaded '%s' manifest cert for node %s\n") % i->inner().name % node);
1406 cert_value tv;
1407 decode_base64(i->inner().value, tv);
1408 ++n_certs_in;
1409 certs.insert(std::make_pair(node,
1410 std::make_pair(i->inner().name, tv)));
1411 }
1412 }
1413 else
1414 {
1415 node = old_man_to_node[man];
1416 }
1417 return node;
1418}
1419
1420u64 anc_graph::add_node_for_old_revision(revision_id const & rev)
1421{
1422 I(existing_graph);
1423 I(!null_id(rev));
1424 u64 node = 0;
1425 if (old_rev_to_node.find(rev) == old_rev_to_node.end())
1426 {
1427 node = max_node++;
1428 ++n_nodes;
1429
1430 manifest_id man;
1431 app.db.get_revision_manifest(rev, man);
1432
1433 L(F("node %d = revision %s = manifest %s\n") % node % rev % man);
1434 old_rev_to_node.insert(std::make_pair(rev, node));
1435 node_to_old_rev.insert(std::make_pair(node, rev));
1436 node_to_old_man.insert(std::make_pair(node, man));
1437
1438 // load certs
1439 std::vector< revision<cert> > rcerts;
1440 app.db.get_revision_certs(rev, rcerts);
1441 erase_bogus_certs(rcerts, app);
1442 for(std::vector< revision<cert> >::const_iterator i = rcerts.begin();
1443 i != rcerts.end(); ++i)
1444 {
1445 L(F("loaded '%s' revision cert for node %s\n") % i->inner().name % node);
1446 cert_value tv;
1447 decode_base64(i->inner().value, tv);
1448 ++n_certs_in;
1449 certs.insert(std::make_pair(node,
1450 std::make_pair(i->inner().name, tv)));
1451
1452 if (i->inner().name == branch_cert_name)
1453 branches.insert(tv());
1454 }
1455 }
1456 else
1457 {
1458 node = old_rev_to_node[rev];
1459 }
1460
1461 return node;
1462}
1463
1464// FIXME: this is recursive -- stack depth grows as ancestry depth -- and will
1465// overflow the stack on large histories.
1466revision_id
1467anc_graph::construct_revision_from_ancestry(u64 child)
1468{
1469 L(F("processing node %d\n") % child);
1470
1471 if (node_to_new_rev.find(child) != node_to_new_rev.end())
1472 {
1473 L(F("node %d already processed, skipping\n") % child);
1474 return node_to_new_rev.find(child)->second;
1475 }
1476
1477 manifest_id child_man;
1478 get_node_manifest(child, child_man);
1479
1480 revision_set rev;
1481 rev.new_manifest = child_man;
1482
1483 typedef std::multimap<u64, u64>::const_iterator ci;
1484 std::pair<ci,ci> range = ancestry.equal_range(child);
1485 if (range.first == range.second)
1486 {
1487 L(F("node %d is a root node\n") % child);
1488 revision_id null_rid;
1489 manifest_id null_mid;
1490 boost::shared_ptr<change_set> cs(new change_set());
1491 std::set<file_path> blah;
1492 analyze_manifest_changes(app, null_mid, child_man, blah, *cs);
1493 rev.edges.insert(std::make_pair(null_rid,
1494 std::make_pair(null_mid, cs)));
1495 }
1496 else if (std::distance(range.first, range.second) == 1)
1497 {
1498 I(child == range.first->first);
1499 u64 parent = range.first->second;
1500 if (node_to_new_rev.find(parent) == node_to_new_rev.end())
1501 construct_revision_from_ancestry(parent);
1502 revision_id parent_rid = node_to_new_rev.find(parent)->second;
1503 L(F("parent node %d = revision %s\n") % parent % parent_rid);
1504 manifest_id parent_man;
1505 get_node_manifest(parent, parent_man);
1506 boost::shared_ptr<change_set> cs(new change_set());
1507 std::set<file_path> need_killing_files;
1508 analyze_manifest_changes(app, parent_man, child_man, need_killing_files,
1509 *cs);
1510 rev.edges.insert(std::make_pair(parent_rid,
1511 std::make_pair(parent_man, cs)));
1512 }
1513 else
1514 {
1515 // this section has lots and lots of rigmorale, in order to handle the
1516 // following case: A -> B -> D, A -> C -> D. File "foo" exists in
1517 // manifests A, C, and D only. (I.e., it was deleted and then re-added
1518 // along the B path, and left alone along the C path.) The problem here
1519 // is that we have to synthesize a delete/add pair on the C -> D edge,
1520 // to make sure that the A -> D changeset is path invariant -- we have
1521 // to make sure that no matter how you go from A to D, we will learn
1522 // that A's "foo" and D's "foo" are actually logically distinct files.
1523 //
1524 // In order to do this, before we generate the changeset for any merged
1525 // revision, we calculate the changeset through from the parents's
1526 // common ancestor to the _other_ parent, which will tell us which files
1527 // have been deleted since then. We also calculate the changeset
1528 // through from the parents's common ancestor to the current parent,
1529 // which tells us which files have already been deleted on our side, and
1530 // so don't need to be deleted again.
1531 //
1532 // Finally, we feed the list of deleted files to analyze_manifest, which
1533 // uses that information to break file ancestries when necessary.
1534 //
1535 // we only know how to preserve file ids when there are exactly two
1536 // parents, so assert that there are.
1537 std::vector<u64> parents, others;
1538 {
1539 u64 left_p, right_p;
1540 ci i = range.first;
1541 I(child == i->first);
1542 left_p = i->second;
1543 ++i;
1544 I(child == i->first);
1545 right_p = i->second;
1546 ++i;
1547 I(i == range.second);
1548 parents.push_back(left_p);
1549 others.push_back(right_p);
1550 parents.push_back(right_p);
1551 others.push_back(left_p);
1552 }
1553 // make sure our parents are all saved.
1554 for (std::vector<u64>::const_iterator i = parents.begin(); i != parents.end(); ++i)
1555 {
1556 if (node_to_new_rev.find(*i) == node_to_new_rev.end())
1557 construct_revision_from_ancestry(*i);
1558 I(node_to_new_rev.find(*i) != node_to_new_rev.end());
1559 }
1560 // actually process the two nodes
1561 for (int i = 0; i != 2; ++i)
1562 {
1563 u64 parent = idx(parents, i);
1564 u64 other_parent = idx(others, i);
1565 L(F("processing edge from child %d -> parent %d\n") % child % parent);
1566
1567 manifest_id parent_man;
1568 get_node_manifest(parent, parent_man);
1569 boost::shared_ptr<change_set> cs(new change_set());
1570 change_set_for_merge(app, node_to_new_rev.find(parent)->second,
1571 node_to_new_rev.find(other_parent)->second,
1572 parent_man, child_man, *cs);
1573 rev.edges.insert(std::make_pair(node_to_new_rev.find(parent)->second,
1574 std::make_pair(parent_man, cs)));
1575 }
1576 }
1577
1578 revision_id rid;
1579 calculate_ident(rev, rid);
1580 node_to_new_rev.insert(std::make_pair(child, rid));
1581
1582 if (!app.db.revision_exists (rid))
1583 {
1584 L(F("mapped node %d to revision %s\n") % child % rid);
1585 app.db.put_revision(rid, rev);
1586 ++n_revs_out;
1587 }
1588 else
1589 {
1590 L(F("skipping already existing revision %s\n") % rid);
1591 }
1592
1593 return rid;
1594}
1595
1596void
1597build_changesets_from_existing_revs(app_state & app)
1598{
1599 global_sanity.set_relaxed(true);
1600 anc_graph graph(true, app);
1601
1602 P(F("rebuilding revision graph from existing graph\n"));
1603 std::multimap<revision_id, revision_id> existing_graph;
1604
1605 {
1606 // early short-circuit to avoid failure after lots of work
1607 rsa_keypair_id key;
1608 N(guess_default_key(key,app),
1609 F("no unique private key for cert construction"));
1610 require_password(key, app);
1611 }
1612
1613 app.db.get_revision_ancestry(existing_graph);
1614 for (std::multimap<revision_id, revision_id>::const_iterator i = existing_graph.begin();
1615 i != existing_graph.end(); ++i)
1616 {
1617 if (!null_id(i->first))
1618 {
1619 u64 parent_node = graph.add_node_for_old_revision(i->first);
1620 u64 child_node = graph.add_node_for_old_revision(i->second);
1621 graph.add_node_ancestry(child_node, parent_node);
1622 }
1623 }
1624
1625 global_sanity.set_relaxed(false);
1626 graph.rebuild_ancestry();
1627}
1628
1629
1630void
1631build_changesets_from_manifest_ancestry(app_state & app)
1632{
1633 anc_graph graph(false, app);
1634
1635 P(F("rebuilding revision graph from manifest certs\n"));
1636
1637 {
1638 // early short-circuit to avoid failure after lots of work
1639 rsa_keypair_id key;
1640 N(guess_default_key(key,app),
1641 F("no unique private key for cert construction"));
1642 require_password(key, app);
1643 }
1644
1645 std::vector< manifest<cert> > tmp;
1646 app.db.get_manifest_certs(cert_name("ancestor"), tmp);
1647 erase_bogus_certs(tmp, app);
1648
1649 for (std::vector< manifest<cert> >::const_iterator i = tmp.begin();
1650 i != tmp.end(); ++i)
1651 {
1652 cert_value tv;
1653 decode_base64(i->inner().value, tv);
1654 manifest_id child, parent;
1655 child = i->inner().ident;
1656 parent = hexenc<id>(tv());
1657
1658 u64 parent_node = graph.add_node_for_old_manifest(parent);
1659 u64 child_node = graph.add_node_for_old_manifest(child);
1660 graph.add_node_ancestry(child_node, parent_node);
1661 }
1662
1663 graph.rebuild_ancestry();
1664}
1665
1666
1667// i/o stuff
1668
1669namespace
1670{
1671 namespace syms
1672 {
1673 std::string const old_revision("old_revision");
1674 std::string const new_manifest("new_manifest");
1675 std::string const old_manifest("old_manifest");
1676 }
1677}
1678
1679
1680void
1681print_edge(basic_io::printer & printer,
1682 edge_entry const & e)
1683{
1684 basic_io::stanza st;
1685 st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()());
1686 st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()());
1687 printer.print_stanza(st);
1688 print_change_set(printer, edge_changes(e));
1689}
1690
1691
1692void
1693print_revision(basic_io::printer & printer,
1694 revision_set const & rev)
1695{
1696 rev.check_sane();
1697 basic_io::stanza st;
1698 st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()());
1699 printer.print_stanza(st);
1700 for (edge_map::const_iterator edge = rev.edges.begin();
1701 edge != rev.edges.end(); ++edge)
1702 print_edge(printer, *edge);
1703}
1704
1705
1706void
1707parse_edge(basic_io::parser & parser,
1708 edge_map & es)
1709{
1710 boost::shared_ptr<change_set> cs(new change_set());
1711 manifest_id old_man;
1712 revision_id old_rev;
1713 std::string tmp;
1714
1715 parser.esym(syms::old_revision);
1716 parser.hex(tmp);
1717 old_rev = revision_id(tmp);
1718
1719 parser.esym(syms::old_manifest);
1720 parser.hex(tmp);
1721 old_man = manifest_id(tmp);
1722
1723 parse_change_set(parser, *cs);
1724
1725 es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs)));
1726}
1727
1728
1729void
1730parse_revision(basic_io::parser & parser,
1731 revision_set & rev)
1732{
1733 rev.edges.clear();
1734 std::string tmp;
1735 parser.esym(syms::new_manifest);
1736 parser.hex(tmp);
1737 rev.new_manifest = manifest_id(tmp);
1738 while (parser.symp(syms::old_revision))
1739 parse_edge(parser, rev.edges);
1740 rev.check_sane();
1741}
1742
1743void
1744read_revision_set(data const & dat,
1745 revision_set & rev)
1746{
1747 std::istringstream iss(dat());
1748 basic_io::input_source src(iss, "revision");
1749 basic_io::tokenizer tok(src);
1750 basic_io::parser pars(tok);
1751 parse_revision(pars, rev);
1752 I(src.lookahead == EOF);
1753 rev.check_sane();
1754}
1755
1756void
1757read_revision_set(revision_data const & dat,
1758 revision_set & rev)
1759{
1760 read_revision_set(dat.inner(), rev);
1761 rev.check_sane();
1762}
1763
1764void
1765write_revision_set(revision_set const & rev,
1766 data & dat)
1767{
1768 rev.check_sane();
1769 std::ostringstream oss;
1770 basic_io::printer pr(oss);
1771 print_revision(pr, rev);
1772 dat = data(oss.str());
1773}
1774
1775void
1776write_revision_set(revision_set const & rev,
1777 revision_data & dat)
1778{
1779 rev.check_sane();
1780 data d;
1781 write_revision_set(rev, d);
1782 dat = revision_data(d);
1783}
1784
1785#ifdef BUILD_UNIT_TESTS
1786#include "unit_tests.hh"
1787#include "sanity.hh"
1788
1789static void
1790revision_test()
1791{
1792}
1793
1794void
1795add_revision_tests(test_suite * suite)
1796{
1797 I(suite);
1798 suite->add(BOOST_TEST_CASE(&revision_test));
1799}
1800
1801
1802#endif // BUILD_UNIT_TESTS

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status