monotone

monotone Mtn Source Tree

Root/revision.cc

1// copyright (C) 2004 graydon hoare <graydon@pobox.com>
2// all rights reserved.
3// licensed to the public under the terms of the GNU GPL (>= 2)
4// see the file COPYING for details
5
6#include <cctype>
7#include <cstdlib>
8#include <iostream>
9#include <map>
10#include <queue>
11#include <set>
12#include <sstream>
13#include <stack>
14#include <string>
15#include <iterator>
16#include <functional>
17
18#include <boost/lexical_cast.hpp>
19#include <boost/dynamic_bitset.hpp>
20#include <boost/shared_ptr.hpp>
21
22#include "cryptopp/osrng.h"
23
24#include "basic_io.hh"
25#include "change_set.hh"
26#include "constants.hh"
27#include "numeric_vocab.hh"
28#include "revision.hh"
29#include "sanity.hh"
30#include "transforms.hh"
31#include "ui.hh"
32#include "vocab.hh"
33#include "keys.hh"
34
35void revision_set::check_sane() const
36{
37 I(!null_id(new_manifest));
38
39 manifest_map fragment;
40 for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i)
41 {
42 change_set const & cs = edge_changes(i);
43 cs.check_sane();
44 if (!global_sanity.relaxed)
45 {
46 // null old revisions come with null old manifests
47 I(!null_id(edge_old_revision(i)) || null_id(edge_old_manifest(i)));
48 }
49 for (change_set::delta_map::const_iterator j = cs.deltas.begin(); j != cs.deltas.end(); ++j)
50 {
51 manifest_map::const_iterator k = fragment.find(delta_entry_path(j));
52 if (k == fragment.end())
53 fragment.insert(std::make_pair(delta_entry_path(j),
54 delta_entry_dst(j)));
55 else
56 {
57 if (!global_sanity.relaxed)
58 {
59 I(delta_entry_dst(j) == manifest_entry_id(k));
60 }
61 }
62 }
63 }
64}
65
66revision_set::revision_set(revision_set const & other)
67{
68 other.check_sane();
69 new_manifest = other.new_manifest;
70 edges = other.edges;
71}
72
73revision_set const &
74revision_set::operator=(revision_set const & other)
75{
76 other.check_sane();
77 new_manifest = other.new_manifest;
78 edges = other.edges;
79 return *this;
80}
81
82
83// Traces history back 'depth' levels from 'child_id', ensuring that
84// historical information is consistent within this subgraph.
85// The child must be in the database already.
86//
87// "Consistent" currently means that we compose manifests along every path (of
88// any length) that terminates at the child, and for each one check that paths
89// that should be the same in fact are the same, and that the calculated
90// change sets can be applied to the old manifests to create the new
91// manifest.
92//
93// NB: While this function has some invariants in it itself, a lot of its
94// purpose is just to exercise all the invariants inside change_set.cc. So
95// don't remove those invariants. (As if you needed another reason...)
96void
97check_sane_history(revision_id const & child_id,
98 int depth,
99 app_state & app)
100{
101 // We are, unfortunately, still quite slow. So we want to give at least a
102 // little feedback. Let's print exactly one warning, on the _second_ time
103 // we are called within one run -- just checking one revision isn't too
104 // slow, so no need to print anything on "commit", but usually if we're
105 // checking 2 revisions we're checking a lot more.
106 // FIXME: make sanity checking faster, so we can remove this kluge
107 // altogether...
108 static int num_checked = 0;
109 ++num_checked;
110 if (num_checked == 2)
111 P(F("verifying new revisions (this may take a while)\n"));
112
113 L(F("Verifying revision %s has sane history (to depth %i)\n")
114 % child_id % depth);
115
116 typedef boost::shared_ptr<change_set> shared_cs;
117 // (ancestor, change_set from ancestor to child)
118 std::map<revision_id, shared_cs> changesets;
119
120 manifest_id m_child_id;
121 app.db.get_revision_manifest(child_id, m_child_id);
122 manifest_map m_child;
123 app.db.get_manifest(m_child_id, m_child);
124
125 std::set<revision_id> frontier;
126 frontier.insert(child_id);
127
128 // FIXME: if we don't check the LCA already, check it.
129
130 while (depth-- > 0)
131 {
132 std::set<revision_id> next_frontier;
133
134 for (std::set<revision_id>::const_iterator i = frontier.begin();
135 i != frontier.end();
136 ++i)
137 {
138 revision_id current_id = *i;
139 revision_set current;
140 app.db.get_revision(current_id, current);
141 // and the parents's manifests to the manifests
142 // and the change_set's to the parents to the changesets
143 for (edge_map::const_iterator j = current.edges.begin();
144 j != current.edges.end();
145 ++j)
146 {
147 revision_id old_id = edge_old_revision(j);
148 manifest_id m_old_id = edge_old_manifest(j);
149 change_set old_to_current_changes = edge_changes(j);
150 if (!null_id(old_id))
151 next_frontier.insert(old_id);
152
153 L(F("Examining %s -> %s\n") % old_id % child_id);
154
155 // build the change_set
156 // if
157 shared_cs old_to_child_changes_p = shared_cs(new change_set);
158 if (current_id == child_id)
159 *old_to_child_changes_p = old_to_current_changes;
160 else
161 {
162 shared_cs current_to_child_changes_p;
163 I(changesets.find(current_id) != changesets.end());
164 current_to_child_changes_p = changesets.find(current_id)->second;
165 concatenate_change_sets(old_to_current_changes,
166 *current_to_child_changes_p,
167 *old_to_child_changes_p);
168 }
169
170 // we have the change_set; now, is it one we've seen before?
171 if (changesets.find(old_id) != changesets.end())
172 {
173 // If it is, then make sure the paths agree on the
174 // changeset.
175 I(*changesets.find(old_id)->second == *old_to_child_changes_p);
176 }
177 else
178 {
179 // If not, this is the first time we've seen this.
180 // So store it in the map for later reference:
181 changesets.insert(std::make_pair(old_id, old_to_child_changes_p));
182 // and check that it works:
183
184 manifest_map purported_m_child;
185 // The null revision has empty manifest, which is the
186 // default.
187 if (!null_id(old_id))
188 app.db.get_manifest(m_old_id, purported_m_child);
189 apply_change_set(*old_to_child_changes_p, purported_m_child);
190 I(purported_m_child == m_child);
191 }
192 }
193 }
194 frontier = next_frontier;
195 }
196
197 // Finally, there's a danger that if we have a long divergence, then after a
198 // merge, the common ancestor will be far back enough that the above
199 // depth-limited search won't have any idea whether the ancestry invariants
200 // are actually preserved. So do an additional check on merge revisions, to
201 // make sure that the paths to both ways going back to their parents's
202 // common ancestor give the same change_set (i.e., this is a valid merge at
203 // all).
204 if (!global_sanity.relaxed)
205 {
206 revision_set child_rev;
207 app.db.get_revision(child_id, child_rev);
208 // Nothing inherently impossible about having more than 2 parents, but if
209 // you come up with some case where it should be possible then you'll
210 // have to also adjust the code below to figure out what "common
211 // ancestor" means.
212 I(child_rev.edges.size() <= 2);
213 if (child_rev.edges.size() != 2)
214 return;
215 edge_map::const_iterator i = child_rev.edges.begin();
216 revision_id parent_left = edge_old_revision(i);
217 change_set left_edge = edge_changes(i);
218 ++i;
219 revision_id parent_right = edge_old_revision(i);
220 change_set right_edge = edge_changes(i);
221 ++i;
222 I(i == child_rev.edges.end());
223 revision_id lca;
224 if (!find_least_common_ancestor(parent_left, parent_right, lca, app))
225 {
226 L(F("%s and %s have no common ancestor, so done\n")
227 % parent_left % parent_right);
228 return;
229 }
230 if (changesets.find(lca) != changesets.end())
231 {
232 L(F("already checked common ancestor, so done\n"));
233 return;
234 }
235 L(F("%s is a merge; verifying paths to common ancestor %s are sane\n")
236 % child_id % lca);
237 // we have a merge node, with an lca sufficiently far back in history
238 // that we haven't yet figured out whether this is a valid merge or
239 // not. so find out.
240 change_set cs_parent_left, cs_parent_right, cs_left, cs_right;
241 calculate_composite_change_set(lca, parent_left, app, cs_parent_left);
242 calculate_composite_change_set(lca, parent_right, app, cs_parent_right);
243 concatenate_change_sets(cs_parent_left, left_edge, cs_left);
244 concatenate_change_sets(cs_parent_right, right_edge, cs_right);
245 I(cs_left == cs_right);
246 }
247}
248
249// calculating least common ancestors is a delicate thing.
250//
251// it turns out that we cannot choose the simple "least common ancestor"
252// for purposes of a merge, because it is possible that there are two
253// equally reachable common ancestors, and this produces ambiguity in the
254// merge. the result -- in a pathological case -- is silently accepting one
255// set of edits while discarding another; not exactly what you want a
256// version control tool to do.
257//
258// a conservative approximation is what we'll call a "subgraph recurring"
259// LCA algorithm. this is somewhat like locating the least common dominator
260// node, but not quite. it is actually just a vanilla LCA search, except
261// that any time there's a fork (a historical merge looks like a fork from
262// our perspective, working backwards from children to parents) it reduces
263// the fork to a common parent via a sequence of pairwise recursive calls
264// to itself before proceeding. this will always resolve to a common parent
265// with no ambiguity, unless it falls off the root of the graph.
266//
267// unfortunately the subgraph recurring algorithm sometimes goes too far
268// back in history -- for example if there is an unambiguous propagate from
269// one branch to another, the entire subgraph preceeding the propagate on
270// the recipient branch is elided, since it is a merge.
271//
272// our current hypothesis is that the *exact* condition we're looking for,
273// when doing a merge, is the least node which dominates one side of the
274// merge and is an ancestor of the other.
275
276typedef unsigned long ctx;
277typedef boost::dynamic_bitset<> bitmap;
278typedef boost::shared_ptr<bitmap> shared_bitmap;
279
280static void
281ensure_parents_loaded(ctx child,
282 std::map<ctx, shared_bitmap> & parents,
283 interner<ctx> & intern,
284 app_state & app)
285{
286 if (parents.find(child) != parents.end())
287 return;
288
289 L(F("loading parents for node %d\n") % child);
290
291 std::set<revision_id> imm_parents;
292 app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents);
293
294 // The null revision is not a parent for purposes of finding common
295 // ancestors.
296 for (std::set<revision_id>::iterator p = imm_parents.begin();
297 p != imm_parents.end(); ++p)
298 {
299 if (null_id(*p))
300 imm_parents.erase(p);
301 }
302
303 shared_bitmap bits = shared_bitmap(new bitmap(parents.size()));
304
305 for (std::set<revision_id>::const_iterator p = imm_parents.begin();
306 p != imm_parents.end(); ++p)
307 {
308 ctx pn = intern.intern(p->inner()());
309 L(F("parent %s -> node %d\n") % *p % pn);
310 if (pn >= bits->size())
311 bits->resize(pn+1);
312 bits->set(pn);
313 }
314
315 parents.insert(std::make_pair(child, bits));
316}
317
318static bool
319expand_dominators(std::map<ctx, shared_bitmap> & parents,
320 std::map<ctx, shared_bitmap> & dominators,
321 interner<ctx> & intern,
322 app_state & app)
323{
324 bool something_changed = false;
325 std::vector<ctx> nodes;
326
327 nodes.reserve(dominators.size());
328
329 // pass 1, pull out all the node numbers we're going to scan this time around
330 for (std::map<ctx, shared_bitmap>::const_iterator e = dominators.begin();
331 e != dominators.end(); ++e)
332 nodes.push_back(e->first);
333
334 // pass 2, update any of the dominator entries we can
335 for (std::vector<ctx>::const_iterator n = nodes.begin();
336 n != nodes.end(); ++n)
337 {
338 shared_bitmap bits = dominators[*n];
339 bitmap saved(*bits);
340 if (bits->size() <= *n)
341 bits->resize(*n + 1);
342 bits->set(*n);
343
344 ensure_parents_loaded(*n, parents, intern, app);
345 shared_bitmap n_parents = parents[*n];
346
347 bitmap intersection(bits->size());
348
349 bool first = true;
350 for (unsigned long parent = 0;
351 parent != n_parents->size(); ++parent)
352 {
353 if (! n_parents->test(parent))
354 continue;
355
356 if (dominators.find(parent) == dominators.end())
357 dominators.insert(std::make_pair(parent,
358 shared_bitmap(new bitmap())));
359 shared_bitmap pbits = dominators[parent];
360
361 if (bits->size() > pbits->size())
362 pbits->resize(bits->size());
363
364 if (pbits->size() > bits->size())
365 bits->resize(pbits->size());
366
367 if (first)
368 {
369 intersection = (*pbits);
370 first = false;
371 }
372 else
373 intersection &= (*pbits);
374 }
375
376 (*bits) |= intersection;
377 if (*bits != saved)
378 something_changed = true;
379 }
380 return something_changed;
381}
382
383
384static bool
385expand_ancestors(std::map<ctx, shared_bitmap> & parents,
386 std::map<ctx, shared_bitmap> & ancestors,
387 interner<ctx> & intern,
388 app_state & app)
389{
390 bool something_changed = false;
391 std::vector<ctx> nodes;
392
393 nodes.reserve(ancestors.size());
394
395 // pass 1, pull out all the node numbers we're going to scan this time around
396 for (std::map<ctx, shared_bitmap>::const_iterator e = ancestors.begin();
397 e != ancestors.end(); ++e)
398 nodes.push_back(e->first);
399
400 // pass 2, update any of the ancestor entries we can
401 for (std::vector<ctx>::const_iterator n = nodes.begin(); n != nodes.end(); ++n)
402 {
403 shared_bitmap bits = ancestors[*n];
404 bitmap saved(*bits);
405 if (bits->size() <= *n)
406 bits->resize(*n + 1);
407 bits->set(*n);
408
409 ensure_parents_loaded(*n, parents, intern, app);
410 shared_bitmap n_parents = parents[*n];
411 for (ctx parent = 0; parent != n_parents->size(); ++parent)
412 {
413 if (! n_parents->test(parent))
414 continue;
415
416 if (bits->size() <= parent)
417 bits->resize(parent + 1);
418 bits->set(parent);
419
420 if (ancestors.find(parent) == ancestors.end())
421 ancestors.insert(make_pair(parent,
422 shared_bitmap(new bitmap())));
423 shared_bitmap pbits = ancestors[parent];
424
425 if (bits->size() > pbits->size())
426 pbits->resize(bits->size());
427
428 if (pbits->size() > bits->size())
429 bits->resize(pbits->size());
430
431 (*bits) |= (*pbits);
432 }
433 if (*bits != saved)
434 something_changed = true;
435 }
436 return something_changed;
437}
438
439static bool
440find_intersecting_node(bitmap & fst,
441 bitmap & snd,
442 interner<ctx> const & intern,
443 revision_id & anc)
444{
445
446 if (fst.size() > snd.size())
447 snd.resize(fst.size());
448 else if (snd.size() > fst.size())
449 fst.resize(snd.size());
450
451 bitmap intersection = fst & snd;
452 if (intersection.any())
453 {
454 L(F("found %d intersecting nodes\n") % intersection.count());
455 for (ctx i = 0; i < intersection.size(); ++i)
456 {
457 if (intersection.test(i))
458 {
459 anc = revision_id(intern.lookup(i));
460 return true;
461 }
462 }
463 }
464 return false;
465}
466
467// static void
468// dump_bitset_map(std::string const & hdr,
469// std::map< ctx, shared_bitmap > const & mm)
470// {
471// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size());
472// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin();
473// i != mm.end(); ++i)
474// {
475// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second)));
476// }
477// }
478
479bool
480find_common_ancestor_for_merge(revision_id const & left,
481 revision_id const & right,
482 revision_id & anc,
483 app_state & app)
484{
485 interner<ctx> intern;
486 std::map< ctx, shared_bitmap >
487 parents, ancestors, dominators;
488
489 ctx ln = intern.intern(left.inner()());
490 ctx rn = intern.intern(right.inner()());
491
492 shared_bitmap lanc = shared_bitmap(new bitmap());
493 shared_bitmap ranc = shared_bitmap(new bitmap());
494 shared_bitmap ldom = shared_bitmap(new bitmap());
495 shared_bitmap rdom = shared_bitmap(new bitmap());
496
497 ancestors.insert(make_pair(ln, lanc));
498 ancestors.insert(make_pair(rn, ranc));
499 dominators.insert(make_pair(ln, ldom));
500 dominators.insert(make_pair(rn, rdom));
501
502 L(F("searching for common ancestor, left=%s right=%s\n") % left % right);
503
504 while (expand_ancestors(parents, ancestors, intern, app) ||
505 expand_dominators(parents, dominators, intern, app))
506 {
507 L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") %
508 parents.size() % ancestors.size() % dominators.size());
509
510 if (find_intersecting_node(*lanc, *rdom, intern, anc))
511 {
512 L(F("found node %d, ancestor of left %s and dominating right %s\n")
513 % anc % left % right);
514 return true;
515 }
516
517 else if (find_intersecting_node(*ranc, *ldom, intern, anc))
518 {
519 L(F("found node %d, ancestor of right %s and dominating left %s\n")
520 % anc % right % left);
521 return true;
522 }
523 }
524// dump_bitset_map("ancestors", ancestors);
525// dump_bitset_map("dominators", dominators);
526// dump_bitset_map("parents", parents);
527 return false;
528}
529
530
531bool
532find_least_common_ancestor(revision_id const & left,
533 revision_id const & right,
534 revision_id & anc,
535 app_state & app)
536{
537 interner<ctx> intern;
538 std::map< ctx, shared_bitmap >
539 parents, ancestors;
540
541 ctx ln = intern.intern(left.inner()());
542 ctx rn = intern.intern(right.inner()());
543
544 shared_bitmap lanc = shared_bitmap(new bitmap());
545 shared_bitmap ranc = shared_bitmap(new bitmap());
546
547 ancestors.insert(make_pair(ln, lanc));
548 ancestors.insert(make_pair(rn, ranc));
549
550 L(F("searching for least common ancestor, left=%s right=%s\n") % left % right);
551
552 while (expand_ancestors(parents, ancestors, intern, app))
553 {
554 L(F("least common ancestor scan [par=%d,anc=%d]\n") %
555 parents.size() % ancestors.size());
556
557 if (find_intersecting_node(*lanc, *ranc, intern, anc))
558 {
559 L(F("found node %d, ancestor of left %s and right %s\n")
560 % anc % left % right);
561 return true;
562 }
563 }
564// dump_bitset_map("ancestors", ancestors);
565// dump_bitset_map("parents", parents);
566 return false;
567}
568
569
570// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the
571// size of the entire revision graph.
572
573static bool
574is_ancestor(revision_id const & ancestor_id,
575 revision_id const & descendent_id,
576 std::multimap<revision_id, revision_id> const & graph)
577{
578
579 std::set<revision_id> visited;
580 std::queue<revision_id> queue;
581
582 queue.push(ancestor_id);
583
584 while (!queue.empty())
585 {
586 revision_id current_id = queue.front();
587 queue.pop();
588
589 if (current_id == descendent_id)
590 return true;
591 else
592 {
593 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
594 std::pair<gi, gi> children = graph.equal_range(current_id);
595 for (gi i = children.first; i != children.second; ++i)
596 {
597 if (visited.find(i->second) == visited.end())
598 {
599 queue.push(i->second);
600 visited.insert(i->second);
601 }
602 }
603 }
604 }
605 return false;
606}
607
608bool
609is_ancestor(revision_id const & ancestor_id,
610 revision_id const & descendent_id,
611 app_state & app)
612{
613 L(F("checking whether %s is an ancestor of %s\n") % ancestor_id % descendent_id);
614
615 std::multimap<revision_id, revision_id> graph;
616 app.db.get_revision_ancestry(graph);
617 return is_ancestor(ancestor_id, descendent_id, graph);
618}
619
620
621static void
622add_bitset_to_union(shared_bitmap src,
623 shared_bitmap dst)
624{
625 if (dst->size() > src->size())
626 src->resize(dst->size());
627 if (src->size() > dst->size())
628 dst->resize(src->size());
629 *dst |= *src;
630}
631
632
633static void
634calculate_ancestors_from_graph(interner<ctx> & intern,
635 revision_id const & init,
636 std::multimap<revision_id, revision_id> const & graph,
637 std::map< ctx, shared_bitmap > & ancestors,
638 shared_bitmap & total_union)
639{
640 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
641 std::stack<ctx> stk;
642
643 stk.push(intern.intern(init.inner()()));
644
645 while (! stk.empty())
646 {
647 ctx us = stk.top();
648 revision_id rev(hexenc<id>(intern.lookup(us)));
649
650 std::pair<gi,gi> parents = graph.equal_range(rev);
651 bool pushed = false;
652
653 // first make sure all parents are done
654 for (gi i = parents.first; i != parents.second; ++i)
655 {
656 ctx parent = intern.intern(i->second.inner()());
657 if (ancestors.find(parent) == ancestors.end())
658 {
659 stk.push(parent);
660 pushed = true;
661 break;
662 }
663 }
664
665 // if we pushed anything we stop now. we'll come back later when all
666 // the parents are done.
667 if (pushed)
668 continue;
669
670 shared_bitmap b = shared_bitmap(new bitmap());
671
672 for (gi i = parents.first; i != parents.second; ++i)
673 {
674 ctx parent = intern.intern(i->second.inner()());
675
676 // set all parents
677 if (b->size() <= parent)
678 b->resize(parent + 1);
679 b->set(parent);
680
681 // ensure all parents are loaded into the ancestor map
682 I(ancestors.find(parent) != ancestors.end());
683
684 // union them into our map
685 std::map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
686 I(j != ancestors.end());
687 add_bitset_to_union(j->second, b);
688 }
689
690 add_bitset_to_union(b, total_union);
691 ancestors.insert(std::make_pair(us, b));
692 stk.pop();
693 }
694}
695
696// this function actually toposorts the whole graph, and then filters by the
697// passed in set. if anyone ever needs to toposort the whole graph, then,
698// this function would be a good thing to generalize...
699void
700toposort(std::set<revision_id> const & revisions,
701 std::vector<revision_id> & sorted,
702 app_state & app)
703{
704 sorted.clear();
705 typedef std::multimap<revision_id, revision_id>::iterator gi;
706 std::multimap<revision_id, revision_id> graph;
707 app.db.get_revision_ancestry(graph);
708 std::set<revision_id> leaves;
709 app.db.get_revision_ids(leaves);
710 while (!graph.empty())
711 {
712 // first find the set of current graph roots
713 std::set<revision_id> roots;
714 for (gi i = graph.begin(); i != graph.end(); ++i)
715 roots.insert(i->first);
716 for (gi i = graph.begin(); i != graph.end(); ++i)
717 roots.erase(i->second);
718 // now stick them in our ordering (if wanted), and remove them from the
719 // graph
720 for (std::set<revision_id>::const_iterator i = roots.begin();
721 i != roots.end(); ++i)
722 {
723 L(F("new root: %s\n") % (*i));
724 if (revisions.find(*i) != revisions.end())
725 sorted.push_back(*i);
726 graph.erase(*i);
727 leaves.erase(*i);
728 }
729 }
730 for (std::set<revision_id>::const_iterator i = leaves.begin();
731 i != leaves.end(); ++i)
732 {
733 L(F("new leaf: %s\n") % (*i));
734 if (revisions.find(*i) != revisions.end())
735 sorted.push_back(*i);
736 }
737}
738
739// This function looks at a set of revisions, and for every pair A, B in that
740// set such that A is an ancestor of B, it erases A.
741
742void
743erase_ancestors(std::set<revision_id> & revisions, app_state & app)
744{
745 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
746 std::multimap<revision_id, revision_id> graph;
747 std::multimap<revision_id, revision_id> inverse_graph;
748
749 app.db.get_revision_ancestry(graph);
750 for (gi i = graph.begin(); i != graph.end(); ++i)
751 inverse_graph.insert(std::make_pair(i->second, i->first));
752
753 interner<ctx> intern;
754 std::map< ctx, shared_bitmap > ancestors;
755
756 shared_bitmap u = shared_bitmap(new bitmap());
757
758 for (std::set<revision_id>::const_iterator i = revisions.begin();
759 i != revisions.end(); ++i)
760 {
761 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
762 }
763
764 std::set<revision_id> tmp;
765 for (std::set<revision_id>::const_iterator i = revisions.begin();
766 i != revisions.end(); ++i)
767 {
768 ctx id = intern.intern(i->inner()());
769 bool has_ancestor_in_set = id < u->size() && u->test(id);
770 if (!has_ancestor_in_set)
771 tmp.insert(*i);
772 }
773
774 revisions = tmp;
775}
776
777// This function takes a revision A and a set of revision Bs, calculates the
778// ancestry of each, and returns the set of revisions that are in A's ancestry
779// but not in the ancestry of any of the Bs. It tells you 'what's new' in A
780// that's not in the Bs. If the output set if non-empty, then A will
781// certainly be in it; but the output set might be empty.
782void
783ancestry_difference(revision_id const & a, std::set<revision_id> const & bs,
784 std::set<revision_id> & new_stuff,
785 app_state & app)
786{
787 new_stuff.clear();
788 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
789 std::multimap<revision_id, revision_id> graph;
790 std::multimap<revision_id, revision_id> inverse_graph;
791
792 app.db.get_revision_ancestry(graph);
793 for (gi i = graph.begin(); i != graph.end(); ++i)
794 inverse_graph.insert(std::make_pair(i->second, i->first));
795
796 interner<ctx> intern;
797 std::map< ctx, shared_bitmap > ancestors;
798
799 shared_bitmap u = shared_bitmap(new bitmap());
800
801 for (std::set<revision_id>::const_iterator i = bs.begin();
802 i != bs.end(); ++i)
803 {
804 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
805 ctx c = intern.intern(i->inner()());
806 if (u->size() <= c)
807 u->resize(c + 1);
808 u->set(c);
809 }
810
811 shared_bitmap au = shared_bitmap(new bitmap());
812 calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
813 {
814 ctx c = intern.intern(a.inner()());
815 if (au->size() <= c)
816 au->resize(c + 1);
817 au->set(c);
818 }
819
820 au->resize(std::max(au->size(), u->size()));
821 u->resize(std::max(au->size(), u->size()));
822
823 *au -= *u;
824
825 for (unsigned int i = 0; i != au->size(); ++i)
826 {
827 if (au->test(i))
828 {
829 revision_id rid(intern.lookup(i));
830 if (!null_id(rid))
831 new_stuff.insert(rid);
832 }
833 }
834}
835
836//
837// The idea with this algorithm is to walk from child up to ancestor,
838// recursively, accumulating all the change_sets associated with
839// intermediate nodes into *one big change_set*.
840//
841// clever readers will realize this is an overlapping-subproblem type
842// situation and thus needs to keep a dynamic programming map to keep
843// itself in linear complexity.
844//
845// in fact, we keep two: one which maps to computed results (partial_csets)
846// and one which just keeps a set of all nodes we traversed
847// (visited_nodes). in theory it could be one map with an extra bool stuck
848// on each entry, but I think that would make it even less readable. it's
849// already quite ugly.
850//
851
852static bool
853calculate_change_sets_recursive(revision_id const & ancestor,
854 revision_id const & child,
855 app_state & app,
856 change_set & cumulative_cset,
857 std::map<revision_id, boost::shared_ptr<change_set> > & partial_csets,
858 std::set<revision_id> & visited_nodes,
859 std::set<revision_id> const & subgraph)
860{
861
862 if (ancestor == child)
863 return true;
864
865 if (subgraph.find(child) == subgraph.end())
866 return false;
867
868 visited_nodes.insert(child);
869
870 bool relevant_child = false;
871
872 revision_set rev;
873 app.db.get_revision(child, rev);
874
875 L(F("exploring changesets from parents of %s, seeking towards %s\n")
876 % child % ancestor);
877
878 for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i)
879 {
880 bool relevant_parent = false;
881 revision_id curr_parent = edge_old_revision(i);
882
883 if (curr_parent.inner()().empty())
884 continue;
885
886 change_set cset_to_curr_parent;
887
888 L(F("considering parent %s of %s\n") % curr_parent % child);
889
890 std::map<revision_id, boost::shared_ptr<change_set> >::const_iterator j =
891 partial_csets.find(curr_parent);
892 if (j != partial_csets.end())
893 {
894 // a recursive call has traversed this parent before and built an
895 // existing cset. just reuse that rather than re-traversing
896 cset_to_curr_parent = *(j->second);
897 relevant_parent = true;
898 }
899 else if (visited_nodes.find(curr_parent) != visited_nodes.end())
900 {
901 // a recursive call has traversed this parent, but there was no
902 // path from it to the root, so the parent is irrelevant. skip.
903 relevant_parent = false;
904 }
905 else
906 relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app,
907 cset_to_curr_parent,
908 partial_csets,
909 visited_nodes,
910 subgraph);
911
912 if (relevant_parent)
913 {
914 L(F("revision %s is relevant, composing with edge to %s\n")
915 % curr_parent % child);
916 concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset);
917 relevant_child = true;
918 break;
919 }
920 else
921 L(F("parent %s of %s is not relevant\n") % curr_parent % child);
922 }
923
924 // store the partial edge from ancestor -> child, so that if anyone
925 // re-traverses this edge they'll just fetch from the partial_edges
926 // cache.
927 if (relevant_child)
928 partial_csets.insert(std::make_pair(child,
929 boost::shared_ptr<change_set>
930 (new change_set(cumulative_cset))));
931
932 return relevant_child;
933}
934
935// this finds (by breadth-first search) the set of nodes you'll have to
936// walk over in calculate_change_sets_recursive, to build the composite
937// changeset. this is to prevent the recursive algorithm from going way
938// back in history on an unlucky guess of parent.
939
940static void
941find_subgraph_for_composite_search(revision_id const & ancestor,
942 revision_id const & child,
943 app_state & app,
944 std::set<revision_id> & subgraph)
945{
946 std::set<revision_id> frontier;
947 frontier.insert(child);
948 subgraph.insert(child);
949 while (!frontier.empty())
950 {
951 std::set<revision_id> next_frontier;
952 for (std::set<revision_id>::const_iterator i = frontier.begin();
953 i != frontier.end(); ++i)
954 {
955 revision_set rev;
956 app.db.get_revision(*i, rev);
957 L(F("adding parents of %s to subgraph\n") % *i);
958
959 for(edge_map::const_iterator j = rev.edges.begin(); j != rev.edges.end(); ++j)
960 {
961 revision_id curr_parent = edge_old_revision(j);
962 if (null_id(curr_parent))
963 continue;
964 subgraph.insert(curr_parent);
965 if (curr_parent == ancestor)
966 {
967 L(F("found parent %s of %s\n") % curr_parent % *i);
968 return;
969 }
970 else
971 L(F("adding parent %s to next frontier\n") % curr_parent);
972 next_frontier.insert(curr_parent);
973 }
974 }
975 frontier = next_frontier;
976 }
977}
978
979void
980calculate_composite_change_set(revision_id const & ancestor,
981 revision_id const & child,
982 app_state & app,
983 change_set & composed)
984{
985 I(composed.empty());
986 L(F("calculating composite changeset between %s and %s\n")
987 % ancestor % child);
988 if (ancestor == child)
989 return;
990 std::set<revision_id> visited;
991 std::set<revision_id> subgraph;
992 std::map<revision_id, boost::shared_ptr<change_set> > partial;
993 find_subgraph_for_composite_search(ancestor, child, app, subgraph);
994 calculate_change_sets_recursive(ancestor, child, app, composed, partial,
995 visited, subgraph);
996}
997
998void
999calculate_arbitrary_change_set(revision_id const & start,
1000 revision_id const & end,
1001 app_state & app,
1002 change_set & composed)
1003{
1004 L(F("calculating changeset from %s to %s\n") % start % end);
1005 revision_id r_ca_id;
1006 change_set ca_to_start, ca_to_end, start_to_ca;
1007 N(find_least_common_ancestor(start, end, r_ca_id, app),
1008 F("no common ancestor for %s and %s\n") % start % end);
1009 L(F("common ancestor is %s\n") % r_ca_id);
1010 calculate_composite_change_set(r_ca_id, start, app, ca_to_start);
1011 calculate_composite_change_set(r_ca_id, end, app, ca_to_end);
1012 manifest_id m_ca_id;
1013 manifest_map m_ca;
1014 app.db.get_revision_manifest(r_ca_id, m_ca_id);
1015 app.db.get_manifest(m_ca_id, m_ca);
1016 invert_change_set(ca_to_start, m_ca, start_to_ca);
1017 concatenate_change_sets(start_to_ca, ca_to_end, composed);
1018}
1019
1020
1021// Stuff related to rebuilding the revision graph. Unfortunately this is a
1022// real enough error case that we need support code for it.
1023
1024
1025static void
1026analyze_manifest_changes(app_state & app,
1027 manifest_id const & parent,
1028 manifest_id const & child,
1029 std::set<file_path> const & need_history_splitting,
1030 change_set & cs)
1031{
1032 manifest_map m_parent, m_child;
1033
1034 if (!null_id(parent))
1035 app.db.get_manifest(parent, m_parent);
1036
1037 I(!null_id(child));
1038 app.db.get_manifest(child, m_child);
1039
1040 L(F("analyzing manifest changes from '%s' -> '%s'\n") % parent % child);
1041
1042 for (manifest_map::const_iterator i = m_parent.begin();
1043 i != m_parent.end(); ++i)
1044 {
1045 file_path f = manifest_entry_path(i);
1046 manifest_map::const_iterator j = m_child.find(f);
1047 if (j == m_child.end())
1048 {
1049 cs.delete_file(f);
1050 }
1051 else if (need_history_splitting.find(f) != need_history_splitting.end())
1052 {
1053 P(F("splitting ancestry for file %s\n") % f);
1054 cs.delete_file(f);
1055 cs.add_file(f, manifest_entry_id(j));
1056 }
1057 else if (! (manifest_entry_id(i) == manifest_entry_id(j)))
1058 {
1059 cs.apply_delta(manifest_entry_path(i),
1060 manifest_entry_id(i),
1061 manifest_entry_id(j));
1062 }
1063 }
1064 for (manifest_map::const_iterator i = m_child.begin();
1065 i != m_child.end(); ++i)
1066 {
1067 manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i));
1068 if (j == m_parent.end())
1069 cs.add_file(manifest_entry_path(i),
1070 manifest_entry_id(i));
1071 }
1072}
1073
1074
1075struct anc_graph
1076{
1077 anc_graph(bool existing, app_state & a) :
1078 existing_graph(existing),
1079 app(a),
1080 max_node(0),
1081 n_nodes("nodes", "n", 1),
1082 n_certs_in("certs in", "c", 1),
1083 n_revs_out("revs out", "r", 1),
1084 n_certs_out("certs out", "C", 1)
1085 {}
1086
1087 bool existing_graph;
1088 app_state & app;
1089 u64 max_node;
1090
1091 ticker n_nodes;
1092 ticker n_certs_in;
1093 ticker n_revs_out;
1094 ticker n_certs_out;
1095
1096 std::map<u64,manifest_id> node_to_old_man;
1097 std::map<manifest_id,u64> old_man_to_node;
1098
1099 std::map<u64,revision_id> node_to_old_rev;
1100 std::map<revision_id,u64> old_rev_to_node;
1101
1102 std::map<u64,revision_id> node_to_new_rev;
1103 std::multimap<u64, std::pair<cert_name, cert_value> > certs;
1104 std::multimap<u64, u64> ancestry;
1105 std::set<std::string> branches;
1106
1107 void add_node_ancestry(u64 child, u64 parent);
1108 void write_certs();
1109 void kluge_for_3_ancestor_nodes();
1110 void rebuild_ancestry();
1111 void get_node_manifest(u64 node, manifest_id & man);
1112 u64 add_node_for_old_manifest(manifest_id const & man);
1113 u64 add_node_for_old_revision(revision_id const & rev);
1114 revision_id construct_revision_from_ancestry(u64 child);
1115};
1116
1117
1118void anc_graph::add_node_ancestry(u64 child, u64 parent)
1119{
1120 L(F("noting ancestry from child %d -> parent %d\n") % child % parent);
1121 ancestry.insert(std::make_pair(child, parent));
1122}
1123
1124void anc_graph::get_node_manifest(u64 node, manifest_id & man)
1125{
1126 std::map<u64,manifest_id>::const_iterator i = node_to_old_man.find(node);
1127 I(i != node_to_old_man.end());
1128 man = i->second;
1129}
1130
1131void anc_graph::write_certs()
1132{
1133 std::set<cert_name> cnames;
1134 cnames.insert(cert_name(branch_cert_name));
1135 cnames.insert(cert_name(date_cert_name));
1136 cnames.insert(cert_name(author_cert_name));
1137 cnames.insert(cert_name(tag_cert_name));
1138 cnames.insert(cert_name(changelog_cert_name));
1139 cnames.insert(cert_name(comment_cert_name));
1140 cnames.insert(cert_name(testresult_cert_name));
1141
1142
1143 {
1144 // regenerate epochs on all branches to random states
1145 CryptoPP::AutoSeededRandomPool prng;
1146
1147 for (std::set<std::string>::const_iterator i = branches.begin(); i != branches.end(); ++i)
1148 {
1149 char buf[constants::epochlen_bytes];
1150 prng.GenerateBlock(reinterpret_cast<byte *>(buf), constants::epochlen_bytes);
1151 hexenc<data> hexdata;
1152 encode_hexenc(data(std::string(buf, buf + constants::epochlen_bytes)), hexdata);
1153 epoch_data new_epoch(hexdata);
1154 L(F("setting epoch for %s to %s\n") % *i % new_epoch);
1155 app.db.set_epoch(cert_value(*i), new_epoch);
1156 }
1157 }
1158
1159
1160 typedef std::multimap<u64, std::pair<cert_name, cert_value> >::const_iterator ci;
1161
1162 for (std::map<u64,revision_id>::const_iterator i = node_to_new_rev.begin();
1163 i != node_to_new_rev.end(); ++i)
1164 {
1165 revision_id rev(i->second);
1166
1167 std::pair<ci,ci> range = certs.equal_range(i->first);
1168
1169 for (ci j = range.first; j != range.second; ++j)
1170 {
1171 cert_name name(j->second.first);
1172 cert_value val(j->second.second);
1173
1174 if (cnames.find(name) == cnames.end())
1175 continue;
1176
1177 cert new_cert;
1178 make_simple_cert(rev.inner(), name, val, app, new_cert);
1179 revision<cert> rcert(new_cert);
1180 if (! app.db.revision_cert_exists(rcert))
1181 {
1182 ++n_certs_out;
1183 app.db.put_revision_cert(rcert);
1184 }
1185 }
1186 }
1187}
1188
1189void
1190anc_graph::kluge_for_3_ancestor_nodes()
1191{
1192 // This method is, as the name suggests, a kluge. It exists because in the
1193 // 0.17 timeframe, monotone's ancestry graph has several nodes with 3
1194 // parents. This isn't, in principle, necessarily a bad thing; having 3
1195 // parents is reasonably well defined, I don't know of much code that is
1196 // dependent on revisions having only 2 parents, etc. But it is a very
1197 // weird thing, that we would never under any circumstances create today,
1198 // and it only exists as a side-effect of the pre-changeset days. In the
1199 // future we may decide to allow 3+-parent revisions; we may decide to
1200 // disallow it. Right now, I'd rather keep options open.
1201 // We remove only edges that are "redundant" (i.e., already weird...).
1202 // These are also something that we currently refuse to produce -- when a
1203 // node has more than one parent, and one parent is an ancestor of another.
1204 // These edges, again, only exist because of weirdnesses in the
1205 // pre-changeset days, and are not particularly meaningful. Again, we may
1206 // decide in the future to use them for some things, but...
1207 // FIXME: remove this method eventually, since it is (mildly) destructive on
1208 // history, and isn't really doing anything that necessarily needs to happen
1209 // anyway.
1210 P(F("scanning for nodes with 3+ parents\n"));
1211 std::set<u64> manyparents;
1212 for (std::multimap<u64, u64>::const_iterator i = ancestry.begin();
1213 i != ancestry.end(); ++i)
1214 {
1215 if (ancestry.count(i->first) > 2)
1216 manyparents.insert(i->first);
1217 }
1218 for (std::set<u64>::const_iterator i = manyparents.begin();
1219 i != manyparents.end(); ++i)
1220 {
1221 std::set<u64> indirect_ancestors;
1222 std::set<u64> parents;
1223 std::vector<u64> to_examine;
1224 for (std::multimap<u64, u64>::const_iterator j = ancestry.lower_bound(*i);
1225 j != ancestry.upper_bound(*i); ++j)
1226 {
1227 to_examine.push_back(j->second);
1228 parents.insert(j->second);
1229 }
1230 I(!to_examine.empty());
1231 while (!to_examine.empty())
1232 {
1233 u64 current = to_examine.back();
1234 to_examine.pop_back();
1235 for (std::multimap<u64, u64>::const_iterator j = ancestry.lower_bound(current);
1236 j != ancestry.upper_bound(current); ++j)
1237 {
1238 if (indirect_ancestors.find(j->second) == indirect_ancestors.end())
1239 {
1240 to_examine.push_back(j->second);
1241 indirect_ancestors.insert(j->second);
1242 }
1243 }
1244 }
1245 size_t killed = 0;
1246 for (std::set<u64>::const_iterator p = parents.begin();
1247 p != parents.end(); ++p)
1248 {
1249 if (indirect_ancestors.find(*p) != indirect_ancestors.end())
1250 {
1251 P(F("optimizing out redundant edge %i -> %i\n") % (*p) % (*i));
1252 // sometimes I hate STL. or at least my incompetence at using it.
1253 size_t old_size = ancestry.size();
1254 for (std::multimap<u64, u64>::iterator e = ancestry.lower_bound(*i);
1255 e != ancestry.upper_bound(*i); ++e)
1256 {
1257 I(e->first == *i);
1258 if (e->second == *p)
1259 {
1260 ancestry.erase(e);
1261 break;
1262 }
1263 }
1264 I(old_size - 1 == ancestry.size());
1265 ++killed;
1266 }
1267 }
1268 I(killed < parents.size());
1269 I(ancestry.find(*i) != ancestry.end());
1270 }
1271}
1272
1273void
1274anc_graph::rebuild_ancestry()
1275{
1276 kluge_for_3_ancestor_nodes();
1277
1278 P(F("rebuilding %d nodes\n") % max_node);
1279 {
1280 transaction_guard guard(app.db);
1281 if (existing_graph)
1282 app.db.delete_existing_revs_and_certs();
1283
1284 std::set<u64> parents, children, heads;
1285 for (std::multimap<u64, u64>::const_iterator i = ancestry.begin();
1286 i != ancestry.end(); ++i)
1287 {
1288 children.insert(i->first);
1289 parents.insert(i->second);
1290 }
1291 set_difference(children.begin(), children.end(),
1292 parents.begin(), parents.end(),
1293 std::inserter(heads, heads.begin()));
1294
1295 // FIXME: should do a depth-first traversal here, or something like,
1296 // instead of being recursive.
1297 for (std::set<u64>::const_iterator i = heads.begin();
1298 i != heads.end(); ++i)
1299 {
1300 construct_revision_from_ancestry(*i);
1301 }
1302 write_certs();
1303 guard.commit();
1304 }
1305}
1306
1307u64 anc_graph::add_node_for_old_manifest(manifest_id const & man)
1308{
1309 I(!existing_graph);
1310 u64 node = 0;
1311 if (old_man_to_node.find(man) == old_man_to_node.end())
1312 {
1313 node = max_node++;
1314 ++n_nodes;
1315 L(F("node %d = manifest %s\n") % node % man);
1316 old_man_to_node.insert(std::make_pair(man, node));
1317 node_to_old_man.insert(std::make_pair(node, man));
1318
1319 // load certs
1320 std::vector< manifest<cert> > mcerts;
1321 app.db.get_manifest_certs(man, mcerts);
1322 erase_bogus_certs(mcerts, app);
1323 for(std::vector< manifest<cert> >::const_iterator i = mcerts.begin();
1324 i != mcerts.end(); ++i)
1325 {
1326 L(F("loaded '%s' manifest cert for node %s\n") % i->inner().name % node);
1327 cert_value tv;
1328 decode_base64(i->inner().value, tv);
1329 ++n_certs_in;
1330 certs.insert(std::make_pair(node,
1331 std::make_pair(i->inner().name, tv)));
1332 }
1333 }
1334 else
1335 {
1336 node = old_man_to_node[man];
1337 }
1338 return node;
1339}
1340
1341u64 anc_graph::add_node_for_old_revision(revision_id const & rev)
1342{
1343 I(existing_graph);
1344 I(!null_id(rev));
1345 u64 node = 0;
1346 if (old_rev_to_node.find(rev) == old_rev_to_node.end())
1347 {
1348 node = max_node++;
1349 ++n_nodes;
1350
1351 manifest_id man;
1352 app.db.get_revision_manifest(rev, man);
1353
1354 L(F("node %d = revision %s = manifest %s\n") % node % rev % man);
1355 old_rev_to_node.insert(std::make_pair(rev, node));
1356 node_to_old_rev.insert(std::make_pair(node, rev));
1357 node_to_old_man.insert(std::make_pair(node, man));
1358
1359 // load certs
1360 std::vector< revision<cert> > rcerts;
1361 app.db.get_revision_certs(rev, rcerts);
1362 erase_bogus_certs(rcerts, app);
1363 for(std::vector< revision<cert> >::const_iterator i = rcerts.begin();
1364 i != rcerts.end(); ++i)
1365 {
1366 L(F("loaded '%s' revision cert for node %s\n") % i->inner().name % node);
1367 cert_value tv;
1368 decode_base64(i->inner().value, tv);
1369 ++n_certs_in;
1370 certs.insert(std::make_pair(node,
1371 std::make_pair(i->inner().name, tv)));
1372
1373 if (i->inner().name == branch_cert_name)
1374 branches.insert(tv());
1375 }
1376 }
1377 else
1378 {
1379 node = old_rev_to_node[rev];
1380 }
1381
1382 return node;
1383}
1384
1385// FIXME: this is recursive -- stack depth grows as ancestry depth -- and will
1386// overflow the stack on large histories.
1387revision_id
1388anc_graph::construct_revision_from_ancestry(u64 child)
1389{
1390 L(F("processing node %d\n") % child);
1391
1392 if (node_to_new_rev.find(child) != node_to_new_rev.end())
1393 {
1394 L(F("node %d already processed, skipping\n") % child);
1395 return node_to_new_rev.find(child)->second;
1396 }
1397
1398 manifest_id child_man;
1399 get_node_manifest(child, child_man);
1400
1401 revision_set rev;
1402 rev.new_manifest = child_man;
1403
1404 typedef std::multimap<u64, u64>::const_iterator ci;
1405 std::pair<ci,ci> range = ancestry.equal_range(child);
1406 if (range.first == range.second)
1407 {
1408 L(F("node %d is a root node\n") % child);
1409 revision_id null_rid;
1410 manifest_id null_mid;
1411 boost::shared_ptr<change_set> cs(new change_set());
1412 std::set<file_path> blah;
1413 analyze_manifest_changes(app, null_mid, child_man, blah, *cs);
1414 rev.edges.insert(std::make_pair(null_rid,
1415 std::make_pair(null_mid, cs)));
1416 }
1417 else if (std::distance(range.first, range.second) == 1)
1418 {
1419 I(child == range.first->first);
1420 u64 parent = range.first->second;
1421 if (node_to_new_rev.find(parent) == node_to_new_rev.end())
1422 construct_revision_from_ancestry(parent);
1423 revision_id parent_rid = node_to_new_rev.find(parent)->second;
1424 L(F("parent node %d = revision %s\n") % parent % parent_rid);
1425 manifest_id parent_man;
1426 get_node_manifest(parent, parent_man);
1427 boost::shared_ptr<change_set> cs(new change_set());
1428 std::set<file_path> need_killing_files;
1429 analyze_manifest_changes(app, parent_man, child_man, need_killing_files,
1430 *cs);
1431 rev.edges.insert(std::make_pair(parent_rid,
1432 std::make_pair(parent_man, cs)));
1433 }
1434 else
1435 {
1436 // this section has lots and lots of rigmorale, in order to handle the
1437 // following case: A -> B -> D, A -> C -> D. File "foo" exists in
1438 // manifests A, C, and D only. (I.e., it was deleted and then re-added
1439 // along the B path, and left alone along the C path.) The problem here
1440 // is that we have to synthesize a delete/add pair on the C -> D edge,
1441 // to make sure that the A -> D changeset is path invariant -- we have
1442 // to make sure that no matter how you go from A to D, we will learn
1443 // that A's "foo" and D's "foo" are actually logically distinct files.
1444 //
1445 // In order to do this, before we generate the changeset for any merged
1446 // revision, we calculate the changeset through from the parents's
1447 // common ancestor to the _other_ parent, which will tell us which files
1448 // have been deleted since then. We also calculate the changeset
1449 // through from the parents's common ancestor to the current parent,
1450 // which tells us which files have already been deleted on our side, and
1451 // so don't need to be deleted again.
1452 //
1453 // Finally, we feed the list of deleted files to analyze_manifest, which
1454 // uses that information to break file ancestries when necessary.
1455 //
1456 // we only know how to preserve file ids when there are exactly two
1457 // parents, so assert that there are.
1458 std::vector<u64> parents, others;
1459 {
1460 u64 left_p, right_p;
1461 ci i = range.first;
1462 I(child == i->first);
1463 left_p = i->second;
1464 ++i;
1465 I(child == i->first);
1466 right_p = i->second;
1467 ++i;
1468 I(i == range.second);
1469 parents.push_back(left_p);
1470 others.push_back(right_p);
1471 parents.push_back(right_p);
1472 others.push_back(left_p);
1473 }
1474 // make sure our parents are all saved.
1475 for (std::vector<u64>::const_iterator i = parents.begin(); i != parents.end(); ++i)
1476 {
1477 if (node_to_new_rev.find(*i) == node_to_new_rev.end())
1478 construct_revision_from_ancestry(*i);
1479 I(node_to_new_rev.find(*i) != node_to_new_rev.end());
1480 }
1481 // actually process the two nodes
1482 for (int i = 0; i != 2; ++i)
1483 {
1484 u64 parent = idx(parents, i);
1485 u64 other_parent = idx(others, i);
1486 L(F("processing edge from child %d -> parent %d\n") % child % parent);
1487
1488 revision_id parent_rid = node_to_new_rev.find(parent)->second;
1489 revision_id other_parent_rid = node_to_new_rev.find(other_parent)->second;
1490 // this is stupidly inefficient, in that we do this whole expensive
1491 // changeset finding thing twice in a row. oh well.
1492 revision_id lca;
1493 std::set<file_path> need_killing_files;
1494 if (find_least_common_ancestor(parent_rid, other_parent_rid, lca, app))
1495 {
1496 change_set parent_cs, other_parent_cs;
1497 calculate_composite_change_set(lca, other_parent_rid, app, other_parent_cs);
1498 calculate_composite_change_set(lca, parent_rid, app, parent_cs);
1499 std::set_difference(other_parent_cs.rearrangement.deleted_files.begin(),
1500 other_parent_cs.rearrangement.deleted_files.end(),
1501 parent_cs.rearrangement.deleted_files.begin(),
1502 parent_cs.rearrangement.deleted_files.end(),
1503 std::inserter(need_killing_files,
1504 need_killing_files.begin()));
1505 }
1506
1507 L(F("parent node %d = revision %s\n") % parent % parent_rid);
1508 manifest_id parent_man;
1509 get_node_manifest(parent, parent_man);
1510 boost::shared_ptr<change_set> cs(new change_set());
1511 analyze_manifest_changes(app, parent_man, child_man, need_killing_files,
1512 *cs);
1513 rev.edges.insert(std::make_pair(parent_rid,
1514 std::make_pair(parent_man, cs)));
1515 }
1516 }
1517
1518 revision_id rid;
1519 calculate_ident(rev, rid);
1520 node_to_new_rev.insert(std::make_pair(child, rid));
1521
1522 if (!app.db.revision_exists (rid))
1523 {
1524 L(F("mapped node %d to revision %s\n") % child % rid);
1525 app.db.put_revision(rid, rev);
1526 ++n_revs_out;
1527 }
1528 else
1529 {
1530 L(F("skipping already existing revision %s\n") % rid);
1531 }
1532
1533 return rid;
1534}
1535
1536void
1537build_changesets_from_existing_revs(app_state & app)
1538{
1539 global_sanity.set_relaxed(true);
1540 anc_graph graph(true, app);
1541
1542 P(F("rebuilding revision graph from existing graph\n"));
1543 std::multimap<revision_id, revision_id> existing_graph;
1544
1545 {
1546 // early short-circuit to avoid failure after lots of work
1547 rsa_keypair_id key;
1548 N(guess_default_key(key,app),
1549 F("no unique private key for cert construction"));
1550 require_password(key, app);
1551 }
1552
1553 app.db.get_revision_ancestry(existing_graph);
1554 for (std::multimap<revision_id, revision_id>::const_iterator i = existing_graph.begin();
1555 i != existing_graph.end(); ++i)
1556 {
1557 if (!null_id(i->first))
1558 {
1559 u64 parent_node = graph.add_node_for_old_revision(i->first);
1560 u64 child_node = graph.add_node_for_old_revision(i->second);
1561 graph.add_node_ancestry(child_node, parent_node);
1562 }
1563 }
1564
1565 global_sanity.set_relaxed(false);
1566 graph.rebuild_ancestry();
1567}
1568
1569
1570void
1571build_changesets_from_manifest_ancestry(app_state & app)
1572{
1573 anc_graph graph(false, app);
1574
1575 P(F("rebuilding revision graph from manifest certs\n"));
1576
1577 {
1578 // early short-circuit to avoid failure after lots of work
1579 rsa_keypair_id key;
1580 N(guess_default_key(key,app),
1581 F("no unique private key for cert construction"));
1582 require_password(key, app);
1583 }
1584
1585 std::vector< manifest<cert> > tmp;
1586 app.db.get_manifest_certs(cert_name("ancestor"), tmp);
1587 erase_bogus_certs(tmp, app);
1588
1589 for (std::vector< manifest<cert> >::const_iterator i = tmp.begin();
1590 i != tmp.end(); ++i)
1591 {
1592 cert_value tv;
1593 decode_base64(i->inner().value, tv);
1594 manifest_id child, parent;
1595 child = i->inner().ident;
1596 parent = hexenc<id>(tv());
1597
1598 u64 parent_node = graph.add_node_for_old_manifest(parent);
1599 u64 child_node = graph.add_node_for_old_manifest(child);
1600 graph.add_node_ancestry(child_node, parent_node);
1601 }
1602
1603 graph.rebuild_ancestry();
1604}
1605
1606
1607// i/o stuff
1608
1609std::string revision_file_name("revision");
1610
1611namespace
1612{
1613 namespace syms
1614 {
1615 std::string const old_revision("old_revision");
1616 std::string const new_manifest("new_manifest");
1617 std::string const old_manifest("old_manifest");
1618 }
1619}
1620
1621
1622void
1623print_edge(basic_io::printer & printer,
1624 edge_entry const & e)
1625{
1626 basic_io::stanza st;
1627 st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()());
1628 st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()());
1629 printer.print_stanza(st);
1630 print_change_set(printer, edge_changes(e));
1631}
1632
1633
1634void
1635print_revision(basic_io::printer & printer,
1636 revision_set const & rev)
1637{
1638 rev.check_sane();
1639 basic_io::stanza st;
1640 st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()());
1641 printer.print_stanza(st);
1642 for (edge_map::const_iterator edge = rev.edges.begin();
1643 edge != rev.edges.end(); ++edge)
1644 print_edge(printer, *edge);
1645}
1646
1647
1648void
1649parse_edge(basic_io::parser & parser,
1650 edge_map & es)
1651{
1652 boost::shared_ptr<change_set> cs(new change_set());
1653 manifest_id old_man;
1654 revision_id old_rev;
1655 std::string tmp;
1656
1657 parser.esym(syms::old_revision);
1658 parser.hex(tmp);
1659 old_rev = revision_id(tmp);
1660
1661 parser.esym(syms::old_manifest);
1662 parser.hex(tmp);
1663 old_man = manifest_id(tmp);
1664
1665 parse_change_set(parser, *cs);
1666
1667 es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs)));
1668}
1669
1670
1671void
1672parse_revision(basic_io::parser & parser,
1673 revision_set & rev)
1674{
1675 rev.edges.clear();
1676 std::string tmp;
1677 parser.esym(syms::new_manifest);
1678 parser.hex(tmp);
1679 rev.new_manifest = manifest_id(tmp);
1680 while (parser.symp(syms::old_revision))
1681 parse_edge(parser, rev.edges);
1682 rev.check_sane();
1683}
1684
1685void
1686read_revision_set(data const & dat,
1687 revision_set & rev)
1688{
1689 std::istringstream iss(dat());
1690 basic_io::input_source src(iss, "revision");
1691 basic_io::tokenizer tok(src);
1692 basic_io::parser pars(tok);
1693 parse_revision(pars, rev);
1694 I(src.lookahead == EOF);
1695 rev.check_sane();
1696}
1697
1698void
1699read_revision_set(revision_data const & dat,
1700 revision_set & rev)
1701{
1702 data unpacked;
1703 unpack(dat.inner(), unpacked);
1704 read_revision_set(unpacked, rev);
1705 rev.check_sane();
1706}
1707
1708void
1709write_revision_set(revision_set const & rev,
1710 data & dat)
1711{
1712 rev.check_sane();
1713 std::ostringstream oss;
1714 basic_io::printer pr(oss);
1715 print_revision(pr, rev);
1716 dat = data(oss.str());
1717}
1718
1719void
1720write_revision_set(revision_set const & rev,
1721 revision_data & dat)
1722{
1723 rev.check_sane();
1724 data d;
1725 write_revision_set(rev, d);
1726 base64< gzip<data> > packed;
1727 pack(d, packed);
1728 dat = revision_data(packed);
1729}
1730
1731#ifdef BUILD_UNIT_TESTS
1732#include "unit_tests.hh"
1733#include "sanity.hh"
1734
1735static void
1736revision_test()
1737{
1738}
1739
1740void
1741add_revision_tests(test_suite * suite)
1742{
1743 I(suite);
1744 suite->add(BOOST_TEST_CASE(&revision_test));
1745}
1746
1747
1748#endif // BUILD_UNIT_TESTS

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status