monotone

monotone Mtn Source Tree

Root/revision.cc

1// -*- mode: C++; c-file-style: "gnu"; indent-tabs-mode: nil -*-
2// copyright (C) 2004 graydon hoare <graydon@pobox.com>
3// all rights reserved.
4// licensed to the public under the terms of the GNU GPL (>= 2)
5// see the file COPYING for details
6
7#include <cctype>
8#include <cstdlib>
9#include <iostream>
10#include <map>
11#include <queue>
12#include <set>
13#include <sstream>
14#include <stack>
15#include <string>
16#include <iterator>
17#include <functional>
18#include <list>
19
20#include <boost/lexical_cast.hpp>
21#include <boost/dynamic_bitset.hpp>
22#include <boost/shared_ptr.hpp>
23
24#include "botan/botan.h"
25
26#include "basic_io.hh"
27#include "change_set.hh"
28#include "constants.hh"
29#include "numeric_vocab.hh"
30#include "revision.hh"
31#include "sanity.hh"
32#include "transforms.hh"
33#include "ui.hh"
34#include "vocab.hh"
35#include "keys.hh"
36
37void revision_set::check_sane() const
38{
39 I(!null_id(new_manifest));
40
41 manifest_map fragment;
42 for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i)
43 {
44 change_set const & cs = edge_changes(i);
45 cs.check_sane();
46 if (!global_sanity.relaxed)
47 {
48 // null old revisions come with null old manifests
49 I(!null_id(edge_old_revision(i)) || null_id(edge_old_manifest(i)));
50 }
51 for (change_set::delta_map::const_iterator j = cs.deltas.begin(); j != cs.deltas.end(); ++j)
52 {
53 manifest_map::const_iterator k = fragment.find(delta_entry_path(j));
54 if (k == fragment.end())
55 fragment.insert(std::make_pair(delta_entry_path(j),
56 delta_entry_dst(j)));
57 else
58 {
59 if (!global_sanity.relaxed)
60 {
61 I(delta_entry_dst(j) == manifest_entry_id(k));
62 }
63 }
64 }
65 }
66}
67
68bool
69revision_set::is_merge_node() const
70{
71 return edges.size() > 1;
72}
73
74revision_set::revision_set(revision_set const & other)
75{
76 /* behave like normal constructor if other is empty */
77 if (null_id(other.new_manifest) && other.edges.empty()) return;
78 other.check_sane();
79 new_manifest = other.new_manifest;
80 edges = other.edges;
81}
82
83revision_set const &
84revision_set::operator=(revision_set const & other)
85{
86 other.check_sane();
87 new_manifest = other.new_manifest;
88 edges = other.edges;
89 return *this;
90}
91
92
93// Traces history back 'depth' levels from 'child_id', ensuring that
94// historical information is consistent within this subgraph.
95// The child must be in the database already.
96//
97// "Consistent" currently means that we compose manifests along every path (of
98// any length) that terminates at the child, and for each one check that paths
99// that should be the same in fact are the same, and that the calculated
100// change sets can be applied to the old manifests to create the new
101// manifest.
102//
103// We also make a special check for merge nodes, where if the previous
104// paragraph wasn't enough to get us back to a common ancestor, we also search
105// all the way up to a common ancestor and make the same check, because that's
106// the invariant that merge is actually required to preserve.
107//
108// NB: While this function has some invariants in it itself, a lot of its
109// purpose is just to exercise all the invariants inside change_set.cc. So
110// don't remove those invariants. (As if you needed another reason...)
111void
112check_sane_history(revision_id const & child_id,
113 int depth,
114 app_state & app)
115{
116 // We are, unfortunately, still quite slow. So we want to give at least a
117 // little feedback. Let's print exactly one warning, on the _second_ time
118 // we are called within one run -- just checking one revision isn't too
119 // slow, so no need to print anything on "commit", but usually if we're
120 // checking 2 revisions we're checking a lot more.
121 // FIXME: make sanity checking faster, so we can remove this kluge
122 // altogether...
123 static int num_checked = 0;
124 ++num_checked;
125 if (num_checked == 2)
126 P(F("verifying new revisions (this may take a while)\n"));
127
128 L(F("Verifying revision %s has sane history (to depth %i)\n")
129 % child_id % depth);
130
131 typedef boost::shared_ptr<change_set> shared_cs;
132 // (ancestor, change_set from ancestor to child)
133 std::map<revision_id, shared_cs> changesets;
134
135 manifest_id m_child_id;
136 app.db.get_revision_manifest(child_id, m_child_id);
137 manifest_map m_child;
138 app.db.get_manifest(m_child_id, m_child);
139
140 std::set<revision_id> frontier;
141 frontier.insert(child_id);
142
143 while (depth-- > 0)
144 {
145 std::set<revision_id> next_frontier;
146
147 for (std::set<revision_id>::const_iterator i = frontier.begin();
148 i != frontier.end();
149 ++i)
150 {
151 revision_id current_id = *i;
152 revision_set current;
153 app.db.get_revision(current_id, current);
154 // and the parents's manifests to the manifests
155 // and the change_set's to the parents to the changesets
156 for (edge_map::const_iterator j = current.edges.begin();
157 j != current.edges.end();
158 ++j)
159 {
160 revision_id old_id = edge_old_revision(j);
161 manifest_id m_old_id = edge_old_manifest(j);
162 change_set old_to_current_changes = edge_changes(j);
163 if (!null_id(old_id))
164 next_frontier.insert(old_id);
165
166 L(F("Examining %s -> %s\n") % old_id % child_id);
167
168 // build the change_set
169 // if
170 shared_cs old_to_child_changes_p = shared_cs(new change_set);
171 if (current_id == child_id)
172 *old_to_child_changes_p = old_to_current_changes;
173 else
174 {
175 shared_cs current_to_child_changes_p;
176 I(changesets.find(current_id) != changesets.end());
177 current_to_child_changes_p = changesets.find(current_id)->second;
178 concatenate_change_sets(old_to_current_changes,
179 *current_to_child_changes_p,
180 *old_to_child_changes_p);
181 }
182 MM(*old_to_child_changes_p);
183
184 // we have the change_set; now, is it one we've seen before?
185 if (changesets.find(old_id) != changesets.end())
186 {
187 // If it is, then make sure the paths agree on the
188 // changeset.
189 MM(*changesets.find(old_id)->second);
190 I(*changesets.find(old_id)->second == *old_to_child_changes_p);
191 }
192 else
193 {
194 // If not, this is the first time we've seen this.
195 // So store it in the map for later reference:
196 changesets.insert(std::make_pair(old_id, old_to_child_changes_p));
197 // and check that it works:
198
199 manifest_map purported_m_child;
200 // The null revision has empty manifest, which is the
201 // default.
202 if (!null_id(old_id))
203 app.db.get_manifest(m_old_id, purported_m_child);
204 apply_change_set(*old_to_child_changes_p, purported_m_child);
205 MM(purported_m_child);
206 MM(m_child);
207 I(purported_m_child == m_child);
208 }
209 }
210 }
211 frontier = next_frontier;
212 }
213
214 // Finally, there's a danger that if we have a long divergence, then after a
215 // merge, the common ancestor will be far back enough that the above
216 // depth-limited search won't have any idea whether the ancestry invariants
217 // are actually preserved. So do an additional check on merge revisions, to
218 // make sure that the paths to both ways going back to their parents's
219 // common ancestor give the same change_set (i.e., this is a valid merge at
220 // all).
221 if (!global_sanity.relaxed)
222 {
223 revision_set child_rev;
224 app.db.get_revision(child_id, child_rev);
225 // Nothing inherently impossible about having more than 2 parents, but if
226 // you come up with some case where it should be possible then you'll
227 // have to also adjust the code below to figure out what "common
228 // ancestor" means.
229 I(child_rev.edges.size() <= 2);
230 if (child_rev.edges.size() != 2)
231 return;
232 edge_map::const_iterator i = child_rev.edges.begin();
233 revision_id parent_left = edge_old_revision(i);
234 change_set left_edge = edge_changes(i);
235 ++i;
236 revision_id parent_right = edge_old_revision(i);
237 change_set right_edge = edge_changes(i);
238 ++i;
239 I(i == child_rev.edges.end());
240 revision_id lca;
241 if (!find_least_common_ancestor(parent_left, parent_right, lca, app))
242 {
243 L(F("%s and %s have no common ancestor, so done\n")
244 % parent_left % parent_right);
245 return;
246 }
247 if (changesets.find(lca) != changesets.end())
248 {
249 L(F("already checked common ancestor, so done\n"));
250 return;
251 }
252 L(F("%s is a merge; verifying paths to common ancestor %s are sane\n")
253 % child_id % lca);
254 // we have a merge node, with an lca sufficiently far back in history
255 // that we haven't yet figured out whether this is a valid merge or
256 // not. so find out.
257 change_set cs_parent_left, cs_parent_right, cs_left, cs_right;
258 MM(cs_parent_left);
259 MM(cs_parent_right);
260 MM(cs_left);
261 MM(cs_right);
262 calculate_composite_change_set(lca, parent_left, app, cs_parent_left);
263 calculate_composite_change_set(lca, parent_right, app, cs_parent_right);
264 concatenate_change_sets(cs_parent_left, left_edge, cs_left);
265 concatenate_change_sets(cs_parent_right, right_edge, cs_right);
266 I(cs_left == cs_right);
267 }
268}
269
270// calculating least common ancestors is a delicate thing.
271//
272// it turns out that we cannot choose the simple "least common ancestor"
273// for purposes of a merge, because it is possible that there are two
274// equally reachable common ancestors, and this produces ambiguity in the
275// merge. the result -- in a pathological case -- is silently accepting one
276// set of edits while discarding another; not exactly what you want a
277// version control tool to do.
278//
279// a conservative approximation is what we'll call a "subgraph recurring"
280// LCA algorithm. this is somewhat like locating the least common dominator
281// node, but not quite. it is actually just a vanilla LCA search, except
282// that any time there's a fork (a historical merge looks like a fork from
283// our perspective, working backwards from children to parents) it reduces
284// the fork to a common parent via a sequence of pairwise recursive calls
285// to itself before proceeding. this will always resolve to a common parent
286// with no ambiguity, unless it falls off the root of the graph.
287//
288// unfortunately the subgraph recurring algorithm sometimes goes too far
289// back in history -- for example if there is an unambiguous propagate from
290// one branch to another, the entire subgraph preceeding the propagate on
291// the recipient branch is elided, since it is a merge.
292//
293// our current hypothesis is that the *exact* condition we're looking for,
294// when doing a merge, is the least node which dominates one side of the
295// merge and is an ancestor of the other.
296
297typedef unsigned long ctx;
298typedef boost::dynamic_bitset<> bitmap;
299typedef boost::shared_ptr<bitmap> shared_bitmap;
300
301static void
302ensure_parents_loaded(ctx child,
303 std::map<ctx, shared_bitmap> & parents,
304 interner<ctx> & intern,
305 app_state & app)
306{
307 if (parents.find(child) != parents.end())
308 return;
309
310 L(F("loading parents for node %d\n") % child);
311
312 std::set<revision_id> imm_parents;
313 app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents);
314
315 // The null revision is not a parent for purposes of finding common
316 // ancestors.
317 for (std::set<revision_id>::iterator p = imm_parents.begin();
318 p != imm_parents.end(); )
319 {
320 if (null_id(*p))
321 imm_parents.erase(p++);
322 else
323 ++p;
324 }
325
326 shared_bitmap bits = shared_bitmap(new bitmap(parents.size()));
327
328 for (std::set<revision_id>::const_iterator p = imm_parents.begin();
329 p != imm_parents.end(); ++p)
330 {
331 ctx pn = intern.intern(p->inner()());
332 L(F("parent %s -> node %d\n") % *p % pn);
333 if (pn >= bits->size())
334 bits->resize(pn+1);
335 bits->set(pn);
336 }
337
338 parents.insert(std::make_pair(child, bits));
339}
340
341static bool
342expand_dominators(std::map<ctx, shared_bitmap> & parents,
343 std::map<ctx, shared_bitmap> & dominators,
344 interner<ctx> & intern,
345 app_state & app)
346{
347 bool something_changed = false;
348 std::vector<ctx> nodes;
349
350 nodes.reserve(dominators.size());
351
352 // pass 1, pull out all the node numbers we're going to scan this time around
353 for (std::map<ctx, shared_bitmap>::reverse_iterator e = dominators.rbegin();
354 e != dominators.rend(); ++e)
355 nodes.push_back(e->first);
356
357 // pass 2, update any of the dominator entries we can
358 for (std::vector<ctx>::const_iterator n = nodes.begin();
359 n != nodes.end(); ++n)
360 {
361 shared_bitmap bits = dominators[*n];
362 bitmap saved(*bits);
363 if (bits->size() <= *n)
364 bits->resize(*n + 1);
365 bits->set(*n);
366
367 ensure_parents_loaded(*n, parents, intern, app);
368 shared_bitmap n_parents = parents[*n];
369
370 bitmap intersection(bits->size());
371
372 bool first = true;
373 for (unsigned long parent = 0;
374 parent != n_parents->size(); ++parent)
375 {
376 if (! n_parents->test(parent))
377 continue;
378
379 if (dominators.find(parent) == dominators.end())
380 dominators.insert(std::make_pair(parent,
381 shared_bitmap(new bitmap())));
382 shared_bitmap pbits = dominators[parent];
383
384 if (intersection.size() > pbits->size())
385 pbits->resize(intersection.size());
386
387 if (pbits->size() > intersection.size())
388 intersection.resize(pbits->size());
389
390 if (first)
391 {
392 intersection = (*pbits);
393 first = false;
394 }
395 else
396 intersection &= (*pbits);
397 }
398
399 if (intersection.size() > bits->size())
400 bits->resize(intersection.size());
401
402 if (bits->size() > intersection.size())
403 intersection.resize(bits->size());
404 (*bits) |= intersection;
405 if (*bits != saved)
406 something_changed = true;
407 }
408 return something_changed;
409}
410
411
412static bool
413expand_ancestors(std::map<ctx, shared_bitmap> & parents,
414 std::map<ctx, shared_bitmap> & ancestors,
415 interner<ctx> & intern,
416 app_state & app)
417{
418 bool something_changed = false;
419 std::vector<ctx> nodes;
420
421 nodes.reserve(ancestors.size());
422
423 // pass 1, pull out all the node numbers we're going to scan this time around
424 for (std::map<ctx, shared_bitmap>::reverse_iterator e = ancestors.rbegin();
425 e != ancestors.rend(); ++e)
426 nodes.push_back(e->first);
427
428 // pass 2, update any of the ancestor entries we can
429 for (std::vector<ctx>::const_iterator n = nodes.begin(); n != nodes.end(); ++n)
430 {
431 shared_bitmap bits = ancestors[*n];
432 bitmap saved(*bits);
433 if (bits->size() <= *n)
434 bits->resize(*n + 1);
435 bits->set(*n);
436
437 ensure_parents_loaded(*n, parents, intern, app);
438 shared_bitmap n_parents = parents[*n];
439 for (ctx parent = 0; parent != n_parents->size(); ++parent)
440 {
441 if (! n_parents->test(parent))
442 continue;
443
444 if (bits->size() <= parent)
445 bits->resize(parent + 1);
446 bits->set(parent);
447
448 if (ancestors.find(parent) == ancestors.end())
449 ancestors.insert(make_pair(parent,
450 shared_bitmap(new bitmap())));
451 shared_bitmap pbits = ancestors[parent];
452
453 if (bits->size() > pbits->size())
454 pbits->resize(bits->size());
455
456 if (pbits->size() > bits->size())
457 bits->resize(pbits->size());
458
459 (*bits) |= (*pbits);
460 }
461 if (*bits != saved)
462 something_changed = true;
463 }
464 return something_changed;
465}
466
467static bool
468find_intersecting_node(bitmap & fst,
469 bitmap & snd,
470 interner<ctx> const & intern,
471 revision_id & anc)
472{
473
474 if (fst.size() > snd.size())
475 snd.resize(fst.size());
476 else if (snd.size() > fst.size())
477 fst.resize(snd.size());
478
479 bitmap intersection = fst & snd;
480 if (intersection.any())
481 {
482 L(F("found %d intersecting nodes\n") % intersection.count());
483 for (ctx i = 0; i < intersection.size(); ++i)
484 {
485 if (intersection.test(i))
486 {
487 anc = revision_id(intern.lookup(i));
488 return true;
489 }
490 }
491 }
492 return false;
493}
494
495// static void
496// dump_bitset_map(std::string const & hdr,
497// std::map< ctx, shared_bitmap > const & mm)
498// {
499// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size());
500// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin();
501// i != mm.end(); ++i)
502// {
503// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second)));
504// }
505// }
506
507bool
508find_common_ancestor_for_merge(revision_id const & left,
509 revision_id const & right,
510 revision_id & anc,
511 app_state & app)
512{
513 // Temporary workaround until we figure out how to clean up the whole
514 // ancestor selection mess:
515 if (app.use_lca)
516 return find_least_common_ancestor(left, right, anc, app);
517
518 interner<ctx> intern;
519 std::map< ctx, shared_bitmap >
520 parents, ancestors, dominators;
521
522 ctx ln = intern.intern(left.inner()());
523 ctx rn = intern.intern(right.inner()());
524
525 shared_bitmap lanc = shared_bitmap(new bitmap());
526 shared_bitmap ranc = shared_bitmap(new bitmap());
527 shared_bitmap ldom = shared_bitmap(new bitmap());
528 shared_bitmap rdom = shared_bitmap(new bitmap());
529
530 ancestors.insert(make_pair(ln, lanc));
531 ancestors.insert(make_pair(rn, ranc));
532 dominators.insert(make_pair(ln, ldom));
533 dominators.insert(make_pair(rn, rdom));
534
535 L(F("searching for common ancestor, left=%s right=%s\n") % left % right);
536
537 while (expand_ancestors(parents, ancestors, intern, app) |
538 expand_dominators(parents, dominators, intern, app))
539 {
540 L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") %
541 parents.size() % ancestors.size() % dominators.size());
542
543 if (find_intersecting_node(*lanc, *rdom, intern, anc))
544 {
545 L(F("found node %d, ancestor of left %s and dominating right %s\n")
546 % anc % left % right);
547 return true;
548 }
549
550 else if (find_intersecting_node(*ranc, *ldom, intern, anc))
551 {
552 L(F("found node %d, ancestor of right %s and dominating left %s\n")
553 % anc % right % left);
554 return true;
555 }
556 }
557// dump_bitset_map("ancestors", ancestors);
558// dump_bitset_map("dominators", dominators);
559// dump_bitset_map("parents", parents);
560 return false;
561}
562
563
564bool
565find_least_common_ancestor(revision_id const & left,
566 revision_id const & right,
567 revision_id & anc,
568 app_state & app)
569{
570 interner<ctx> intern;
571 std::map< ctx, shared_bitmap >
572 parents, ancestors;
573
574 if (left == right)
575 {
576 anc = left;
577 return true;
578 }
579
580 ctx ln = intern.intern(left.inner()());
581 ctx rn = intern.intern(right.inner()());
582
583 shared_bitmap lanc = shared_bitmap(new bitmap());
584 shared_bitmap ranc = shared_bitmap(new bitmap());
585
586 ancestors.insert(make_pair(ln, lanc));
587 ancestors.insert(make_pair(rn, ranc));
588
589 L(F("searching for least common ancestor, left=%s right=%s\n") % left % right);
590
591 while (expand_ancestors(parents, ancestors, intern, app))
592 {
593 L(F("least common ancestor scan [par=%d,anc=%d]\n") %
594 parents.size() % ancestors.size());
595
596 if (find_intersecting_node(*lanc, *ranc, intern, anc))
597 {
598 L(F("found node %d, ancestor of left %s and right %s\n")
599 % anc % left % right);
600 return true;
601 }
602 }
603// dump_bitset_map("ancestors", ancestors);
604// dump_bitset_map("parents", parents);
605 return false;
606}
607
608
609// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the
610// size of the entire revision graph.
611
612static bool
613is_ancestor(revision_id const & ancestor_id,
614 revision_id const & descendent_id,
615 std::multimap<revision_id, revision_id> const & graph)
616{
617
618 std::set<revision_id> visited;
619 std::queue<revision_id> queue;
620
621 queue.push(ancestor_id);
622
623 while (!queue.empty())
624 {
625 revision_id current_id = queue.front();
626 queue.pop();
627
628 if (current_id == descendent_id)
629 return true;
630 else
631 {
632 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
633 std::pair<gi, gi> children = graph.equal_range(current_id);
634 for (gi i = children.first; i != children.second; ++i)
635 {
636 if (visited.find(i->second) == visited.end())
637 {
638 queue.push(i->second);
639 visited.insert(i->second);
640 }
641 }
642 }
643 }
644 return false;
645}
646
647bool
648is_ancestor(revision_id const & ancestor_id,
649 revision_id const & descendent_id,
650 app_state & app)
651{
652 L(F("checking whether %s is an ancestor of %s\n") % ancestor_id % descendent_id);
653
654 std::multimap<revision_id, revision_id> graph;
655 app.db.get_revision_ancestry(graph);
656 return is_ancestor(ancestor_id, descendent_id, graph);
657}
658
659
660static void
661add_bitset_to_union(shared_bitmap src,
662 shared_bitmap dst)
663{
664 if (dst->size() > src->size())
665 src->resize(dst->size());
666 if (src->size() > dst->size())
667 dst->resize(src->size());
668 *dst |= *src;
669}
670
671
672static void
673calculate_ancestors_from_graph(interner<ctx> & intern,
674 revision_id const & init,
675 std::multimap<revision_id, revision_id> const & graph,
676 std::map< ctx, shared_bitmap > & ancestors,
677 shared_bitmap & total_union)
678{
679 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
680 std::stack<ctx> stk;
681
682 stk.push(intern.intern(init.inner()()));
683
684 while (! stk.empty())
685 {
686 ctx us = stk.top();
687 revision_id rev(hexenc<id>(intern.lookup(us)));
688
689 std::pair<gi,gi> parents = graph.equal_range(rev);
690 bool pushed = false;
691
692 // first make sure all parents are done
693 for (gi i = parents.first; i != parents.second; ++i)
694 {
695 ctx parent = intern.intern(i->second.inner()());
696 if (ancestors.find(parent) == ancestors.end())
697 {
698 stk.push(parent);
699 pushed = true;
700 break;
701 }
702 }
703
704 // if we pushed anything we stop now. we'll come back later when all
705 // the parents are done.
706 if (pushed)
707 continue;
708
709 shared_bitmap b = shared_bitmap(new bitmap());
710
711 for (gi i = parents.first; i != parents.second; ++i)
712 {
713 ctx parent = intern.intern(i->second.inner()());
714
715 // set all parents
716 if (b->size() <= parent)
717 b->resize(parent + 1);
718 b->set(parent);
719
720 // ensure all parents are loaded into the ancestor map
721 I(ancestors.find(parent) != ancestors.end());
722
723 // union them into our map
724 std::map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
725 I(j != ancestors.end());
726 add_bitset_to_union(j->second, b);
727 }
728
729 add_bitset_to_union(b, total_union);
730 ancestors.insert(std::make_pair(us, b));
731 stk.pop();
732 }
733}
734
735// this function actually toposorts the whole graph, and then filters by the
736// passed in set. if anyone ever needs to toposort the whole graph, then,
737// this function would be a good thing to generalize...
738void
739toposort(std::set<revision_id> const & revisions,
740 std::vector<revision_id> & sorted,
741 app_state & app)
742{
743 sorted.clear();
744 typedef std::multimap<revision_id, revision_id>::iterator gi;
745 typedef std::map<revision_id, int>::iterator pi;
746 std::multimap<revision_id, revision_id> graph;
747 app.db.get_revision_ancestry(graph);
748 std::set<revision_id> leaves;
749 app.db.get_revision_ids(leaves);
750 std::map<revision_id, int> pcount;
751 for (gi i = graph.begin(); i != graph.end(); ++i)
752 pcount.insert(std::make_pair(i->first, 0));
753 for (gi i = graph.begin(); i != graph.end(); ++i)
754 ++(pcount[i->second]);
755 // first find the set of graph roots
756 std::list<revision_id> roots;
757 for (pi i = pcount.begin(); i != pcount.end(); ++i)
758 if(i->second==0)
759 roots.push_back(i->first);
760 while (!roots.empty())
761 {
762 // now stick them in our ordering (if wanted) and remove them from the
763 // graph, calculating the new roots as we go
764 L(F("new root: %s\n") % (roots.front()));
765 if (revisions.find(roots.front()) != revisions.end())
766 sorted.push_back(roots.front());
767 for(gi i = graph.lower_bound(roots.front());
768 i != graph.upper_bound(roots.front()); i++)
769 if(--(pcount[i->second]) == 0)
770 roots.push_back(i->second);
771 graph.erase(roots.front());
772 leaves.erase(roots.front());
773 roots.pop_front();
774 }
775 I(graph.empty());
776 for (std::set<revision_id>::const_iterator i = leaves.begin();
777 i != leaves.end(); ++i)
778 {
779 L(F("new leaf: %s\n") % (*i));
780 if (revisions.find(*i) != revisions.end())
781 sorted.push_back(*i);
782 }
783}
784
785// This function looks at a set of revisions, and for every pair A, B in that
786// set such that A is an ancestor of B, it erases A.
787
788void
789erase_ancestors(std::set<revision_id> & revisions, app_state & app)
790{
791 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
792 std::multimap<revision_id, revision_id> graph;
793 std::multimap<revision_id, revision_id> inverse_graph;
794
795 app.db.get_revision_ancestry(graph);
796 for (gi i = graph.begin(); i != graph.end(); ++i)
797 inverse_graph.insert(std::make_pair(i->second, i->first));
798
799 interner<ctx> intern;
800 std::map< ctx, shared_bitmap > ancestors;
801
802 shared_bitmap u = shared_bitmap(new bitmap());
803
804 for (std::set<revision_id>::const_iterator i = revisions.begin();
805 i != revisions.end(); ++i)
806 {
807 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
808 }
809
810 std::set<revision_id> tmp;
811 for (std::set<revision_id>::const_iterator i = revisions.begin();
812 i != revisions.end(); ++i)
813 {
814 ctx id = intern.intern(i->inner()());
815 bool has_ancestor_in_set = id < u->size() && u->test(id);
816 if (!has_ancestor_in_set)
817 tmp.insert(*i);
818 }
819
820 revisions = tmp;
821}
822
823// This function takes a revision A and a set of revision Bs, calculates the
824// ancestry of each, and returns the set of revisions that are in A's ancestry
825// but not in the ancestry of any of the Bs. It tells you 'what's new' in A
826// that's not in the Bs. If the output set if non-empty, then A will
827// certainly be in it; but the output set might be empty.
828void
829ancestry_difference(revision_id const & a, std::set<revision_id> const & bs,
830 std::set<revision_id> & new_stuff,
831 app_state & app)
832{
833 new_stuff.clear();
834 typedef std::multimap<revision_id, revision_id>::const_iterator gi;
835 std::multimap<revision_id, revision_id> graph;
836 std::multimap<revision_id, revision_id> inverse_graph;
837
838 app.db.get_revision_ancestry(graph);
839 for (gi i = graph.begin(); i != graph.end(); ++i)
840 inverse_graph.insert(std::make_pair(i->second, i->first));
841
842 interner<ctx> intern;
843 std::map< ctx, shared_bitmap > ancestors;
844
845 shared_bitmap u = shared_bitmap(new bitmap());
846
847 for (std::set<revision_id>::const_iterator i = bs.begin();
848 i != bs.end(); ++i)
849 {
850 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
851 ctx c = intern.intern(i->inner()());
852 if (u->size() <= c)
853 u->resize(c + 1);
854 u->set(c);
855 }
856
857 shared_bitmap au = shared_bitmap(new bitmap());
858 calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
859 {
860 ctx c = intern.intern(a.inner()());
861 if (au->size() <= c)
862 au->resize(c + 1);
863 au->set(c);
864 }
865
866 au->resize(std::max(au->size(), u->size()));
867 u->resize(std::max(au->size(), u->size()));
868
869 *au -= *u;
870
871 for (unsigned int i = 0; i != au->size(); ++i)
872 {
873 if (au->test(i))
874 {
875 revision_id rid(intern.lookup(i));
876 if (!null_id(rid))
877 new_stuff.insert(rid);
878 }
879 }
880}
881
882//
883// The idea with this algorithm is to walk from child up to ancestor,
884// recursively, accumulating all the change_sets associated with
885// intermediate nodes into *one big change_set*.
886//
887// clever readers will realize this is an overlapping-subproblem type
888// situation and thus needs to keep a dynamic programming map to keep
889// itself in linear complexity.
890//
891// in fact, we keep two: one which maps to computed results (partial_csets)
892// and one which just keeps a set of all nodes we traversed
893// (visited_nodes). in theory it could be one map with an extra bool stuck
894// on each entry, but I think that would make it even less readable. it's
895// already quite ugly.
896//
897
898static bool
899calculate_change_sets_recursive(revision_id const & ancestor,
900 revision_id const & child,
901 app_state & app,
902 change_set & cumulative_cset,
903 std::map<revision_id, boost::shared_ptr<change_set> > & partial_csets,
904 std::set<revision_id> & visited_nodes,
905 std::set<revision_id> const & subgraph)
906{
907
908 if (ancestor == child)
909 return true;
910
911 if (subgraph.find(child) == subgraph.end())
912 return false;
913
914 visited_nodes.insert(child);
915
916 bool relevant_child = false;
917
918 revision_set rev;
919 app.db.get_revision(child, rev);
920
921 L(F("exploring changesets from parents of %s, seeking towards %s\n")
922 % child % ancestor);
923
924 for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i)
925 {
926 bool relevant_parent = false;
927 revision_id curr_parent = edge_old_revision(i);
928
929 if (curr_parent.inner()().empty())
930 continue;
931
932 change_set cset_to_curr_parent;
933
934 L(F("considering parent %s of %s\n") % curr_parent % child);
935
936 std::map<revision_id, boost::shared_ptr<change_set> >::const_iterator j =
937 partial_csets.find(curr_parent);
938 if (j != partial_csets.end())
939 {
940 // a recursive call has traversed this parent before and built an
941 // existing cset. just reuse that rather than re-traversing
942 cset_to_curr_parent = *(j->second);
943 relevant_parent = true;
944 }
945 else if (visited_nodes.find(curr_parent) != visited_nodes.end())
946 {
947 // a recursive call has traversed this parent, but there was no
948 // path from it to the root, so the parent is irrelevant. skip.
949 relevant_parent = false;
950 }
951 else
952 relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app,
953 cset_to_curr_parent,
954 partial_csets,
955 visited_nodes,
956 subgraph);
957
958 if (relevant_parent)
959 {
960 L(F("revision %s is relevant, composing with edge to %s\n")
961 % curr_parent % child);
962 concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset);
963 relevant_child = true;
964 break;
965 }
966 else
967 L(F("parent %s of %s is not relevant\n") % curr_parent % child);
968 }
969
970 // store the partial edge from ancestor -> child, so that if anyone
971 // re-traverses this edge they'll just fetch from the partial_edges
972 // cache.
973 if (relevant_child)
974 partial_csets.insert(std::make_pair(child,
975 boost::shared_ptr<change_set>
976 (new change_set(cumulative_cset))));
977
978 return relevant_child;
979}
980
981// this finds (by breadth-first search) the set of nodes you'll have to
982// walk over in calculate_change_sets_recursive, to build the composite
983// changeset. this is to prevent the recursive algorithm from going way
984// back in history on an unlucky guess of parent.
985
986static void
987find_subgraph_for_composite_search(revision_id const & ancestor,
988 revision_id const & child,
989 app_state & app,
990 std::set<revision_id> & subgraph)
991{
992 std::set<revision_id> frontier;
993 frontier.insert(child);
994 subgraph.insert(child);
995 while (!frontier.empty())
996 {
997 std::set<revision_id> next_frontier;
998 for (std::set<revision_id>::const_iterator i = frontier.begin();
999 i != frontier.end(); ++i)
1000 {
1001 revision_set rev;
1002 app.db.get_revision(*i, rev);
1003 L(F("adding parents of %s to subgraph\n") % *i);
1004
1005 for(edge_map::const_iterator j = rev.edges.begin(); j != rev.edges.end(); ++j)
1006 {
1007 revision_id curr_parent = edge_old_revision(j);
1008 if (null_id(curr_parent))
1009 continue;
1010 subgraph.insert(curr_parent);
1011 if (curr_parent == ancestor)
1012 {
1013 L(F("found parent %s of %s\n") % curr_parent % *i);
1014 return;
1015 }
1016 else
1017 L(F("adding parent %s to next frontier\n") % curr_parent);
1018 next_frontier.insert(curr_parent);
1019 }
1020 }
1021 frontier = next_frontier;
1022 }
1023}
1024
1025void
1026calculate_composite_change_set(revision_id const & ancestor,
1027 revision_id const & child,
1028 app_state & app,
1029 change_set & composed)
1030{
1031 I(composed.empty());
1032 L(F("calculating composite changeset between %s and %s\n")
1033 % ancestor % child);
1034 if (ancestor == child)
1035 return;
1036 std::set<revision_id> visited;
1037 std::set<revision_id> subgraph;
1038 std::map<revision_id, boost::shared_ptr<change_set> > partial;
1039 find_subgraph_for_composite_search(ancestor, child, app, subgraph);
1040 calculate_change_sets_recursive(ancestor, child, app, composed, partial,
1041 visited, subgraph);
1042}
1043
1044void
1045calculate_arbitrary_change_set(revision_id const & start,
1046 revision_id const & end,
1047 app_state & app,
1048 change_set & composed)
1049{
1050 L(F("calculating changeset from %s to %s\n") % start % end);
1051 revision_id r_ca_id;
1052 change_set ca_to_start, ca_to_end, start_to_ca;
1053 N(find_least_common_ancestor(start, end, r_ca_id, app),
1054 F("no common ancestor for %s and %s\n") % start % end);
1055 L(F("common ancestor is %s\n") % r_ca_id);
1056 calculate_composite_change_set(r_ca_id, start, app, ca_to_start);
1057 calculate_composite_change_set(r_ca_id, end, app, ca_to_end);
1058 manifest_id m_ca_id;
1059 manifest_map m_ca;
1060 app.db.get_revision_manifest(r_ca_id, m_ca_id);
1061 app.db.get_manifest(m_ca_id, m_ca);
1062 invert_change_set(ca_to_start, m_ca, start_to_ca);
1063 concatenate_change_sets(start_to_ca, ca_to_end, composed);
1064}
1065
1066
1067// Stuff related to rebuilding the revision graph. Unfortunately this is a
1068// real enough error case that we need support code for it.
1069
1070
1071static void
1072analyze_manifest_changes(app_state & app,
1073 manifest_id const & parent,
1074 manifest_id const & child,
1075 std::set<file_path> const & need_history_splitting,
1076 change_set & cs)
1077{
1078 manifest_map m_parent, m_child;
1079
1080 if (!null_id(parent))
1081 app.db.get_manifest(parent, m_parent);
1082
1083 I(!null_id(child));
1084 app.db.get_manifest(child, m_child);
1085
1086 L(F("analyzing manifest changes from '%s' -> '%s'\n") % parent % child);
1087
1088 for (manifest_map::const_iterator i = m_parent.begin();
1089 i != m_parent.end(); ++i)
1090 {
1091 file_path f = manifest_entry_path(i);
1092 manifest_map::const_iterator j = m_child.find(f);
1093 if (j == m_child.end())
1094 {
1095 cs.delete_file(f);
1096 }
1097 else if (need_history_splitting.find(f) != need_history_splitting.end())
1098 {
1099 P(F("splitting ancestry for file %s\n") % f);
1100 cs.delete_file(f);
1101 cs.add_file(f, manifest_entry_id(j));
1102 }
1103 else if (! (manifest_entry_id(i) == manifest_entry_id(j)))
1104 {
1105 cs.apply_delta(manifest_entry_path(i),
1106 manifest_entry_id(i),
1107 manifest_entry_id(j));
1108 }
1109 }
1110 for (manifest_map::const_iterator i = m_child.begin();
1111 i != m_child.end(); ++i)
1112 {
1113 manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i));
1114 if (j == m_parent.end())
1115 cs.add_file(manifest_entry_path(i),
1116 manifest_entry_id(i));
1117 }
1118}
1119
1120
1121struct anc_graph
1122{
1123 anc_graph(bool existing, app_state & a) :
1124 existing_graph(existing),
1125 app(a),
1126 max_node(0),
1127 n_nodes("nodes", "n", 1),
1128 n_certs_in("certs in", "c", 1),
1129 n_revs_out("revs out", "r", 1),
1130 n_certs_out("certs out", "C", 1)
1131 {}
1132
1133 bool existing_graph;
1134 app_state & app;
1135 u64 max_node;
1136
1137 ticker n_nodes;
1138 ticker n_certs_in;
1139 ticker n_revs_out;
1140 ticker n_certs_out;
1141
1142 std::map<u64,manifest_id> node_to_old_man;
1143 std::map<manifest_id,u64> old_man_to_node;
1144
1145 std::map<u64,revision_id> node_to_old_rev;
1146 std::map<revision_id,u64> old_rev_to_node;
1147
1148 std::map<u64,revision_id> node_to_new_rev;
1149 std::multimap<u64, std::pair<cert_name, cert_value> > certs;
1150 std::multimap<u64, u64> ancestry;
1151 std::set<std::string> branches;
1152
1153 void add_node_ancestry(u64 child, u64 parent);
1154 void write_certs();
1155 void kluge_for_3_ancestor_nodes();
1156 void rebuild_ancestry();
1157 void get_node_manifest(u64 node, manifest_id & man);
1158 u64 add_node_for_old_manifest(manifest_id const & man);
1159 u64 add_node_for_old_revision(revision_id const & rev);
1160 revision_id construct_revision_from_ancestry(u64 child);
1161};
1162
1163
1164void anc_graph::add_node_ancestry(u64 child, u64 parent)
1165{
1166 L(F("noting ancestry from child %d -> parent %d\n") % child % parent);
1167 ancestry.insert(std::make_pair(child, parent));
1168}
1169
1170void anc_graph::get_node_manifest(u64 node, manifest_id & man)
1171{
1172 std::map<u64,manifest_id>::const_iterator i = node_to_old_man.find(node);
1173 I(i != node_to_old_man.end());
1174 man = i->second;
1175}
1176
1177void anc_graph::write_certs()
1178{
1179 std::set<cert_name> cnames;
1180 cnames.insert(cert_name(branch_cert_name));
1181 cnames.insert(cert_name(date_cert_name));
1182 cnames.insert(cert_name(author_cert_name));
1183 cnames.insert(cert_name(tag_cert_name));
1184 cnames.insert(cert_name(changelog_cert_name));
1185 cnames.insert(cert_name(comment_cert_name));
1186 cnames.insert(cert_name(testresult_cert_name));
1187
1188
1189 {
1190 // regenerate epochs on all branches to random states
1191
1192 for (std::set<std::string>::const_iterator i = branches.begin(); i != branches.end(); ++i)
1193 {
1194 char buf[constants::epochlen_bytes];
1195 Botan::Global_RNG::randomize(reinterpret_cast<Botan::byte *>(buf), constants::epochlen_bytes);
1196 hexenc<data> hexdata;
1197 encode_hexenc(data(std::string(buf, buf + constants::epochlen_bytes)), hexdata);
1198 epoch_data new_epoch(hexdata);
1199 L(F("setting epoch for %s to %s\n") % *i % new_epoch);
1200 app.db.set_epoch(cert_value(*i), new_epoch);
1201 }
1202 }
1203
1204
1205 typedef std::multimap<u64, std::pair<cert_name, cert_value> >::const_iterator ci;
1206
1207 for (std::map<u64,revision_id>::const_iterator i = node_to_new_rev.begin();
1208 i != node_to_new_rev.end(); ++i)
1209 {
1210 revision_id rev(i->second);
1211
1212 std::pair<ci,ci> range = certs.equal_range(i->first);
1213
1214 for (ci j = range.first; j != range.second; ++j)
1215 {
1216 cert_name name(j->second.first);
1217 cert_value val(j->second.second);
1218
1219 if (cnames.find(name) == cnames.end())
1220 continue;
1221
1222 cert new_cert;
1223 make_simple_cert(rev.inner(), name, val, app, new_cert);
1224 revision<cert> rcert(new_cert);
1225 if (! app.db.revision_cert_exists(rcert))
1226 {
1227 ++n_certs_out;
1228 app.db.put_revision_cert(rcert);
1229 }
1230 }
1231 }
1232}
1233
1234void
1235anc_graph::kluge_for_3_ancestor_nodes()
1236{
1237 // This method is, as the name suggests, a kluge. It exists because in the
1238 // 0.17 timeframe, monotone's ancestry graph has several nodes with 3
1239 // parents. This isn't, in principle, necessarily a bad thing; having 3
1240 // parents is reasonably well defined, I don't know of much code that is
1241 // dependent on revisions having only 2 parents, etc. But it is a very
1242 // weird thing, that we would never under any circumstances create today,
1243 // and it only exists as a side-effect of the pre-changeset days. In the
1244 // future we may decide to allow 3+-parent revisions; we may decide to
1245 // disallow it. Right now, I'd rather keep options open.
1246 // We remove only edges that are "redundant" (i.e., already weird...).
1247 // These are also something that we currently refuse to produce -- when a
1248 // node has more than one parent, and one parent is an ancestor of another.
1249 // These edges, again, only exist because of weirdnesses in the
1250 // pre-changeset days, and are not particularly meaningful. Again, we may
1251 // decide in the future to use them for some things, but...
1252 // FIXME: remove this method eventually, since it is (mildly) destructive on
1253 // history, and isn't really doing anything that necessarily needs to happen
1254 // anyway.
1255 P(F("scanning for nodes with 3+ parents\n"));
1256 std::set<u64> manyparents;
1257 for (std::multimap<u64, u64>::const_iterator i = ancestry.begin();
1258 i != ancestry.end(); ++i)
1259 {
1260 if (ancestry.count(i->first) > 2)
1261 manyparents.insert(i->first);
1262 }
1263 for (std::set<u64>::const_iterator i = manyparents.begin();
1264 i != manyparents.end(); ++i)
1265 {
1266 std::set<u64> indirect_ancestors;
1267 std::set<u64> parents;
1268 std::vector<u64> to_examine;
1269 for (std::multimap<u64, u64>::const_iterator j = ancestry.lower_bound(*i);
1270 j != ancestry.upper_bound(*i); ++j)
1271 {
1272 to_examine.push_back(j->second);
1273 parents.insert(j->second);
1274 }
1275 I(!to_examine.empty());
1276 while (!to_examine.empty())
1277 {
1278 u64 current = to_examine.back();
1279 to_examine.pop_back();
1280 for (std::multimap<u64, u64>::const_iterator j = ancestry.lower_bound(current);
1281 j != ancestry.upper_bound(current); ++j)
1282 {
1283 if (indirect_ancestors.find(j->second) == indirect_ancestors.end())
1284 {
1285 to_examine.push_back(j->second);
1286 indirect_ancestors.insert(j->second);
1287 }
1288 }
1289 }
1290 size_t killed = 0;
1291 for (std::set<u64>::const_iterator p = parents.begin();
1292 p != parents.end(); ++p)
1293 {
1294 if (indirect_ancestors.find(*p) != indirect_ancestors.end())
1295 {
1296 P(F("optimizing out redundant edge %i -> %i\n") % (*p) % (*i));
1297 // sometimes I hate STL. or at least my incompetence at using it.
1298 size_t old_size = ancestry.size();
1299 for (std::multimap<u64, u64>::iterator e = ancestry.lower_bound(*i);
1300 e != ancestry.upper_bound(*i); ++e)
1301 {
1302 I(e->first == *i);
1303 if (e->second == *p)
1304 {
1305 ancestry.erase(e);
1306 break;
1307 }
1308 }
1309 I(old_size - 1 == ancestry.size());
1310 ++killed;
1311 }
1312 }
1313 I(killed < parents.size());
1314 I(ancestry.find(*i) != ancestry.end());
1315 }
1316}
1317
1318void
1319anc_graph::rebuild_ancestry()
1320{
1321 kluge_for_3_ancestor_nodes();
1322
1323 P(F("rebuilding %d nodes\n") % max_node);
1324 {
1325 transaction_guard guard(app.db);
1326 if (existing_graph)
1327 app.db.delete_existing_revs_and_certs();
1328
1329 std::set<u64> parents, children, heads;
1330 for (std::multimap<u64, u64>::const_iterator i = ancestry.begin();
1331 i != ancestry.end(); ++i)
1332 {
1333 children.insert(i->first);
1334 parents.insert(i->second);
1335 }
1336 set_difference(children.begin(), children.end(),
1337 parents.begin(), parents.end(),
1338 std::inserter(heads, heads.begin()));
1339
1340 // FIXME: should do a depth-first traversal here, or something like,
1341 // instead of being recursive.
1342 for (std::set<u64>::const_iterator i = heads.begin();
1343 i != heads.end(); ++i)
1344 {
1345 construct_revision_from_ancestry(*i);
1346 }
1347 write_certs();
1348 guard.commit();
1349 }
1350}
1351
1352u64 anc_graph::add_node_for_old_manifest(manifest_id const & man)
1353{
1354 I(!existing_graph);
1355 u64 node = 0;
1356 if (old_man_to_node.find(man) == old_man_to_node.end())
1357 {
1358 node = max_node++;
1359 ++n_nodes;
1360 L(F("node %d = manifest %s\n") % node % man);
1361 old_man_to_node.insert(std::make_pair(man, node));
1362 node_to_old_man.insert(std::make_pair(node, man));
1363
1364 // load certs
1365 std::vector< manifest<cert> > mcerts;
1366 app.db.get_manifest_certs(man, mcerts);
1367 erase_bogus_certs(mcerts, app);
1368 for(std::vector< manifest<cert> >::const_iterator i = mcerts.begin();
1369 i != mcerts.end(); ++i)
1370 {
1371 L(F("loaded '%s' manifest cert for node %s\n") % i->inner().name % node);
1372 cert_value tv;
1373 decode_base64(i->inner().value, tv);
1374 ++n_certs_in;
1375 certs.insert(std::make_pair(node,
1376 std::make_pair(i->inner().name, tv)));
1377 }
1378 }
1379 else
1380 {
1381 node = old_man_to_node[man];
1382 }
1383 return node;
1384}
1385
1386u64 anc_graph::add_node_for_old_revision(revision_id const & rev)
1387{
1388 I(existing_graph);
1389 I(!null_id(rev));
1390 u64 node = 0;
1391 if (old_rev_to_node.find(rev) == old_rev_to_node.end())
1392 {
1393 node = max_node++;
1394 ++n_nodes;
1395
1396 manifest_id man;
1397 app.db.get_revision_manifest(rev, man);
1398
1399 L(F("node %d = revision %s = manifest %s\n") % node % rev % man);
1400 old_rev_to_node.insert(std::make_pair(rev, node));
1401 node_to_old_rev.insert(std::make_pair(node, rev));
1402 node_to_old_man.insert(std::make_pair(node, man));
1403
1404 // load certs
1405 std::vector< revision<cert> > rcerts;
1406 app.db.get_revision_certs(rev, rcerts);
1407 erase_bogus_certs(rcerts, app);
1408 for(std::vector< revision<cert> >::const_iterator i = rcerts.begin();
1409 i != rcerts.end(); ++i)
1410 {
1411 L(F("loaded '%s' revision cert for node %s\n") % i->inner().name % node);
1412 cert_value tv;
1413 decode_base64(i->inner().value, tv);
1414 ++n_certs_in;
1415 certs.insert(std::make_pair(node,
1416 std::make_pair(i->inner().name, tv)));
1417
1418 if (i->inner().name == branch_cert_name)
1419 branches.insert(tv());
1420 }
1421 }
1422 else
1423 {
1424 node = old_rev_to_node[rev];
1425 }
1426
1427 return node;
1428}
1429
1430// FIXME: this is recursive -- stack depth grows as ancestry depth -- and will
1431// overflow the stack on large histories.
1432revision_id
1433anc_graph::construct_revision_from_ancestry(u64 child)
1434{
1435 L(F("processing node %d\n") % child);
1436
1437 if (node_to_new_rev.find(child) != node_to_new_rev.end())
1438 {
1439 L(F("node %d already processed, skipping\n") % child);
1440 return node_to_new_rev.find(child)->second;
1441 }
1442
1443 manifest_id child_man;
1444 get_node_manifest(child, child_man);
1445
1446 revision_set rev;
1447 rev.new_manifest = child_man;
1448
1449 typedef std::multimap<u64, u64>::const_iterator ci;
1450 std::pair<ci,ci> range = ancestry.equal_range(child);
1451 if (range.first == range.second)
1452 {
1453 L(F("node %d is a root node\n") % child);
1454 revision_id null_rid;
1455 manifest_id null_mid;
1456 boost::shared_ptr<change_set> cs(new change_set());
1457 std::set<file_path> blah;
1458 analyze_manifest_changes(app, null_mid, child_man, blah, *cs);
1459 rev.edges.insert(std::make_pair(null_rid,
1460 std::make_pair(null_mid, cs)));
1461 }
1462 else if (std::distance(range.first, range.second) == 1)
1463 {
1464 I(child == range.first->first);
1465 u64 parent = range.first->second;
1466 if (node_to_new_rev.find(parent) == node_to_new_rev.end())
1467 construct_revision_from_ancestry(parent);
1468 revision_id parent_rid = node_to_new_rev.find(parent)->second;
1469 L(F("parent node %d = revision %s\n") % parent % parent_rid);
1470 manifest_id parent_man;
1471 get_node_manifest(parent, parent_man);
1472 boost::shared_ptr<change_set> cs(new change_set());
1473 std::set<file_path> need_killing_files;
1474 analyze_manifest_changes(app, parent_man, child_man, need_killing_files,
1475 *cs);
1476 rev.edges.insert(std::make_pair(parent_rid,
1477 std::make_pair(parent_man, cs)));
1478 }
1479 else
1480 {
1481 // this section has lots and lots of rigmorale, in order to handle the
1482 // following case: A -> B -> D, A -> C -> D. File "foo" exists in
1483 // manifests A, C, and D only. (I.e., it was deleted and then re-added
1484 // along the B path, and left alone along the C path.) The problem here
1485 // is that we have to synthesize a delete/add pair on the C -> D edge,
1486 // to make sure that the A -> D changeset is path invariant -- we have
1487 // to make sure that no matter how you go from A to D, we will learn
1488 // that A's "foo" and D's "foo" are actually logically distinct files.
1489 //
1490 // In order to do this, before we generate the changeset for any merged
1491 // revision, we calculate the changeset through from the parents's
1492 // common ancestor to the _other_ parent, which will tell us which files
1493 // have been deleted since then. We also calculate the changeset
1494 // through from the parents's common ancestor to the current parent,
1495 // which tells us which files have already been deleted on our side, and
1496 // so don't need to be deleted again.
1497 //
1498 // Finally, we feed the list of deleted files to analyze_manifest, which
1499 // uses that information to break file ancestries when necessary.
1500 //
1501 // we only know how to preserve file ids when there are exactly two
1502 // parents, so assert that there are.
1503 std::vector<u64> parents, others;
1504 {
1505 u64 left_p, right_p;
1506 ci i = range.first;
1507 I(child == i->first);
1508 left_p = i->second;
1509 ++i;
1510 I(child == i->first);
1511 right_p = i->second;
1512 ++i;
1513 I(i == range.second);
1514 parents.push_back(left_p);
1515 others.push_back(right_p);
1516 parents.push_back(right_p);
1517 others.push_back(left_p);
1518 }
1519 // make sure our parents are all saved.
1520 for (std::vector<u64>::const_iterator i = parents.begin(); i != parents.end(); ++i)
1521 {
1522 if (node_to_new_rev.find(*i) == node_to_new_rev.end())
1523 construct_revision_from_ancestry(*i);
1524 I(node_to_new_rev.find(*i) != node_to_new_rev.end());
1525 }
1526 // actually process the two nodes
1527 for (int i = 0; i != 2; ++i)
1528 {
1529 u64 parent = idx(parents, i);
1530 u64 other_parent = idx(others, i);
1531 L(F("processing edge from child %d -> parent %d\n") % child % parent);
1532
1533 revision_id parent_rid = node_to_new_rev.find(parent)->second;
1534 revision_id other_parent_rid = node_to_new_rev.find(other_parent)->second;
1535 // this is stupidly inefficient, in that we do this whole expensive
1536 // changeset finding thing twice in a row. oh well.
1537 revision_id lca;
1538 std::set<file_path> need_killing_files;
1539 if (find_least_common_ancestor(parent_rid, other_parent_rid, lca, app))
1540 {
1541 change_set parent_cs, other_parent_cs;
1542 calculate_composite_change_set(lca, other_parent_rid, app, other_parent_cs);
1543 calculate_composite_change_set(lca, parent_rid, app, parent_cs);
1544 std::set_difference(other_parent_cs.rearrangement.deleted_files.begin(),
1545 other_parent_cs.rearrangement.deleted_files.end(),
1546 parent_cs.rearrangement.deleted_files.begin(),
1547 parent_cs.rearrangement.deleted_files.end(),
1548 std::inserter(need_killing_files,
1549 need_killing_files.begin()));
1550 }
1551
1552 L(F("parent node %d = revision %s\n") % parent % parent_rid);
1553 manifest_id parent_man;
1554 get_node_manifest(parent, parent_man);
1555 boost::shared_ptr<change_set> cs(new change_set());
1556 analyze_manifest_changes(app, parent_man, child_man, need_killing_files,
1557 *cs);
1558 rev.edges.insert(std::make_pair(parent_rid,
1559 std::make_pair(parent_man, cs)));
1560 }
1561 }
1562
1563 revision_id rid;
1564 calculate_ident(rev, rid);
1565 node_to_new_rev.insert(std::make_pair(child, rid));
1566
1567 if (!app.db.revision_exists (rid))
1568 {
1569 L(F("mapped node %d to revision %s\n") % child % rid);
1570 app.db.put_revision(rid, rev);
1571 ++n_revs_out;
1572 }
1573 else
1574 {
1575 L(F("skipping already existing revision %s\n") % rid);
1576 }
1577
1578 return rid;
1579}
1580
1581void
1582build_changesets_from_existing_revs(app_state & app)
1583{
1584 global_sanity.set_relaxed(true);
1585 anc_graph graph(true, app);
1586
1587 P(F("rebuilding revision graph from existing graph\n"));
1588 std::multimap<revision_id, revision_id> existing_graph;
1589
1590 {
1591 // early short-circuit to avoid failure after lots of work
1592 rsa_keypair_id key;
1593 get_user_key(key,app);
1594 require_password(key, app);
1595 }
1596
1597 app.db.get_revision_ancestry(existing_graph);
1598 for (std::multimap<revision_id, revision_id>::const_iterator i = existing_graph.begin();
1599 i != existing_graph.end(); ++i)
1600 {
1601 if (!null_id(i->first))
1602 {
1603 u64 parent_node = graph.add_node_for_old_revision(i->first);
1604 u64 child_node = graph.add_node_for_old_revision(i->second);
1605 graph.add_node_ancestry(child_node, parent_node);
1606 }
1607 }
1608
1609 global_sanity.set_relaxed(false);
1610 graph.rebuild_ancestry();
1611}
1612
1613
1614void
1615build_changesets_from_manifest_ancestry(app_state & app)
1616{
1617 anc_graph graph(false, app);
1618
1619 P(F("rebuilding revision graph from manifest certs\n"));
1620
1621 {
1622 // early short-circuit to avoid failure after lots of work
1623 rsa_keypair_id key;
1624 get_user_key(key,app);
1625 require_password(key, app);
1626 }
1627
1628 std::vector< manifest<cert> > tmp;
1629 app.db.get_manifest_certs(cert_name("ancestor"), tmp);
1630 erase_bogus_certs(tmp, app);
1631
1632 for (std::vector< manifest<cert> >::const_iterator i = tmp.begin();
1633 i != tmp.end(); ++i)
1634 {
1635 cert_value tv;
1636 decode_base64(i->inner().value, tv);
1637 manifest_id child, parent;
1638 child = i->inner().ident;
1639 parent = hexenc<id>(tv());
1640
1641 u64 parent_node = graph.add_node_for_old_manifest(parent);
1642 u64 child_node = graph.add_node_for_old_manifest(child);
1643 graph.add_node_ancestry(child_node, parent_node);
1644 }
1645
1646 graph.rebuild_ancestry();
1647}
1648
1649
1650// i/o stuff
1651
1652namespace
1653{
1654 namespace syms
1655 {
1656 std::string const old_revision("old_revision");
1657 std::string const new_manifest("new_manifest");
1658 std::string const old_manifest("old_manifest");
1659 }
1660}
1661
1662
1663void
1664print_edge(basic_io::printer & printer,
1665 edge_entry const & e)
1666{
1667 basic_io::stanza st;
1668 st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()());
1669 st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()());
1670 printer.print_stanza(st);
1671 print_change_set(printer, edge_changes(e));
1672}
1673
1674
1675void
1676print_revision(basic_io::printer & printer,
1677 revision_set const & rev)
1678{
1679 rev.check_sane();
1680 basic_io::stanza st;
1681 st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()());
1682 printer.print_stanza(st);
1683 for (edge_map::const_iterator edge = rev.edges.begin();
1684 edge != rev.edges.end(); ++edge)
1685 print_edge(printer, *edge);
1686}
1687
1688
1689void
1690parse_edge(basic_io::parser & parser,
1691 edge_map & es)
1692{
1693 boost::shared_ptr<change_set> cs(new change_set());
1694 manifest_id old_man;
1695 revision_id old_rev;
1696 std::string tmp;
1697
1698 parser.esym(syms::old_revision);
1699 parser.hex(tmp);
1700 old_rev = revision_id(tmp);
1701
1702 parser.esym(syms::old_manifest);
1703 parser.hex(tmp);
1704 old_man = manifest_id(tmp);
1705
1706 parse_change_set(parser, *cs);
1707
1708 es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs)));
1709}
1710
1711
1712void
1713parse_revision(basic_io::parser & parser,
1714 revision_set & rev)
1715{
1716 rev.edges.clear();
1717 std::string tmp;
1718 parser.esym(syms::new_manifest);
1719 parser.hex(tmp);
1720 rev.new_manifest = manifest_id(tmp);
1721 while (parser.symp(syms::old_revision))
1722 parse_edge(parser, rev.edges);
1723 rev.check_sane();
1724}
1725
1726void
1727read_revision_set(data const & dat,
1728 revision_set & rev)
1729{
1730 std::istringstream iss(dat());
1731 basic_io::input_source src(iss, "revision");
1732 basic_io::tokenizer tok(src);
1733 basic_io::parser pars(tok);
1734 parse_revision(pars, rev);
1735 I(src.lookahead == EOF);
1736 rev.check_sane();
1737}
1738
1739void
1740read_revision_set(revision_data const & dat,
1741 revision_set & rev)
1742{
1743 read_revision_set(dat.inner(), rev);
1744 rev.check_sane();
1745}
1746
1747void
1748write_revision_set(revision_set const & rev,
1749 data & dat)
1750{
1751 rev.check_sane();
1752 std::ostringstream oss;
1753 basic_io::printer pr(oss);
1754 print_revision(pr, rev);
1755 dat = data(oss.str());
1756}
1757
1758void
1759write_revision_set(revision_set const & rev,
1760 revision_data & dat)
1761{
1762 rev.check_sane();
1763 data d;
1764 write_revision_set(rev, d);
1765 dat = revision_data(d);
1766}
1767
1768#ifdef BUILD_UNIT_TESTS
1769#include "unit_tests.hh"
1770#include "sanity.hh"
1771
1772static void
1773revision_test()
1774{
1775}
1776
1777void
1778add_revision_tests(test_suite * suite)
1779{
1780 I(suite);
1781 suite->add(BOOST_TEST_CASE(&revision_test));
1782}
1783
1784
1785#endif // BUILD_UNIT_TESTS

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status