monotone

monotone Mtn Source Tree

Root/revision.cc

1// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <map>
12#include <queue>
13#include <set>
14#include <sstream>
15#include <stack>
16#include <iterator>
17#include <functional>
18#include <list>
19
20#include "lexical_cast.hh"
21#include <boost/dynamic_bitset.hpp>
22#include <boost/shared_ptr.hpp>
23
24#include "botan/botan.h"
25
26#include "basic_io.hh"
27#include "cert.hh"
28#include "cset.hh"
29#include "constants.hh"
30#include "interner.hh"
31#include "numeric_vocab.hh"
32#include "revision.hh"
33#include "sanity.hh"
34#include "transforms.hh"
35#include "simplestring_xform.hh"
36#include "ui.hh"
37#include "vocab.hh"
38#include "safe_map.hh"
39#include "legacy.hh"
40#include "rev_height.hh"
41#include "outdated_indicator.hh"
42#include "database.hh"
43#include "roster.hh"
44#include "graph.hh"
45#include "key_store.hh"
46
47using std::back_inserter;
48using std::copy;
49using std::deque;
50using std::list;
51using std::make_pair;
52using std::map;
53using std::max;
54using std::multimap;
55using std::ostringstream;
56using std::pair;
57using std::queue;
58using std::set;
59using std::stack;
60using std::string;
61using std::vector;
62
63using boost::dynamic_bitset;
64using boost::shared_ptr;
65
66void revision_t::check_sane() const
67{
68 // null id in current manifest only permitted if previous
69 // state was null and no changes
70 // FIXME: above comment makes no sense. This should just be
71 // I(!null_id(new_manifest)), and the only reason I am not making it so
72 // right now is that I don't have time to immediately track down all the
73 // fallout.
74 if (null_id(new_manifest))
75 {
76 for (edge_map::const_iterator i = edges.begin();
77 i != edges.end(); ++i)
78 I(null_id(edge_old_revision(i)));
79 }
80
81 if (edges.size() == 1)
82 {
83 // no particular checks to be done right now
84 }
85 else if (edges.size() == 2)
86 {
87 // merge nodes cannot have null revisions
88 for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i)
89 I(!null_id(edge_old_revision(i)));
90 }
91 else
92 // revisions must always have either 1 or 2 edges
93 I(false);
94
95 // we used to also check that if there were multiple edges that had patches
96 // for the same file, then the new hashes on each edge matched each other.
97 // this is not ported over to roster-style revisions because it's an
98 // inadequate check, and the real check, that the new manifest id is correct
99 // (done in put_revision, for instance) covers this case automatically.
100}
101
102bool
103revision_t::is_merge_node() const
104{
105 return edges.size() > 1;
106}
107
108bool
109revision_t::is_nontrivial() const
110{
111 check_sane();
112 // merge revisions are never trivial, because even if the resulting node
113 // happens to be identical to both parents, the merge is still recording
114 // that fact.
115 if (is_merge_node())
116 return true;
117 else
118 return !edge_changes(edges.begin()).empty();
119}
120
121revision_t::revision_t(revision_t const & other)
122 : origin_aware(other)
123{
124 /* behave like normal constructor if other is empty */
125 made_for = made_for_nobody;
126 if (null_id(other.new_manifest) && other.edges.empty()) return;
127 other.check_sane();
128 new_manifest = other.new_manifest;
129 edges = other.edges;
130 made_for = other.made_for;
131}
132
133revision_t const &
134revision_t::operator=(revision_t const & other)
135{
136 other.check_sane();
137 new_manifest = other.new_manifest;
138 edges = other.edges;
139 made_for = other.made_for;
140 return *this;
141}
142
143
144// For a surprisingly long time, we have been using an algorithm which
145// is nonsense, based on a misunderstanding of what "LCA" means. The
146// LCA of two nodes is *not* the first common ancestor which you find
147// when iteratively expanding their ancestor sets. Instead, the LCA is
148// the common ancestor which is a descendent of all other common
149// ancestors.
150//
151// In general, a set of nodes in a DAG doesn't always have an
152// LCA. There might be multiple common ancestors which are not parents
153// of one another. So we implement something which is "functionally
154// useful" for finding a merge point (and moreover, which always
155// terminates): we find an LCA of the input set if it exists,
156// otherwise we replace the input set with the nodes we did find and
157// repeat.
158//
159// All previous discussions in monotone-land, before say August 2005,
160// of LCA (and LCAD) are essentially wrong due to our silly
161// misunderstanding. It's unfortunate, but our half-baked
162// approximations worked almost well enough to take us through 3 years
163// of deployed use. Hopefully this more accurate new use will serve us
164// even longer.
165
166typedef unsigned long ctx;
167typedef dynamic_bitset<> bitmap;
168typedef shared_ptr<bitmap> shared_bitmap;
169
170static void
171calculate_ancestors_from_graph(interner<ctx> & intern,
172 revision_id const & init,
173 multimap<revision_id, revision_id> const & graph,
174 map< ctx, shared_bitmap > & ancestors,
175 shared_bitmap & total_union);
176
177void
178find_common_ancestor_for_merge(database & db,
179 revision_id const & left,
180 revision_id const & right,
181 revision_id & anc)
182{
183 interner<ctx> intern;
184 set<ctx> leaves;
185 map<ctx, shared_bitmap> ancestors;
186
187 shared_bitmap isect = shared_bitmap(new bitmap());
188 shared_bitmap isect_ancs = shared_bitmap(new bitmap());
189
190 leaves.insert(intern.intern(left.inner()()));
191 leaves.insert(intern.intern(right.inner()()));
192
193
194 multimap<revision_id, revision_id> inverse_graph;
195 {
196 multimap<revision_id, revision_id> graph;
197 db.get_revision_ancestry(graph);
198 typedef multimap<revision_id, revision_id>::const_iterator gi;
199 for (gi i = graph.begin(); i != graph.end(); ++i)
200 inverse_graph.insert(make_pair(i->second, i->first));
201 }
202
203
204 while (leaves.size() != 1)
205 {
206 isect->clear();
207 isect_ancs->clear();
208
209 // First intersect all ancestors of current leaf set
210 for (set<ctx>::const_iterator i = leaves.begin(); i != leaves.end(); ++i)
211 {
212 ctx curr_leaf = *i;
213 shared_bitmap curr_leaf_ancestors;
214 map<ctx, shared_bitmap >::const_iterator j = ancestors.find(*i);
215 if (j != ancestors.end())
216 curr_leaf_ancestors = j->second;
217 else
218 {
219 curr_leaf_ancestors = shared_bitmap(new bitmap());
220 calculate_ancestors_from_graph(intern, revision_id(intern.lookup(curr_leaf)),
221 inverse_graph, ancestors,
222 curr_leaf_ancestors);
223 }
224 if (isect->size() > curr_leaf_ancestors->size())
225 curr_leaf_ancestors->resize(isect->size());
226
227 if (curr_leaf_ancestors->size() > isect->size())
228 isect->resize(curr_leaf_ancestors->size());
229
230 if (i == leaves.begin())
231 *isect = *curr_leaf_ancestors;
232 else
233 (*isect) &= (*curr_leaf_ancestors);
234 }
235
236 // isect is now the set of common ancestors of leaves, but that is not enough.
237 // We need the set of leaves of isect; to do that we calculate the set of
238 // ancestors of isect, in order to subtract it from isect (below).
239 set<ctx> new_leaves;
240 for (ctx i = 0; i < isect->size(); ++i)
241 {
242 if (isect->test(i))
243 {
244 calculate_ancestors_from_graph(intern, revision_id(intern.lookup(i)),
245 inverse_graph, ancestors, isect_ancs);
246 }
247 }
248
249 // Finally, the subtraction step: for any element i of isect, if
250 // it's *not* in isect_ancs, it survives as a new leaf.
251 leaves.clear();
252 for (ctx i = 0; i < isect->size(); ++i)
253 {
254 if (!isect->test(i))
255 continue;
256 if (i < isect_ancs->size() && isect_ancs->test(i))
257 continue;
258 safe_insert(leaves, i);
259 }
260 }
261
262 I(leaves.size() == 1);
263 anc = revision_id(intern.lookup(*leaves.begin()));
264}
265
266// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the
267// size of the entire revision graph.
268
269template<typename T> static bool
270is_ancestor(T const & ancestor_id,
271 T const & descendent_id,
272 multimap<T, T> const & graph)
273{
274 set<T> visited;
275 queue<T> queue;
276
277 queue.push(ancestor_id);
278
279 while (!queue.empty())
280 {
281 T current_id = queue.front();
282 queue.pop();
283
284 if (current_id == descendent_id)
285 return true;
286 else
287 {
288 typedef typename multimap<T, T>::const_iterator gi;
289 pair<gi, gi> children = graph.equal_range(current_id);
290 for (gi i = children.first; i != children.second; ++i)
291 {
292 if (visited.find(i->second) == visited.end())
293 {
294 queue.push(i->second);
295 visited.insert(i->second);
296 }
297 }
298 }
299 }
300 return false;
301}
302
303bool
304is_ancestor(database & db,
305 revision_id const & ancestor_id,
306 revision_id const & descendent_id)
307{
308 L(FL("checking whether %s is an ancestor of %s")
309 % ancestor_id
310 % descendent_id);
311
312 multimap<revision_id, revision_id> graph;
313 db.get_revision_ancestry(graph);
314 return is_ancestor(ancestor_id, descendent_id, graph);
315}
316
317
318static void
319add_bitset_to_union(shared_bitmap src,
320 shared_bitmap dst)
321{
322 if (dst->size() > src->size())
323 src->resize(dst->size());
324 if (src->size() > dst->size())
325 dst->resize(src->size());
326 *dst |= *src;
327}
328
329
330static void
331calculate_ancestors_from_graph(interner<ctx> & intern,
332 revision_id const & init,
333 multimap<revision_id, revision_id> const & graph,
334 map< ctx, shared_bitmap > & ancestors,
335 shared_bitmap & total_union)
336{
337 typedef multimap<revision_id, revision_id>::const_iterator gi;
338 stack<ctx> stk;
339
340 stk.push(intern.intern(init.inner()()));
341
342 while (! stk.empty())
343 {
344 ctx us = stk.top();
345 revision_id rev(intern.lookup(us));
346
347 pair<gi,gi> parents = graph.equal_range(rev);
348 bool pushed = false;
349
350 // first make sure all parents are done
351 for (gi i = parents.first; i != parents.second; ++i)
352 {
353 ctx parent = intern.intern(i->second.inner()());
354 if (ancestors.find(parent) == ancestors.end())
355 {
356 stk.push(parent);
357 pushed = true;
358 break;
359 }
360 }
361
362 // if we pushed anything we stop now. we'll come back later when all
363 // the parents are done.
364 if (pushed)
365 continue;
366
367 shared_bitmap b = shared_bitmap(new bitmap());
368
369 for (gi i = parents.first; i != parents.second; ++i)
370 {
371 ctx parent = intern.intern(i->second.inner()());
372
373 // set all parents
374 if (b->size() <= parent)
375 b->resize(parent + 1);
376 b->set(parent);
377
378 // ensure all parents are loaded into the ancestor map
379 I(ancestors.find(parent) != ancestors.end());
380
381 // union them into our map
382 map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
383 I(j != ancestors.end());
384 add_bitset_to_union(j->second, b);
385 }
386
387 add_bitset_to_union(b, total_union);
388 ancestors.insert(make_pair(us, b));
389 stk.pop();
390 }
391}
392
393void
394toposort(database & db,
395 set<revision_id> const & revisions,
396 vector<revision_id> & sorted)
397{
398 map<rev_height, revision_id> work;
399
400 for (set<revision_id>::const_iterator i = revisions.begin();
401 i != revisions.end(); ++i)
402 {
403 rev_height height;
404 db.get_rev_height(*i, height);
405 work.insert(make_pair(height, *i));
406 }
407
408 sorted.clear();
409
410 for (map<rev_height, revision_id>::const_iterator i = work.begin();
411 i != work.end(); ++i)
412 {
413 sorted.push_back(i->second);
414 }
415}
416
417static void
418accumulate_strict_ancestors(database & db,
419 revision_id const & start,
420 set<revision_id> & all_ancestors,
421 multimap<revision_id, revision_id> const & inverse_graph,
422 rev_height const & min_height)
423{
424 typedef multimap<revision_id, revision_id>::const_iterator gi;
425
426 vector<revision_id> frontier;
427 frontier.push_back(start);
428
429 while (!frontier.empty())
430 {
431 revision_id rid = frontier.back();
432 frontier.pop_back();
433 pair<gi, gi> parents = inverse_graph.equal_range(rid);
434 for (gi i = parents.first; i != parents.second; ++i)
435 {
436 revision_id const & parent = i->second;
437 if (all_ancestors.find(parent) == all_ancestors.end())
438 {
439 // prune if we're below min_height
440 rev_height h;
441 db.get_rev_height(parent, h);
442 if (h >= min_height)
443 {
444 all_ancestors.insert(parent);
445 frontier.push_back(parent);
446 }
447 }
448 }
449 }
450}
451
452// this call is equivalent to running:
453// erase(remove_if(candidates.begin(), candidates.end(), p));
454// erase_ancestors(candidates, db);
455// however, by interleaving the two operations, it can in common cases make
456// many fewer calls to the predicate, which can be a significant speed win.
457
458void
459erase_ancestors_and_failures(database & db,
460 std::set<revision_id> & candidates,
461 is_failure & p,
462 multimap<revision_id, revision_id> *inverse_graph_cache_ptr)
463{
464 // Load up the ancestry graph
465 multimap<revision_id, revision_id> inverse_graph;
466
467 if (candidates.empty())
468 return;
469
470 if (inverse_graph_cache_ptr == NULL)
471 inverse_graph_cache_ptr = &inverse_graph;
472 if (inverse_graph_cache_ptr->empty())
473 {
474 multimap<revision_id, revision_id> graph;
475 db.get_revision_ancestry(graph);
476 for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
477 i != graph.end(); ++i)
478 inverse_graph_cache_ptr->insert(make_pair(i->second, i->first));
479 }
480
481 // Keep a set of all ancestors that we've traversed -- to avoid
482 // combinatorial explosion.
483 set<revision_id> all_ancestors;
484
485 rev_height min_height;
486 db.get_rev_height(*candidates.begin(), min_height);
487 for (std::set<revision_id>::const_iterator it = candidates.begin(); it != candidates.end(); it++)
488 {
489 rev_height h;
490 db.get_rev_height(*it, h);
491 if (h < min_height)
492 min_height = h;
493 }
494
495 vector<revision_id> todo(candidates.begin(), candidates.end());
496 std::random_shuffle(todo.begin(), todo.end());
497
498 size_t predicates = 0;
499 while (!todo.empty())
500 {
501 revision_id rid = todo.back();
502 todo.pop_back();
503 // check if this one has already been eliminated
504 if (all_ancestors.find(rid) != all_ancestors.end())
505 continue;
506 // and then whether it actually should stay in the running:
507 ++predicates;
508 if (p(rid))
509 {
510 candidates.erase(rid);
511 continue;
512 }
513 // okay, it is good enough that all its ancestors should be
514 // eliminated
515 accumulate_strict_ancestors(db, rid, all_ancestors, *inverse_graph_cache_ptr, min_height);
516 }
517
518 // now go and eliminate the ancestors
519 for (set<revision_id>::const_iterator i = all_ancestors.begin();
520 i != all_ancestors.end(); ++i)
521 candidates.erase(*i);
522
523 L(FL("called predicate %s times") % predicates);
524}
525
526// This function looks at a set of revisions, and for every pair A, B in that
527// set such that A is an ancestor of B, it erases A.
528
529namespace
530{
531 struct no_failures : public is_failure
532 {
533 virtual bool operator()(revision_id const & rid)
534 {
535 return false;
536 }
537 };
538}
539void
540erase_ancestors(database & db, set<revision_id> & revisions)
541{
542 no_failures p;
543 erase_ancestors_and_failures(db, revisions, p);
544}
545
546// This function takes a revision A and a set of revision Bs, calculates the
547// ancestry of each, and returns the set of revisions that are in A's ancestry
548// but not in the ancestry of any of the Bs. It tells you 'what's new' in A
549// that's not in the Bs. If the output set if non-empty, then A will
550// certainly be in it; but the output set might be empty.
551void
552ancestry_difference(database & db, revision_id const & a,
553 set<revision_id> const & bs,
554 set<revision_id> & new_stuff)
555{
556 new_stuff.clear();
557 typedef multimap<revision_id, revision_id>::const_iterator gi;
558 multimap<revision_id, revision_id> graph;
559 multimap<revision_id, revision_id> inverse_graph;
560
561 db.get_revision_ancestry(graph);
562 for (gi i = graph.begin(); i != graph.end(); ++i)
563 inverse_graph.insert(make_pair(i->second, i->first));
564
565 interner<ctx> intern;
566 map< ctx, shared_bitmap > ancestors;
567
568 shared_bitmap u = shared_bitmap(new bitmap());
569
570 for (set<revision_id>::const_iterator i = bs.begin();
571 i != bs.end(); ++i)
572 {
573 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
574 ctx c = intern.intern(i->inner()());
575 if (u->size() <= c)
576 u->resize(c + 1);
577 u->set(c);
578 }
579
580 shared_bitmap au = shared_bitmap(new bitmap());
581 calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
582 {
583 ctx c = intern.intern(a.inner()());
584 if (au->size() <= c)
585 au->resize(c + 1);
586 au->set(c);
587 }
588
589 au->resize(max(au->size(), u->size()));
590 u->resize(max(au->size(), u->size()));
591
592 *au -= *u;
593
594 for (unsigned int i = 0; i != au->size(); ++i)
595 {
596 if (au->test(i))
597 {
598 revision_id rid(intern.lookup(i));
599 if (!null_id(rid))
600 new_stuff.insert(rid);
601 }
602 }
603}
604
605void
606select_nodes_modified_by_rev(database & db,
607 revision_t const & rev,
608 roster_t const new_roster,
609 set<node_id> & nodes_modified)
610{
611 nodes_modified.clear();
612
613 for (edge_map::const_iterator i = rev.edges.begin();
614 i != rev.edges.end(); ++i)
615 {
616 set<node_id> edge_nodes_modified;
617 roster_t old_roster;
618 db.get_roster(edge_old_revision(i), old_roster);
619 select_nodes_modified_by_cset(edge_changes(i),
620 old_roster,
621 new_roster,
622 edge_nodes_modified);
623
624 copy(edge_nodes_modified.begin(), edge_nodes_modified.end(),
625 inserter(nodes_modified, nodes_modified.begin()));
626 }
627}
628
629
630void
631make_revision(revision_id const & old_rev_id,
632 roster_t const & old_roster,
633 roster_t const & new_roster,
634 revision_t & rev)
635{
636 shared_ptr<cset> cs(new cset());
637
638 rev.edges.clear();
639 make_cset(old_roster, new_roster, *cs);
640
641 calculate_ident(new_roster, rev.new_manifest);
642
643 if (global_sanity.debug_p())
644 L(FL("new manifest_id is %s")
645 % encode_hexenc(rev.new_manifest.inner()()));
646
647 safe_insert(rev.edges, make_pair(old_rev_id, cs));
648 rev.made_for = made_for_database;
649}
650
651void
652make_revision(revision_id const & old_rev_id,
653 roster_t const & old_roster,
654 cset const & changes,
655 revision_t & rev)
656{
657 roster_t new_roster = old_roster;
658 {
659 temp_node_id_source nis;
660 editable_roster_base er(new_roster, nis);
661 changes.apply_to(er);
662 }
663
664 shared_ptr<cset> cs(new cset(changes));
665 rev.edges.clear();
666
667 calculate_ident(new_roster, rev.new_manifest);
668
669 if (global_sanity.debug_p())
670 L(FL("new manifest_id is %s")
671 % encode_hexenc(rev.new_manifest.inner()()));
672
673 safe_insert(rev.edges, make_pair(old_rev_id, cs));
674 rev.made_for = made_for_database;
675}
676
677void
678make_revision(parent_map const & old_rosters,
679 roster_t const & new_roster,
680 revision_t & rev)
681{
682 edge_map edges;
683 for (parent_map::const_iterator i = old_rosters.begin();
684 i != old_rosters.end();
685 i++)
686 {
687 shared_ptr<cset> cs(new cset());
688 make_cset(parent_roster(i), new_roster, *cs);
689 safe_insert(edges, make_pair(parent_id(i), cs));
690 }
691
692 rev.edges = edges;
693 calculate_ident(new_roster, rev.new_manifest);
694
695 if (global_sanity.debug_p())
696 L(FL("new manifest_id is %s")
697 % encode_hexenc(rev.new_manifest.inner()()));
698}
699
700static void
701recalculate_manifest_id_for_restricted_rev(parent_map const & old_rosters,
702 edge_map & edges,
703 revision_t & rev)
704{
705 // In order to get the correct manifest ID, recalculate the new roster
706 // using one of the restricted csets. It doesn't matter which of the
707 // parent roster/cset pairs we use for this; by construction, they must
708 // all produce the same result.
709 revision_id rid = parent_id(old_rosters.begin());
710 roster_t restricted_roster = *(safe_get(old_rosters, rid).first);
711
712 temp_node_id_source nis;
713 editable_roster_base er(restricted_roster, nis);
714 safe_get(edges, rid)->apply_to(er);
715
716 calculate_ident(restricted_roster, rev.new_manifest);
717 rev.edges = edges;
718
719 if (global_sanity.debug_p())
720 L(FL("new manifest_id is %s")
721 % encode_hexenc(rev.new_manifest.inner()()));
722}
723
724void
725make_restricted_revision(parent_map const & old_rosters,
726 roster_t const & new_roster,
727 node_restriction const & mask,
728 revision_t & rev)
729{
730 edge_map edges;
731 for (parent_map::const_iterator i = old_rosters.begin();
732 i != old_rosters.end();
733 i++)
734 {
735 shared_ptr<cset> included(new cset());
736 roster_t restricted_roster;
737
738 make_restricted_roster(parent_roster(i), new_roster,
739 restricted_roster, mask);
740 make_cset(parent_roster(i), restricted_roster, *included);
741 safe_insert(edges, make_pair(parent_id(i), included));
742 }
743
744 recalculate_manifest_id_for_restricted_rev(old_rosters, edges, rev);
745}
746
747void
748make_restricted_revision(parent_map const & old_rosters,
749 roster_t const & new_roster,
750 node_restriction const & mask,
751 revision_t & rev,
752 cset & excluded,
753 utf8 const & cmd_name)
754{
755 edge_map edges;
756 bool no_excludes = true;
757 for (parent_map::const_iterator i = old_rosters.begin();
758 i != old_rosters.end();
759 i++)
760 {
761 shared_ptr<cset> included(new cset());
762 roster_t restricted_roster;
763
764 make_restricted_roster(parent_roster(i), new_roster,
765 restricted_roster, mask);
766 make_cset(parent_roster(i), restricted_roster, *included);
767 make_cset(restricted_roster, new_roster, excluded);
768 safe_insert(edges, make_pair(parent_id(i), included));
769 if (!excluded.empty())
770 no_excludes = false;
771 }
772
773 N(old_rosters.size() == 1 || no_excludes,
774 F("the command '%s %s' cannot be restricted in a two-parent workspace")
775 % ui.prog_name % cmd_name);
776
777 recalculate_manifest_id_for_restricted_rev(old_rosters, edges, rev);
778}
779
780// Workspace-only revisions, with fake rev.new_manifest and content
781// changes suppressed.
782void
783make_revision_for_workspace(revision_id const & old_rev_id,
784 cset const & changes,
785 revision_t & rev)
786{
787 MM(old_rev_id);
788 MM(changes);
789 MM(rev);
790 shared_ptr<cset> cs(new cset(changes));
791 cs->deltas_applied.clear();
792
793 rev.edges.clear();
794 safe_insert(rev.edges, make_pair(old_rev_id, cs));
795 if (!null_id(old_rev_id))
796 rev.new_manifest = manifest_id(fake_id());
797 rev.made_for = made_for_workspace;
798}
799
800void
801make_revision_for_workspace(revision_id const & old_rev_id,
802 roster_t const & old_roster,
803 roster_t const & new_roster,
804 revision_t & rev)
805{
806 MM(old_rev_id);
807 MM(old_roster);
808 MM(new_roster);
809 MM(rev);
810 cset changes;
811 make_cset(old_roster, new_roster, changes);
812 make_revision_for_workspace(old_rev_id, changes, rev);
813}
814
815void
816make_revision_for_workspace(parent_map const & old_rosters,
817 roster_t const & new_roster,
818 revision_t & rev)
819{
820 edge_map edges;
821 for (parent_map::const_iterator i = old_rosters.begin();
822 i != old_rosters.end();
823 i++)
824 {
825 shared_ptr<cset> cs(new cset());
826 make_cset(parent_roster(i), new_roster, *cs);
827 cs->deltas_applied.clear();
828 safe_insert(edges, make_pair(parent_id(i), cs));
829 }
830
831 rev.edges = edges;
832 rev.new_manifest = manifest_id(fake_id());
833 rev.made_for = made_for_workspace;
834}
835
836
837// Stuff related to rebuilding the revision graph. Unfortunately this is a
838// real enough error case that we need support code for it.
839
840typedef map<u64, pair<shared_ptr<roster_t>, shared_ptr<marking_map> > >
841parent_roster_map;
842
843template <> void
844dump(parent_roster_map const & prm, string & out)
845{
846 ostringstream oss;
847 for (parent_roster_map::const_iterator i = prm.begin(); i != prm.end(); ++i)
848 {
849 oss << "roster: " << i->first << '\n';
850 string roster_str, indented_roster_str;
851 dump(*i->second.first, roster_str);
852 prefix_lines_with(" ", roster_str, indented_roster_str);
853 oss << indented_roster_str;
854 oss << "\nroster's marking:\n";
855 string marking_str, indented_marking_str;
856 dump(*i->second.second, marking_str);
857 prefix_lines_with(" ", marking_str, indented_marking_str);
858 oss << indented_marking_str;
859 oss << "\n\n";
860 }
861 out = oss.str();
862}
863
864struct anc_graph
865{
866 anc_graph(bool existing, database & db, key_store & keys) :
867 existing_graph(existing),
868 db(db),
869 keys(keys),
870 max_node(0),
871 n_nodes("nodes", "n", 1),
872 n_certs_in("certs in", "c", 1),
873 n_revs_out("revs out", "r", 1),
874 n_certs_out("certs out", "C", 1)
875 {}
876
877 bool existing_graph;
878 database & db;
879 key_store & keys;
880 u64 max_node;
881
882 ticker n_nodes;
883 ticker n_certs_in;
884 ticker n_revs_out;
885 ticker n_certs_out;
886
887 map<u64,manifest_id> node_to_old_man;
888 map<manifest_id,u64> old_man_to_node;
889
890 map<u64,revision_id> node_to_old_rev;
891 map<revision_id,u64> old_rev_to_node;
892
893 map<u64,revision_id> node_to_new_rev;
894 map<revision_id,u64> new_rev_to_node;
895
896 map<u64, legacy::renames_map> node_to_renames;
897
898 multimap<u64, pair<cert_name, cert_value> > certs;
899 multimap<u64, u64> ancestry;
900 set<string> branches;
901
902 void add_node_ancestry(u64 child, u64 parent);
903 void write_certs();
904 void kluge_for_bogus_merge_edges();
905 void rebuild_ancestry(set<string> const & attrs_to_drop);
906 void get_node_manifest(u64 node, manifest_id & man);
907 u64 add_node_for_old_manifest(manifest_id const & man);
908 u64 add_node_for_oldstyle_revision(revision_id const & rev);
909 void construct_revisions_from_ancestry(set<string> const & attrs_to_drop);
910 void fixup_node_identities(parent_roster_map const & parent_rosters,
911 roster_t & child_roster,
912 legacy::renames_map const & renames);
913};
914
915
916void anc_graph::add_node_ancestry(u64 child, u64 parent)
917{
918 L(FL("noting ancestry from child %d -> parent %d") % child % parent);
919 ancestry.insert(make_pair(child, parent));
920}
921
922void anc_graph::get_node_manifest(u64 node, manifest_id & man)
923{
924 map<u64,manifest_id>::const_iterator i = node_to_old_man.find(node);
925 I(i != node_to_old_man.end());
926 man = i->second;
927}
928
929void anc_graph::write_certs()
930{
931 {
932 // regenerate epochs on all branches to random states
933
934 for (set<string>::const_iterator i = branches.begin(); i != branches.end(); ++i)
935 {
936 char buf[constants::epochlen_bytes];
937 keys.get_rng().randomize(reinterpret_cast<Botan::byte *>(buf), constants::epochlen_bytes);
938 epoch_data new_epoch(string(buf, buf + constants::epochlen_bytes));
939 L(FL("setting epoch for %s to %s")
940 % *i % new_epoch);
941 db.set_epoch(branch_name(*i), new_epoch);
942 }
943 }
944
945
946 typedef multimap<u64, pair<cert_name, cert_value> >::const_iterator ci;
947
948 for (map<u64,revision_id>::const_iterator i = node_to_new_rev.begin();
949 i != node_to_new_rev.end(); ++i)
950 {
951 revision_id rev(i->second);
952
953 pair<ci,ci> range = certs.equal_range(i->first);
954
955 for (ci j = range.first; j != range.second; ++j)
956 {
957 cert_name name(j->second.first);
958 cert_value val(j->second.second);
959
960 if (put_simple_revision_cert(db, keys, rev, name, val))
961 ++n_certs_out;
962 }
963 }
964}
965
966void
967anc_graph::kluge_for_bogus_merge_edges()
968{
969 // This kluge exists because in the 0.24-era monotone databases, several
970 // bad merges still existed in which one side of the merge is an ancestor
971 // of the other side of the merge. In other words, graphs which look like
972 // this:
973 //
974 // a ----------------------> e
975 // \ /
976 // \---> b -> c -> d ----/
977 //
978 // Such merges confuse the roster-building algorithm, because they should
979 // never have occurred in the first place: a was not a head at the time
980 // of the merge, e should simply have been considered an extension of d.
981 //
982 // So... we drop the a->e edges entirely.
983 //
984 // Note: this kluge drops edges which are a struct superset of those
985 // dropped by a previous kluge ("3-ancestor") so we have removed that
986 // code.
987
988 P(F("scanning for bogus merge edges"));
989
990 multimap<u64,u64> parent_to_child_map;
991 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
992 i != ancestry.end(); ++i)
993 parent_to_child_map.insert(make_pair(i->second, i->first));
994
995 map<u64, u64> edges_to_kill;
996 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
997 i != ancestry.end(); ++i)
998 {
999 multimap<u64, u64>::const_iterator j = i;
1000 ++j;
1001 u64 child = i->first;
1002 // NB: ancestry is a multimap from child->parent(s)
1003 if (j != ancestry.end())
1004 {
1005 if (j->first == i->first)
1006 {
1007 L(FL("considering old merge edge %s") %
1008 encode_hexenc(safe_get(node_to_old_rev, i->first).inner()()));
1009 u64 parent1 = i->second;
1010 u64 parent2 = j->second;
1011 if (is_ancestor (parent1, parent2, parent_to_child_map))
1012 safe_insert(edges_to_kill, make_pair(child, parent1));
1013 else if (is_ancestor (parent2, parent1, parent_to_child_map))
1014 safe_insert(edges_to_kill, make_pair(child, parent2));
1015 }
1016 }
1017 }
1018
1019 for (map<u64, u64>::const_iterator i = edges_to_kill.begin();
1020 i != edges_to_kill.end(); ++i)
1021 {
1022 u64 child = i->first;
1023 u64 parent = i->second;
1024 bool killed = false;
1025 for (multimap<u64, u64>::iterator j = ancestry.lower_bound(child);
1026 j->first == child; ++j)
1027 {
1028 if (j->second == parent)
1029 {
1030 P(F("optimizing out redundant edge %d -> %d")
1031 % parent % child);
1032 ancestry.erase(j);
1033 killed = true;
1034 break;
1035 }
1036 }
1037
1038 if (!killed)
1039 W(F("failed to eliminate edge %d -> %d")
1040 % parent % child);
1041 }
1042}
1043
1044
1045void
1046anc_graph::rebuild_ancestry(set<string> const & attrs_to_drop)
1047{
1048 kluge_for_bogus_merge_edges();
1049
1050 P(F("rebuilding %d nodes") % max_node);
1051 {
1052 transaction_guard guard(db);
1053 if (existing_graph)
1054 db.delete_existing_revs_and_certs();
1055 construct_revisions_from_ancestry(attrs_to_drop);
1056 write_certs();
1057 if (existing_graph)
1058 db.delete_existing_manifests();
1059 guard.commit();
1060 }
1061}
1062
1063u64
1064anc_graph::add_node_for_old_manifest(manifest_id const & man)
1065{
1066 I(!existing_graph);
1067 u64 node = 0;
1068 if (old_man_to_node.find(man) == old_man_to_node.end())
1069 {
1070 node = max_node++;
1071 ++n_nodes;
1072 L(FL("node %d = manifest %s")
1073 % node % man);
1074 old_man_to_node.insert(make_pair(man, node));
1075 node_to_old_man.insert(make_pair(node, man));
1076
1077 // load certs
1078 vector< manifest<cert> > mcerts;
1079 db.get_manifest_certs(man, mcerts);
1080 erase_bogus_certs(db, mcerts);
1081 for(vector< manifest<cert> >::const_iterator i = mcerts.begin();
1082 i != mcerts.end(); ++i)
1083 {
1084 L(FL("loaded '%s' manifest cert for node %s") % i->inner().name % node);
1085 ++n_certs_in;
1086 certs.insert(make_pair(node, make_pair(i->inner().name,
1087 i->inner().value)));
1088 }
1089 }
1090 else
1091 {
1092 node = old_man_to_node[man];
1093 }
1094 return node;
1095}
1096
1097u64 anc_graph::add_node_for_oldstyle_revision(revision_id const & rev)
1098{
1099 I(existing_graph);
1100 I(!null_id(rev));
1101 u64 node = 0;
1102 if (old_rev_to_node.find(rev) == old_rev_to_node.end())
1103 {
1104 node = max_node++;
1105 ++n_nodes;
1106
1107 manifest_id man;
1108 legacy::renames_map renames;
1109 legacy::get_manifest_and_renames_for_rev(db, rev, man, renames);
1110
1111 L(FL("node %d = revision %s = manifest %s")
1112 % node
1113 % rev
1114 % man);
1115 old_rev_to_node.insert(make_pair(rev, node));
1116 node_to_old_rev.insert(make_pair(node, rev));
1117 node_to_old_man.insert(make_pair(node, man));
1118 node_to_renames.insert(make_pair(node, renames));
1119
1120 // load certs
1121 vector< revision<cert> > rcerts;
1122 db.get_revision_certs(rev, rcerts);
1123 erase_bogus_certs(db, rcerts);
1124 for(vector< revision<cert> >::const_iterator i = rcerts.begin();
1125 i != rcerts.end(); ++i)
1126 {
1127 L(FL("loaded '%s' revision cert for node %s") % i->inner().name % node);
1128 ++n_certs_in;
1129 certs.insert(make_pair(node, make_pair(i->inner().name,
1130 i->inner().value)));
1131
1132 if (i->inner().name == branch_cert_name)
1133 branches.insert(i->inner().value());
1134 }
1135 }
1136 else
1137 {
1138 node = old_rev_to_node[rev];
1139 }
1140
1141 return node;
1142}
1143
1144static bool
1145not_dead_yet(node_id nid, u64 birth_rev,
1146 parent_roster_map const & parent_rosters,
1147 multimap<u64, u64> const & child_to_parents)
1148{
1149 // Any given node, at each point in the revision graph, is in one of the
1150 // states "alive", "unborn", "dead". The invariant we must maintain in
1151 // constructing our revision graph is that if a node is dead in any parent,
1152 // then it must also be dead in the child. The purpose of this function is
1153 // to take a node, and a list of parents, and determine whether that node is
1154 // allowed to be alive in a child of the given parents.
1155
1156 // "Alive" means, the node currently exists in the revision's tree.
1157 // "Unborn" means, the node does not exist in the revision's tree, and the
1158 // node's birth revision is _not_ an ancestor of the revision.
1159 // "Dead" means, the node does not exist in the revision's tree, and the
1160 // node's birth revision _is_ an ancestor of the revision.
1161
1162 // L(FL("testing liveliness of node %d, born in rev %d") % nid % birth_rev);
1163 for (parent_roster_map::const_iterator r = parent_rosters.begin();
1164 r != parent_rosters.end(); ++r)
1165 {
1166 shared_ptr<roster_t> parent = r->second.first;
1167 // L(FL("node %d %s in parent roster %d")
1168 // % nid
1169 // % (parent->has_node(n->first) ? "exists" : "does not exist" )
1170 // % r->first);
1171
1172 if (!parent->has_node(nid))
1173 {
1174 deque<u64> work;
1175 set<u64> seen;
1176 work.push_back(r->first);
1177 while (!work.empty())
1178 {
1179 u64 curr = work.front();
1180 work.pop_front();
1181 // L(FL("examining ancestor %d of parent roster %d, looking for anc=%d")
1182 // % curr % r->first % birth_rev);
1183
1184 if (seen.find(curr) != seen.end())
1185 continue;
1186 seen.insert(curr);
1187
1188 if (curr == birth_rev)
1189 {
1190 // L(FL("node is dead in %d") % r->first);
1191 return false;
1192 }
1193 typedef multimap<u64, u64>::const_iterator ci;
1194 pair<ci,ci> range = child_to_parents.equal_range(curr);
1195 for (ci i = range.first; i != range.second; ++i)
1196 {
1197 if (i->first != curr)
1198 continue;
1199 work.push_back(i->second);
1200 }
1201 }
1202 }
1203 }
1204 // L(FL("node is alive in all parents, returning true"));
1205 return true;
1206}
1207
1208
1209static file_path
1210find_old_path_for(map<file_path, file_path> const & renames,
1211 file_path const & new_path)
1212{
1213 map<file_path, file_path>::const_iterator i = renames.find(new_path);
1214 if (i != renames.end())
1215 return i->second;
1216
1217 // ??? root directory rename possible in the old schema?
1218 // if not, do this first.
1219 if (new_path.empty())
1220 return new_path;
1221
1222 file_path dir;
1223 path_component base;
1224 new_path.dirname_basename(dir, base);
1225 return find_old_path_for(renames, dir) / base;
1226}
1227
1228static file_path
1229find_new_path_for(map<file_path, file_path> const & renames,
1230 file_path const & old_path)
1231{
1232 map<file_path, file_path> reversed;
1233 for (map<file_path, file_path>::const_iterator i = renames.begin();
1234 i != renames.end(); ++i)
1235 reversed.insert(make_pair(i->second, i->first));
1236 // this is a hackish kluge. seems to work, though.
1237 return find_old_path_for(reversed, old_path);
1238}
1239
1240// Recursive helper function for insert_into_roster.
1241static void
1242insert_parents_into_roster(roster_t & child_roster,
1243 temp_node_id_source & nis,
1244 file_path const & pth,
1245 file_path const & full)
1246{
1247 if (child_roster.has_node(pth))
1248 {
1249 E(is_dir_t(child_roster.get_node(pth)),
1250 F("Directory %s for path %s cannot be added, "
1251 "as there is a file in the way") % pth % full);
1252 return;
1253 }
1254
1255 if (!pth.empty())
1256 insert_parents_into_roster(child_roster, nis, pth.dirname(), full);
1257
1258 child_roster.attach_node(child_roster.create_dir_node(nis), pth);
1259}
1260
1261static void
1262insert_into_roster(roster_t & child_roster,
1263 temp_node_id_source & nis,
1264 file_path const & pth,
1265 file_id const & fid)
1266{
1267 if (child_roster.has_node(pth))
1268 {
1269 node_t n = child_roster.get_node(pth);
1270 E(is_file_t(n),
1271 F("Path %s cannot be added, as there is a directory in the way") % pth);
1272 file_t f = downcast_to_file_t(n);
1273 E(f->content == fid,
1274 F("Path %s added twice with differing content") % pth);
1275 return;
1276 }
1277
1278 insert_parents_into_roster(child_roster, nis, pth.dirname(), pth);
1279 child_roster.attach_node(child_roster.create_file_node(fid, nis), pth);
1280}
1281
1282void
1283anc_graph::fixup_node_identities(parent_roster_map const & parent_rosters,
1284 roster_t & child_roster,
1285 legacy::renames_map const & renames)
1286{
1287 // Our strategy here is to iterate over every node in every parent, and
1288 // for each parent node P find zero or one tmp nodes in the child which
1289 // represents the fate of P:
1290 //
1291 // - If any of the parents thinks that P has died, we do not search for
1292 // it in the child; we leave it as "dropped".
1293 //
1294 // - We fetch the name N of the parent node P, and apply the rename map
1295 // to N, getting "remapped name" M. If we find a child node C with
1296 // name M in the child roster, with the same type as P, we identify P
1297 // and C, and swap P for C in the child.
1298
1299
1300 // Map node_id -> birth rev
1301 map<node_id, u64> nodes_in_any_parent;
1302
1303 // Stage 1: collect all nodes (and their birth revs) in any parent.
1304 for (parent_roster_map::const_iterator i = parent_rosters.begin();
1305 i != parent_rosters.end(); ++i)
1306 {
1307 shared_ptr<roster_t> parent_roster = i->second.first;
1308 shared_ptr<marking_map> parent_marking = i->second.second;
1309
1310 node_map const & nodes = parent_roster->all_nodes();
1311 for (node_map::const_iterator j = nodes.begin(); j != nodes.end(); ++j)
1312 {
1313 node_id n = j->first;
1314 revision_id birth_rev = safe_get(*parent_marking, n).birth_revision;
1315 u64 birth_node = safe_get(new_rev_to_node, birth_rev);
1316 map<node_id, u64>::const_iterator i = nodes_in_any_parent.find(n);
1317 if (i != nodes_in_any_parent.end())
1318 I(i->second == birth_node);
1319 else
1320 safe_insert(nodes_in_any_parent,
1321 make_pair(n, birth_node));
1322 }
1323 }
1324
1325 // Stage 2: For any node which is actually live, try to locate a mapping
1326 // from a parent instance of it to a child node.
1327 for (map<node_id, u64>::const_iterator i = nodes_in_any_parent.begin();
1328 i != nodes_in_any_parent.end(); ++i)
1329 {
1330 node_id n = i->first;
1331 u64 birth_rev = i->second;
1332
1333 if (child_roster.has_node(n))
1334 continue;
1335
1336 if (not_dead_yet(n, birth_rev, parent_rosters, ancestry))
1337 {
1338 for (parent_roster_map::const_iterator j = parent_rosters.begin();
1339 j != parent_rosters.end(); ++j)
1340 {
1341 shared_ptr<roster_t> parent_roster = j->second.first;
1342
1343 if (!parent_roster->has_node(n))
1344 continue;
1345
1346 file_path fp;
1347 parent_roster->get_name(n, fp);
1348
1349 // Try remapping the name.
1350 if (node_to_old_rev.find(j->first) != node_to_old_rev.end())
1351 {
1352 legacy::renames_map::const_iterator rmap;
1353 revision_id parent_rid = safe_get(node_to_old_rev, j->first);
1354 rmap = renames.find(parent_rid);
1355 if (rmap != renames.end())
1356 fp = find_new_path_for(rmap->second, fp);
1357 }
1358
1359 // See if we can match this node against a child.
1360 if ((!child_roster.has_node(n))
1361 && child_roster.has_node(fp))
1362 {
1363 node_t pn = parent_roster->get_node(n);
1364 node_t cn = child_roster.get_node(fp);
1365 if (is_file_t(pn) == is_file_t(cn))
1366 {
1367 child_roster.replace_node_id(cn->self, n);
1368 break;
1369 }
1370 }
1371 }
1372 }
1373 }
1374}
1375
1376struct
1377current_rev_debugger
1378{
1379 u64 node;
1380 anc_graph const & agraph;
1381 current_rev_debugger(u64 n, anc_graph const & ag)
1382 : node(n), agraph(ag)
1383 {
1384 }
1385};
1386
1387template <> void
1388dump(current_rev_debugger const & d, string & out)
1389{
1390 typedef multimap<u64, pair<cert_name, cert_value> >::const_iterator ci;
1391 pair<ci,ci> range = d.agraph.certs.equal_range(d.node);
1392 for(ci i = range.first; i != range.second; ++i)
1393 {
1394 if (i->first == d.node)
1395 {
1396 out += "cert '" + i->second.first() + "'";
1397 out += "= '" + i->second.second() + "'\n";
1398 }
1399 }
1400}
1401
1402
1403void
1404anc_graph::construct_revisions_from_ancestry(set<string> const & attrs_to_drop)
1405{
1406 // This is an incredibly cheesy, and also reasonably simple sorting
1407 // system: we put all the root nodes in the work queue. we take a
1408 // node out of the work queue and check if its parents are done. if
1409 // they are, we process it and insert its children. otherwise we put
1410 // it back on the end of the work queue. This both ensures that we're
1411 // always processing something *like* a frontier, while avoiding the
1412 // need to worry about one side of the frontier advancing faster than
1413 // another.
1414
1415 typedef multimap<u64,u64>::const_iterator ci;
1416 multimap<u64,u64> parent_to_child_map;
1417 deque<u64> work;
1418 set<u64> done;
1419
1420 {
1421 // Set up the parent->child mapping and prime the work queue
1422
1423 set<u64> children, all;
1424 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
1425 i != ancestry.end(); ++i)
1426 {
1427 parent_to_child_map.insert(make_pair(i->second, i->first));
1428 children.insert(i->first);
1429 }
1430 for (map<u64,manifest_id>::const_iterator i = node_to_old_man.begin();
1431 i != node_to_old_man.end(); ++i)
1432 {
1433 all.insert(i->first);
1434 }
1435
1436 set_difference(all.begin(), all.end(),
1437 children.begin(), children.end(),
1438 back_inserter(work));
1439 }
1440
1441 while (!work.empty())
1442 {
1443
1444 u64 child = work.front();
1445
1446 current_rev_debugger dbg(child, *this);
1447 MM(dbg);
1448
1449 work.pop_front();
1450
1451 if (done.find(child) != done.end())
1452 continue;
1453
1454 pair<ci,ci> parent_range = ancestry.equal_range(child);
1455 set<u64> parents;
1456 bool parents_all_done = true;
1457 for (ci i = parent_range.first; parents_all_done && i != parent_range.second; ++i)
1458 {
1459 if (i->first != child)
1460 continue;
1461 u64 parent = i->second;
1462 if (done.find(parent) == done.end())
1463 {
1464 work.push_back(child);
1465 parents_all_done = false;
1466 }
1467 else
1468 parents.insert(parent);
1469 }
1470
1471 if (parents_all_done
1472 && (node_to_new_rev.find(child) == node_to_new_rev.end()))
1473 {
1474 L(FL("processing node %d") % child);
1475
1476 manifest_id old_child_mid;
1477 legacy::manifest_map old_child_man;
1478
1479 get_node_manifest(child, old_child_mid);
1480 manifest_data mdat;
1481 db.get_manifest_version(old_child_mid, mdat);
1482 legacy::read_manifest_map(mdat, old_child_man);
1483
1484 // Load all the parent rosters into a temporary roster map
1485 parent_roster_map parent_rosters;
1486 MM(parent_rosters);
1487
1488 for (ci i = parent_range.first; parents_all_done && i != parent_range.second; ++i)
1489 {
1490 if (i->first != child)
1491 continue;
1492 u64 parent = i->second;
1493 if (parent_rosters.find(parent) == parent_rosters.end())
1494 {
1495 shared_ptr<roster_t> ros = shared_ptr<roster_t>(new roster_t());
1496 shared_ptr<marking_map> mm = shared_ptr<marking_map>(new marking_map());
1497 db.get_roster(safe_get(node_to_new_rev, parent), *ros, *mm);
1498 safe_insert(parent_rosters, make_pair(parent, make_pair(ros, mm)));
1499 }
1500 }
1501
1502 file_path attr_path = file_path_internal(".mt-attrs");
1503 file_path old_ignore_path = file_path_internal(".mt-ignore");
1504 file_path new_ignore_path = file_path_internal(".mtn-ignore");
1505
1506 roster_t child_roster;
1507 MM(child_roster);
1508 temp_node_id_source nis;
1509
1510 // all rosters shall have a root node.
1511 child_roster.attach_node(child_roster.create_dir_node(nis),
1512 file_path_internal(""));
1513
1514 for (legacy::manifest_map::const_iterator i = old_child_man.begin();
1515 i != old_child_man.end(); ++i)
1516 {
1517 if (i->first == attr_path)
1518 continue;
1519 // convert .mt-ignore to .mtn-ignore... except if .mtn-ignore
1520 // already exists, just leave things alone.
1521 else if (i->first == old_ignore_path
1522 && old_child_man.find(new_ignore_path) == old_child_man.end())
1523 insert_into_roster(child_roster, nis, new_ignore_path, i->second);
1524 else
1525 insert_into_roster(child_roster, nis, i->first, i->second);
1526 }
1527
1528 // migrate attributes out of .mt-attrs
1529 {
1530 legacy::manifest_map::const_iterator i = old_child_man.find(attr_path);
1531 if (i != old_child_man.end())
1532 {
1533 file_data dat;
1534 db.get_file_version(i->second, dat);
1535 legacy::dot_mt_attrs_map attrs;
1536 legacy::read_dot_mt_attrs(dat.inner(), attrs);
1537 for (legacy::dot_mt_attrs_map::const_iterator j = attrs.begin();
1538 j != attrs.end(); ++j)
1539 {
1540 if (child_roster.has_node(j->first))
1541 {
1542 map<string, string> const &
1543 fattrs = j->second;
1544 for (map<string, string>::const_iterator
1545 k = fattrs.begin();
1546 k != fattrs.end(); ++k)
1547 {
1548 string key = k->first;
1549 if (attrs_to_drop.find(key) != attrs_to_drop.end())
1550 {
1551 // ignore it
1552 }
1553 else if (key == "execute" || key == "manual_merge")
1554 child_roster.set_attr(j->first,
1555 attr_key("mtn:" + key),
1556 attr_value(k->second));
1557 else
1558 E(false, F("unknown attribute '%s' on path '%s'\n"
1559 "please contact %s so we can work out the right way to migrate this\n"
1560 "(if you just want it to go away, see the switch --drop-attr, but\n"
1561 "seriously, if you'd like to keep it, we're happy to figure out how)")
1562 % key % j->first % PACKAGE_BUGREPORT);
1563 }
1564 }
1565 }
1566 }
1567 }
1568
1569 // Now knit the parent node IDs into child node IDs (which are currently all
1570 // tmpids), wherever possible.
1571 fixup_node_identities(parent_rosters, child_roster, node_to_renames[child]);
1572
1573 revision_t rev;
1574 rev.made_for = made_for_database;
1575 MM(rev);
1576 calculate_ident(child_roster, rev.new_manifest);
1577
1578 // For each parent, construct an edge in the revision structure by analyzing the
1579 // relationship between the parent roster and the child roster (and placing the
1580 // result in a cset)
1581
1582 for (parent_roster_map::const_iterator i = parent_rosters.begin();
1583 i != parent_rosters.end(); ++i)
1584 {
1585 u64 parent = i->first;
1586 revision_id parent_rid = safe_get(node_to_new_rev, parent);
1587 shared_ptr<roster_t> parent_roster = i->second.first;
1588 shared_ptr<cset> cs = shared_ptr<cset>(new cset());
1589 MM(*cs);
1590 make_cset(*parent_roster, child_roster, *cs);
1591 safe_insert(rev.edges, make_pair(parent_rid, cs));
1592 }
1593
1594 // It is possible that we're at a "root" node here -- a node
1595 // which had no parent in the old rev graph -- in which case we
1596 // synthesize an edge from the empty revision to the current,
1597 // containing a cset which adds all the files in the child.
1598
1599 if (rev.edges.empty())
1600 {
1601 revision_id parent_rid;
1602 shared_ptr<roster_t> parent_roster = shared_ptr<roster_t>(new roster_t());
1603 shared_ptr<cset> cs = shared_ptr<cset>(new cset());
1604 MM(*cs);
1605 make_cset(*parent_roster, child_roster, *cs);
1606 safe_insert(rev.edges, make_pair (parent_rid, cs));
1607
1608 }
1609
1610 // Finally, put all this excitement into the database and save
1611 // the new_rid for use in the cert-writing pass.
1612
1613 revision_id new_rid;
1614 calculate_ident(rev, new_rid);
1615 node_to_new_rev.insert(make_pair(child, new_rid));
1616 new_rev_to_node.insert(make_pair(new_rid, child));
1617
1618 /*
1619 P(F("------------------------------------------------"));
1620 P(F("made revision %s with %d edges, manifest id = %s")
1621 % new_rid % rev.edges.size() % rev.new_manifest);
1622
1623 {
1624 string rtmp;
1625 data dtmp;
1626 dump(dbg, rtmp);
1627 write_revision(rev, dtmp);
1628 P(F("%s") % rtmp);
1629 P(F("%s") % dtmp);
1630 }
1631 P(F("------------------------------------------------"));
1632 */
1633
1634 L(FL("mapped node %d to revision %s") % child % new_rid);
1635 if (db.put_revision(new_rid, rev))
1636 ++n_revs_out;
1637
1638 // Mark this child as done, hooray!
1639 safe_insert(done, child);
1640
1641 // Extend the work queue with all the children of this child
1642 pair<ci,ci> grandchild_range = parent_to_child_map.equal_range(child);
1643 for (ci i = grandchild_range.first;
1644 i != grandchild_range.second; ++i)
1645 {
1646 if (i->first != child)
1647 continue;
1648 if (done.find(i->second) == done.end())
1649 work.push_back(i->second);
1650 }
1651 }
1652 }
1653}
1654
1655void
1656build_roster_style_revs_from_manifest_style_revs(database & db, key_store & keys,
1657 set<string> const & attrs_to_drop)
1658{
1659 anc_graph graph(true, db, keys);
1660
1661 P(F("converting existing revision graph to new roster-style revisions"));
1662 multimap<revision_id, revision_id> existing_graph;
1663
1664 // cross-check that we're getting everything
1665 // in fact the code in this function is wrong, because if a revision has no
1666 // parents and no children (it is a root revision, and no children have been
1667 // committed under it), then we will simply drop it!
1668 // This code at least causes this case to throw an assertion; FIXME: make
1669 // this case actually work.
1670 set<revision_id> all_rev_ids;
1671 db.get_revision_ids(all_rev_ids);
1672
1673 db.get_revision_ancestry(existing_graph);
1674 for (multimap<revision_id, revision_id>::const_iterator i = existing_graph.begin();
1675 i != existing_graph.end(); ++i)
1676 {
1677 // FIXME: insert for the null id as well, and do the same for the
1678 // changesetify code, and then reach rebuild_ancestry how to deal with
1679 // such things. (I guess u64(0) should represent the null parent?)
1680 if (!null_id(i->first))
1681 {
1682 u64 parent_node = graph.add_node_for_oldstyle_revision(i->first);
1683 all_rev_ids.erase(i->first);
1684 u64 child_node = graph.add_node_for_oldstyle_revision(i->second);
1685 all_rev_ids.erase(i->second);
1686 graph.add_node_ancestry(child_node, parent_node);
1687 }
1688 }
1689
1690 for (set<revision_id>::const_iterator i = all_rev_ids.begin();
1691 i != all_rev_ids.end(); ++i)
1692 {
1693 graph.add_node_for_oldstyle_revision(*i);
1694 }
1695
1696 graph.rebuild_ancestry(attrs_to_drop);
1697}
1698
1699
1700void
1701build_changesets_from_manifest_ancestry(database & db, key_store & keys,
1702 set<string> const & attrs_to_drop)
1703{
1704 anc_graph graph(false, db, keys);
1705
1706 P(F("rebuilding revision graph from manifest certs"));
1707
1708 vector< manifest<cert> > tmp;
1709 db.get_manifest_certs(cert_name("ancestor"), tmp);
1710 erase_bogus_certs(db, tmp);
1711
1712 for (vector< manifest<cert> >::const_iterator i = tmp.begin();
1713 i != tmp.end(); ++i)
1714 {
1715 manifest_id child, parent;
1716 child = manifest_id(i->inner().ident.inner());
1717 parent = manifest_id(i->inner().value());
1718
1719 u64 parent_node = graph.add_node_for_old_manifest(parent);
1720 u64 child_node = graph.add_node_for_old_manifest(child);
1721 graph.add_node_ancestry(child_node, parent_node);
1722 }
1723
1724 graph.rebuild_ancestry(attrs_to_drop);
1725}
1726
1727
1728// This is a special function solely for the use of regenerate_caches -- it
1729// must work even when caches (especially, the height cache!) do not exist.
1730// For all other purposes, use toposort above.
1731static void
1732allrevs_toposorted(database & db,
1733 vector<revision_id> & revisions)
1734{
1735 // get the complete ancestry
1736 rev_ancestry_map graph;
1737 db.get_revision_ancestry(graph);
1738 toposort_rev_ancestry(graph, revisions);
1739}
1740
1741void
1742regenerate_caches(database & db)
1743{
1744 P(F("regenerating cached rosters and heights"));
1745
1746 db.ensure_open_for_format_changes();
1747
1748 transaction_guard guard(db);
1749
1750 db.delete_existing_rosters();
1751 db.delete_existing_heights();
1752
1753 vector<revision_id> sorted_ids;
1754 allrevs_toposorted(db, sorted_ids);
1755
1756 ticker done(_("regenerated"), "r", 5);
1757 done.set_total(sorted_ids.size());
1758
1759 for (std::vector<revision_id>::const_iterator i = sorted_ids.begin();
1760 i != sorted_ids.end(); ++i)
1761 {
1762 revision_t rev;
1763 revision_id const & rev_id = *i;
1764 db.get_revision(rev_id, rev);
1765 db.put_roster_for_revision(rev_id, rev);
1766 db.put_height_for_revision(rev_id, rev);
1767 ++done;
1768 }
1769
1770 guard.commit();
1771
1772 P(F("finished regenerating cached rosters and heights"));
1773}
1774
1775// i/o stuff
1776
1777namespace
1778{
1779 namespace syms
1780 {
1781 symbol const format_version("format_version");
1782 symbol const old_revision("old_revision");
1783 symbol const new_manifest("new_manifest");
1784 }
1785}
1786
1787void
1788print_edge(basic_io::printer & printer,
1789 edge_entry const & e)
1790{
1791 basic_io::stanza st;
1792 st.push_binary_pair(syms::old_revision, edge_old_revision(e).inner());
1793 printer.print_stanza(st);
1794 print_cset(printer, edge_changes(e));
1795}
1796
1797static void
1798print_insane_revision(basic_io::printer & printer,
1799 revision_t const & rev)
1800{
1801
1802 basic_io::stanza format_stanza;
1803 format_stanza.push_str_pair(syms::format_version, "1");
1804 printer.print_stanza(format_stanza);
1805
1806 basic_io::stanza manifest_stanza;
1807 manifest_stanza.push_binary_pair(syms::new_manifest, rev.new_manifest.inner());
1808 printer.print_stanza(manifest_stanza);
1809
1810 for (edge_map::const_iterator edge = rev.edges.begin();
1811 edge != rev.edges.end(); ++edge)
1812 print_edge(printer, *edge);
1813}
1814
1815void
1816print_revision(basic_io::printer & printer,
1817 revision_t const & rev)
1818{
1819 rev.check_sane();
1820 print_insane_revision(printer, rev);
1821}
1822
1823
1824void
1825parse_edge(basic_io::parser & parser,
1826 revision_t & rev)
1827{
1828 shared_ptr<cset> cs(new cset());
1829 MM(*cs);
1830 manifest_id old_man;
1831 revision_id old_rev;
1832 string tmp;
1833
1834 parser.esym(syms::old_revision);
1835 parser.hex(tmp);
1836 old_rev = revision_id(decode_hexenc(tmp));
1837
1838 parse_cset(parser, *cs);
1839
1840 rev.edges.insert(make_pair(old_rev, cs));
1841}
1842
1843
1844void
1845parse_revision(basic_io::parser & parser,
1846 revision_t & rev)
1847{
1848 MM(rev);
1849 rev.edges.clear();
1850 rev.made_for = made_for_database;
1851 string tmp;
1852 parser.esym(syms::format_version);
1853 parser.str(tmp);
1854 E(tmp == "1",
1855 F("encountered a revision with unknown format, version '%s'\n"
1856 "I only know how to understand the version '1' format\n"
1857 "a newer version of monotone is required to complete this operation")
1858 % tmp);
1859 parser.esym(syms::new_manifest);
1860 parser.hex(tmp);
1861 rev.new_manifest = manifest_id(decode_hexenc(tmp), made_from_network);
1862 while (parser.symp(syms::old_revision))
1863 parse_edge(parser, rev);
1864 rev.check_sane();
1865}
1866
1867void
1868read_revision(data const & dat,
1869 revision_t & rev)
1870{
1871 MM(rev);
1872 basic_io::input_source src(dat(), "revision");
1873 rev.made_from = dat.made_from;
1874 src.made_from = dat.made_from;
1875 made_from_t made_from(rev.made_from);
1876 basic_io::tokenizer tok(src);
1877 basic_io::parser pars(tok);
1878 parse_revision(pars, rev);
1879 I(src.lookahead == EOF);
1880 rev.check_sane();
1881}
1882
1883void
1884read_revision(revision_data const & dat,
1885 revision_t & rev)
1886{
1887 read_revision(dat.inner(), rev);
1888 rev.check_sane();
1889}
1890
1891static void write_insane_revision(revision_t const & rev,
1892 data & dat)
1893{
1894 basic_io::printer pr;
1895 print_insane_revision(pr, rev);
1896 dat = data(pr.buf);
1897}
1898
1899template <> void
1900dump(revision_t const & rev, string & out)
1901{
1902 data dat;
1903 write_insane_revision(rev, dat);
1904 out = dat();
1905}
1906
1907void
1908write_revision(revision_t const & rev,
1909 data & dat)
1910{
1911 rev.check_sane();
1912 write_insane_revision(rev, dat);
1913}
1914
1915void
1916write_revision(revision_t const & rev,
1917 revision_data & dat)
1918{
1919 data d;
1920 write_revision(rev, d);
1921 dat = revision_data(d);
1922}
1923
1924void calculate_ident(revision_t const & cs,
1925 revision_id & ident)
1926{
1927 data tmp;
1928 id tid;
1929 write_revision(cs, tmp);
1930 calculate_ident(tmp, tid);
1931 ident = revision_id(tid);
1932}
1933
1934#ifdef BUILD_UNIT_TESTS
1935#include "unit_tests.hh"
1936#include "sanity.hh"
1937
1938UNIT_TEST(revision, find_old_new_path_for)
1939{
1940 map<file_path, file_path> renames;
1941 file_path foo = file_path_internal("foo");
1942 file_path foo_bar = file_path_internal("foo/bar");
1943 file_path foo_baz = file_path_internal("foo/baz");
1944 file_path quux = file_path_internal("quux");
1945 file_path quux_baz = file_path_internal("quux/baz");
1946 I(foo == find_old_path_for(renames, foo));
1947 I(foo == find_new_path_for(renames, foo));
1948 I(foo_bar == find_old_path_for(renames, foo_bar));
1949 I(foo_bar == find_new_path_for(renames, foo_bar));
1950 I(quux == find_old_path_for(renames, quux));
1951 I(quux == find_new_path_for(renames, quux));
1952 renames.insert(make_pair(foo, quux));
1953 renames.insert(make_pair(foo_bar, foo_baz));
1954 I(quux == find_old_path_for(renames, foo));
1955 I(foo == find_new_path_for(renames, quux));
1956 I(quux_baz == find_old_path_for(renames, foo_baz));
1957 I(foo_baz == find_new_path_for(renames, quux_baz));
1958 I(foo_baz == find_old_path_for(renames, foo_bar));
1959 I(foo_bar == find_new_path_for(renames, foo_baz));
1960}
1961
1962UNIT_TEST(revision, from_network)
1963{
1964 char const * bad_revisions[] = {
1965 "",
1966
1967 "format_version \"1\"\n",
1968
1969 "format_version \"1\"\n"
1970 "\n"
1971 "new_manifest [0000000000000000000000000000000000000000]\n",
1972
1973 "format_version \"1\"\n"
1974 "\n"
1975 "new_manifest [000000000000000]\n",
1976
1977 "format_version \"1\"\n"
1978 "\n"
1979 "new_manifest [0000000000000000000000000000000000000000]\n"
1980 "\n"
1981 "old_revision [66ff7f4640593afacdb056fefc069349e7d9ed9e]\n"
1982 "\n"
1983 "rename \"some_file\"\n"
1984 " foo \"x\"\n",
1985
1986 "format_version \"1\"\n"
1987 "\n"
1988 "new_manifest [0000000000000000000000000000000000000000]\n"
1989 "\n"
1990 "old_revision [66ff7f4640593afacdb056fefc069349e7d9ed9e]\n"
1991 "\n"
1992 "rename \"some_file\"\n"
1993 " foo \"some_file\"\n"
1994 };
1995 revision_t rev;
1996 for (unsigned i = 0; i < sizeof(bad_revisions)/sizeof(char const*); ++i)
1997 {
1998 UNIT_TEST_CHECKPOINT((string("iteration ")
1999 + boost::lexical_cast<string>(i)).c_str());
2000 UNIT_TEST_CHECK_THROW(read_revision(data(bad_revisions[i],
2001 made_from_network),
2002 rev),
2003 bad_decode);
2004 }
2005}
2006
2007#endif // BUILD_UNIT_TESTS
2008
2009// Local Variables:
2010// mode: C++
2011// fill-column: 76
2012// c-file-style: "gnu"
2013// indent-tabs-mode: nil
2014// End:
2015// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status