monotone

monotone Mtn Source Tree

Root/revision.cc

1// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <map>
12#include <queue>
13#include <set>
14#include <sstream>
15#include <stack>
16#include <iterator>
17#include <functional>
18#include <list>
19
20#include "lexical_cast.hh"
21#include <boost/dynamic_bitset.hpp>
22#include <boost/shared_ptr.hpp>
23
24#include "botan/botan.h"
25
26#include "basic_io.hh"
27#include "cert.hh"
28#include "cset.hh"
29#include "constants.hh"
30#include "interner.hh"
31#include "numeric_vocab.hh"
32#include "revision.hh"
33#include "sanity.hh"
34#include "transforms.hh"
35#include "simplestring_xform.hh"
36#include "ui.hh"
37#include "vocab.hh"
38#include "safe_map.hh"
39#include "legacy.hh"
40#include "rev_height.hh"
41#include "outdated_indicator.hh"
42#include "database.hh"
43#include "roster.hh"
44#include "graph.hh"
45
46using std::back_inserter;
47using std::copy;
48using std::deque;
49using std::list;
50using std::make_pair;
51using std::map;
52using std::max;
53using std::multimap;
54using std::ostringstream;
55using std::pair;
56using std::queue;
57using std::set;
58using std::stack;
59using std::string;
60using std::vector;
61
62using boost::dynamic_bitset;
63using boost::shared_ptr;
64
65void revision_t::check_sane() const
66{
67 // null id in current manifest only permitted if previous
68 // state was null and no changes
69 // FIXME: above comment makes no sense. This should just be
70 // I(!null_id(new_manifest)), and the only reason I am not making it so
71 // right now is that I don't have time to immediately track down all the
72 // fallout.
73 if (null_id(new_manifest))
74 {
75 for (edge_map::const_iterator i = edges.begin();
76 i != edges.end(); ++i)
77 I(null_id(edge_old_revision(i)));
78 }
79
80 if (edges.size() == 1)
81 {
82 // no particular checks to be done right now
83 }
84 else if (edges.size() == 2)
85 {
86 // merge nodes cannot have null revisions
87 for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i)
88 I(!null_id(edge_old_revision(i)));
89 }
90 else
91 // revisions must always have either 1 or 2 edges
92 I(false);
93
94 // we used to also check that if there were multiple edges that had patches
95 // for the same file, then the new hashes on each edge matched each other.
96 // this is not ported over to roster-style revisions because it's an
97 // inadequate check, and the real check, that the new manifest id is correct
98 // (done in put_revision, for instance) covers this case automatically.
99}
100
101bool
102revision_t::is_merge_node() const
103{
104 return edges.size() > 1;
105}
106
107bool
108revision_t::is_nontrivial() const
109{
110 check_sane();
111 // merge revisions are never trivial, because even if the resulting node
112 // happens to be identical to both parents, the merge is still recording
113 // that fact.
114 if (is_merge_node())
115 return true;
116 else
117 return !edge_changes(edges.begin()).empty();
118}
119
120revision_t::revision_t(revision_t const & other)
121{
122 /* behave like normal constructor if other is empty */
123 made_for = made_for_nobody;
124 if (null_id(other.new_manifest) && other.edges.empty()) return;
125 other.check_sane();
126 new_manifest = other.new_manifest;
127 edges = other.edges;
128 made_for = other.made_for;
129}
130
131revision_t const &
132revision_t::operator=(revision_t const & other)
133{
134 other.check_sane();
135 new_manifest = other.new_manifest;
136 edges = other.edges;
137 made_for = other.made_for;
138 return *this;
139}
140
141
142// For a surprisingly long time, we have been using an algorithm which
143// is nonsense, based on a misunderstanding of what "LCA" means. The
144// LCA of two nodes is *not* the first common ancestor which you find
145// when iteratively expanding their ancestor sets. Instead, the LCA is
146// the common ancestor which is a descendent of all other common
147// ancestors.
148//
149// In general, a set of nodes in a DAG doesn't always have an
150// LCA. There might be multiple common ancestors which are not parents
151// of one another. So we implement something which is "functionally
152// useful" for finding a merge point (and moreover, which always
153// terminates): we find an LCA of the input set if it exists,
154// otherwise we replace the input set with the nodes we did find and
155// repeat.
156//
157// All previous discussions in monotone-land, before say August 2005,
158// of LCA (and LCAD) are essentially wrong due to our silly
159// misunderstanding. It's unfortunate, but our half-baked
160// approximations worked almost well enough to take us through 3 years
161// of deployed use. Hopefully this more accurate new use will serve us
162// even longer.
163
164typedef unsigned long ctx;
165typedef dynamic_bitset<> bitmap;
166typedef shared_ptr<bitmap> shared_bitmap;
167
168static void
169calculate_ancestors_from_graph(interner<ctx> & intern,
170 revision_id const & init,
171 multimap<revision_id, revision_id> const & graph,
172 map< ctx, shared_bitmap > & ancestors,
173 shared_bitmap & total_union);
174
175void
176find_common_ancestor_for_merge(database & db,
177 revision_id const & left,
178 revision_id const & right,
179 revision_id & anc)
180{
181 interner<ctx> intern;
182 set<ctx> leaves;
183 map<ctx, shared_bitmap> ancestors;
184
185 shared_bitmap isect = shared_bitmap(new bitmap());
186 shared_bitmap isect_ancs = shared_bitmap(new bitmap());
187
188 leaves.insert(intern.intern(left.inner()()));
189 leaves.insert(intern.intern(right.inner()()));
190
191
192 multimap<revision_id, revision_id> inverse_graph;
193 {
194 multimap<revision_id, revision_id> graph;
195 db.get_revision_ancestry(graph);
196 typedef multimap<revision_id, revision_id>::const_iterator gi;
197 for (gi i = graph.begin(); i != graph.end(); ++i)
198 inverse_graph.insert(make_pair(i->second, i->first));
199 }
200
201
202 while (leaves.size() != 1)
203 {
204 isect->clear();
205 isect_ancs->clear();
206
207 // First intersect all ancestors of current leaf set
208 for (set<ctx>::const_iterator i = leaves.begin(); i != leaves.end(); ++i)
209 {
210 ctx curr_leaf = *i;
211 shared_bitmap curr_leaf_ancestors;
212 map<ctx, shared_bitmap >::const_iterator j = ancestors.find(*i);
213 if (j != ancestors.end())
214 curr_leaf_ancestors = j->second;
215 else
216 {
217 curr_leaf_ancestors = shared_bitmap(new bitmap());
218 calculate_ancestors_from_graph(intern, revision_id(intern.lookup(curr_leaf)),
219 inverse_graph, ancestors,
220 curr_leaf_ancestors);
221 }
222 if (isect->size() > curr_leaf_ancestors->size())
223 curr_leaf_ancestors->resize(isect->size());
224
225 if (curr_leaf_ancestors->size() > isect->size())
226 isect->resize(curr_leaf_ancestors->size());
227
228 if (i == leaves.begin())
229 *isect = *curr_leaf_ancestors;
230 else
231 (*isect) &= (*curr_leaf_ancestors);
232 }
233
234 // isect is now the set of common ancestors of leaves, but that is not enough.
235 // We need the set of leaves of isect; to do that we calculate the set of
236 // ancestors of isect, in order to subtract it from isect (below).
237 set<ctx> new_leaves;
238 for (ctx i = 0; i < isect->size(); ++i)
239 {
240 if (isect->test(i))
241 {
242 calculate_ancestors_from_graph(intern, revision_id(intern.lookup(i)),
243 inverse_graph, ancestors, isect_ancs);
244 }
245 }
246
247 // Finally, the subtraction step: for any element i of isect, if
248 // it's *not* in isect_ancs, it survives as a new leaf.
249 leaves.clear();
250 for (ctx i = 0; i < isect->size(); ++i)
251 {
252 if (!isect->test(i))
253 continue;
254 if (i < isect_ancs->size() && isect_ancs->test(i))
255 continue;
256 safe_insert(leaves, i);
257 }
258 }
259
260 I(leaves.size() == 1);
261 anc = revision_id(intern.lookup(*leaves.begin()));
262}
263
264// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the
265// size of the entire revision graph.
266
267template<typename T> static bool
268is_ancestor(T const & ancestor_id,
269 T const & descendent_id,
270 multimap<T, T> const & graph)
271{
272 set<T> visited;
273 queue<T> queue;
274
275 queue.push(ancestor_id);
276
277 while (!queue.empty())
278 {
279 T current_id = queue.front();
280 queue.pop();
281
282 if (current_id == descendent_id)
283 return true;
284 else
285 {
286 typedef typename multimap<T, T>::const_iterator gi;
287 pair<gi, gi> children = graph.equal_range(current_id);
288 for (gi i = children.first; i != children.second; ++i)
289 {
290 if (visited.find(i->second) == visited.end())
291 {
292 queue.push(i->second);
293 visited.insert(i->second);
294 }
295 }
296 }
297 }
298 return false;
299}
300
301bool
302is_ancestor(database & db,
303 revision_id const & ancestor_id,
304 revision_id const & descendent_id)
305{
306 L(FL("checking whether %s is an ancestor of %s")
307 % ancestor_id
308 % descendent_id);
309
310 multimap<revision_id, revision_id> graph;
311 db.get_revision_ancestry(graph);
312 return is_ancestor(ancestor_id, descendent_id, graph);
313}
314
315
316static void
317add_bitset_to_union(shared_bitmap src,
318 shared_bitmap dst)
319{
320 if (dst->size() > src->size())
321 src->resize(dst->size());
322 if (src->size() > dst->size())
323 dst->resize(src->size());
324 *dst |= *src;
325}
326
327
328static void
329calculate_ancestors_from_graph(interner<ctx> & intern,
330 revision_id const & init,
331 multimap<revision_id, revision_id> const & graph,
332 map< ctx, shared_bitmap > & ancestors,
333 shared_bitmap & total_union)
334{
335 typedef multimap<revision_id, revision_id>::const_iterator gi;
336 stack<ctx> stk;
337
338 stk.push(intern.intern(init.inner()()));
339
340 while (! stk.empty())
341 {
342 ctx us = stk.top();
343 revision_id rev(intern.lookup(us));
344
345 pair<gi,gi> parents = graph.equal_range(rev);
346 bool pushed = false;
347
348 // first make sure all parents are done
349 for (gi i = parents.first; i != parents.second; ++i)
350 {
351 ctx parent = intern.intern(i->second.inner()());
352 if (ancestors.find(parent) == ancestors.end())
353 {
354 stk.push(parent);
355 pushed = true;
356 break;
357 }
358 }
359
360 // if we pushed anything we stop now. we'll come back later when all
361 // the parents are done.
362 if (pushed)
363 continue;
364
365 shared_bitmap b = shared_bitmap(new bitmap());
366
367 for (gi i = parents.first; i != parents.second; ++i)
368 {
369 ctx parent = intern.intern(i->second.inner()());
370
371 // set all parents
372 if (b->size() <= parent)
373 b->resize(parent + 1);
374 b->set(parent);
375
376 // ensure all parents are loaded into the ancestor map
377 I(ancestors.find(parent) != ancestors.end());
378
379 // union them into our map
380 map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
381 I(j != ancestors.end());
382 add_bitset_to_union(j->second, b);
383 }
384
385 add_bitset_to_union(b, total_union);
386 ancestors.insert(make_pair(us, b));
387 stk.pop();
388 }
389}
390
391void
392toposort(database & db,
393 set<revision_id> const & revisions,
394 vector<revision_id> & sorted)
395{
396 map<rev_height, revision_id> work;
397
398 for (set<revision_id>::const_iterator i = revisions.begin();
399 i != revisions.end(); ++i)
400 {
401 rev_height height;
402 db.get_rev_height(*i, height);
403 work.insert(make_pair(height, *i));
404 }
405
406 sorted.clear();
407
408 for (map<rev_height, revision_id>::const_iterator i = work.begin();
409 i != work.end(); ++i)
410 {
411 sorted.push_back(i->second);
412 }
413}
414
415static void
416accumulate_strict_ancestors(database & db,
417 revision_id const & start,
418 set<revision_id> & all_ancestors,
419 multimap<revision_id, revision_id> const & inverse_graph,
420 rev_height const & min_height)
421{
422 typedef multimap<revision_id, revision_id>::const_iterator gi;
423
424 vector<revision_id> frontier;
425 frontier.push_back(start);
426
427 while (!frontier.empty())
428 {
429 revision_id rid = frontier.back();
430 frontier.pop_back();
431 pair<gi, gi> parents = inverse_graph.equal_range(rid);
432 for (gi i = parents.first; i != parents.second; ++i)
433 {
434 revision_id const & parent = i->second;
435 if (all_ancestors.find(parent) == all_ancestors.end())
436 {
437 // prune if we're below min_height
438 rev_height h;
439 db.get_rev_height(parent, h);
440 if (h >= min_height)
441 {
442 all_ancestors.insert(parent);
443 frontier.push_back(parent);
444 }
445 }
446 }
447 }
448}
449
450// this call is equivalent to running:
451// erase(remove_if(candidates.begin(), candidates.end(), p));
452// erase_ancestors(candidates, db);
453// however, by interleaving the two operations, it can in common cases make
454// many fewer calls to the predicate, which can be a significant speed win.
455
456void
457erase_ancestors_and_failures(database & db,
458 std::set<revision_id> & candidates,
459 is_failure & p,
460 multimap<revision_id, revision_id> *inverse_graph_cache_ptr)
461{
462 // Load up the ancestry graph
463 multimap<revision_id, revision_id> inverse_graph;
464
465 if (candidates.empty())
466 return;
467
468 if (inverse_graph_cache_ptr == NULL)
469 inverse_graph_cache_ptr = &inverse_graph;
470 if (inverse_graph_cache_ptr->empty())
471 {
472 multimap<revision_id, revision_id> graph;
473 db.get_revision_ancestry(graph);
474 for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
475 i != graph.end(); ++i)
476 inverse_graph_cache_ptr->insert(make_pair(i->second, i->first));
477 }
478
479 // Keep a set of all ancestors that we've traversed -- to avoid
480 // combinatorial explosion.
481 set<revision_id> all_ancestors;
482
483 rev_height min_height;
484 db.get_rev_height(*candidates.begin(), min_height);
485 for (std::set<revision_id>::const_iterator it = candidates.begin(); it != candidates.end(); it++)
486 {
487 rev_height h;
488 db.get_rev_height(*it, h);
489 if (h < min_height)
490 min_height = h;
491 }
492
493 vector<revision_id> todo(candidates.begin(), candidates.end());
494 std::random_shuffle(todo.begin(), todo.end());
495
496 size_t predicates = 0;
497 while (!todo.empty())
498 {
499 revision_id rid = todo.back();
500 todo.pop_back();
501 // check if this one has already been eliminated
502 if (all_ancestors.find(rid) != all_ancestors.end())
503 continue;
504 // and then whether it actually should stay in the running:
505 ++predicates;
506 if (p(rid))
507 {
508 candidates.erase(rid);
509 continue;
510 }
511 // okay, it is good enough that all its ancestors should be
512 // eliminated
513 accumulate_strict_ancestors(db, rid, all_ancestors, *inverse_graph_cache_ptr, min_height);
514 }
515
516 // now go and eliminate the ancestors
517 for (set<revision_id>::const_iterator i = all_ancestors.begin();
518 i != all_ancestors.end(); ++i)
519 candidates.erase(*i);
520
521 L(FL("called predicate %s times") % predicates);
522}
523
524// This function looks at a set of revisions, and for every pair A, B in that
525// set such that A is an ancestor of B, it erases A.
526
527namespace
528{
529 struct no_failures : public is_failure
530 {
531 virtual bool operator()(revision_id const & rid)
532 {
533 return false;
534 }
535 };
536}
537void
538erase_ancestors(database & db, set<revision_id> & revisions)
539{
540 no_failures p;
541 erase_ancestors_and_failures(db, revisions, p);
542}
543
544// This function takes a revision A and a set of revision Bs, calculates the
545// ancestry of each, and returns the set of revisions that are in A's ancestry
546// but not in the ancestry of any of the Bs. It tells you 'what's new' in A
547// that's not in the Bs. If the output set if non-empty, then A will
548// certainly be in it; but the output set might be empty.
549void
550ancestry_difference(database & db, revision_id const & a,
551 set<revision_id> const & bs,
552 set<revision_id> & new_stuff)
553{
554 new_stuff.clear();
555 typedef multimap<revision_id, revision_id>::const_iterator gi;
556 multimap<revision_id, revision_id> graph;
557 multimap<revision_id, revision_id> inverse_graph;
558
559 db.get_revision_ancestry(graph);
560 for (gi i = graph.begin(); i != graph.end(); ++i)
561 inverse_graph.insert(make_pair(i->second, i->first));
562
563 interner<ctx> intern;
564 map< ctx, shared_bitmap > ancestors;
565
566 shared_bitmap u = shared_bitmap(new bitmap());
567
568 for (set<revision_id>::const_iterator i = bs.begin();
569 i != bs.end(); ++i)
570 {
571 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
572 ctx c = intern.intern(i->inner()());
573 if (u->size() <= c)
574 u->resize(c + 1);
575 u->set(c);
576 }
577
578 shared_bitmap au = shared_bitmap(new bitmap());
579 calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
580 {
581 ctx c = intern.intern(a.inner()());
582 if (au->size() <= c)
583 au->resize(c + 1);
584 au->set(c);
585 }
586
587 au->resize(max(au->size(), u->size()));
588 u->resize(max(au->size(), u->size()));
589
590 *au -= *u;
591
592 for (unsigned int i = 0; i != au->size(); ++i)
593 {
594 if (au->test(i))
595 {
596 revision_id rid(intern.lookup(i));
597 if (!null_id(rid))
598 new_stuff.insert(rid);
599 }
600 }
601}
602
603void
604select_nodes_modified_by_rev(database & db,
605 revision_t const & rev,
606 roster_t const new_roster,
607 set<node_id> & nodes_modified)
608{
609 nodes_modified.clear();
610
611 for (edge_map::const_iterator i = rev.edges.begin();
612 i != rev.edges.end(); ++i)
613 {
614 set<node_id> edge_nodes_modified;
615 roster_t old_roster;
616 db.get_roster(edge_old_revision(i), old_roster);
617 select_nodes_modified_by_cset(edge_changes(i),
618 old_roster,
619 new_roster,
620 edge_nodes_modified);
621
622 copy(edge_nodes_modified.begin(), edge_nodes_modified.end(),
623 inserter(nodes_modified, nodes_modified.begin()));
624 }
625}
626
627
628void
629make_revision(revision_id const & old_rev_id,
630 roster_t const & old_roster,
631 roster_t const & new_roster,
632 revision_t & rev)
633{
634 shared_ptr<cset> cs(new cset());
635
636 rev.edges.clear();
637 make_cset(old_roster, new_roster, *cs);
638
639 calculate_ident(new_roster, rev.new_manifest);
640
641 if (global_sanity.debug_p())
642 L(FL("new manifest_id is %s")
643 % encode_hexenc(rev.new_manifest.inner()()));
644
645 safe_insert(rev.edges, make_pair(old_rev_id, cs));
646 rev.made_for = made_for_database;
647}
648
649void
650make_revision(revision_id const & old_rev_id,
651 roster_t const & old_roster,
652 cset const & changes,
653 revision_t & rev)
654{
655 roster_t new_roster = old_roster;
656 {
657 temp_node_id_source nis;
658 editable_roster_base er(new_roster, nis);
659 changes.apply_to(er);
660 }
661
662 shared_ptr<cset> cs(new cset(changes));
663 rev.edges.clear();
664
665 calculate_ident(new_roster, rev.new_manifest);
666
667 if (global_sanity.debug_p())
668 L(FL("new manifest_id is %s")
669 % encode_hexenc(rev.new_manifest.inner()()));
670
671 safe_insert(rev.edges, make_pair(old_rev_id, cs));
672 rev.made_for = made_for_database;
673}
674
675void
676make_revision(parent_map const & old_rosters,
677 roster_t const & new_roster,
678 revision_t & rev)
679{
680 edge_map edges;
681 for (parent_map::const_iterator i = old_rosters.begin();
682 i != old_rosters.end();
683 i++)
684 {
685 shared_ptr<cset> cs(new cset());
686 make_cset(parent_roster(i), new_roster, *cs);
687 safe_insert(edges, make_pair(parent_id(i), cs));
688 }
689
690 rev.edges = edges;
691 calculate_ident(new_roster, rev.new_manifest);
692
693 if (global_sanity.debug_p())
694 L(FL("new manifest_id is %s")
695 % encode_hexenc(rev.new_manifest.inner()()));
696}
697
698static void
699recalculate_manifest_id_for_restricted_rev(parent_map const & old_rosters,
700 edge_map & edges,
701 revision_t & rev)
702{
703 // In order to get the correct manifest ID, recalculate the new roster
704 // using one of the restricted csets. It doesn't matter which of the
705 // parent roster/cset pairs we use for this; by construction, they must
706 // all produce the same result.
707 revision_id rid = parent_id(old_rosters.begin());
708 roster_t restricted_roster = *(safe_get(old_rosters, rid).first);
709
710 temp_node_id_source nis;
711 editable_roster_base er(restricted_roster, nis);
712 safe_get(edges, rid)->apply_to(er);
713
714 calculate_ident(restricted_roster, rev.new_manifest);
715 rev.edges = edges;
716
717 if (global_sanity.debug_p())
718 L(FL("new manifest_id is %s")
719 % encode_hexenc(rev.new_manifest.inner()()));
720}
721
722void
723make_restricted_revision(parent_map const & old_rosters,
724 roster_t const & new_roster,
725 node_restriction const & mask,
726 revision_t & rev)
727{
728 edge_map edges;
729 for (parent_map::const_iterator i = old_rosters.begin();
730 i != old_rosters.end();
731 i++)
732 {
733 shared_ptr<cset> included(new cset());
734 roster_t restricted_roster;
735
736 make_restricted_roster(parent_roster(i), new_roster,
737 restricted_roster, mask);
738 make_cset(parent_roster(i), restricted_roster, *included);
739 safe_insert(edges, make_pair(parent_id(i), included));
740 }
741
742 recalculate_manifest_id_for_restricted_rev(old_rosters, edges, rev);
743}
744
745void
746make_restricted_revision(parent_map const & old_rosters,
747 roster_t const & new_roster,
748 node_restriction const & mask,
749 revision_t & rev,
750 cset & excluded,
751 utf8 const & cmd_name)
752{
753 edge_map edges;
754 bool no_excludes = true;
755 for (parent_map::const_iterator i = old_rosters.begin();
756 i != old_rosters.end();
757 i++)
758 {
759 shared_ptr<cset> included(new cset());
760 roster_t restricted_roster;
761
762 make_restricted_roster(parent_roster(i), new_roster,
763 restricted_roster, mask);
764 make_cset(parent_roster(i), restricted_roster, *included);
765 make_cset(restricted_roster, new_roster, excluded);
766 safe_insert(edges, make_pair(parent_id(i), included));
767 if (!excluded.empty())
768 no_excludes = false;
769 }
770
771 N(old_rosters.size() == 1 || no_excludes,
772 F("the command '%s %s' cannot be restricted in a two-parent workspace")
773 % ui.prog_name % cmd_name);
774
775 recalculate_manifest_id_for_restricted_rev(old_rosters, edges, rev);
776}
777
778// Workspace-only revisions, with fake rev.new_manifest and content
779// changes suppressed.
780void
781make_revision_for_workspace(revision_id const & old_rev_id,
782 cset const & changes,
783 revision_t & rev)
784{
785 MM(old_rev_id);
786 MM(changes);
787 MM(rev);
788 shared_ptr<cset> cs(new cset(changes));
789 cs->deltas_applied.clear();
790
791 rev.edges.clear();
792 safe_insert(rev.edges, make_pair(old_rev_id, cs));
793 if (!null_id(old_rev_id))
794 rev.new_manifest = manifest_id(fake_id());
795 rev.made_for = made_for_workspace;
796}
797
798void
799make_revision_for_workspace(revision_id const & old_rev_id,
800 roster_t const & old_roster,
801 roster_t const & new_roster,
802 revision_t & rev)
803{
804 MM(old_rev_id);
805 MM(old_roster);
806 MM(new_roster);
807 MM(rev);
808 cset changes;
809 make_cset(old_roster, new_roster, changes);
810 make_revision_for_workspace(old_rev_id, changes, rev);
811}
812
813void
814make_revision_for_workspace(parent_map const & old_rosters,
815 roster_t const & new_roster,
816 revision_t & rev)
817{
818 edge_map edges;
819 for (parent_map::const_iterator i = old_rosters.begin();
820 i != old_rosters.end();
821 i++)
822 {
823 shared_ptr<cset> cs(new cset());
824 make_cset(parent_roster(i), new_roster, *cs);
825 cs->deltas_applied.clear();
826 safe_insert(edges, make_pair(parent_id(i), cs));
827 }
828
829 rev.edges = edges;
830 rev.new_manifest = manifest_id(fake_id());
831 rev.made_for = made_for_workspace;
832}
833
834
835// Stuff related to rebuilding the revision graph. Unfortunately this is a
836// real enough error case that we need support code for it.
837
838typedef map<u64, pair<shared_ptr<roster_t>, shared_ptr<marking_map> > >
839parent_roster_map;
840
841template <> void
842dump(parent_roster_map const & prm, string & out)
843{
844 ostringstream oss;
845 for (parent_roster_map::const_iterator i = prm.begin(); i != prm.end(); ++i)
846 {
847 oss << "roster: " << i->first << '\n';
848 string roster_str, indented_roster_str;
849 dump(*i->second.first, roster_str);
850 prefix_lines_with(" ", roster_str, indented_roster_str);
851 oss << indented_roster_str;
852 oss << "\nroster's marking:\n";
853 string marking_str, indented_marking_str;
854 dump(*i->second.second, marking_str);
855 prefix_lines_with(" ", marking_str, indented_marking_str);
856 oss << indented_marking_str;
857 oss << "\n\n";
858 }
859 out = oss.str();
860}
861
862struct anc_graph
863{
864 anc_graph(bool existing, database & db, key_store & keys) :
865 existing_graph(existing),
866 db(db),
867 keys(keys),
868 max_node(0),
869 n_nodes("nodes", "n", 1),
870 n_certs_in("certs in", "c", 1),
871 n_revs_out("revs out", "r", 1),
872 n_certs_out("certs out", "C", 1)
873 {}
874
875 bool existing_graph;
876 database & db;
877 key_store & keys;
878 u64 max_node;
879
880 ticker n_nodes;
881 ticker n_certs_in;
882 ticker n_revs_out;
883 ticker n_certs_out;
884
885 map<u64,manifest_id> node_to_old_man;
886 map<manifest_id,u64> old_man_to_node;
887
888 map<u64,revision_id> node_to_old_rev;
889 map<revision_id,u64> old_rev_to_node;
890
891 map<u64,revision_id> node_to_new_rev;
892 map<revision_id,u64> new_rev_to_node;
893
894 map<u64, legacy::renames_map> node_to_renames;
895
896 multimap<u64, pair<cert_name, cert_value> > certs;
897 multimap<u64, u64> ancestry;
898 set<string> branches;
899
900 void add_node_ancestry(u64 child, u64 parent);
901 void write_certs();
902 void kluge_for_bogus_merge_edges();
903 void rebuild_ancestry(set<string> const & attrs_to_drop);
904 void get_node_manifest(u64 node, manifest_id & man);
905 u64 add_node_for_old_manifest(manifest_id const & man);
906 u64 add_node_for_oldstyle_revision(revision_id const & rev);
907 void construct_revisions_from_ancestry(set<string> const & attrs_to_drop);
908 void fixup_node_identities(parent_roster_map const & parent_rosters,
909 roster_t & child_roster,
910 legacy::renames_map const & renames);
911};
912
913
914void anc_graph::add_node_ancestry(u64 child, u64 parent)
915{
916 L(FL("noting ancestry from child %d -> parent %d") % child % parent);
917 ancestry.insert(make_pair(child, parent));
918}
919
920void anc_graph::get_node_manifest(u64 node, manifest_id & man)
921{
922 map<u64,manifest_id>::const_iterator i = node_to_old_man.find(node);
923 I(i != node_to_old_man.end());
924 man = i->second;
925}
926
927void anc_graph::write_certs()
928{
929 {
930 // regenerate epochs on all branches to random states
931
932 for (set<string>::const_iterator i = branches.begin(); i != branches.end(); ++i)
933 {
934 char buf[constants::epochlen_bytes];
935 Botan::Global_RNG::randomize(reinterpret_cast<Botan::byte *>(buf), constants::epochlen_bytes);
936 epoch_data new_epoch(string(buf, buf + constants::epochlen_bytes));
937 L(FL("setting epoch for %s to %s")
938 % *i % new_epoch);
939 db.set_epoch(branch_name(*i), new_epoch);
940 }
941 }
942
943
944 typedef multimap<u64, pair<cert_name, cert_value> >::const_iterator ci;
945
946 for (map<u64,revision_id>::const_iterator i = node_to_new_rev.begin();
947 i != node_to_new_rev.end(); ++i)
948 {
949 revision_id rev(i->second);
950
951 pair<ci,ci> range = certs.equal_range(i->first);
952
953 for (ci j = range.first; j != range.second; ++j)
954 {
955 cert_name name(j->second.first);
956 cert_value val(j->second.second);
957
958 if (put_simple_revision_cert(db, keys, rev, name, val))
959 ++n_certs_out;
960 }
961 }
962}
963
964void
965anc_graph::kluge_for_bogus_merge_edges()
966{
967 // This kluge exists because in the 0.24-era monotone databases, several
968 // bad merges still existed in which one side of the merge is an ancestor
969 // of the other side of the merge. In other words, graphs which look like
970 // this:
971 //
972 // a ----------------------> e
973 // \ /
974 // \---> b -> c -> d ----/
975 //
976 // Such merges confuse the roster-building algorithm, because they should
977 // never have occurred in the first place: a was not a head at the time
978 // of the merge, e should simply have been considered an extension of d.
979 //
980 // So... we drop the a->e edges entirely.
981 //
982 // Note: this kluge drops edges which are a struct superset of those
983 // dropped by a previous kluge ("3-ancestor") so we have removed that
984 // code.
985
986 P(F("scanning for bogus merge edges"));
987
988 multimap<u64,u64> parent_to_child_map;
989 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
990 i != ancestry.end(); ++i)
991 parent_to_child_map.insert(make_pair(i->second, i->first));
992
993 map<u64, u64> edges_to_kill;
994 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
995 i != ancestry.end(); ++i)
996 {
997 multimap<u64, u64>::const_iterator j = i;
998 ++j;
999 u64 child = i->first;
1000 // NB: ancestry is a multimap from child->parent(s)
1001 if (j != ancestry.end())
1002 {
1003 if (j->first == i->first)
1004 {
1005 L(FL("considering old merge edge %s") %
1006 encode_hexenc(safe_get(node_to_old_rev, i->first).inner()()));
1007 u64 parent1 = i->second;
1008 u64 parent2 = j->second;
1009 if (is_ancestor (parent1, parent2, parent_to_child_map))
1010 safe_insert(edges_to_kill, make_pair(child, parent1));
1011 else if (is_ancestor (parent2, parent1, parent_to_child_map))
1012 safe_insert(edges_to_kill, make_pair(child, parent2));
1013 }
1014 }
1015 }
1016
1017 for (map<u64, u64>::const_iterator i = edges_to_kill.begin();
1018 i != edges_to_kill.end(); ++i)
1019 {
1020 u64 child = i->first;
1021 u64 parent = i->second;
1022 bool killed = false;
1023 for (multimap<u64, u64>::iterator j = ancestry.lower_bound(child);
1024 j->first == child; ++j)
1025 {
1026 if (j->second == parent)
1027 {
1028 P(F("optimizing out redundant edge %d -> %d")
1029 % parent % child);
1030 ancestry.erase(j);
1031 killed = true;
1032 break;
1033 }
1034 }
1035
1036 if (!killed)
1037 W(F("failed to eliminate edge %d -> %d")
1038 % parent % child);
1039 }
1040}
1041
1042
1043void
1044anc_graph::rebuild_ancestry(set<string> const & attrs_to_drop)
1045{
1046 kluge_for_bogus_merge_edges();
1047
1048 P(F("rebuilding %d nodes") % max_node);
1049 {
1050 transaction_guard guard(db);
1051 if (existing_graph)
1052 db.delete_existing_revs_and_certs();
1053 construct_revisions_from_ancestry(attrs_to_drop);
1054 write_certs();
1055 if (existing_graph)
1056 db.delete_existing_manifests();
1057 guard.commit();
1058 }
1059}
1060
1061u64
1062anc_graph::add_node_for_old_manifest(manifest_id const & man)
1063{
1064 I(!existing_graph);
1065 u64 node = 0;
1066 if (old_man_to_node.find(man) == old_man_to_node.end())
1067 {
1068 node = max_node++;
1069 ++n_nodes;
1070 L(FL("node %d = manifest %s")
1071 % node % man);
1072 old_man_to_node.insert(make_pair(man, node));
1073 node_to_old_man.insert(make_pair(node, man));
1074
1075 // load certs
1076 vector< manifest<cert> > mcerts;
1077 db.get_manifest_certs(man, mcerts);
1078 erase_bogus_certs(db, mcerts);
1079 for(vector< manifest<cert> >::const_iterator i = mcerts.begin();
1080 i != mcerts.end(); ++i)
1081 {
1082 L(FL("loaded '%s' manifest cert for node %s") % i->inner().name % node);
1083 ++n_certs_in;
1084 certs.insert(make_pair(node, make_pair(i->inner().name,
1085 i->inner().value)));
1086 }
1087 }
1088 else
1089 {
1090 node = old_man_to_node[man];
1091 }
1092 return node;
1093}
1094
1095u64 anc_graph::add_node_for_oldstyle_revision(revision_id const & rev)
1096{
1097 I(existing_graph);
1098 I(!null_id(rev));
1099 u64 node = 0;
1100 if (old_rev_to_node.find(rev) == old_rev_to_node.end())
1101 {
1102 node = max_node++;
1103 ++n_nodes;
1104
1105 manifest_id man;
1106 legacy::renames_map renames;
1107 legacy::get_manifest_and_renames_for_rev(db, rev, man, renames);
1108
1109 L(FL("node %d = revision %s = manifest %s")
1110 % node
1111 % rev
1112 % man);
1113 old_rev_to_node.insert(make_pair(rev, node));
1114 node_to_old_rev.insert(make_pair(node, rev));
1115 node_to_old_man.insert(make_pair(node, man));
1116 node_to_renames.insert(make_pair(node, renames));
1117
1118 // load certs
1119 vector< revision<cert> > rcerts;
1120 db.get_revision_certs(rev, rcerts);
1121 erase_bogus_certs(db, rcerts);
1122 for(vector< revision<cert> >::const_iterator i = rcerts.begin();
1123 i != rcerts.end(); ++i)
1124 {
1125 L(FL("loaded '%s' revision cert for node %s") % i->inner().name % node);
1126 ++n_certs_in;
1127 certs.insert(make_pair(node, make_pair(i->inner().name,
1128 i->inner().value)));
1129
1130 if (i->inner().name == branch_cert_name)
1131 branches.insert(i->inner().value());
1132 }
1133 }
1134 else
1135 {
1136 node = old_rev_to_node[rev];
1137 }
1138
1139 return node;
1140}
1141
1142static bool
1143not_dead_yet(node_id nid, u64 birth_rev,
1144 parent_roster_map const & parent_rosters,
1145 multimap<u64, u64> const & child_to_parents)
1146{
1147 // Any given node, at each point in the revision graph, is in one of the
1148 // states "alive", "unborn", "dead". The invariant we must maintain in
1149 // constructing our revision graph is that if a node is dead in any parent,
1150 // then it must also be dead in the child. The purpose of this function is
1151 // to take a node, and a list of parents, and determine whether that node is
1152 // allowed to be alive in a child of the given parents.
1153
1154 // "Alive" means, the node currently exists in the revision's tree.
1155 // "Unborn" means, the node does not exist in the revision's tree, and the
1156 // node's birth revision is _not_ an ancestor of the revision.
1157 // "Dead" means, the node does not exist in the revision's tree, and the
1158 // node's birth revision _is_ an ancestor of the revision.
1159
1160 // L(FL("testing liveliness of node %d, born in rev %d") % nid % birth_rev);
1161 for (parent_roster_map::const_iterator r = parent_rosters.begin();
1162 r != parent_rosters.end(); ++r)
1163 {
1164 shared_ptr<roster_t> parent = r->second.first;
1165 // L(FL("node %d %s in parent roster %d")
1166 // % nid
1167 // % (parent->has_node(n->first) ? "exists" : "does not exist" )
1168 // % r->first);
1169
1170 if (!parent->has_node(nid))
1171 {
1172 deque<u64> work;
1173 set<u64> seen;
1174 work.push_back(r->first);
1175 while (!work.empty())
1176 {
1177 u64 curr = work.front();
1178 work.pop_front();
1179 // L(FL("examining ancestor %d of parent roster %d, looking for anc=%d")
1180 // % curr % r->first % birth_rev);
1181
1182 if (seen.find(curr) != seen.end())
1183 continue;
1184 seen.insert(curr);
1185
1186 if (curr == birth_rev)
1187 {
1188 // L(FL("node is dead in %d") % r->first);
1189 return false;
1190 }
1191 typedef multimap<u64, u64>::const_iterator ci;
1192 pair<ci,ci> range = child_to_parents.equal_range(curr);
1193 for (ci i = range.first; i != range.second; ++i)
1194 {
1195 if (i->first != curr)
1196 continue;
1197 work.push_back(i->second);
1198 }
1199 }
1200 }
1201 }
1202 // L(FL("node is alive in all parents, returning true"));
1203 return true;
1204}
1205
1206
1207static file_path
1208find_old_path_for(map<file_path, file_path> const & renames,
1209 file_path const & new_path)
1210{
1211 map<file_path, file_path>::const_iterator i = renames.find(new_path);
1212 if (i != renames.end())
1213 return i->second;
1214
1215 // ??? root directory rename possible in the old schema?
1216 // if not, do this first.
1217 if (new_path.empty())
1218 return new_path;
1219
1220 file_path dir;
1221 path_component base;
1222 new_path.dirname_basename(dir, base);
1223 return find_old_path_for(renames, dir) / base;
1224}
1225
1226static file_path
1227find_new_path_for(map<file_path, file_path> const & renames,
1228 file_path const & old_path)
1229{
1230 map<file_path, file_path> reversed;
1231 for (map<file_path, file_path>::const_iterator i = renames.begin();
1232 i != renames.end(); ++i)
1233 reversed.insert(make_pair(i->second, i->first));
1234 // this is a hackish kluge. seems to work, though.
1235 return find_old_path_for(reversed, old_path);
1236}
1237
1238// Recursive helper function for insert_into_roster.
1239static void
1240insert_parents_into_roster(roster_t & child_roster,
1241 temp_node_id_source & nis,
1242 file_path const & pth,
1243 file_path const & full)
1244{
1245 if (child_roster.has_node(pth))
1246 {
1247 E(is_dir_t(child_roster.get_node(pth)),
1248 F("Directory %s for path %s cannot be added, "
1249 "as there is a file in the way") % pth % full);
1250 return;
1251 }
1252
1253 if (!pth.empty())
1254 insert_parents_into_roster(child_roster, nis, pth.dirname(), full);
1255
1256 child_roster.attach_node(child_roster.create_dir_node(nis), pth);
1257}
1258
1259static void
1260insert_into_roster(roster_t & child_roster,
1261 temp_node_id_source & nis,
1262 file_path const & pth,
1263 file_id const & fid)
1264{
1265 if (child_roster.has_node(pth))
1266 {
1267 node_t n = child_roster.get_node(pth);
1268 E(is_file_t(n),
1269 F("Path %s cannot be added, as there is a directory in the way") % pth);
1270 file_t f = downcast_to_file_t(n);
1271 E(f->content == fid,
1272 F("Path %s added twice with differing content") % pth);
1273 return;
1274 }
1275
1276 insert_parents_into_roster(child_roster, nis, pth.dirname(), pth);
1277 child_roster.attach_node(child_roster.create_file_node(fid, nis), pth);
1278}
1279
1280void
1281anc_graph::fixup_node_identities(parent_roster_map const & parent_rosters,
1282 roster_t & child_roster,
1283 legacy::renames_map const & renames)
1284{
1285 // Our strategy here is to iterate over every node in every parent, and
1286 // for each parent node P find zero or one tmp nodes in the child which
1287 // represents the fate of P:
1288 //
1289 // - If any of the parents thinks that P has died, we do not search for
1290 // it in the child; we leave it as "dropped".
1291 //
1292 // - We fetch the name N of the parent node P, and apply the rename map
1293 // to N, getting "remapped name" M. If we find a child node C with
1294 // name M in the child roster, with the same type as P, we identify P
1295 // and C, and swap P for C in the child.
1296
1297
1298 // Map node_id -> birth rev
1299 map<node_id, u64> nodes_in_any_parent;
1300
1301 // Stage 1: collect all nodes (and their birth revs) in any parent.
1302 for (parent_roster_map::const_iterator i = parent_rosters.begin();
1303 i != parent_rosters.end(); ++i)
1304 {
1305 shared_ptr<roster_t> parent_roster = i->second.first;
1306 shared_ptr<marking_map> parent_marking = i->second.second;
1307
1308 node_map const & nodes = parent_roster->all_nodes();
1309 for (node_map::const_iterator j = nodes.begin(); j != nodes.end(); ++j)
1310 {
1311 node_id n = j->first;
1312 revision_id birth_rev = safe_get(*parent_marking, n).birth_revision;
1313 u64 birth_node = safe_get(new_rev_to_node, birth_rev);
1314 map<node_id, u64>::const_iterator i = nodes_in_any_parent.find(n);
1315 if (i != nodes_in_any_parent.end())
1316 I(i->second == birth_node);
1317 else
1318 safe_insert(nodes_in_any_parent,
1319 make_pair(n, birth_node));
1320 }
1321 }
1322
1323 // Stage 2: For any node which is actually live, try to locate a mapping
1324 // from a parent instance of it to a child node.
1325 for (map<node_id, u64>::const_iterator i = nodes_in_any_parent.begin();
1326 i != nodes_in_any_parent.end(); ++i)
1327 {
1328 node_id n = i->first;
1329 u64 birth_rev = i->second;
1330
1331 if (child_roster.has_node(n))
1332 continue;
1333
1334 if (not_dead_yet(n, birth_rev, parent_rosters, ancestry))
1335 {
1336 for (parent_roster_map::const_iterator j = parent_rosters.begin();
1337 j != parent_rosters.end(); ++j)
1338 {
1339 shared_ptr<roster_t> parent_roster = j->second.first;
1340
1341 if (!parent_roster->has_node(n))
1342 continue;
1343
1344 file_path fp;
1345 parent_roster->get_name(n, fp);
1346
1347 // Try remapping the name.
1348 if (node_to_old_rev.find(j->first) != node_to_old_rev.end())
1349 {
1350 legacy::renames_map::const_iterator rmap;
1351 revision_id parent_rid = safe_get(node_to_old_rev, j->first);
1352 rmap = renames.find(parent_rid);
1353 if (rmap != renames.end())
1354 fp = find_new_path_for(rmap->second, fp);
1355 }
1356
1357 // See if we can match this node against a child.
1358 if ((!child_roster.has_node(n))
1359 && child_roster.has_node(fp))
1360 {
1361 node_t pn = parent_roster->get_node(n);
1362 node_t cn = child_roster.get_node(fp);
1363 if (is_file_t(pn) == is_file_t(cn))
1364 {
1365 child_roster.replace_node_id(cn->self, n);
1366 break;
1367 }
1368 }
1369 }
1370 }
1371 }
1372}
1373
1374struct
1375current_rev_debugger
1376{
1377 u64 node;
1378 anc_graph const & agraph;
1379 current_rev_debugger(u64 n, anc_graph const & ag)
1380 : node(n), agraph(ag)
1381 {
1382 }
1383};
1384
1385template <> void
1386dump(current_rev_debugger const & d, string & out)
1387{
1388 typedef multimap<u64, pair<cert_name, cert_value> >::const_iterator ci;
1389 pair<ci,ci> range = d.agraph.certs.equal_range(d.node);
1390 for(ci i = range.first; i != range.second; ++i)
1391 {
1392 if (i->first == d.node)
1393 {
1394 out += "cert '" + i->second.first() + "'";
1395 out += "= '" + i->second.second() + "'\n";
1396 }
1397 }
1398}
1399
1400
1401void
1402anc_graph::construct_revisions_from_ancestry(set<string> const & attrs_to_drop)
1403{
1404 // This is an incredibly cheesy, and also reasonably simple sorting
1405 // system: we put all the root nodes in the work queue. we take a
1406 // node out of the work queue and check if its parents are done. if
1407 // they are, we process it and insert its children. otherwise we put
1408 // it back on the end of the work queue. This both ensures that we're
1409 // always processing something *like* a frontier, while avoiding the
1410 // need to worry about one side of the frontier advancing faster than
1411 // another.
1412
1413 typedef multimap<u64,u64>::const_iterator ci;
1414 multimap<u64,u64> parent_to_child_map;
1415 deque<u64> work;
1416 set<u64> done;
1417
1418 {
1419 // Set up the parent->child mapping and prime the work queue
1420
1421 set<u64> children, all;
1422 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
1423 i != ancestry.end(); ++i)
1424 {
1425 parent_to_child_map.insert(make_pair(i->second, i->first));
1426 children.insert(i->first);
1427 }
1428 for (map<u64,manifest_id>::const_iterator i = node_to_old_man.begin();
1429 i != node_to_old_man.end(); ++i)
1430 {
1431 all.insert(i->first);
1432 }
1433
1434 set_difference(all.begin(), all.end(),
1435 children.begin(), children.end(),
1436 back_inserter(work));
1437 }
1438
1439 while (!work.empty())
1440 {
1441
1442 u64 child = work.front();
1443
1444 current_rev_debugger dbg(child, *this);
1445 MM(dbg);
1446
1447 work.pop_front();
1448
1449 if (done.find(child) != done.end())
1450 continue;
1451
1452 pair<ci,ci> parent_range = ancestry.equal_range(child);
1453 set<u64> parents;
1454 bool parents_all_done = true;
1455 for (ci i = parent_range.first; parents_all_done && i != parent_range.second; ++i)
1456 {
1457 if (i->first != child)
1458 continue;
1459 u64 parent = i->second;
1460 if (done.find(parent) == done.end())
1461 {
1462 work.push_back(child);
1463 parents_all_done = false;
1464 }
1465 else
1466 parents.insert(parent);
1467 }
1468
1469 if (parents_all_done
1470 && (node_to_new_rev.find(child) == node_to_new_rev.end()))
1471 {
1472 L(FL("processing node %d") % child);
1473
1474 manifest_id old_child_mid;
1475 legacy::manifest_map old_child_man;
1476
1477 get_node_manifest(child, old_child_mid);
1478 manifest_data mdat;
1479 db.get_manifest_version(old_child_mid, mdat);
1480 legacy::read_manifest_map(mdat, old_child_man);
1481
1482 // Load all the parent rosters into a temporary roster map
1483 parent_roster_map parent_rosters;
1484 MM(parent_rosters);
1485
1486 for (ci i = parent_range.first; parents_all_done && i != parent_range.second; ++i)
1487 {
1488 if (i->first != child)
1489 continue;
1490 u64 parent = i->second;
1491 if (parent_rosters.find(parent) == parent_rosters.end())
1492 {
1493 shared_ptr<roster_t> ros = shared_ptr<roster_t>(new roster_t());
1494 shared_ptr<marking_map> mm = shared_ptr<marking_map>(new marking_map());
1495 db.get_roster(safe_get(node_to_new_rev, parent), *ros, *mm);
1496 safe_insert(parent_rosters, make_pair(parent, make_pair(ros, mm)));
1497 }
1498 }
1499
1500 file_path attr_path = file_path_internal(".mt-attrs");
1501 file_path old_ignore_path = file_path_internal(".mt-ignore");
1502 file_path new_ignore_path = file_path_internal(".mtn-ignore");
1503
1504 roster_t child_roster;
1505 MM(child_roster);
1506 temp_node_id_source nis;
1507
1508 // all rosters shall have a root node.
1509 child_roster.attach_node(child_roster.create_dir_node(nis),
1510 file_path_internal(""));
1511
1512 for (legacy::manifest_map::const_iterator i = old_child_man.begin();
1513 i != old_child_man.end(); ++i)
1514 {
1515 if (i->first == attr_path)
1516 continue;
1517 // convert .mt-ignore to .mtn-ignore... except if .mtn-ignore
1518 // already exists, just leave things alone.
1519 else if (i->first == old_ignore_path
1520 && old_child_man.find(new_ignore_path) == old_child_man.end())
1521 insert_into_roster(child_roster, nis, new_ignore_path, i->second);
1522 else
1523 insert_into_roster(child_roster, nis, i->first, i->second);
1524 }
1525
1526 // migrate attributes out of .mt-attrs
1527 {
1528 legacy::manifest_map::const_iterator i = old_child_man.find(attr_path);
1529 if (i != old_child_man.end())
1530 {
1531 file_data dat;
1532 db.get_file_version(i->second, dat);
1533 legacy::dot_mt_attrs_map attrs;
1534 legacy::read_dot_mt_attrs(dat.inner(), attrs);
1535 for (legacy::dot_mt_attrs_map::const_iterator j = attrs.begin();
1536 j != attrs.end(); ++j)
1537 {
1538 if (child_roster.has_node(j->first))
1539 {
1540 map<string, string> const &
1541 fattrs = j->second;
1542 for (map<string, string>::const_iterator
1543 k = fattrs.begin();
1544 k != fattrs.end(); ++k)
1545 {
1546 string key = k->first;
1547 if (attrs_to_drop.find(key) != attrs_to_drop.end())
1548 {
1549 // ignore it
1550 }
1551 else if (key == "execute" || key == "manual_merge")
1552 child_roster.set_attr(j->first,
1553 attr_key("mtn:" + key),
1554 attr_value(k->second));
1555 else
1556 E(false, F("unknown attribute '%s' on path '%s'\n"
1557 "please contact %s so we can work out the right way to migrate this\n"
1558 "(if you just want it to go away, see the switch --drop-attr, but\n"
1559 "seriously, if you'd like to keep it, we're happy to figure out how)")
1560 % key % j->first % PACKAGE_BUGREPORT);
1561 }
1562 }
1563 }
1564 }
1565 }
1566
1567 // Now knit the parent node IDs into child node IDs (which are currently all
1568 // tmpids), wherever possible.
1569 fixup_node_identities(parent_rosters, child_roster, node_to_renames[child]);
1570
1571 revision_t rev;
1572 rev.made_for = made_for_database;
1573 MM(rev);
1574 calculate_ident(child_roster, rev.new_manifest);
1575
1576 // For each parent, construct an edge in the revision structure by analyzing the
1577 // relationship between the parent roster and the child roster (and placing the
1578 // result in a cset)
1579
1580 for (parent_roster_map::const_iterator i = parent_rosters.begin();
1581 i != parent_rosters.end(); ++i)
1582 {
1583 u64 parent = i->first;
1584 revision_id parent_rid = safe_get(node_to_new_rev, parent);
1585 shared_ptr<roster_t> parent_roster = i->second.first;
1586 shared_ptr<cset> cs = shared_ptr<cset>(new cset());
1587 MM(*cs);
1588 make_cset(*parent_roster, child_roster, *cs);
1589 safe_insert(rev.edges, make_pair(parent_rid, cs));
1590 }
1591
1592 // It is possible that we're at a "root" node here -- a node
1593 // which had no parent in the old rev graph -- in which case we
1594 // synthesize an edge from the empty revision to the current,
1595 // containing a cset which adds all the files in the child.
1596
1597 if (rev.edges.empty())
1598 {
1599 revision_id parent_rid;
1600 shared_ptr<roster_t> parent_roster = shared_ptr<roster_t>(new roster_t());
1601 shared_ptr<cset> cs = shared_ptr<cset>(new cset());
1602 MM(*cs);
1603 make_cset(*parent_roster, child_roster, *cs);
1604 safe_insert(rev.edges, make_pair (parent_rid, cs));
1605
1606 }
1607
1608 // Finally, put all this excitement into the database and save
1609 // the new_rid for use in the cert-writing pass.
1610
1611 revision_id new_rid;
1612 calculate_ident(rev, new_rid);
1613 node_to_new_rev.insert(make_pair(child, new_rid));
1614 new_rev_to_node.insert(make_pair(new_rid, child));
1615
1616 /*
1617 P(F("------------------------------------------------"));
1618 P(F("made revision %s with %d edges, manifest id = %s")
1619 % new_rid % rev.edges.size() % rev.new_manifest);
1620
1621 {
1622 string rtmp;
1623 data dtmp;
1624 dump(dbg, rtmp);
1625 write_revision(rev, dtmp);
1626 P(F("%s") % rtmp);
1627 P(F("%s") % dtmp);
1628 }
1629 P(F("------------------------------------------------"));
1630 */
1631
1632 L(FL("mapped node %d to revision %s") % child % new_rid);
1633 if (db.put_revision(new_rid, rev))
1634 ++n_revs_out;
1635
1636 // Mark this child as done, hooray!
1637 safe_insert(done, child);
1638
1639 // Extend the work queue with all the children of this child
1640 pair<ci,ci> grandchild_range = parent_to_child_map.equal_range(child);
1641 for (ci i = grandchild_range.first;
1642 i != grandchild_range.second; ++i)
1643 {
1644 if (i->first != child)
1645 continue;
1646 if (done.find(i->second) == done.end())
1647 work.push_back(i->second);
1648 }
1649 }
1650 }
1651}
1652
1653void
1654build_roster_style_revs_from_manifest_style_revs(database & db, key_store & keys,
1655 set<string> const & attrs_to_drop)
1656{
1657 anc_graph graph(true, db, keys);
1658
1659 P(F("converting existing revision graph to new roster-style revisions"));
1660 multimap<revision_id, revision_id> existing_graph;
1661
1662 // cross-check that we're getting everything
1663 // in fact the code in this function is wrong, because if a revision has no
1664 // parents and no children (it is a root revision, and no children have been
1665 // committed under it), then we will simply drop it!
1666 // This code at least causes this case to throw an assertion; FIXME: make
1667 // this case actually work.
1668 set<revision_id> all_rev_ids;
1669 db.get_revision_ids(all_rev_ids);
1670
1671 db.get_revision_ancestry(existing_graph);
1672 for (multimap<revision_id, revision_id>::const_iterator i = existing_graph.begin();
1673 i != existing_graph.end(); ++i)
1674 {
1675 // FIXME: insert for the null id as well, and do the same for the
1676 // changesetify code, and then reach rebuild_ancestry how to deal with
1677 // such things. (I guess u64(0) should represent the null parent?)
1678 if (!null_id(i->first))
1679 {
1680 u64 parent_node = graph.add_node_for_oldstyle_revision(i->first);
1681 all_rev_ids.erase(i->first);
1682 u64 child_node = graph.add_node_for_oldstyle_revision(i->second);
1683 all_rev_ids.erase(i->second);
1684 graph.add_node_ancestry(child_node, parent_node);
1685 }
1686 }
1687
1688 for (set<revision_id>::const_iterator i = all_rev_ids.begin();
1689 i != all_rev_ids.end(); ++i)
1690 {
1691 graph.add_node_for_oldstyle_revision(*i);
1692 }
1693
1694 graph.rebuild_ancestry(attrs_to_drop);
1695}
1696
1697
1698void
1699build_changesets_from_manifest_ancestry(database & db, key_store & keys,
1700 set<string> const & attrs_to_drop)
1701{
1702 anc_graph graph(false, db, keys);
1703
1704 P(F("rebuilding revision graph from manifest certs"));
1705
1706 vector< manifest<cert> > tmp;
1707 db.get_manifest_certs(cert_name("ancestor"), tmp);
1708 erase_bogus_certs(db, tmp);
1709
1710 for (vector< manifest<cert> >::const_iterator i = tmp.begin();
1711 i != tmp.end(); ++i)
1712 {
1713 manifest_id child, parent;
1714 child = manifest_id(i->inner().ident.inner());
1715 parent = manifest_id(i->inner().value());
1716
1717 u64 parent_node = graph.add_node_for_old_manifest(parent);
1718 u64 child_node = graph.add_node_for_old_manifest(child);
1719 graph.add_node_ancestry(child_node, parent_node);
1720 }
1721
1722 graph.rebuild_ancestry(attrs_to_drop);
1723}
1724
1725
1726// This is a special function solely for the use of regenerate_caches -- it
1727// must work even when caches (especially, the height cache!) do not exist.
1728// For all other purposes, use toposort above.
1729static void
1730allrevs_toposorted(database & db,
1731 vector<revision_id> & revisions)
1732{
1733 // get the complete ancestry
1734 rev_ancestry_map graph;
1735 db.get_revision_ancestry(graph);
1736 toposort_rev_ancestry(graph, revisions);
1737}
1738
1739void
1740regenerate_caches(database & db)
1741{
1742 P(F("regenerating cached rosters and heights"));
1743
1744 db.ensure_open_for_format_changes();
1745
1746 transaction_guard guard(db);
1747
1748 db.delete_existing_rosters();
1749 db.delete_existing_heights();
1750
1751 vector<revision_id> sorted_ids;
1752 allrevs_toposorted(db, sorted_ids);
1753
1754 ticker done(_("regenerated"), "r", 5);
1755 done.set_total(sorted_ids.size());
1756
1757 for (std::vector<revision_id>::const_iterator i = sorted_ids.begin();
1758 i != sorted_ids.end(); ++i)
1759 {
1760 revision_t rev;
1761 revision_id const & rev_id = *i;
1762 db.get_revision(rev_id, rev);
1763 db.put_roster_for_revision(rev_id, rev);
1764 db.put_height_for_revision(rev_id, rev);
1765 ++done;
1766 }
1767
1768 guard.commit();
1769
1770 P(F("finished regenerating cached rosters and heights"));
1771}
1772
1773// i/o stuff
1774
1775namespace
1776{
1777 namespace syms
1778 {
1779 symbol const format_version("format_version");
1780 symbol const old_revision("old_revision");
1781 symbol const new_manifest("new_manifest");
1782 }
1783}
1784
1785void
1786print_edge(basic_io::printer & printer,
1787 edge_entry const & e)
1788{
1789 basic_io::stanza st;
1790 st.push_binary_pair(syms::old_revision, edge_old_revision(e).inner());
1791 printer.print_stanza(st);
1792 print_cset(printer, edge_changes(e));
1793}
1794
1795static void
1796print_insane_revision(basic_io::printer & printer,
1797 revision_t const & rev)
1798{
1799
1800 basic_io::stanza format_stanza;
1801 format_stanza.push_str_pair(syms::format_version, "1");
1802 printer.print_stanza(format_stanza);
1803
1804 basic_io::stanza manifest_stanza;
1805 manifest_stanza.push_binary_pair(syms::new_manifest, rev.new_manifest.inner());
1806 printer.print_stanza(manifest_stanza);
1807
1808 for (edge_map::const_iterator edge = rev.edges.begin();
1809 edge != rev.edges.end(); ++edge)
1810 print_edge(printer, *edge);
1811}
1812
1813void
1814print_revision(basic_io::printer & printer,
1815 revision_t const & rev)
1816{
1817 rev.check_sane();
1818 print_insane_revision(printer, rev);
1819}
1820
1821
1822void
1823parse_edge(basic_io::parser & parser,
1824 edge_map & es)
1825{
1826 shared_ptr<cset> cs(new cset());
1827 MM(*cs);
1828 manifest_id old_man;
1829 revision_id old_rev;
1830 string tmp;
1831
1832 parser.esym(syms::old_revision);
1833 parser.hex(tmp);
1834 old_rev = revision_id(decode_hexenc(tmp));
1835
1836 parse_cset(parser, *cs);
1837
1838 es.insert(make_pair(old_rev, cs));
1839}
1840
1841
1842void
1843parse_revision(basic_io::parser & parser,
1844 revision_t & rev)
1845{
1846 MM(rev);
1847 rev.edges.clear();
1848 rev.made_for = made_for_database;
1849 string tmp;
1850 parser.esym(syms::format_version);
1851 parser.str(tmp);
1852 E(tmp == "1",
1853 F("encountered a revision with unknown format, version '%s'\n"
1854 "I only know how to understand the version '1' format\n"
1855 "a newer version of monotone is required to complete this operation")
1856 % tmp);
1857 parser.esym(syms::new_manifest);
1858 parser.hex(tmp);
1859 rev.new_manifest = manifest_id(decode_hexenc(tmp));
1860 while (parser.symp(syms::old_revision))
1861 parse_edge(parser, rev.edges);
1862 rev.check_sane();
1863}
1864
1865void
1866read_revision(data const & dat,
1867 revision_t & rev)
1868{
1869 MM(rev);
1870 basic_io::input_source src(dat(), "revision");
1871 basic_io::tokenizer tok(src);
1872 basic_io::parser pars(tok);
1873 parse_revision(pars, rev);
1874 I(src.lookahead == EOF);
1875 rev.check_sane();
1876}
1877
1878void
1879read_revision(revision_data const & dat,
1880 revision_t & rev)
1881{
1882 read_revision(dat.inner(), rev);
1883 rev.check_sane();
1884}
1885
1886static void write_insane_revision(revision_t const & rev,
1887 data & dat)
1888{
1889 basic_io::printer pr;
1890 print_insane_revision(pr, rev);
1891 dat = data(pr.buf);
1892}
1893
1894template <> void
1895dump(revision_t const & rev, string & out)
1896{
1897 data dat;
1898 write_insane_revision(rev, dat);
1899 out = dat();
1900}
1901
1902void
1903write_revision(revision_t const & rev,
1904 data & dat)
1905{
1906 rev.check_sane();
1907 write_insane_revision(rev, dat);
1908}
1909
1910void
1911write_revision(revision_t const & rev,
1912 revision_data & dat)
1913{
1914 data d;
1915 write_revision(rev, d);
1916 dat = revision_data(d);
1917}
1918
1919void calculate_ident(revision_t const & cs,
1920 revision_id & ident)
1921{
1922 data tmp;
1923 id tid;
1924 write_revision(cs, tmp);
1925 calculate_ident(tmp, tid);
1926 ident = revision_id(tid);
1927}
1928
1929#ifdef BUILD_UNIT_TESTS
1930#include "unit_tests.hh"
1931#include "sanity.hh"
1932
1933UNIT_TEST(revision, find_old_new_path_for)
1934{
1935 map<file_path, file_path> renames;
1936 file_path foo = file_path_internal("foo");
1937 file_path foo_bar = file_path_internal("foo/bar");
1938 file_path foo_baz = file_path_internal("foo/baz");
1939 file_path quux = file_path_internal("quux");
1940 file_path quux_baz = file_path_internal("quux/baz");
1941 I(foo == find_old_path_for(renames, foo));
1942 I(foo == find_new_path_for(renames, foo));
1943 I(foo_bar == find_old_path_for(renames, foo_bar));
1944 I(foo_bar == find_new_path_for(renames, foo_bar));
1945 I(quux == find_old_path_for(renames, quux));
1946 I(quux == find_new_path_for(renames, quux));
1947 renames.insert(make_pair(foo, quux));
1948 renames.insert(make_pair(foo_bar, foo_baz));
1949 I(quux == find_old_path_for(renames, foo));
1950 I(foo == find_new_path_for(renames, quux));
1951 I(quux_baz == find_old_path_for(renames, foo_baz));
1952 I(foo_baz == find_new_path_for(renames, quux_baz));
1953 I(foo_baz == find_old_path_for(renames, foo_bar));
1954 I(foo_bar == find_new_path_for(renames, foo_baz));
1955}
1956
1957#endif // BUILD_UNIT_TESTS
1958
1959// Local Variables:
1960// mode: C++
1961// fill-column: 76
1962// c-file-style: "gnu"
1963// indent-tabs-mode: nil
1964// End:
1965// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status