monotone

monotone Mtn Source Tree

Root/revision.cc

1// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include <cctype>
11#include <cstdlib>
12#include <map>
13#include <queue>
14#include <set>
15#include <sstream>
16#include <stack>
17#include <string>
18#include <iterator>
19#include <functional>
20#include <list>
21
22#include <boost/lexical_cast.hpp>
23#include <boost/dynamic_bitset.hpp>
24#include <boost/shared_ptr.hpp>
25
26#include "botan/botan.h"
27
28#include "app_state.hh"
29#include "basic_io.hh"
30#include "cert.hh"
31#include "cset.hh"
32#include "constants.hh"
33#include "interner.hh"
34#include "keys.hh"
35#include "numeric_vocab.hh"
36#include "revision.hh"
37#include "mtn-sanity.hh"
38#include "transforms.hh"
39#include "simplestring_xform.hh"
40#include "ui.hh"
41#include "vocab.hh"
42#include "safe_map.hh"
43#include "legacy.hh"
44#include "rev_height.hh"
45#include "cmd.hh"
46
47using std::back_inserter;
48using std::copy;
49using std::deque;
50using std::list;
51using std::make_pair;
52using std::map;
53using std::max;
54using std::multimap;
55using std::ostringstream;
56using std::pair;
57using std::queue;
58using std::set;
59using std::stack;
60using std::string;
61using std::vector;
62
63using boost::dynamic_bitset;
64using boost::shared_ptr;
65
66void revision_t::check_sane() const
67{
68 // null id in current manifest only permitted if previous
69 // state was null and no changes
70 // FIXME: above comment makes no sense. This should just be
71 // I(!null_id(new_manifest)), and the only reason I am not making it so
72 // right now is that I don't have time to immediately track down all the
73 // fallout.
74 if (null_id(new_manifest))
75 {
76 for (edge_map::const_iterator i = edges.begin();
77 i != edges.end(); ++i)
78 I(null_id(edge_old_revision(i)));
79 }
80
81 if (edges.size() == 1)
82 {
83 // no particular checks to be done right now
84 }
85 else if (edges.size() == 2)
86 {
87 // merge nodes cannot have null revisions
88 for (edge_map::const_iterator i = edges.begin(); i != edges.end(); ++i)
89 I(!null_id(edge_old_revision(i)));
90 }
91 else
92 // revisions must always have either 1 or 2 edges
93 I(false);
94
95 // we used to also check that if there were multiple edges that had patches
96 // for the same file, then the new hashes on each edge matched each other.
97 // this is not ported over to roster-style revisions because it's an
98 // inadequate check, and the real check, that the new manifest id is correct
99 // (done in put_revision, for instance) covers this case automatically.
100}
101
102bool
103revision_t::is_merge_node() const
104{
105 return edges.size() > 1;
106}
107
108bool
109revision_t::is_nontrivial() const
110{
111 check_sane();
112 // merge revisions are never trivial, because even if the resulting node
113 // happens to be identical to both parents, the merge is still recording
114 // that fact.
115 if (is_merge_node())
116 return true;
117 else
118 return !edge_changes(edges.begin()).empty();
119}
120
121revision_t::revision_t(revision_t const & other)
122{
123 /* behave like normal constructor if other is empty */
124 made_for = made_for_nobody;
125 if (null_id(other.new_manifest) && other.edges.empty()) return;
126 other.check_sane();
127 new_manifest = other.new_manifest;
128 edges = other.edges;
129 made_for = other.made_for;
130}
131
132revision_t const &
133revision_t::operator=(revision_t const & other)
134{
135 other.check_sane();
136 new_manifest = other.new_manifest;
137 edges = other.edges;
138 made_for = other.made_for;
139 return *this;
140}
141
142
143// For a surprisingly long time, we have been using an algorithm which
144// is nonsense, based on a misunderstanding of what "LCA" means. The
145// LCA of two nodes is *not* the first common ancestor which you find
146// when iteratively expanding their ancestor sets. Instead, the LCA is
147// the common ancestor which is a descendent of all other common
148// ancestors.
149//
150// In general, a set of nodes in a DAG doesn't always have an
151// LCA. There might be multiple common ancestors which are not parents
152// of one another. So we implement something which is "functionally
153// useful" for finding a merge point (and moreover, which always
154// terminates): we find an LCA of the input set if it exists,
155// otherwise we replace the input set with the nodes we did find and
156// repeat.
157//
158// All previous discussions in monotone-land, before say August 2005,
159// of LCA (and LCAD) are essentially wrong due to our silly
160// misunderstanding. It's unfortunate, but our half-baked
161// approximations worked almost well enough to take us through 3 years
162// of deployed use. Hopefully this more accurate new use will serve us
163// even longer.
164
165typedef unsigned long ctx;
166typedef dynamic_bitset<> bitmap;
167typedef shared_ptr<bitmap> shared_bitmap;
168
169static void
170calculate_ancestors_from_graph(interner<ctx> & intern,
171 revision_id const & init,
172 multimap<revision_id, revision_id> const & graph,
173 map< ctx, shared_bitmap > & ancestors,
174 shared_bitmap & total_union);
175
176void
177find_common_ancestor_for_merge(revision_id const & left,
178 revision_id const & right,
179 revision_id & anc,
180 app_state & app)
181{
182 interner<ctx> intern;
183 set<ctx> leaves;
184 map<ctx, shared_bitmap> ancestors;
185
186 shared_bitmap isect = shared_bitmap(new bitmap());
187 shared_bitmap isect_ancs = shared_bitmap(new bitmap());
188
189 leaves.insert(intern.intern(left.inner()()));
190 leaves.insert(intern.intern(right.inner()()));
191
192
193 multimap<revision_id, revision_id> inverse_graph;
194 {
195 multimap<revision_id, revision_id> graph;
196 app.db.get_revision_ancestry(graph);
197 typedef multimap<revision_id, revision_id>::const_iterator gi;
198 for (gi i = graph.begin(); i != graph.end(); ++i)
199 inverse_graph.insert(make_pair(i->second, i->first));
200 }
201
202
203 while (leaves.size() != 1)
204 {
205 isect->clear();
206 isect_ancs->clear();
207
208 // First intersect all ancestors of current leaf set
209 for (set<ctx>::const_iterator i = leaves.begin(); i != leaves.end(); ++i)
210 {
211 ctx curr_leaf = *i;
212 shared_bitmap curr_leaf_ancestors;
213 map<ctx, shared_bitmap >::const_iterator j = ancestors.find(*i);
214 if (j != ancestors.end())
215 curr_leaf_ancestors = j->second;
216 else
217 {
218 curr_leaf_ancestors = shared_bitmap(new bitmap());
219 calculate_ancestors_from_graph(intern, revision_id(intern.lookup(curr_leaf)),
220 inverse_graph, ancestors,
221 curr_leaf_ancestors);
222 }
223 if (isect->size() > curr_leaf_ancestors->size())
224 curr_leaf_ancestors->resize(isect->size());
225
226 if (curr_leaf_ancestors->size() > isect->size())
227 isect->resize(curr_leaf_ancestors->size());
228
229 if (i == leaves.begin())
230 *isect = *curr_leaf_ancestors;
231 else
232 (*isect) &= (*curr_leaf_ancestors);
233 }
234
235 // isect is now the set of common ancestors of leaves, but that is not enough.
236 // We need the set of leaves of isect; to do that we calculate the set of
237 // ancestors of isect, in order to subtract it from isect (below).
238 set<ctx> new_leaves;
239 for (ctx i = 0; i < isect->size(); ++i)
240 {
241 if (isect->test(i))
242 {
243 calculate_ancestors_from_graph(intern, revision_id(intern.lookup(i)),
244 inverse_graph, ancestors, isect_ancs);
245 }
246 }
247
248 // Finally, the subtraction step: for any element i of isect, if
249 // it's *not* in isect_ancs, it survives as a new leaf.
250 leaves.clear();
251 for (ctx i = 0; i < isect->size(); ++i)
252 {
253 if (!isect->test(i))
254 continue;
255 if (i < isect_ancs->size() && isect_ancs->test(i))
256 continue;
257 safe_insert(leaves, i);
258 }
259 }
260
261 I(leaves.size() == 1);
262 anc = revision_id(intern.lookup(*leaves.begin()));
263}
264
265// FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the
266// size of the entire revision graph.
267
268template<typename T> static bool
269is_ancestor(T const & ancestor_id,
270 T const & descendent_id,
271 multimap<T, T> const & graph)
272{
273
274 set<T> visited;
275 queue<T> queue;
276
277 queue.push(ancestor_id);
278
279 while (!queue.empty())
280 {
281 T current_id = queue.front();
282 queue.pop();
283
284 if (current_id == descendent_id)
285 return true;
286 else
287 {
288 typedef typename multimap<T, T>::const_iterator gi;
289 pair<gi, gi> children = graph.equal_range(current_id);
290 for (gi i = children.first; i != children.second; ++i)
291 {
292 if (visited.find(i->second) == visited.end())
293 {
294 queue.push(i->second);
295 visited.insert(i->second);
296 }
297 }
298 }
299 }
300 return false;
301}
302
303bool
304is_ancestor(revision_id const & ancestor_id,
305 revision_id const & descendent_id,
306 app_state & app)
307{
308 L(FL("checking whether %s is an ancestor of %s") % ancestor_id % descendent_id);
309
310 multimap<revision_id, revision_id> graph;
311 app.db.get_revision_ancestry(graph);
312 return is_ancestor(ancestor_id, descendent_id, graph);
313}
314
315
316static void
317add_bitset_to_union(shared_bitmap src,
318 shared_bitmap dst)
319{
320 if (dst->size() > src->size())
321 src->resize(dst->size());
322 if (src->size() > dst->size())
323 dst->resize(src->size());
324 *dst |= *src;
325}
326
327
328static void
329calculate_ancestors_from_graph(interner<ctx> & intern,
330 revision_id const & init,
331 multimap<revision_id, revision_id> const & graph,
332 map< ctx, shared_bitmap > & ancestors,
333 shared_bitmap & total_union)
334{
335 typedef multimap<revision_id, revision_id>::const_iterator gi;
336 stack<ctx> stk;
337
338 stk.push(intern.intern(init.inner()()));
339
340 while (! stk.empty())
341 {
342 ctx us = stk.top();
343 revision_id rev(hexenc<id>(intern.lookup(us)));
344
345 pair<gi,gi> parents = graph.equal_range(rev);
346 bool pushed = false;
347
348 // first make sure all parents are done
349 for (gi i = parents.first; i != parents.second; ++i)
350 {
351 ctx parent = intern.intern(i->second.inner()());
352 if (ancestors.find(parent) == ancestors.end())
353 {
354 stk.push(parent);
355 pushed = true;
356 break;
357 }
358 }
359
360 // if we pushed anything we stop now. we'll come back later when all
361 // the parents are done.
362 if (pushed)
363 continue;
364
365 shared_bitmap b = shared_bitmap(new bitmap());
366
367 for (gi i = parents.first; i != parents.second; ++i)
368 {
369 ctx parent = intern.intern(i->second.inner()());
370
371 // set all parents
372 if (b->size() <= parent)
373 b->resize(parent + 1);
374 b->set(parent);
375
376 // ensure all parents are loaded into the ancestor map
377 I(ancestors.find(parent) != ancestors.end());
378
379 // union them into our map
380 map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
381 I(j != ancestors.end());
382 add_bitset_to_union(j->second, b);
383 }
384
385 add_bitset_to_union(b, total_union);
386 ancestors.insert(make_pair(us, b));
387 stk.pop();
388 }
389}
390
391void
392toposort(set<revision_id> const & revisions,
393 vector<revision_id> & sorted,
394 app_state & app)
395{
396 map<rev_height, revision_id> work;
397
398 for (set<revision_id>::const_iterator i = revisions.begin();
399 i != revisions.end(); ++i)
400 {
401 rev_height height;
402 app.db.get_rev_height(*i, height);
403 work.insert(make_pair(height, *i));
404 }
405
406 sorted.clear();
407
408 for (map<rev_height, revision_id>::const_iterator i = work.begin();
409 i != work.end(); ++i)
410 {
411 sorted.push_back(i->second);
412 }
413}
414
415static void
416accumulate_strict_ancestors(revision_id const & start,
417 set<revision_id> & new_ancestors,
418 set<revision_id> & all_ancestors,
419 multimap<revision_id, revision_id> const & inverse_graph)
420{
421 typedef multimap<revision_id, revision_id>::const_iterator gi;
422 new_ancestors.clear();
423
424 vector<revision_id> frontier;
425 frontier.push_back(start);
426 while (!frontier.empty())
427 {
428 revision_id rid = frontier.back();
429 frontier.pop_back();
430 pair<gi, gi> parents = inverse_graph.equal_range(rid);
431 for (gi i = parents.first; i != parents.second; ++i)
432 {
433 revision_id const & parent = i->second;
434 if (all_ancestors.find(parent) == all_ancestors.end())
435 {
436 new_ancestors.insert(parent);
437 all_ancestors.insert(parent);
438 frontier.push_back(i->second);
439 }
440 }
441 }
442}
443
444// this call is equivalent to running:
445// remove_if(candidates.begin(), candidates.end(), p);
446// erase_ancestors(candidates, app);
447// however, by interleaving the two operations, it can in common cases make
448// many fewer calls to the predicate, which can be a significant speed win.
449//
450// FIXME: once we have heights, we should use them here. The strategy will
451// be, to calculate the minimum height of all the candidates, and teach
452// accumulate_ancestors to stop at a certain height, and teach the code that
453// removes items from the candidate set to occasionally rescan the candidate
454// set to get a new minimum height (perhaps, whenever we remove the minimum
455// height candidate).
456void
457erase_ancestors_and_failures(std::set<revision_id> & candidates,
458 is_failure & p,
459 app_state & app)
460{
461 // Load up the ancestry graph
462 multimap<revision_id, revision_id> inverse_graph;
463
464 {
465 multimap<revision_id, revision_id> graph;
466 app.db.get_revision_ancestry(graph);
467 for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
468 i != graph.end(); ++i)
469 inverse_graph.insert(make_pair(i->second, i->first));
470 }
471
472 // Keep a set of all ancestors that we've traversed -- to avoid
473 // combinatorial explosion.
474 set<revision_id> all_ancestors;
475
476 vector<revision_id> todo(candidates.begin(), candidates.end());
477 std::random_shuffle(todo.begin(), todo.end());
478
479 size_t predicates = 0;
480 while (!todo.empty())
481 {
482 revision_id rid = todo.back();
483 todo.pop_back();
484 // check if this one has already been eliminated
485 if (all_ancestors.find(rid) != all_ancestors.end())
486 continue;
487 // and then whether it actually should stay in the running:
488 ++predicates;
489 if (p(rid))
490 {
491 candidates.erase(rid);
492 continue;
493 }
494 // okay, it is good enough that all its ancestors should be
495 // eliminated; do that now.
496 {
497 set<revision_id> new_ancestors;
498 accumulate_strict_ancestors(rid, new_ancestors, all_ancestors, inverse_graph);
499 I(new_ancestors.find(rid) == new_ancestors.end());
500 for (set<revision_id>::const_iterator i = new_ancestors.begin();
501 i != new_ancestors.end(); ++i)
502 candidates.erase(*i);
503 }
504 }
505 L(FL("called predicate %s times") % predicates);
506}
507
508// This function looks at a set of revisions, and for every pair A, B in that
509// set such that A is an ancestor of B, it erases A.
510
511namespace
512{
513 struct no_failures : public is_failure
514 {
515 virtual bool operator()(revision_id const & rid)
516 {
517 return false;
518 }
519 };
520}
521void
522erase_ancestors(set<revision_id> & revisions, app_state & app)
523{
524 no_failures p;
525 erase_ancestors_and_failures(revisions, p, app);
526}
527
528// This function takes a revision A and a set of revision Bs, calculates the
529// ancestry of each, and returns the set of revisions that are in A's ancestry
530// but not in the ancestry of any of the Bs. It tells you 'what's new' in A
531// that's not in the Bs. If the output set if non-empty, then A will
532// certainly be in it; but the output set might be empty.
533void
534ancestry_difference(revision_id const & a, set<revision_id> const & bs,
535 set<revision_id> & new_stuff,
536 app_state & app)
537{
538 new_stuff.clear();
539 typedef multimap<revision_id, revision_id>::const_iterator gi;
540 multimap<revision_id, revision_id> graph;
541 multimap<revision_id, revision_id> inverse_graph;
542
543 app.db.get_revision_ancestry(graph);
544 for (gi i = graph.begin(); i != graph.end(); ++i)
545 inverse_graph.insert(make_pair(i->second, i->first));
546
547 interner<ctx> intern;
548 map< ctx, shared_bitmap > ancestors;
549
550 shared_bitmap u = shared_bitmap(new bitmap());
551
552 for (set<revision_id>::const_iterator i = bs.begin();
553 i != bs.end(); ++i)
554 {
555 calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
556 ctx c = intern.intern(i->inner()());
557 if (u->size() <= c)
558 u->resize(c + 1);
559 u->set(c);
560 }
561
562 shared_bitmap au = shared_bitmap(new bitmap());
563 calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
564 {
565 ctx c = intern.intern(a.inner()());
566 if (au->size() <= c)
567 au->resize(c + 1);
568 au->set(c);
569 }
570
571 au->resize(max(au->size(), u->size()));
572 u->resize(max(au->size(), u->size()));
573
574 *au -= *u;
575
576 for (unsigned int i = 0; i != au->size(); ++i)
577 {
578 if (au->test(i))
579 {
580 revision_id rid(intern.lookup(i));
581 if (!null_id(rid))
582 new_stuff.insert(rid);
583 }
584 }
585}
586
587void
588select_nodes_modified_by_rev(revision_t const & rev,
589 roster_t const new_roster,
590 set<node_id> & nodes_modified,
591 app_state & app)
592{
593 nodes_modified.clear();
594
595 for (edge_map::const_iterator i = rev.edges.begin();
596 i != rev.edges.end(); ++i)
597 {
598 set<node_id> edge_nodes_modified;
599 roster_t old_roster;
600 app.db.get_roster(edge_old_revision(i), old_roster);
601 select_nodes_modified_by_cset(edge_changes(i),
602 old_roster,
603 new_roster,
604 edge_nodes_modified);
605
606 copy(edge_nodes_modified.begin(), edge_nodes_modified.end(),
607 inserter(nodes_modified, nodes_modified.begin()));
608 }
609}
610
611
612void
613make_revision(revision_id const & old_rev_id,
614 roster_t const & old_roster,
615 roster_t const & new_roster,
616 revision_t & rev)
617{
618 shared_ptr<cset> cs(new cset());
619
620 rev.edges.clear();
621 make_cset(old_roster, new_roster, *cs);
622
623 calculate_ident(new_roster, rev.new_manifest);
624 L(FL("new manifest_id is %s") % rev.new_manifest);
625
626 safe_insert(rev.edges, make_pair(old_rev_id, cs));
627 rev.made_for = made_for_database;
628}
629
630void
631make_revision(revision_id const & old_rev_id,
632 roster_t const & old_roster,
633 cset const & changes,
634 revision_t & rev)
635{
636 roster_t new_roster = old_roster;
637 {
638 temp_node_id_source nis;
639 editable_roster_base er(new_roster, nis);
640 changes.apply_to(er);
641 }
642
643 shared_ptr<cset> cs(new cset(changes));
644 rev.edges.clear();
645
646 calculate_ident(new_roster, rev.new_manifest);
647 L(FL("new manifest_id is %s") % rev.new_manifest);
648
649 safe_insert(rev.edges, make_pair(old_rev_id, cs));
650 rev.made_for = made_for_database;
651}
652
653void
654make_revision(parent_map const & old_rosters,
655 roster_t const & new_roster,
656 revision_t & rev)
657{
658 edge_map edges;
659 for (parent_map::const_iterator i = old_rosters.begin();
660 i != old_rosters.end();
661 i++)
662 {
663 shared_ptr<cset> cs(new cset());
664 make_cset(parent_roster(i), new_roster, *cs);
665 safe_insert(edges, make_pair(parent_id(i), cs));
666 }
667
668 rev.edges = edges;
669 calculate_ident(new_roster, rev.new_manifest);
670 L(FL("new manifest_id is %s") % rev.new_manifest);
671}
672
673static void
674recalculate_manifest_id_for_restricted_rev(parent_map const & old_rosters,
675 edge_map & edges,
676 revision_t & rev)
677{
678 // In order to get the correct manifest ID, recalculate the new roster
679 // using one of the restricted csets. It doesn't matter which of the
680 // parent roster/cset pairs we use for this; by construction, they must
681 // all produce the same result.
682 revision_id id = parent_id(old_rosters.begin());
683 roster_t restricted_roster = *(safe_get(old_rosters, id).first);
684
685 temp_node_id_source nis;
686 editable_roster_base er(restricted_roster, nis);
687 safe_get(edges, id)->apply_to(er);
688
689 calculate_ident(restricted_roster, rev.new_manifest);
690 rev.edges = edges;
691 L(FL("new manifest_id is %s") % rev.new_manifest);
692}
693
694void
695make_restricted_revision(parent_map const & old_rosters,
696 roster_t const & new_roster,
697 node_restriction const & mask,
698 revision_t & rev)
699{
700 edge_map edges;
701 cset dummy;
702 for (parent_map::const_iterator i = old_rosters.begin();
703 i != old_rosters.end();
704 i++)
705 {
706 shared_ptr<cset> included(new cset());
707 make_restricted_csets(parent_roster(i), new_roster,
708 *included, dummy, mask);
709 check_restricted_cset(parent_roster(i), *included);
710 safe_insert(edges, make_pair(parent_id(i), included));
711 }
712
713 recalculate_manifest_id_for_restricted_rev(old_rosters, edges, rev);
714}
715
716void
717make_restricted_revision(parent_map const & old_rosters,
718 roster_t const & new_roster,
719 node_restriction const & mask,
720 revision_t & rev,
721 cset & excluded,
722 string const & cmd_name)
723{
724 edge_map edges;
725 bool no_excludes = true;
726 for (parent_map::const_iterator i = old_rosters.begin();
727 i != old_rosters.end();
728 i++)
729 {
730 shared_ptr<cset> included(new cset());
731 make_restricted_csets(parent_roster(i), new_roster,
732 *included, excluded, mask);
733 check_restricted_cset(parent_roster(i), *included);
734 safe_insert(edges, make_pair(parent_id(i), included));
735 if (!excluded.empty())
736 no_excludes = false;
737 }
738
739 N(old_rosters.size() == 1 || no_excludes,
740 F("the command '%s %s' cannot be restricted in a two-parent workspace")
741 % ui.prog_name % cmd_name);
742
743 recalculate_manifest_id_for_restricted_rev(old_rosters, edges, rev);
744}
745
746// Workspace-only revisions, with fake rev.new_manifest and content
747// changes suppressed.
748void
749make_revision_for_workspace(revision_id const & old_rev_id,
750 cset const & changes,
751 revision_t & rev)
752{
753 MM(old_rev_id);
754 MM(changes);
755 MM(rev);
756 shared_ptr<cset> cs(new cset(changes));
757 cs->deltas_applied.clear();
758
759 rev.edges.clear();
760 safe_insert(rev.edges, make_pair(old_rev_id, cs));
761 if (!null_id(old_rev_id))
762 rev.new_manifest = manifest_id(fake_id());
763 rev.made_for = made_for_workspace;
764}
765
766void
767make_revision_for_workspace(revision_id const & old_rev_id,
768 roster_t const & old_roster,
769 roster_t const & new_roster,
770 revision_t & rev)
771{
772 MM(old_rev_id);
773 MM(old_roster);
774 MM(new_roster);
775 MM(rev);
776 cset changes;
777 make_cset(old_roster, new_roster, changes);
778 make_revision_for_workspace(old_rev_id, changes, rev);
779}
780
781void
782make_revision_for_workspace(parent_map const & old_rosters,
783 roster_t const & new_roster,
784 revision_t & rev)
785{
786 edge_map edges;
787 for (parent_map::const_iterator i = old_rosters.begin();
788 i != old_rosters.end();
789 i++)
790 {
791 shared_ptr<cset> cs(new cset());
792 make_cset(parent_roster(i), new_roster, *cs);
793 cs->deltas_applied.clear();
794 safe_insert(edges, make_pair(parent_id(i), cs));
795 }
796
797 rev.edges = edges;
798 rev.new_manifest = manifest_id(fake_id());
799 rev.made_for = made_for_workspace;
800}
801
802
803// Stuff related to rebuilding the revision graph. Unfortunately this is a
804// real enough error case that we need support code for it.
805
806typedef map<u64, pair<shared_ptr<roster_t>, shared_ptr<marking_map> > >
807parent_roster_map;
808
809template <> void
810dump(parent_roster_map const & prm, string & out)
811{
812 ostringstream oss;
813 for (parent_roster_map::const_iterator i = prm.begin(); i != prm.end(); ++i)
814 {
815 oss << "roster: " << i->first << '\n';
816 string roster_str, indented_roster_str;
817 dump(*i->second.first, roster_str);
818 prefix_lines_with(" ", roster_str, indented_roster_str);
819 oss << indented_roster_str;
820 oss << "\nroster's marking:\n";
821 string marking_str, indented_marking_str;
822 dump(*i->second.second, marking_str);
823 prefix_lines_with(" ", marking_str, indented_marking_str);
824 oss << indented_marking_str;
825 oss << "\n\n";
826 }
827 out = oss.str();
828}
829
830struct anc_graph
831{
832 anc_graph(bool existing, app_state & a) :
833 existing_graph(existing),
834 app(a),
835 max_node(0),
836 n_nodes("nodes", "n", 1),
837 n_certs_in("certs in", "c", 1),
838 n_revs_out("revs out", "r", 1),
839 n_certs_out("certs out", "C", 1)
840 {}
841
842 bool existing_graph;
843 app_state & app;
844 u64 max_node;
845
846 ticker n_nodes;
847 ticker n_certs_in;
848 ticker n_revs_out;
849 ticker n_certs_out;
850
851 map<u64,manifest_id> node_to_old_man;
852 map<manifest_id,u64> old_man_to_node;
853
854 map<u64,revision_id> node_to_old_rev;
855 map<revision_id,u64> old_rev_to_node;
856
857 map<u64,revision_id> node_to_new_rev;
858 map<revision_id,u64> new_rev_to_node;
859
860 map<u64, legacy::renames_map> node_to_renames;
861
862 multimap<u64, pair<cert_name, cert_value> > certs;
863 multimap<u64, u64> ancestry;
864 set<string> branches;
865
866 void add_node_ancestry(u64 child, u64 parent);
867 void write_certs();
868 void kluge_for_bogus_merge_edges();
869 void rebuild_ancestry();
870 void get_node_manifest(u64 node, manifest_id & man);
871 u64 add_node_for_old_manifest(manifest_id const & man);
872 u64 add_node_for_oldstyle_revision(revision_id const & rev);
873 void construct_revisions_from_ancestry();
874 void fixup_node_identities(parent_roster_map const & parent_rosters,
875 roster_t & child_roster,
876 legacy::renames_map const & renames);
877};
878
879
880void anc_graph::add_node_ancestry(u64 child, u64 parent)
881{
882 L(FL("noting ancestry from child %d -> parent %d") % child % parent);
883 ancestry.insert(make_pair(child, parent));
884}
885
886void anc_graph::get_node_manifest(u64 node, manifest_id & man)
887{
888 map<u64,manifest_id>::const_iterator i = node_to_old_man.find(node);
889 I(i != node_to_old_man.end());
890 man = i->second;
891}
892
893void anc_graph::write_certs()
894{
895 {
896 // regenerate epochs on all branches to random states
897
898 for (set<string>::const_iterator i = branches.begin(); i != branches.end(); ++i)
899 {
900 char buf[constants::epochlen_bytes];
901 Botan::Global_RNG::randomize(reinterpret_cast<Botan::byte *>(buf), constants::epochlen_bytes);
902 hexenc<data> hexdata;
903 encode_hexenc(data(string(buf, buf + constants::epochlen_bytes)), hexdata);
904 epoch_data new_epoch(hexdata);
905 L(FL("setting epoch for %s to %s") % *i % new_epoch);
906 app.db.set_epoch(branch_name(*i), new_epoch);
907 }
908 }
909
910
911 typedef multimap<u64, pair<cert_name, cert_value> >::const_iterator ci;
912
913 for (map<u64,revision_id>::const_iterator i = node_to_new_rev.begin();
914 i != node_to_new_rev.end(); ++i)
915 {
916 revision_id rev(i->second);
917
918 pair<ci,ci> range = certs.equal_range(i->first);
919
920 for (ci j = range.first; j != range.second; ++j)
921 {
922 cert_name name(j->second.first);
923 cert_value val(j->second.second);
924
925 cert new_cert;
926 make_simple_cert(rev.inner(), name, val, app, new_cert);
927 revision<cert> rcert(new_cert);
928 if (! app.db.revision_cert_exists(rcert))
929 {
930 ++n_certs_out;
931 app.db.put_revision_cert(rcert);
932 }
933 }
934 }
935}
936
937void
938anc_graph::kluge_for_bogus_merge_edges()
939{
940 // This kluge exists because in the 0.24-era monotone databases, several
941 // bad merges still existed in which one side of the merge is an ancestor
942 // of the other side of the merge. In other words, graphs which look like
943 // this:
944 //
945 // a ----------------------> e
946 // \ /
947 // \---> b -> c -> d ----/
948 //
949 // Such merges confuse the roster-building algorithm, because they should
950 // never have occurred in the first place: a was not a head at the time
951 // of the merge, e should simply have been considered an extension of d.
952 //
953 // So... we drop the a->e edges entirely.
954 //
955 // Note: this kluge drops edges which are a struct superset of those
956 // dropped by a previous kluge ("3-ancestor") so we have removed that
957 // code.
958
959 P(F("scanning for bogus merge edges"));
960
961 multimap<u64,u64> parent_to_child_map;
962 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
963 i != ancestry.end(); ++i)
964 parent_to_child_map.insert(make_pair(i->second, i->first));
965
966 map<u64, u64> edges_to_kill;
967 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
968 i != ancestry.end(); ++i)
969 {
970 multimap<u64, u64>::const_iterator j = i;
971 ++j;
972 u64 child = i->first;
973 // NB: ancestry is a multimap from child->parent(s)
974 if (j != ancestry.end())
975 {
976 if (j->first == i->first)
977 {
978 L(FL("considering old merge edge %s") %
979 safe_get(node_to_old_rev, i->first));
980 u64 parent1 = i->second;
981 u64 parent2 = j->second;
982 if (is_ancestor (parent1, parent2, parent_to_child_map))
983 safe_insert(edges_to_kill, make_pair(child, parent1));
984 else if (is_ancestor (parent2, parent1, parent_to_child_map))
985 safe_insert(edges_to_kill, make_pair(child, parent2));
986 }
987 }
988 }
989
990 for (map<u64, u64>::const_iterator i = edges_to_kill.begin();
991 i != edges_to_kill.end(); ++i)
992 {
993 u64 child = i->first;
994 u64 parent = i->second;
995 bool killed = false;
996 for (multimap<u64, u64>::iterator j = ancestry.lower_bound(child);
997 j->first == child; ++j)
998 {
999 if (j->second == parent)
1000 {
1001 P(F("optimizing out redundant edge %d -> %d")
1002 % parent % child);
1003 ancestry.erase(j);
1004 killed = true;
1005 break;
1006 }
1007 }
1008
1009 if (!killed)
1010 W(F("failed to eliminate edge %d -> %d")
1011 % parent % child);
1012 }
1013}
1014
1015
1016void
1017anc_graph::rebuild_ancestry()
1018{
1019 kluge_for_bogus_merge_edges();
1020
1021 P(F("rebuilding %d nodes") % max_node);
1022 {
1023 transaction_guard guard(app.db);
1024 if (existing_graph)
1025 app.db.delete_existing_revs_and_certs();
1026 construct_revisions_from_ancestry();
1027 write_certs();
1028 if (existing_graph)
1029 app.db.delete_existing_manifests();
1030 guard.commit();
1031 }
1032}
1033
1034u64
1035anc_graph::add_node_for_old_manifest(manifest_id const & man)
1036{
1037 I(!existing_graph);
1038 u64 node = 0;
1039 if (old_man_to_node.find(man) == old_man_to_node.end())
1040 {
1041 node = max_node++;
1042 ++n_nodes;
1043 L(FL("node %d = manifest %s") % node % man);
1044 old_man_to_node.insert(make_pair(man, node));
1045 node_to_old_man.insert(make_pair(node, man));
1046
1047 // load certs
1048 vector< manifest<cert> > mcerts;
1049 app.db.get_manifest_certs(man, mcerts);
1050 erase_bogus_certs(mcerts, app);
1051 for(vector< manifest<cert> >::const_iterator i = mcerts.begin();
1052 i != mcerts.end(); ++i)
1053 {
1054 L(FL("loaded '%s' manifest cert for node %s") % i->inner().name % node);
1055 cert_value tv;
1056 decode_base64(i->inner().value, tv);
1057 ++n_certs_in;
1058 certs.insert(make_pair(node,
1059 make_pair(i->inner().name, tv)));
1060 }
1061 }
1062 else
1063 {
1064 node = old_man_to_node[man];
1065 }
1066 return node;
1067}
1068
1069u64 anc_graph::add_node_for_oldstyle_revision(revision_id const & rev)
1070{
1071 I(existing_graph);
1072 I(!null_id(rev));
1073 u64 node = 0;
1074 if (old_rev_to_node.find(rev) == old_rev_to_node.end())
1075 {
1076 node = max_node++;
1077 ++n_nodes;
1078
1079 manifest_id man;
1080 legacy::renames_map renames;
1081 legacy::get_manifest_and_renames_for_rev(app, rev, man, renames);
1082
1083 L(FL("node %d = revision %s = manifest %s") % node % rev % man);
1084 old_rev_to_node.insert(make_pair(rev, node));
1085 node_to_old_rev.insert(make_pair(node, rev));
1086 node_to_old_man.insert(make_pair(node, man));
1087 node_to_renames.insert(make_pair(node, renames));
1088
1089 // load certs
1090 vector< revision<cert> > rcerts;
1091 app.db.get_revision_certs(rev, rcerts);
1092 erase_bogus_certs(rcerts, app);
1093 for(vector< revision<cert> >::const_iterator i = rcerts.begin();
1094 i != rcerts.end(); ++i)
1095 {
1096 L(FL("loaded '%s' revision cert for node %s") % i->inner().name % node);
1097 cert_value tv;
1098 decode_base64(i->inner().value, tv);
1099 ++n_certs_in;
1100 certs.insert(make_pair(node,
1101 make_pair(i->inner().name, tv)));
1102
1103 if (i->inner().name == branch_cert_name)
1104 branches.insert(tv());
1105 }
1106 }
1107 else
1108 {
1109 node = old_rev_to_node[rev];
1110 }
1111
1112 return node;
1113}
1114
1115static bool
1116not_dead_yet(node_id nid, u64 birth_rev,
1117 parent_roster_map const & parent_rosters,
1118 multimap<u64, u64> const & child_to_parents)
1119{
1120 // Any given node, at each point in the revision graph, is in one of the
1121 // states "alive", "unborn", "dead". The invariant we must maintain in
1122 // constructing our revision graph is that if a node is dead in any parent,
1123 // then it must also be dead in the child. The purpose of this function is
1124 // to take a node, and a list of parents, and determine whether that node is
1125 // allowed to be alive in a child of the given parents.
1126
1127 // "Alive" means, the node currently exists in the revision's tree.
1128 // "Unborn" means, the node does not exist in the revision's tree, and the
1129 // node's birth revision is _not_ an ancestor of the revision.
1130 // "Dead" means, the node does not exist in the revision's tree, and the
1131 // node's birth revision _is_ an ancestor of the revision.
1132
1133 // L(FL("testing liveliness of node %d, born in rev %d") % nid % birth_rev);
1134 for (parent_roster_map::const_iterator r = parent_rosters.begin();
1135 r != parent_rosters.end(); ++r)
1136 {
1137 shared_ptr<roster_t> parent = r->second.first;
1138 // L(FL("node %d %s in parent roster %d")
1139 // % nid
1140 // % (parent->has_node(n->first) ? "exists" : "does not exist" )
1141 // % r->first);
1142
1143 if (!parent->has_node(nid))
1144 {
1145 deque<u64> work;
1146 set<u64> seen;
1147 work.push_back(r->first);
1148 while (!work.empty())
1149 {
1150 u64 curr = work.front();
1151 work.pop_front();
1152 // L(FL("examining ancestor %d of parent roster %d, looking for anc=%d")
1153 // % curr % r->first % birth_rev);
1154
1155 if (seen.find(curr) != seen.end())
1156 continue;
1157 seen.insert(curr);
1158
1159 if (curr == birth_rev)
1160 {
1161 // L(FL("node is dead in %d") % r->first);
1162 return false;
1163 }
1164 typedef multimap<u64, u64>::const_iterator ci;
1165 pair<ci,ci> range = child_to_parents.equal_range(curr);
1166 for (ci i = range.first; i != range.second; ++i)
1167 {
1168 if (i->first != curr)
1169 continue;
1170 work.push_back(i->second);
1171 }
1172 }
1173 }
1174 }
1175 // L(FL("node is alive in all parents, returning true"));
1176 return true;
1177}
1178
1179
1180static split_path
1181find_old_path_for(map<split_path, split_path> const & renames,
1182 split_path const & new_path)
1183{
1184 split_path leader, trailer;
1185 leader = new_path;
1186 while (!leader.empty())
1187 {
1188 if (renames.find(leader) != renames.end())
1189 {
1190 leader = safe_get(renames, leader);
1191 break;
1192 }
1193 path_component pc = leader.back();
1194 leader.pop_back();
1195 trailer.insert(trailer.begin(), pc);
1196 }
1197 split_path result;
1198 copy(leader.begin(), leader.end(), back_inserter(result));
1199 copy(trailer.begin(), trailer.end(), back_inserter(result));
1200 return result;
1201}
1202
1203static split_path
1204find_new_path_for(map<split_path, split_path> const & renames,
1205 split_path const & old_path)
1206{
1207 map<split_path, split_path> reversed;
1208 for (map<split_path, split_path>::const_iterator i = renames.begin();
1209 i != renames.end(); ++i)
1210 reversed.insert(make_pair(i->second, i->first));
1211 // this is a hackish kluge. seems to work, though.
1212 return find_old_path_for(reversed, old_path);
1213}
1214
1215static void
1216insert_into_roster(roster_t & child_roster,
1217 temp_node_id_source & nis,
1218 file_path const & pth,
1219 file_id const & fid)
1220{
1221 split_path sp, dirname;
1222 path_component basename;
1223 pth.split(sp);
1224
1225 E(!child_roster.has_node(sp),
1226 F("Path %s added to child roster multiple times") % pth);
1227
1228 dirname_basename(sp, dirname, basename);
1229
1230 {
1231 split_path tmp_pth;
1232 for (split_path::const_iterator i = dirname.begin(); i != dirname.end();
1233 ++i)
1234 {
1235 tmp_pth.push_back(*i);
1236 if (child_roster.has_node(tmp_pth))
1237 {
1238 E(is_dir_t(child_roster.get_node(tmp_pth)),
1239 F("Directory for path %s cannot be added, as there is a file in the way") % pth);
1240 }
1241 else
1242 child_roster.attach_node(child_roster.create_dir_node(nis), tmp_pth);
1243 }
1244 }
1245
1246 if (child_roster.has_node(sp))
1247 {
1248 node_t n = child_roster.get_node(sp);
1249 E(is_file_t(n),
1250 F("Path %s cannot be added, as there is a directory in the way") % sp);
1251 file_t f = downcast_to_file_t(n);
1252 E(f->content == fid,
1253 F("Path %s added twice with differing content") % sp);
1254 }
1255 else
1256 child_roster.attach_node(child_roster.create_file_node(fid, nis), sp);
1257}
1258
1259void
1260anc_graph::fixup_node_identities(parent_roster_map const & parent_rosters,
1261 roster_t & child_roster,
1262 legacy::renames_map const & renames)
1263{
1264 // Our strategy here is to iterate over every node in every parent, and
1265 // for each parent node P find zero or one tmp nodes in the child which
1266 // represents the fate of P:
1267 //
1268 // - If any of the parents thinks that P has died, we do not search for
1269 // it in the child; we leave it as "dropped".
1270 //
1271 // - We fetch the name N of the parent node P, and apply the rename map
1272 // to N, getting "remapped name" M. If we find a child node C with
1273 // name M in the child roster, with the same type as P, we identify P
1274 // and C, and swap P for C in the child.
1275
1276
1277 // Map node_id -> birth rev
1278 map<node_id, u64> nodes_in_any_parent;
1279
1280 // Stage 1: collect all nodes (and their birth revs) in any parent.
1281 for (parent_roster_map::const_iterator i = parent_rosters.begin();
1282 i != parent_rosters.end(); ++i)
1283 {
1284 shared_ptr<roster_t> parent_roster = i->second.first;
1285 shared_ptr<marking_map> parent_marking = i->second.second;
1286
1287 node_map const & nodes = parent_roster->all_nodes();
1288 for (node_map::const_iterator j = nodes.begin(); j != nodes.end(); ++j)
1289 {
1290 node_id n = j->first;
1291 revision_id birth_rev = safe_get(*parent_marking, n).birth_revision;
1292 u64 birth_node = safe_get(new_rev_to_node, birth_rev);
1293 map<node_id, u64>::const_iterator i = nodes_in_any_parent.find(n);
1294 if (i != nodes_in_any_parent.end())
1295 I(i->second == birth_node);
1296 else
1297 safe_insert(nodes_in_any_parent,
1298 make_pair(n, birth_node));
1299 }
1300 }
1301
1302 // Stage 2: For any node which is actually live, try to locate a mapping
1303 // from a parent instance of it to a child node.
1304 for (map<node_id, u64>::const_iterator i = nodes_in_any_parent.begin();
1305 i != nodes_in_any_parent.end(); ++i)
1306 {
1307 node_id n = i->first;
1308 u64 birth_rev = i->second;
1309
1310 if (child_roster.has_node(n))
1311 continue;
1312
1313 if (not_dead_yet(n, birth_rev, parent_rosters, ancestry))
1314 {
1315 for (parent_roster_map::const_iterator j = parent_rosters.begin();
1316 j != parent_rosters.end(); ++j)
1317 {
1318 shared_ptr<roster_t> parent_roster = j->second.first;
1319
1320 if (!parent_roster->has_node(n))
1321 continue;
1322
1323 split_path sp;
1324 parent_roster->get_name(n, sp);
1325
1326 // Try remapping the name.
1327 if (node_to_old_rev.find(j->first) != node_to_old_rev.end())
1328 {
1329 legacy::renames_map::const_iterator rmap;
1330 revision_id parent_rid = safe_get(node_to_old_rev, j->first);
1331 rmap = renames.find(parent_rid);
1332 if (rmap != renames.end())
1333 sp = find_new_path_for(rmap->second, sp);
1334 }
1335
1336 // See if we can match this node against a child.
1337 if ((!child_roster.has_node(n))
1338 && child_roster.has_node(sp))
1339 {
1340 node_t pn = parent_roster->get_node(n);
1341 node_t cn = child_roster.get_node(sp);
1342 if (is_file_t(pn) == is_file_t(cn))
1343 {
1344 child_roster.replace_node_id(cn->self, n);
1345 break;
1346 }
1347 }
1348 }
1349 }
1350 }
1351}
1352
1353struct
1354current_rev_debugger
1355{
1356 u64 node;
1357 anc_graph const & agraph;
1358 current_rev_debugger(u64 n, anc_graph const & ag)
1359 : node(n), agraph(ag)
1360 {
1361 }
1362};
1363
1364template <> void
1365dump(current_rev_debugger const & d, string & out)
1366{
1367 typedef multimap<u64, pair<cert_name, cert_value> >::const_iterator ci;
1368 pair<ci,ci> range = d.agraph.certs.equal_range(d.node);
1369 for(ci i = range.first; i != range.second; ++i)
1370 {
1371 if (i->first == d.node)
1372 {
1373 out += "cert '" + i->second.first() + "'";
1374 out += "= '" + i->second.second() + "'\n";
1375 }
1376 }
1377}
1378
1379
1380void
1381anc_graph::construct_revisions_from_ancestry()
1382{
1383 // This is an incredibly cheesy, and also reasonably simple sorting
1384 // system: we put all the root nodes in the work queue. we take a
1385 // node out of the work queue and check if its parents are done. if
1386 // they are, we process it and insert its children. otherwise we put
1387 // it back on the end of the work queue. This both ensures that we're
1388 // always processing something *like* a frontier, while avoiding the
1389 // need to worry about one side of the frontier advancing faster than
1390 // another.
1391
1392 typedef multimap<u64,u64>::const_iterator ci;
1393 multimap<u64,u64> parent_to_child_map;
1394 deque<u64> work;
1395 set<u64> done;
1396
1397 {
1398 // Set up the parent->child mapping and prime the work queue
1399
1400 set<u64> children, all;
1401 for (multimap<u64, u64>::const_iterator i = ancestry.begin();
1402 i != ancestry.end(); ++i)
1403 {
1404 parent_to_child_map.insert(make_pair(i->second, i->first));
1405 children.insert(i->first);
1406 }
1407 for (map<u64,manifest_id>::const_iterator i = node_to_old_man.begin();
1408 i != node_to_old_man.end(); ++i)
1409 {
1410 all.insert(i->first);
1411 }
1412
1413 set_difference(all.begin(), all.end(),
1414 children.begin(), children.end(),
1415 back_inserter(work));
1416 }
1417
1418 while (!work.empty())
1419 {
1420
1421 u64 child = work.front();
1422
1423 current_rev_debugger dbg(child, *this);
1424 MM(dbg);
1425
1426 work.pop_front();
1427
1428 if (done.find(child) != done.end())
1429 continue;
1430
1431 pair<ci,ci> parent_range = ancestry.equal_range(child);
1432 set<u64> parents;
1433 bool parents_all_done = true;
1434 for (ci i = parent_range.first; parents_all_done && i != parent_range.second; ++i)
1435 {
1436 if (i->first != child)
1437 continue;
1438 u64 parent = i->second;
1439 if (done.find(parent) == done.end())
1440 {
1441 work.push_back(child);
1442 parents_all_done = false;
1443 }
1444 else
1445 parents.insert(parent);
1446 }
1447
1448 if (parents_all_done
1449 && (node_to_new_rev.find(child) == node_to_new_rev.end()))
1450 {
1451 L(FL("processing node %d") % child);
1452
1453 manifest_id old_child_mid;
1454 legacy::manifest_map old_child_man;
1455
1456 get_node_manifest(child, old_child_mid);
1457 manifest_data mdat;
1458 app.db.get_manifest_version(old_child_mid, mdat);
1459 legacy::read_manifest_map(mdat, old_child_man);
1460
1461 // Load all the parent rosters into a temporary roster map
1462 parent_roster_map parent_rosters;
1463 MM(parent_rosters);
1464
1465 for (ci i = parent_range.first; parents_all_done && i != parent_range.second; ++i)
1466 {
1467 if (i->first != child)
1468 continue;
1469 u64 parent = i->second;
1470 if (parent_rosters.find(parent) == parent_rosters.end())
1471 {
1472 shared_ptr<roster_t> ros = shared_ptr<roster_t>(new roster_t());
1473 shared_ptr<marking_map> mm = shared_ptr<marking_map>(new marking_map());
1474 app.db.get_roster(safe_get(node_to_new_rev, parent), *ros, *mm);
1475 safe_insert(parent_rosters, make_pair(parent, make_pair(ros, mm)));
1476 }
1477 }
1478
1479 file_path attr_path = file_path_internal(".mt-attrs");
1480 file_path old_ignore_path = file_path_internal(".mt-ignore");
1481 file_path new_ignore_path = file_path_internal(".mtn-ignore");
1482
1483 roster_t child_roster;
1484 MM(child_roster);
1485 temp_node_id_source nis;
1486
1487 // all rosters shall have a root node.
1488 {
1489 split_path root_pth;
1490 file_path().split(root_pth);
1491 child_roster.attach_node(child_roster.create_dir_node(nis), root_pth);
1492 }
1493
1494 for (legacy::manifest_map::const_iterator i = old_child_man.begin();
1495 i != old_child_man.end(); ++i)
1496 {
1497 if (i->first == attr_path)
1498 continue;
1499 // convert .mt-ignore to .mtn-ignore... except if .mtn-ignore
1500 // already exists, just leave things alone.
1501 else if (i->first == old_ignore_path
1502 && old_child_man.find(new_ignore_path) == old_child_man.end())
1503 insert_into_roster(child_roster, nis, new_ignore_path, i->second);
1504 else
1505 insert_into_roster(child_roster, nis, i->first, i->second);
1506 }
1507
1508 // migrate attributes out of .mt-attrs
1509 {
1510 legacy::manifest_map::const_iterator i = old_child_man.find(attr_path);
1511 if (i != old_child_man.end())
1512 {
1513 file_data dat;
1514 app.db.get_file_version(i->second, dat);
1515 legacy::dot_mt_attrs_map attrs;
1516 legacy::read_dot_mt_attrs(dat.inner(), attrs);
1517 for (legacy::dot_mt_attrs_map::const_iterator j = attrs.begin();
1518 j != attrs.end(); ++j)
1519 {
1520 split_path sp;
1521 j->first.split(sp);
1522 if (child_roster.has_node(sp))
1523 {
1524 map<string, string> const &
1525 fattrs = j->second;
1526 for (map<string, string>::const_iterator
1527 k = fattrs.begin();
1528 k != fattrs.end(); ++k)
1529 {
1530 string key = k->first;
1531 if (app.opts.attrs_to_drop.find(key) != app.opts.attrs_to_drop.end())
1532 {
1533 // ignore it
1534 }
1535 else if (key == "execute" || key == "manual_merge")
1536 child_roster.set_attr(sp,
1537 attr_key("mtn:" + key),
1538 attr_value(k->second));
1539 else
1540 E(false, F("unknown attribute '%s' on path '%s'\n"
1541 "please contact %s so we can work out the right way to migrate this\n"
1542 "(if you just want it to go away, see the switch --drop-attr, but\n"
1543 "seriously, if you'd like to keep it, we're happy to figure out how)")
1544 % key % file_path(sp) % PACKAGE_BUGREPORT);
1545 }
1546 }
1547 }
1548 }
1549 }
1550
1551 // Now knit the parent node IDs into child node IDs (which are currently all
1552 // tmpids), wherever possible.
1553 fixup_node_identities(parent_rosters, child_roster, node_to_renames[child]);
1554
1555 revision_t rev;
1556 rev.made_for = made_for_database;
1557 MM(rev);
1558 calculate_ident(child_roster, rev.new_manifest);
1559
1560 // For each parent, construct an edge in the revision structure by analyzing the
1561 // relationship between the parent roster and the child roster (and placing the
1562 // result in a cset)
1563
1564 for (parent_roster_map::const_iterator i = parent_rosters.begin();
1565 i != parent_rosters.end(); ++i)
1566 {
1567 u64 parent = i->first;
1568 revision_id parent_rid = safe_get(node_to_new_rev, parent);
1569 shared_ptr<roster_t> parent_roster = i->second.first;
1570 shared_ptr<cset> cs = shared_ptr<cset>(new cset());
1571 MM(*cs);
1572 make_cset(*parent_roster, child_roster, *cs);
1573 safe_insert(rev.edges, make_pair(parent_rid, cs));
1574 }
1575
1576 // It is possible that we're at a "root" node here -- a node
1577 // which had no parent in the old rev graph -- in which case we
1578 // synthesize an edge from the empty revision to the current,
1579 // containing a cset which adds all the files in the child.
1580
1581 if (rev.edges.empty())
1582 {
1583 revision_id parent_rid;
1584 shared_ptr<roster_t> parent_roster = shared_ptr<roster_t>(new roster_t());
1585 shared_ptr<cset> cs = shared_ptr<cset>(new cset());
1586 MM(*cs);
1587 make_cset(*parent_roster, child_roster, *cs);
1588 safe_insert(rev.edges, make_pair (parent_rid, cs));
1589
1590 }
1591
1592 // Finally, put all this excitement into the database and save
1593 // the new_rid for use in the cert-writing pass.
1594
1595 revision_id new_rid;
1596 calculate_ident(rev, new_rid);
1597 node_to_new_rev.insert(make_pair(child, new_rid));
1598 new_rev_to_node.insert(make_pair(new_rid, child));
1599
1600 /*
1601 P(F("------------------------------------------------"));
1602 P(F("made revision %s with %d edges, manifest id = %s")
1603 % new_rid % rev.edges.size() % rev.new_manifest);
1604
1605 {
1606 string rtmp;
1607 data dtmp;
1608 dump(dbg, rtmp);
1609 write_revision(rev, dtmp);
1610 P(F("%s") % rtmp);
1611 P(F("%s") % dtmp);
1612 }
1613 P(F("------------------------------------------------"));
1614 */
1615
1616 if (!app.db.revision_exists (new_rid))
1617 {
1618 L(FL("mapped node %d to revision %s") % child % new_rid);
1619 app.db.put_revision(new_rid, rev);
1620 ++n_revs_out;
1621 }
1622 else
1623 {
1624 L(FL("skipping already existing revision %s") % new_rid);
1625 }
1626
1627 // Mark this child as done, hooray!
1628 safe_insert(done, child);
1629
1630 // Extend the work queue with all the children of this child
1631 pair<ci,ci> grandchild_range = parent_to_child_map.equal_range(child);
1632 for (ci i = grandchild_range.first;
1633 i != grandchild_range.second; ++i)
1634 {
1635 if (i->first != child)
1636 continue;
1637 if (done.find(i->second) == done.end())
1638 work.push_back(i->second);
1639 }
1640 }
1641 }
1642}
1643
1644void
1645build_roster_style_revs_from_manifest_style_revs(app_state & app)
1646{
1647 app.db.ensure_open_for_format_changes();
1648 app.db.check_is_not_rosterified();
1649
1650 real_sanity.set_relaxed(true);
1651 anc_graph graph(true, app);
1652
1653 P(F("converting existing revision graph to new roster-style revisions"));
1654 multimap<revision_id, revision_id> existing_graph;
1655
1656 {
1657 // early short-circuit to avoid failure after lots of work
1658 rsa_keypair_id key;
1659 get_user_key(key,app);
1660 require_password(key, app);
1661 }
1662
1663 // cross-check that we're getting everything
1664 // in fact the code in this function is wrong, because if a revision has no
1665 // parents and no children (it is a root revision, and no children have been
1666 // committed under it), then we will simply drop it!
1667 // This code at least causes this case to throw an assertion; FIXME: make
1668 // this case actually work.
1669 set<revision_id> all_rev_ids;
1670 app.db.get_revision_ids(all_rev_ids);
1671
1672 app.db.get_revision_ancestry(existing_graph);
1673 for (multimap<revision_id, revision_id>::const_iterator i = existing_graph.begin();
1674 i != existing_graph.end(); ++i)
1675 {
1676 // FIXME: insert for the null id as well, and do the same for the
1677 // changesetify code, and then reach rebuild_ancestry how to deal with
1678 // such things. (I guess u64(0) should represent the null parent?)
1679 if (!null_id(i->first))
1680 {
1681 u64 parent_node = graph.add_node_for_oldstyle_revision(i->first);
1682 all_rev_ids.erase(i->first);
1683 u64 child_node = graph.add_node_for_oldstyle_revision(i->second);
1684 all_rev_ids.erase(i->second);
1685 graph.add_node_ancestry(child_node, parent_node);
1686 }
1687 }
1688
1689 for (set<revision_id>::const_iterator i = all_rev_ids.begin();
1690 i != all_rev_ids.end(); ++i)
1691 {
1692 graph.add_node_for_oldstyle_revision(*i);
1693 }
1694
1695 real_sanity.set_relaxed(false);
1696 graph.rebuild_ancestry();
1697}
1698
1699
1700void
1701build_changesets_from_manifest_ancestry(app_state & app)
1702{
1703 app.db.ensure_open_for_format_changes();
1704 app.db.check_is_not_rosterified();
1705
1706 anc_graph graph(false, app);
1707
1708 P(F("rebuilding revision graph from manifest certs"));
1709
1710 {
1711 // early short-circuit to avoid failure after lots of work
1712 rsa_keypair_id key;
1713 get_user_key(key,app);
1714 require_password(key, app);
1715 }
1716
1717 vector< manifest<cert> > tmp;
1718 app.db.get_manifest_certs(cert_name("ancestor"), tmp);
1719 erase_bogus_certs(tmp, app);
1720
1721 for (vector< manifest<cert> >::const_iterator i = tmp.begin();
1722 i != tmp.end(); ++i)
1723 {
1724 cert_value tv;
1725 decode_base64(i->inner().value, tv);
1726 manifest_id child, parent;
1727 child = manifest_id(i->inner().ident);
1728 parent = manifest_id(tv());
1729
1730 u64 parent_node = graph.add_node_for_old_manifest(parent);
1731 u64 child_node = graph.add_node_for_old_manifest(child);
1732 graph.add_node_ancestry(child_node, parent_node);
1733 }
1734
1735 graph.rebuild_ancestry();
1736}
1737
1738
1739// This is a special function solely for the use of regenerate_caches -- it
1740// must work even when caches (especially, the height cache!) do not exist.
1741// For all other purposes, use toposort above.
1742static void
1743allrevs_toposorted(vector<revision_id> & revisions,
1744 app_state & app)
1745{
1746
1747 typedef multimap<revision_id, revision_id>::const_iterator gi;
1748 typedef map<revision_id, int>::iterator pi;
1749
1750 revisions.clear();
1751
1752 // get the complete ancestry
1753 multimap<revision_id, revision_id> graph;
1754 app.db.get_revision_ancestry(graph);
1755
1756 // determine the number of parents for each rev
1757 map<revision_id, int> pcount;
1758 for (gi i = graph.begin(); i != graph.end(); ++i)
1759 pcount.insert(make_pair(i->first, 0));
1760 for (gi i = graph.begin(); i != graph.end(); ++i)
1761 ++(pcount[i->second]);
1762
1763 // find the set of graph roots
1764 list<revision_id> roots;
1765 for (pi i = pcount.begin(); i != pcount.end(); ++i)
1766 if(i->second==0)
1767 roots.push_back(i->first);
1768
1769 while (!roots.empty())
1770 {
1771 revision_id cur = roots.front();
1772 roots.pop_front();
1773 if (!null_id(cur))
1774 revisions.push_back(cur);
1775
1776 for(gi i = graph.lower_bound(cur);
1777 i != graph.upper_bound(cur); i++)
1778 if(--(pcount[i->second]) == 0)
1779 roots.push_back(i->second);
1780 }
1781}
1782
1783void
1784regenerate_caches(app_state & app)
1785{
1786 P(F("regenerating cached rosters and heights"));
1787
1788 app.db.ensure_open_for_format_changes();
1789
1790 transaction_guard guard(app.db);
1791
1792 app.db.delete_existing_rosters();
1793 app.db.delete_existing_heights();
1794
1795 vector<revision_id> sorted_ids;
1796 allrevs_toposorted(sorted_ids, app);
1797
1798 ticker done(_("regenerated"), "r", 5);
1799 done.set_total(sorted_ids.size());
1800
1801 for (std::vector<revision_id>::const_iterator i = sorted_ids.begin();
1802 i != sorted_ids.end(); ++i)
1803 {
1804 revision_t rev;
1805 revision_id const & rev_id = *i;
1806 app.db.get_revision(rev_id, rev);
1807 app.db.put_roster_for_revision(rev_id, rev);
1808 app.db.put_height_for_revision(rev_id, rev);
1809 ++done;
1810 }
1811
1812 guard.commit();
1813
1814 P(F("finished regenerating cached rosters and heights"));
1815}
1816
1817CMD(rev_height, hidden_group(), N_("REV"), "show the height for REV",
1818 options::opts::none)
1819{
1820 if (args.size() != 1)
1821 throw usage(name);
1822 revision_id rid(idx(args, 0)());
1823 N(app.db.revision_exists(rid), F("No such revision %s") % rid);
1824 rev_height height;
1825 app.db.get_rev_height(rid, height);
1826 P(F("cached height: %s") % height);
1827}
1828
1829// i/o stuff
1830
1831namespace
1832{
1833 namespace syms
1834 {
1835 symbol const format_version("format_version");
1836 symbol const old_revision("old_revision");
1837 symbol const new_manifest("new_manifest");
1838 }
1839}
1840
1841void
1842print_edge(basic_io::printer & printer,
1843 edge_entry const & e)
1844{
1845 basic_io::stanza st;
1846 st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner());
1847 printer.print_stanza(st);
1848 print_cset(printer, edge_changes(e));
1849}
1850
1851static void
1852print_insane_revision(basic_io::printer & printer,
1853 revision_t const & rev)
1854{
1855
1856 basic_io::stanza format_stanza;
1857 format_stanza.push_str_pair(syms::format_version, "1");
1858 printer.print_stanza(format_stanza);
1859
1860 basic_io::stanza manifest_stanza;
1861 manifest_stanza.push_hex_pair(syms::new_manifest, rev.new_manifest.inner());
1862 printer.print_stanza(manifest_stanza);
1863
1864 for (edge_map::const_iterator edge = rev.edges.begin();
1865 edge != rev.edges.end(); ++edge)
1866 print_edge(printer, *edge);
1867}
1868
1869void
1870print_revision(basic_io::printer & printer,
1871 revision_t const & rev)
1872{
1873 rev.check_sane();
1874 print_insane_revision(printer, rev);
1875}
1876
1877
1878void
1879parse_edge(basic_io::parser & parser,
1880 edge_map & es)
1881{
1882 shared_ptr<cset> cs(new cset());
1883 MM(*cs);
1884 manifest_id old_man;
1885 revision_id old_rev;
1886 string tmp;
1887
1888 parser.esym(syms::old_revision);
1889 parser.hex(tmp);
1890 old_rev = revision_id(tmp);
1891
1892 parse_cset(parser, *cs);
1893
1894 es.insert(make_pair(old_rev, cs));
1895}
1896
1897
1898void
1899parse_revision(basic_io::parser & parser,
1900 revision_t & rev)
1901{
1902 MM(rev);
1903 rev.edges.clear();
1904 rev.made_for = made_for_database;
1905 string tmp;
1906 parser.esym(syms::format_version);
1907 parser.str(tmp);
1908 E(tmp == "1",
1909 F("encountered a revision with unknown format, version '%s'\n"
1910 "I only know how to understand the version '1' format\n"
1911 "a newer version of monotone is required to complete this operation")
1912 % tmp);
1913 parser.esym(syms::new_manifest);
1914 parser.hex(tmp);
1915 rev.new_manifest = manifest_id(tmp);
1916 while (parser.symp(syms::old_revision))
1917 parse_edge(parser, rev.edges);
1918 rev.check_sane();
1919}
1920
1921void
1922read_revision(data const & dat,
1923 revision_t & rev)
1924{
1925 MM(rev);
1926 basic_io::input_source src(dat(), "revision");
1927 basic_io::tokenizer tok(src);
1928 basic_io::parser pars(tok);
1929 parse_revision(pars, rev);
1930 I(src.lookahead == EOF);
1931 rev.check_sane();
1932}
1933
1934void
1935read_revision(revision_data const & dat,
1936 revision_t & rev)
1937{
1938 read_revision(dat.inner(), rev);
1939 rev.check_sane();
1940}
1941
1942static void write_insane_revision(revision_t const & rev,
1943 data & dat)
1944{
1945 basic_io::printer pr;
1946 print_insane_revision(pr, rev);
1947 dat = data(pr.buf);
1948}
1949
1950template <> void
1951dump(revision_t const & rev, string & out)
1952{
1953 data dat;
1954 write_insane_revision(rev, dat);
1955 out = dat();
1956}
1957
1958void
1959write_revision(revision_t const & rev,
1960 data & dat)
1961{
1962 rev.check_sane();
1963 write_insane_revision(rev, dat);
1964}
1965
1966void
1967write_revision(revision_t const & rev,
1968 revision_data & dat)
1969{
1970 data d;
1971 write_revision(rev, d);
1972 dat = revision_data(d);
1973}
1974
1975void calculate_ident(revision_t const & cs,
1976 revision_id & ident)
1977{
1978 data tmp;
1979 hexenc<id> tid;
1980 write_revision(cs, tmp);
1981 calculate_ident(tmp, tid);
1982 ident = revision_id(tid);
1983}
1984
1985#ifdef BUILD_UNIT_TESTS
1986#include "unit_tests.hh"
1987#include "sanity.hh"
1988
1989UNIT_TEST(revision, find_old_new_path_for)
1990{
1991 map<split_path, split_path> renames;
1992 split_path foo, foo_bar, foo_baz, quux, quux_baz;
1993 file_path_internal("foo").split(foo);
1994 file_path_internal("foo/bar").split(foo_bar);
1995 file_path_internal("foo/baz").split(foo_baz);
1996 file_path_internal("quux").split(quux);
1997 file_path_internal("quux/baz").split(quux_baz);
1998 I(foo == find_old_path_for(renames, foo));
1999 I(foo == find_new_path_for(renames, foo));
2000 I(foo_bar == find_old_path_for(renames, foo_bar));
2001 I(foo_bar == find_new_path_for(renames, foo_bar));
2002 I(quux == find_old_path_for(renames, quux));
2003 I(quux == find_new_path_for(renames, quux));
2004 renames.insert(make_pair(foo, quux));
2005 renames.insert(make_pair(foo_bar, foo_baz));
2006 I(quux == find_old_path_for(renames, foo));
2007 I(foo == find_new_path_for(renames, quux));
2008 I(quux_baz == find_old_path_for(renames, foo_baz));
2009 I(foo_baz == find_new_path_for(renames, quux_baz));
2010 I(foo_baz == find_old_path_for(renames, foo_bar));
2011 I(foo_bar == find_new_path_for(renames, foo_baz));
2012}
2013
2014#endif // BUILD_UNIT_TESTS
2015
2016// Local Variables:
2017// mode: C++
2018// fill-column: 76
2019// c-file-style: "gnu"
2020// indent-tabs-mode: nil
2021// End:
2022// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status