monotone

monotone Mtn Source Tree

Root/roster.cc

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include <algorithm>
11#include <stack>
12#include <set>
13#include <string>
14#include <vector>
15#include <sstream>
16#include <stdexcept>
17#include <iostream>
18
19#include "app_state.hh"
20#include "basic_io.hh"
21#include "cset.hh"
22#include "inodeprint.hh"
23#include "roster.hh"
24#include "revision.hh"
25#include "vocab.hh"
26#include "transforms.hh"
27#include "simplestring_xform.hh"
28#include "localized_file_io.hh"
29#include "parallel_iter.hh"
30#include "restrictions.hh"
31#include "safe_map.hh"
32#include "string_queue.hh"
33#include "netio.hh"
34
35#include <boost/lexical_cast.hpp>
36
37using std::inserter;
38using std::make_pair;
39using std::map;
40using std::ostringstream;
41using std::pair;
42using std::reverse;
43using std::set;
44using std::set_union;
45using std::stack;
46using std::string;
47using std::vector;
48using std::logic_error;
49using std::cout;
50
51using boost::lexical_cast;
52
53
54// GCC really doesn't seem to be able to figure out how to make constant
55// strings and not keep re-creating them. Bug still seems to be present
56// even in g++-4.1. Grrr.
57static const string node_id_str("node id");
58static const string record_size_str("record size");
59static const string ident_str("ident");
60static const string path_str("path");
61static const string content_str("content");
62static const string nondormant_attr_key_str("non-dormant attr key");
63static const string nondormant_attr_value_str("non-dormant attr value");
64
65// This specialization saves about 600m instructions on annotating xdelta.cc
66template <>
67inline u32
68extract_datum_lsb(std::string const & in,
69 size_t & pos,
70 std::string const & name)
71{
72 I(sizeof(u32) == 4);
73 u32 out = 0;
74
75 require_bytes(in, pos, 4, name);
76
77 out = (widen<u32,u8>(in[pos]) << 0) |
78 (widen<u32,u8>(in[pos+1]) << 8) |
79 (widen<u32,u8>(in[pos+2]) << 16) |
80 (widen<u32,u8>(in[pos+3]) << 24);
81 pos += 4;
82 return out;
83}
84
85///////////////////////////////////////////////////////////////////
86
87template <> void
88dump(full_attr_map_t const & val, string & out)
89{
90 ostringstream oss;
91 for (full_attr_map_t::const_iterator i = val.begin(); i != val.end(); ++i)
92 oss << "attr key: '" << i->first << "'\n"
93 << " status: " << (i->second.first ? "live" : "dead") << "\n"
94 << " value: '" << i->second.second << "'\n";
95 out = oss.str();
96}
97
98template <> void
99dump(set<revision_id> const & revids, string & out)
100{
101 out.clear();
102 bool first = true;
103 for (set<revision_id>::const_iterator i = revids.begin();
104 i != revids.end(); ++i)
105 {
106 if (!first)
107 out += ", ";
108 first = false;
109 out += i->inner()();
110 }
111}
112
113template <> void
114dump(marking_t const & marking, string & out)
115{
116 ostringstream oss;
117 string tmp;
118 oss << "birth_revision: " << marking.birth_revision << "\n";
119 dump(marking.parent_name, tmp);
120 oss << "parent_name: " << tmp << "\n";
121 dump(marking.file_content, tmp);
122 oss << "file_content: " << tmp << "\n";
123 oss << "attrs (number: " << marking.attrs.size() << "):\n";
124 for (map<attr_key, set<revision_id> >::const_iterator
125 i = marking.attrs.begin(); i != marking.attrs.end(); ++i)
126 {
127 dump(i->second, tmp);
128 oss << " " << i->first << ": " << tmp << "\n";
129 }
130 out = oss.str();
131}
132
133template <> void
134dump(marking_map const & markings, string & out)
135{
136 ostringstream oss;
137 for (marking_map::const_iterator i = markings.begin();
138 i != markings.end();
139 ++i)
140 {
141 oss << "Marking for " << i->first << ":\n";
142 string marking_str, indented_marking_str;
143 dump(i->second, marking_str);
144 prefix_lines_with(" ", marking_str, indented_marking_str);
145 oss << indented_marking_str << "\n";
146 }
147 out = oss.str();
148}
149
150namespace
151{
152 //
153 // We have a few concepts of "nullness" here:
154 //
155 // - the_null_node is a node_id. It does not correspond to a real node;
156 // it's an id you use for the parent of the root, or of any node which
157 // is detached.
158 //
159 // - the_null_component is a path_component. It is the *name* of the root
160 // node. Its string representation is "", the empty string.
161 //
162 // - The split_path corresponding to the_null_node is [], the empty vector.
163 //
164 // - The split_path corresponding to the root node is [""], the 1-element
165 // vector containing the_null_component.
166 //
167 // - The split_path corresponding to foo/bar is ["", "foo", "bar"].
168 //
169 // - The only legal one-element split_path is [""], referring to the
170 // root node.
171 //
172 // We do this in order to support the notion of moving the root directory
173 // around, or applying attributes to the root directory (though we will
174 // not support moving the root at this time, since we haven't worked out
175 // all the UI implications yet).
176 //
177
178
179 const node_id first_node = 1;
180 const node_id first_temp_node = widen<node_id, int>(1) << (sizeof(node_id) * 8 - 1);
181 inline bool temp_node(node_id n)
182 {
183 return n & first_temp_node;
184 }
185}
186
187
188node::node(node_id i)
189 : self(i),
190 parent(the_null_node),
191 name(the_null_component)
192{
193}
194
195
196node::node()
197 : self(the_null_node),
198 parent(the_null_node),
199 name(the_null_component)
200{
201}
202
203
204dir_node::dir_node(node_id i)
205 : node(i)
206{
207}
208
209
210dir_node::dir_node()
211 : node()
212{
213}
214
215
216bool
217dir_node::has_child(path_component const & pc) const
218{
219 return children.find(pc) != children.end();
220}
221
222node_t
223dir_node::get_child(path_component const & pc) const
224{
225 return safe_get(children, pc);
226}
227
228
229void
230dir_node::attach_child(path_component const & pc, node_t child)
231{
232 I(null_node(child->parent));
233 I(null_name(child->name));
234 safe_insert(children, make_pair(pc, child));
235 child->parent = this->self;
236 child->name = pc;
237}
238
239
240node_t
241dir_node::detach_child(path_component const & pc)
242{
243 node_t n = get_child(pc);
244 n->parent = the_null_node;
245 n->name = the_null_component;
246 safe_erase(children, pc);
247 return n;
248}
249
250
251node_t
252dir_node::clone()
253{
254 dir_t d = dir_t(new dir_node(self));
255 d->parent = parent;
256 d->name = name;
257 d->attrs = attrs;
258 d->children = children;
259 return d;
260}
261
262
263file_node::file_node(node_id i, file_id const & f)
264 : node(i),
265 content(f)
266{
267}
268
269
270file_node::file_node()
271 : node()
272{
273}
274
275
276node_t
277file_node::clone()
278{
279 file_t f = file_t(new file_node(self, content));
280 f->parent = parent;
281 f->name = name;
282 f->attrs = attrs;
283 return f;
284}
285
286template <> void
287dump(node_t const & n, string & out)
288{
289 ostringstream oss;
290 string name;
291 dump(n->name, name);
292 oss << "address: " << n << " (uses: " << n.use_count() << ")\n"
293 << "self: " << n->self << "\n"
294 << "parent: " << n->parent << "\n"
295 << "name: " << name << "\n";
296 string attr_map_s;
297 dump(n->attrs, attr_map_s);
298 oss << "attrs:\n" << attr_map_s;
299 oss << "type: ";
300 if (is_file_t(n))
301 oss << "file\n"
302 << "content: " << downcast_to_file_t(n)->content << "\n";
303 else
304 {
305 oss << "dir\n";
306 dir_map const & c = downcast_to_dir_t(n)->children;
307 oss << "children: " << c.size() << "\n";
308 for (dir_map::const_iterator i = c.begin(); i != c.end(); ++i)
309 {
310 dump(i->first, name);
311 oss << " " << name << " -> " << i->second << "\n";
312 }
313 }
314 out = oss.str();
315}
316
317// helper
318void
319roster_t::do_deep_copy_from(roster_t const & other)
320{
321 MM(*this);
322 MM(other);
323 I(!root_dir);
324 I(nodes.empty());
325 for (node_map::const_iterator i = other.nodes.begin(); i != other.nodes.end();
326 ++i)
327 safe_insert(nodes, make_pair(i->first, i->second->clone()));
328 for (node_map::iterator i = nodes.begin(); i != nodes.end(); ++i)
329 if (is_dir_t(i->second))
330 {
331 dir_map & children = downcast_to_dir_t(i->second)->children;
332 for (dir_map::iterator j = children.begin(); j != children.end(); ++j)
333 j->second = safe_get(nodes, j->second->self);
334 }
335 if (other.root_dir)
336 root_dir = downcast_to_dir_t(safe_get(nodes, other.root_dir->self));
337}
338
339roster_t::roster_t(roster_t const & other)
340{
341 do_deep_copy_from(other);
342}
343
344roster_t &
345roster_t::operator=(roster_t const & other)
346{
347 root_dir.reset();
348 nodes.clear();
349 do_deep_copy_from(other);
350 return *this;
351}
352
353
354struct
355dfs_iter
356{
357
358 dir_t root;
359 bool return_root;
360 stack< pair<dir_t, dir_map::const_iterator> > stk;
361 split_path dirname;
362
363
364 dfs_iter(dir_t r)
365 : root(r), return_root(root)
366 {
367 if (root && !root->children.empty())
368 stk.push(make_pair(root, root->children.begin()));
369 }
370
371
372 bool finished() const
373 {
374 return (!return_root) && stk.empty();
375 }
376
377
378 node_t operator*() const
379 {
380 I(!finished());
381 if (return_root)
382 return root;
383 else
384 {
385 I(!stk.empty());
386 return stk.top().second->second;
387 }
388 }
389
390
391 void operator++()
392 {
393 I(!finished());
394
395 if (return_root)
396 {
397 return_root = false;
398 return;
399 }
400
401 // we're not finished, so we need to set up so operator* will return the
402 // right thing.
403 node_t ntmp = stk.top().second->second;
404 if (is_dir_t(ntmp))
405 {
406 dirname.push_back(stk.top().second->first);
407 dir_t dtmp = downcast_to_dir_t(ntmp);
408 stk.push(make_pair(dtmp, dtmp->children.begin()));
409 }
410 else
411 ++(stk.top().second);
412
413 while (!stk.empty()
414 && stk.top().second == stk.top().first->children.end())
415 {
416 stk.pop();
417 if (!dirname.empty())
418 dirname.pop_back();
419 if (!stk.empty())
420 ++stk.top().second;
421 }
422 }
423};
424
425
426bool
427roster_t::has_root() const
428{
429 return static_cast<bool>(root_dir);
430}
431
432
433inline bool
434same_type(node_t a, node_t b)
435{
436 return is_file_t(a) == is_file_t(b);
437}
438
439
440inline bool
441shallow_equal(node_t a, node_t b,
442 bool shallow_compare_dir_children)
443{
444 if (a->self != b->self)
445 return false;
446
447 if (a->parent != b->parent)
448 return false;
449
450 if (a->name != b->name)
451 return false;
452
453 if (a->attrs != b->attrs)
454 return false;
455
456 if (! same_type(a,b))
457 return false;
458
459 if (is_file_t(a))
460 {
461 file_t fa = downcast_to_file_t(a);
462 file_t fb = downcast_to_file_t(b);
463 if (!(fa->content == fb->content))
464 return false;
465 }
466 else
467 {
468 dir_t da = downcast_to_dir_t(a);
469 dir_t db = downcast_to_dir_t(b);
470
471 if (shallow_compare_dir_children)
472 {
473 if (da->children.size() != db->children.size())
474 return false;
475
476 dir_map::const_iterator
477 i = da->children.begin(),
478 j = db->children.begin();
479
480 while (i != da->children.end() && j != db->children.end())
481 {
482 if (i->first != j->first)
483 return false;
484 if (i->second->self != j->second->self)
485 return false;
486 ++i;
487 ++j;
488 }
489 I(i == da->children.end() && j == db->children.end());
490 }
491 }
492 return true;
493}
494
495
496// FIXME_ROSTERS: why does this do two loops? why does it pass 'true' to
497// shallow_equal?
498// -- njs
499bool
500roster_t::operator==(roster_t const & other) const
501{
502 node_map::const_iterator i = nodes.begin(), j = other.nodes.begin();
503 while (i != nodes.end() && j != other.nodes.end())
504 {
505 if (i->first != j->first)
506 return false;
507 if (!shallow_equal(i->second, j->second, true))
508 return false;
509 ++i;
510 ++j;
511 }
512
513 if (i != nodes.end() || j != other.nodes.end())
514 return false;
515
516 dfs_iter p(root_dir), q(other.root_dir);
517 while (! (p.finished() || q.finished()))
518 {
519 if (!shallow_equal(*p, *q, true))
520 return false;
521 ++p;
522 ++q;
523 }
524
525 if (!(p.finished() && q.finished()))
526 return false;
527
528 return true;
529}
530
531
532node_t
533roster_t::get_node(split_path const & sp) const
534{
535 split_path dirname;
536 path_component basename;
537 dirname_basename(sp, dirname, basename);
538
539 MM(sp);
540 MM(*this);
541
542 I(has_root());
543
544 if (dirname.empty())
545 {
546 I(null_name(basename));
547 return root_dir;
548 }
549
550 dir_t d = root_dir;
551 for (split_path::const_iterator i = dirname.begin()+1; i != dirname.end(); ++i)
552 d = downcast_to_dir_t(d->get_child(*i));
553 return d->get_child(basename);
554}
555
556bool
557roster_t::has_node(node_id n) const
558{
559 return nodes.find(n) != nodes.end();
560}
561
562bool
563roster_t::is_root(node_id n) const
564{
565 return has_root() && root_dir->self == n;
566}
567
568bool
569roster_t::has_node(split_path const & sp) const
570{
571 split_path dirname;
572 path_component basename;
573 dirname_basename(sp, dirname, basename);
574
575 if (dirname.empty())
576 {
577 I(null_name(basename));
578 return has_root();
579 }
580
581 // If we have no root, we *definitely* don't have a non-root path
582 if (!has_root())
583 return false;
584
585 dir_t d = root_dir;
586 for (split_path::const_iterator i = dirname.begin()+1; i != dirname.end(); ++i)
587 {
588 if (d->children.find(*i) == d->children.end())
589 return false;
590 d = downcast_to_dir_t(d->get_child(*i));
591 }
592 return d->children.find(basename) != d->children.end();
593}
594
595
596
597node_t
598roster_t::get_node(node_id nid) const
599{
600 return safe_get(nodes, nid);
601}
602
603
604void
605roster_t::get_name(node_id nid, split_path & sp) const
606{
607 I(!null_node(nid));
608 sp.clear();
609 while (!null_node(nid))
610 {
611 node_t n = get_node(nid);
612 sp.push_back(n->name);
613 nid = n->parent;
614 }
615 reverse(sp.begin(), sp.end());
616}
617
618
619void
620roster_t::replace_node_id(node_id from, node_id to)
621{
622 I(!null_node(from));
623 I(!null_node(to));
624 node_t n = get_node(from);
625 safe_erase(nodes, from);
626 safe_insert(nodes, make_pair(to, n));
627 n->self = to;
628
629 if (is_dir_t(n))
630 {
631 dir_t d = downcast_to_dir_t(n);
632 for (dir_map::iterator i = d->children.begin(); i != d->children.end(); ++i)
633 {
634 I(i->second->parent == from);
635 i->second->parent = to;
636 }
637 }
638}
639
640
641// this records the old location into the old_locations member, to prevent the
642// same node from being re-attached at the same place.
643node_id
644roster_t::detach_node(split_path const & pth)
645{
646 split_path dirname;
647 path_component basename;
648 dirname_basename(pth, dirname, basename);
649
650 I(has_root());
651 if (dirname.empty())
652 {
653 // detaching the root dir
654 I(null_name(basename));
655 node_id root_id = root_dir->self;
656 safe_insert(old_locations,
657 make_pair(root_id, make_pair(root_dir->parent, root_dir->name)));
658 // clear ("reset") the root_dir shared_pointer
659 root_dir.reset();
660 I(!has_root());
661 return root_id;
662 }
663
664 dir_t parent = downcast_to_dir_t(get_node(dirname));
665 node_id nid = parent->detach_child(basename)->self;
666 safe_insert(old_locations,
667 make_pair(nid, make_pair(parent->self, basename)));
668 I(!null_node(nid));
669 return nid;
670}
671
672
673void
674roster_t::drop_detached_node(node_id nid)
675{
676 // ensure the node is already detached
677 node_t n = get_node(nid);
678 I(null_node(n->parent));
679 I(null_name(n->name));
680 // if it's a dir, make sure it's empty
681 if (is_dir_t(n))
682 I(downcast_to_dir_t(n)->children.empty());
683 // all right, kill it
684 safe_erase(nodes, nid);
685 // can use safe_erase here, because while not every detached node appears in
686 // old_locations, all those that used to be in the tree do. and you should
687 // only ever be dropping nodes that were detached, not nodes that you just
688 // created and that have never been attached.
689 safe_erase(old_locations, nid);
690}
691
692
693// this creates a node in a detached state, but it does _not_ insert an entry
694// for it into the old_locations member, because there is no old_location to
695// forbid
696node_id
697roster_t::create_dir_node(node_id_source & nis)
698{
699 node_id nid = nis.next();
700 create_dir_node(nid);
701 return nid;
702}
703void
704roster_t::create_dir_node(node_id nid)
705{
706 dir_t d = dir_t(new dir_node());
707 d->self = nid;
708 safe_insert(nodes, make_pair(nid, d));
709}
710
711
712// this creates a node in a detached state, but it does _not_ insert an entry
713// for it into the old_locations member, because there is no old_location to
714// forbid
715node_id
716roster_t::create_file_node(file_id const & content, node_id_source & nis)
717{
718 node_id nid = nis.next();
719 create_file_node(content, nid);
720 return nid;
721}
722void
723roster_t::create_file_node(file_id const & content, node_id nid)
724{
725 file_t f = file_t(new file_node());
726 f->self = nid;
727 f->content = content;
728 safe_insert(nodes, make_pair(nid, f));
729}
730
731void
732roster_t::attach_node(node_id nid, split_path const & dst)
733{
734 split_path dirname;
735 path_component basename;
736 dirname_basename(dst, dirname, basename);
737
738 MM(dst);
739
740 if (dirname.empty())
741 // attaching the root node
742 attach_node(nid, the_null_node, basename);
743 else
744 attach_node(nid, get_node(dirname)->self, basename);
745}
746
747void
748roster_t::attach_node(node_id nid, node_id parent, path_component name)
749{
750 node_t n = get_node(nid);
751
752 I(!null_node(n->self));
753 // ensure the node is already detached (as best one can)
754 I(null_node(n->parent));
755 I(null_name(n->name));
756
757 // this iterator might point to old_locations.end(), because old_locations
758 // only includes entries for renames, not new nodes
759 map<node_id, pair<node_id, path_component> >::iterator
760 i = old_locations.find(nid);
761
762 if (null_node(parent) || null_name(name))
763 {
764 I(null_node(parent) && null_name(name));
765 I(null_node(n->parent));
766 I(null_name(n->name));
767 I(!has_root());
768 root_dir = downcast_to_dir_t(n);
769 I(i == old_locations.end() || i->second != make_pair(root_dir->parent,
770 root_dir->name));
771 }
772 else
773 {
774 dir_t parent_n = downcast_to_dir_t(get_node(parent));
775 parent_n->attach_child(name, n);
776 I(i == old_locations.end() || i->second != make_pair(n->parent, n->name));
777 }
778
779 if (i != old_locations.end())
780 old_locations.erase(i);
781}
782
783void
784roster_t::apply_delta(split_path const & pth,
785 file_id const & old_id,
786 file_id const & new_id)
787{
788 file_t f = downcast_to_file_t(get_node(pth));
789 I(f->content == old_id);
790 I(!null_node(f->self));
791 I(!(f->content == new_id));
792 f->content = new_id;
793}
794
795
796void
797roster_t::clear_attr(split_path const & pth,
798 attr_key const & name)
799{
800 set_attr(pth, name, make_pair(false, attr_value()));
801}
802
803
804void
805roster_t::set_attr(split_path const & pth,
806 attr_key const & name,
807 attr_value const & val)
808{
809 set_attr(pth, name, make_pair(true, val));
810}
811
812
813void
814roster_t::set_attr(split_path const & pth,
815 attr_key const & name,
816 pair<bool, attr_value> const & val)
817{
818 I(val.first || val.second().empty());
819 node_t n = get_node(pth);
820 I(!null_node(n->self));
821 full_attr_map_t::iterator i = n->attrs.find(name);
822 if (i == n->attrs.end())
823 i = safe_insert(n->attrs, make_pair(name,
824 make_pair(false, attr_value())));
825 I(i->second != val);
826 i->second = val;
827}
828
829template <> void
830dump(roster_t const & val, string & out)
831{
832 ostringstream oss;
833 if (val.root_dir)
834 oss << "Root node: " << val.root_dir->self << "\n"
835 << " at " << val.root_dir << ", uses: " << val.root_dir.use_count() << "\n";
836 else
837 oss << "root dir is NULL\n";
838 for (node_map::const_iterator i = val.nodes.begin(); i != val.nodes.end(); ++i)
839 {
840 oss << "\nNode " << i->first << "\n";
841 string node_s;
842 dump(i->second, node_s);
843 oss << node_s;
844 }
845 out = oss.str();
846}
847
848void
849roster_t::check_sane(bool temp_nodes_ok) const
850{
851 I(has_root());
852 node_map::const_iterator ri;
853
854 I(old_locations.empty());
855
856 for (ri = nodes.begin();
857 ri != nodes.end();
858 ++ri)
859 {
860 node_id nid = ri->first;
861 I(!null_node(nid));
862 if (!temp_nodes_ok)
863 I(!temp_node(nid));
864 node_t n = ri->second;
865 I(n->self == nid);
866 if (is_dir_t(n))
867 {
868 if (null_name(n->name) || null_node(n->parent))
869 I(null_name(n->name) && null_node(n->parent));
870 else
871 I(!null_name(n->name) && !null_node(n->parent));
872 }
873 else
874 {
875 I(!null_name(n->name) && !null_node(n->parent));
876 I(!null_id(downcast_to_file_t(n)->content));
877 }
878 for (full_attr_map_t::const_iterator i = n->attrs.begin(); i != n->attrs.end(); ++i)
879 I(i->second.first || i->second.second().empty());
880 if (n != root_dir)
881 {
882 I(!null_node(n->parent));
883 I(downcast_to_dir_t(get_node(n->parent))->get_child(n->name) == n);
884 }
885
886 }
887
888 I(has_root());
889 size_t maxdepth = nodes.size();
890 for (dfs_iter i(root_dir); !i.finished(); ++i)
891 {
892 I(*i == get_node((*i)->self));
893 I(maxdepth-- > 0);
894 }
895 I(maxdepth == 0);
896}
897
898void
899roster_t::check_sane_against(marking_map const & markings, bool temp_nodes_ok) const
900{
901
902 check_sane(temp_nodes_ok);
903
904 node_map::const_iterator ri;
905 marking_map::const_iterator mi;
906
907 for (ri = nodes.begin(), mi = markings.begin();
908 ri != nodes.end() && mi != markings.end();
909 ++ri, ++mi)
910 {
911 I(!null_id(mi->second.birth_revision));
912 I(!mi->second.parent_name.empty());
913
914 if (is_file_t(ri->second))
915 I(!mi->second.file_content.empty());
916 else
917 I(mi->second.file_content.empty());
918
919 full_attr_map_t::const_iterator rai;
920 map<attr_key, set<revision_id> >::const_iterator mai;
921 for (rai = ri->second->attrs.begin(), mai = mi->second.attrs.begin();
922 rai != ri->second->attrs.end() && mai != mi->second.attrs.end();
923 ++rai, ++mai)
924 {
925 I(rai->first == mai->first);
926 I(!mai->second.empty());
927 }
928 I(rai == ri->second->attrs.end() && mai == mi->second.attrs.end());
929 // TODO: attrs
930 }
931
932 I(ri == nodes.end() && mi == markings.end());
933}
934
935
936temp_node_id_source::temp_node_id_source()
937 : curr(first_temp_node)
938{}
939
940node_id
941temp_node_id_source::next()
942{
943 node_id n = curr++;
944 I(temp_node(n));
945 return n;
946}
947
948editable_roster_base::editable_roster_base(roster_t & r, node_id_source & nis)
949 : r(r), nis(nis)
950{}
951
952node_id
953editable_roster_base::detach_node(split_path const & src)
954{
955 // L(FL("detach_node('%s')") % file_path(src));
956 return r.detach_node(src);
957}
958
959void
960editable_roster_base::drop_detached_node(node_id nid)
961{
962 // L(FL("drop_detached_node(%d)") % nid);
963 r.drop_detached_node(nid);
964}
965
966node_id
967editable_roster_base::create_dir_node()
968{
969 // L(FL("create_dir_node()"));
970 node_id n = r.create_dir_node(nis);
971 // L(FL("create_dir_node() -> %d") % n);
972 return n;
973}
974
975node_id
976editable_roster_base::create_file_node(file_id const & content)
977{
978 // L(FL("create_file_node('%s')") % content);
979 node_id n = r.create_file_node(content, nis);
980 // L(FL("create_file_node('%s') -> %d") % content % n);
981 return n;
982}
983
984void
985editable_roster_base::attach_node(node_id nid, split_path const & dst)
986{
987 // L(FL("attach_node(%d, '%s')") % nid % file_path(dst));
988 MM(dst);
989 MM(this->r);
990 r.attach_node(nid, dst);
991}
992
993void
994editable_roster_base::apply_delta(split_path const & pth,
995 file_id const & old_id,
996 file_id const & new_id)
997{
998 // L(FL("clear_attr('%s', '%s', '%s')") % file_path(pth) % old_id % new_id);
999 r.apply_delta(pth, old_id, new_id);
1000}
1001
1002void
1003editable_roster_base::clear_attr(split_path const & pth,
1004 attr_key const & name)
1005{
1006 // L(FL("clear_attr('%s', '%s')") % file_path(pth) % name);
1007 r.clear_attr(pth, name);
1008}
1009
1010void
1011editable_roster_base::set_attr(split_path const & pth,
1012 attr_key const & name,
1013 attr_value const & val)
1014{
1015 // L(FL("set_attr('%s', '%s', '%s')") % file_path(pth) % name % val);
1016 r.set_attr(pth, name, val);
1017}
1018
1019void
1020editable_roster_base::commit()
1021{
1022}
1023
1024namespace
1025{
1026 struct true_node_id_source
1027 : public node_id_source
1028 {
1029 true_node_id_source(app_state & app) : app(app) {}
1030 virtual node_id next()
1031 {
1032 node_id n = app.db.next_node_id();
1033 I(!temp_node(n));
1034 return n;
1035 }
1036 app_state & app;
1037 };
1038
1039
1040 class editable_roster_for_merge
1041 : public editable_roster_base
1042 {
1043 public:
1044 set<node_id> new_nodes;
1045 editable_roster_for_merge(roster_t & r, node_id_source & nis)
1046 : editable_roster_base(r, nis)
1047 {}
1048 virtual node_id create_dir_node()
1049 {
1050 node_id nid = this->editable_roster_base::create_dir_node();
1051 new_nodes.insert(nid);
1052 return nid;
1053 }
1054 virtual node_id create_file_node(file_id const & content)
1055 {
1056 node_id nid = this->editable_roster_base::create_file_node(content);
1057 new_nodes.insert(nid);
1058 return nid;
1059 }
1060 };
1061
1062
1063 // This handles all the stuff in a_new.
1064 void unify_roster_oneway(roster_t & a, set<node_id> & a_new,
1065 roster_t & b, set<node_id> & b_new,
1066 node_id_source & nis)
1067 {
1068 for (set<node_id>::const_iterator i = a_new.begin(); i != a_new.end(); ++i)
1069 {
1070 node_id const aid = *i;
1071 split_path sp;
1072 // SPEEDUP?: climb out only so far as is necessary to find a shared
1073 // id? possibly faster (since usually will get a hit immediately),
1074 // but may not be worth the effort (since it doesn't take that long to
1075 // get out in any case)
1076 a.get_name(aid, sp);
1077 node_id bid = b.get_node(sp)->self;
1078 if (temp_node(bid))
1079 {
1080 node_id new_nid = nis.next();
1081 // the node_id_source provided to this function must only generate
1082 // true node ids, because this code was written with the
1083 // assumption that only true nids would come in or go out, and
1084 // temp ids are used as an intermediate stage to indicate nodes
1085 // that need true ids installed.
1086 // FIXME: make everything work correctly when this node_id_source
1087 // returns temp ids. This is needed for workspace merge support.
1088 // FIXME: having done this, add a test in database_check.cc that
1089 // for each revision, generates that rev's roster from scratch,
1090 // and compares it to the one stored in the db. (Do the
1091 // comparison using something like equal_up_to_renumbering, except
1092 // should say if (!temp_node(a) && !temp_node(b)) I(a == b).)
1093 I(!temp_node(new_nid));
1094 a.replace_node_id(aid, new_nid);
1095 b.replace_node_id(bid, new_nid);
1096 b_new.erase(bid);
1097 }
1098 else
1099 {
1100 a.replace_node_id(aid, bid);
1101 }
1102 }
1103 }
1104
1105
1106 void
1107 union_corpses(roster_t & left, roster_t & right)
1108 {
1109 // left and right should be equal, except that each may have some attr
1110 // corpses that the other does not
1111 map<node_id, node_t>::const_iterator left_i = left.all_nodes().begin();
1112 map<node_id, node_t>::const_iterator right_i = right.all_nodes().begin();
1113 while (left_i != left.all_nodes().end() || right_i != right.all_nodes().end())
1114 {
1115 I(left_i->second->self == right_i->second->self);
1116 parallel::iter<full_attr_map_t> j(left_i->second->attrs,
1117 right_i->second->attrs);
1118 // we batch up the modifications until the end, so as not to be
1119 // changing things around under the parallel::iter's feet
1120 set<attr_key> left_missing, right_missing;
1121 while (j.next())
1122 {
1123 MM(j);
1124 switch (j.state())
1125 {
1126 case parallel::invalid:
1127 I(false);
1128
1129 case parallel::in_left:
1130 // this is a corpse
1131 I(!j.left_data().first);
1132 right_missing.insert(j.left_key());
1133 break;
1134
1135 case parallel::in_right:
1136 // this is a corpse
1137 I(!j.right_data().first);
1138 left_missing.insert(j.right_key());
1139 break;
1140
1141 case parallel::in_both:
1142 break;
1143 }
1144 }
1145 for (set<attr_key>::const_iterator j = left_missing.begin();
1146 j != left_missing.end(); ++j)
1147 safe_insert(left_i->second->attrs,
1148 make_pair(*j, make_pair(false, attr_value())));
1149 for (set<attr_key>::const_iterator j = right_missing.begin();
1150 j != right_missing.end(); ++j)
1151 safe_insert(right_i->second->attrs,
1152 make_pair(*j, make_pair(false, attr_value())));
1153 ++left_i;
1154 ++right_i;
1155 }
1156 I(left_i == left.all_nodes().end());
1157 I(right_i == right.all_nodes().end());
1158 }
1159
1160 // After this, left should == right, and there should be no temporary ids.
1161 // Destroys sets, because that's handy (it has to scan over both, but it can
1162 // skip some double-scanning)
1163 void
1164 unify_rosters(roster_t & left, set<node_id> & left_new,
1165 roster_t & right, set<node_id> & right_new,
1166 node_id_source & nis)
1167 {
1168 // Our basic problem is this: when interpreting a revision with multiple
1169 // csets, those csets all give enough information for us to get the same
1170 // manifest, and even a bit more than that. However, there is some
1171 // information that is not exposed at the manifest level, and csets alone
1172 // do not give us all we need. This function is responsible taking the
1173 // two rosters that we get from pure cset application, and fixing them up
1174 // so that they are wholly identical.
1175
1176 // The first thing that is missing is identification of files. If one
1177 // cset says "add_file" and the other says nothing, then the add_file is
1178 // not really an add_file. One of our rosters will have a temp id for
1179 // this file, and the other will not. In this case, we replace the temp
1180 // id with the other side's permanent id. However, if both csets say
1181 // "add_file", then this really is a new id; both rosters will have temp
1182 // ids, and we replace both of them with a newly allocated id. After
1183 // this, the two rosters will have identical node_ids at every path.
1184 unify_roster_oneway(left, left_new, right, right_new, nis);
1185 unify_roster_oneway(right, right_new, left, left_new, nis);
1186
1187 // The other thing we need to fix up is attr corpses. Live attrs are made
1188 // identical by the csets; but if, say, on one side of a fork an attr is
1189 // added and then deleted, then one of our incoming merge rosters will
1190 // have a corpse for that attr, and the other will not. We need to make
1191 // sure at both of them end up with the corpse. This function fixes up
1192 // that.
1193 union_corpses(left, right);
1194 }
1195
1196 template <typename T> void
1197 mark_unmerged_scalar(set<revision_id> const & parent_marks,
1198 T const & parent_val,
1199 revision_id const & new_rid,
1200 T const & new_val,
1201 set<revision_id> & new_marks)
1202 {
1203 I(new_marks.empty());
1204 if (parent_val == new_val)
1205 new_marks = parent_marks;
1206 else
1207 new_marks.insert(new_rid);
1208 }
1209
1210 // This function implements the case.
1211 // a b1
1212 // \ /
1213 // b2
1214 void
1215 mark_won_merge(set<revision_id> const & a_marks,
1216 set<revision_id> const & a_uncommon_ancestors,
1217 set<revision_id> const & b1_marks,
1218 revision_id const & new_rid,
1219 set<revision_id> & new_marks)
1220 {
1221 for (set<revision_id>::const_iterator i = a_marks.begin();
1222 i != a_marks.end(); ++i)
1223 {
1224 if (a_uncommon_ancestors.find(*i) != a_uncommon_ancestors.end())
1225 {
1226 // at least one element of *(a) is not an ancestor of b1
1227 new_marks.clear();
1228 new_marks.insert(new_rid);
1229 return;
1230 }
1231 }
1232 // all elements of *(a) are ancestors of b1; this was a clean merge to b,
1233 // so copy forward the marks.
1234 new_marks = b1_marks;
1235 }
1236
1237 template <typename T> void
1238 mark_merged_scalar(set<revision_id> const & left_marks,
1239 set<revision_id> const & left_uncommon_ancestors,
1240 T const & left_val,
1241 set<revision_id> const & right_marks,
1242 set<revision_id> const & right_uncommon_ancestors,
1243 T const & right_val,
1244 revision_id const & new_rid,
1245 T const & new_val,
1246 set<revision_id> & new_marks)
1247 {
1248 I(new_marks.empty());
1249
1250 // let's not depend on T::operator!= being defined, only on T::operator==
1251 // being defined.
1252 bool diff_from_left = !(new_val == left_val);
1253 bool diff_from_right = !(new_val == right_val);
1254
1255 // some quick sanity checks
1256 for (set<revision_id>::const_iterator i = left_marks.begin();
1257 i != left_marks.end(); ++i)
1258 I(right_uncommon_ancestors.find(*i) == right_uncommon_ancestors.end());
1259 for (set<revision_id>::const_iterator i = right_marks.begin();
1260 i != right_marks.end(); ++i)
1261 I(left_uncommon_ancestors.find(*i) == left_uncommon_ancestors.end());
1262
1263 if (diff_from_left && diff_from_right)
1264 new_marks.insert(new_rid);
1265
1266 else if (diff_from_left && !diff_from_right)
1267 mark_won_merge(left_marks, left_uncommon_ancestors, right_marks,
1268 new_rid, new_marks);
1269
1270 else if (!diff_from_left && diff_from_right)
1271 mark_won_merge(right_marks, right_uncommon_ancestors, left_marks,
1272 new_rid, new_marks);
1273
1274 else
1275 {
1276 // this is the case
1277 // a a
1278 // \ /
1279 // a
1280 // so we simply union the mark sets. This is technically not
1281 // quite the canonical multi-*-merge thing to do; in the case
1282 // a1*
1283 // / \ (blah blah; avoid multi-line-comment warning)
1284 // b a2
1285 // | |
1286 // a3* |
1287 // \ /
1288 // a4
1289 // we will set *(a4) = {a1, a3}, even though the minimal
1290 // common ancestor set is {a3}. we could fix this by running
1291 // erase_ancestors. However, there isn't really any point;
1292 // the only operation performed on *(a4) is to test *(a4) > R
1293 // for some revision R. The truth-value of this test cannot
1294 // be affected by added new revisions to *(a4) that are
1295 // ancestors of revisions that are already in *(a4).
1296 set_union(left_marks.begin(), left_marks.end(),
1297 right_marks.begin(), right_marks.end(),
1298 inserter(new_marks, new_marks.begin()));
1299 }
1300 }
1301
1302 void
1303 mark_new_node(revision_id const & new_rid, node_t n, marking_t & new_marking)
1304 {
1305 new_marking.birth_revision = new_rid;
1306 I(new_marking.parent_name.empty());
1307 new_marking.parent_name.insert(new_rid);
1308 I(new_marking.file_content.empty());
1309 if (is_file_t(n))
1310 new_marking.file_content.insert(new_rid);
1311 I(new_marking.attrs.empty());
1312 set<revision_id> singleton;
1313 singleton.insert(new_rid);
1314 for (full_attr_map_t::const_iterator i = n->attrs.begin();
1315 i != n->attrs.end(); ++i)
1316 new_marking.attrs.insert(make_pair(i->first, singleton));
1317 }
1318
1319 void
1320 mark_unmerged_node(marking_t const & parent_marking, node_t parent_n,
1321 revision_id const & new_rid, node_t n,
1322 marking_t & new_marking)
1323 {
1324 // SPEEDUP?: the common case here is that the parent and child nodes are
1325 // exactly identical, in which case the markings are also exactly
1326 // identical. There might be a win in first doing an overall
1327 // comparison/copy, in case it can be better optimized as a block
1328 // comparison and a block copy...
1329
1330 I(same_type(parent_n, n) && parent_n->self == n->self);
1331
1332 new_marking.birth_revision = parent_marking.birth_revision;
1333
1334 mark_unmerged_scalar(parent_marking.parent_name,
1335 make_pair(parent_n->parent, parent_n->name),
1336 new_rid,
1337 make_pair(n->parent, n->name),
1338 new_marking.parent_name);
1339
1340 if (is_file_t(n))
1341 mark_unmerged_scalar(parent_marking.file_content,
1342 downcast_to_file_t(parent_n)->content,
1343 new_rid,
1344 downcast_to_file_t(n)->content,
1345 new_marking.file_content);
1346
1347 for (full_attr_map_t::const_iterator i = n->attrs.begin();
1348 i != n->attrs.end(); ++i)
1349 {
1350 set<revision_id> & new_marks = new_marking.attrs[i->first];
1351 I(new_marks.empty());
1352 full_attr_map_t::const_iterator j = parent_n->attrs.find(i->first);
1353 if (j == parent_n->attrs.end())
1354 new_marks.insert(new_rid);
1355 else
1356 mark_unmerged_scalar(safe_get(parent_marking.attrs, i->first),
1357 j->second,
1358 new_rid, i->second, new_marks);
1359 }
1360 }
1361
1362 void
1363 mark_merged_node(marking_t const & left_marking,
1364 set<revision_id> left_uncommon_ancestors,
1365 node_t ln,
1366 marking_t const & right_marking,
1367 set<revision_id> right_uncommon_ancestors,
1368 node_t rn,
1369 revision_id const & new_rid,
1370 node_t n,
1371 marking_t & new_marking)
1372 {
1373 I(same_type(ln, n) && same_type(rn, n));
1374 I(left_marking.birth_revision == right_marking.birth_revision);
1375 new_marking.birth_revision = left_marking.birth_revision;
1376
1377 // name
1378 mark_merged_scalar(left_marking.parent_name, left_uncommon_ancestors,
1379 make_pair(ln->parent, ln->name),
1380 right_marking.parent_name, right_uncommon_ancestors,
1381 make_pair(rn->parent, rn->name),
1382 new_rid,
1383 make_pair(n->parent, n->name),
1384 new_marking.parent_name);
1385 // content
1386 if (is_file_t(n))
1387 {
1388 file_t f = downcast_to_file_t(n);
1389 file_t lf = downcast_to_file_t(ln);
1390 file_t rf = downcast_to_file_t(rn);
1391 mark_merged_scalar(left_marking.file_content, left_uncommon_ancestors,
1392 lf->content,
1393 right_marking.file_content, right_uncommon_ancestors,
1394 rf->content,
1395 new_rid, f->content, new_marking.file_content);
1396 }
1397 // attrs
1398 for (full_attr_map_t::const_iterator i = n->attrs.begin();
1399 i != n->attrs.end(); ++i)
1400 {
1401 attr_key const & key = i->first;
1402 full_attr_map_t::const_iterator li = ln->attrs.find(key);
1403 full_attr_map_t::const_iterator ri = rn->attrs.find(key);
1404 I(new_marking.attrs.find(key) == new_marking.attrs.end());
1405 // [], when used to refer to a non-existent element, default
1406 // constructs that element and returns a reference to it. We make use
1407 // of this here.
1408 set<revision_id> & new_marks = new_marking.attrs[key];
1409
1410 if (li == ln->attrs.end() && ri == rn->attrs.end())
1411 // this is a brand new attribute, never before seen
1412 safe_insert(new_marks, new_rid);
1413
1414 else if (li != ln->attrs.end() && ri == rn->attrs.end())
1415 // only the left side has seen this attr before
1416 mark_unmerged_scalar(safe_get(left_marking.attrs, key),
1417 li->second,
1418 new_rid, i->second, new_marks);
1419
1420 else if (li == ln->attrs.end() && ri != rn->attrs.end())
1421 // only the right side has seen this attr before
1422 mark_unmerged_scalar(safe_get(right_marking.attrs, key),
1423 ri->second,
1424 new_rid, i->second, new_marks);
1425
1426 else
1427 // both sides have seen this attr before
1428 mark_merged_scalar(safe_get(left_marking.attrs, key),
1429 left_uncommon_ancestors,
1430 li->second,
1431 safe_get(right_marking.attrs, key),
1432 right_uncommon_ancestors,
1433 ri->second,
1434 new_rid, i->second, new_marks);
1435 }
1436
1437 // some extra sanity checking -- attributes are not allowed to be deleted,
1438 // so we double check that they haven't.
1439 // SPEEDUP?: this code could probably be made more efficient -- but very
1440 // rarely will any node have more than, say, one attribute, so it probably
1441 // doesn't matter.
1442 for (full_attr_map_t::const_iterator i = ln->attrs.begin();
1443 i != ln->attrs.end(); ++i)
1444 I(n->attrs.find(i->first) != n->attrs.end());
1445 for (full_attr_map_t::const_iterator i = rn->attrs.begin();
1446 i != rn->attrs.end(); ++i)
1447 I(n->attrs.find(i->first) != n->attrs.end());
1448 }
1449
1450
1451 // This function is also responsible for verifying ancestry invariants --
1452 // those invariants on a roster that involve the structure of the roster's
1453 // parents, rather than just the structure of the roster itself.
1454 void
1455 mark_merge_roster(roster_t const & left_roster,
1456 marking_map const & left_markings,
1457 set<revision_id> const & left_uncommon_ancestors,
1458 roster_t const & right_roster,
1459 marking_map const & right_markings,
1460 set<revision_id> const & right_uncommon_ancestors,
1461 revision_id const & new_rid,
1462 roster_t const & merge,
1463 marking_map & new_markings)
1464 {
1465 for (node_map::const_iterator i = merge.all_nodes().begin();
1466 i != merge.all_nodes().end(); ++i)
1467 {
1468 node_t const & n = i->second;
1469 // SPEEDUP?: instead of using find repeatedly, iterate everything in
1470 // parallel
1471 map<node_id, node_t>::const_iterator lni = left_roster.all_nodes().find(i->first);
1472 map<node_id, node_t>::const_iterator rni = right_roster.all_nodes().find(i->first);
1473
1474 bool exists_in_left = (lni != left_roster.all_nodes().end());
1475 bool exists_in_right = (rni != right_roster.all_nodes().end());
1476
1477 marking_t new_marking;
1478
1479 if (!exists_in_left && !exists_in_right)
1480 mark_new_node(new_rid, n, new_marking);
1481
1482 else if (!exists_in_left && exists_in_right)
1483 {
1484 node_t const & right_node = rni->second;
1485 marking_t const & right_marking = safe_get(right_markings, n->self);
1486 // must be unborn on the left (as opposed to dead)
1487 I(right_uncommon_ancestors.find(right_marking.birth_revision)
1488 != right_uncommon_ancestors.end());
1489 mark_unmerged_node(right_marking, right_node,
1490 new_rid, n, new_marking);
1491 }
1492 else if (exists_in_left && !exists_in_right)
1493 {
1494 node_t const & left_node = lni->second;
1495 marking_t const & left_marking = safe_get(left_markings, n->self);
1496 // must be unborn on the right (as opposed to dead)
1497 I(left_uncommon_ancestors.find(left_marking.birth_revision)
1498 != left_uncommon_ancestors.end());
1499 mark_unmerged_node(left_marking, left_node,
1500 new_rid, n, new_marking);
1501 }
1502 else
1503 {
1504 node_t const & left_node = lni->second;
1505 node_t const & right_node = rni->second;
1506 mark_merged_node(safe_get(left_markings, n->self),
1507 left_uncommon_ancestors, left_node,
1508 safe_get(right_markings, n->self),
1509 right_uncommon_ancestors, right_node,
1510 new_rid, n, new_marking);
1511 }
1512
1513 safe_insert(new_markings, make_pair(i->first, new_marking));
1514 }
1515 }
1516
1517
1518 class editable_roster_for_nonmerge
1519 : public editable_roster_base
1520 {
1521 public:
1522 editable_roster_for_nonmerge(roster_t & r, node_id_source & nis,
1523 revision_id const & rid,
1524 marking_map & markings)
1525 : editable_roster_base(r, nis),
1526 rid(rid), markings(markings)
1527 {}
1528
1529 virtual node_id detach_node(split_path const & src)
1530 {
1531 node_id nid = this->editable_roster_base::detach_node(src);
1532 marking_map::iterator marking = markings.find(nid);
1533 I(marking != markings.end());
1534 marking->second.parent_name.clear();
1535 marking->second.parent_name.insert(rid);
1536 return nid;
1537 }
1538
1539 virtual void drop_detached_node(node_id nid)
1540 {
1541 this->editable_roster_base::drop_detached_node(nid);
1542 safe_erase(markings, nid);
1543 }
1544
1545 virtual node_id create_dir_node()
1546 {
1547 return handle_new(this->editable_roster_base::create_dir_node());
1548 }
1549
1550 virtual node_id create_file_node(file_id const & content)
1551 {
1552 return handle_new(this->editable_roster_base::create_file_node(content));
1553 }
1554
1555 virtual void apply_delta(split_path const & pth,
1556 file_id const & old_id, file_id const & new_id)
1557 {
1558 this->editable_roster_base::apply_delta(pth, old_id, new_id);
1559 node_id nid = r.get_node(pth)->self;
1560 marking_map::iterator marking = markings.find(nid);
1561 I(marking != markings.end());
1562 marking->second.file_content.clear();
1563 marking->second.file_content.insert(rid);
1564 }
1565
1566 virtual void clear_attr(split_path const & pth, attr_key const & name)
1567 {
1568 this->editable_roster_base::clear_attr(pth, name);
1569 handle_attr(pth, name);
1570 }
1571
1572 virtual void set_attr(split_path const & pth, attr_key const & name,
1573 attr_value const & val)
1574 {
1575 this->editable_roster_base::set_attr(pth, name, val);
1576 handle_attr(pth, name);
1577 }
1578
1579 node_id handle_new(node_id nid)
1580 {
1581 node_t n = r.get_node(nid);
1582 marking_t new_marking;
1583 mark_new_node(rid, n, new_marking);
1584 safe_insert(markings, make_pair(nid, new_marking));
1585 return nid;
1586 }
1587
1588 void handle_attr(split_path const & pth, attr_key const & name)
1589 {
1590 node_id nid = r.get_node(pth)->self;
1591 marking_map::iterator marking = markings.find(nid);
1592 map<attr_key, set<revision_id> >::iterator am = marking->second.attrs.find(name);
1593 if (am == marking->second.attrs.end())
1594 {
1595 marking->second.attrs.insert(make_pair(name, set<revision_id>()));
1596 am = marking->second.attrs.find(name);
1597 }
1598
1599 I(am != marking->second.attrs.end());
1600 am->second.clear();
1601 am->second.insert(rid);
1602 }
1603
1604 private:
1605 revision_id const & rid;
1606 // markings starts out as the parent's markings
1607 marking_map & markings;
1608 };
1609
1610
1611 // yes, this function takes 14 arguments. I'm very sorry.
1612 void
1613 make_roster_for_merge(revision_id const & left_rid,
1614 roster_t const & left_roster,
1615 marking_map const & left_markings,
1616 cset const & left_cs,
1617 set<revision_id> left_uncommon_ancestors,
1618
1619 revision_id const & right_rid,
1620 roster_t const & right_roster,
1621 marking_map const & right_markings,
1622 cset const & right_cs,
1623 set<revision_id> right_uncommon_ancestors,
1624
1625 revision_id const & new_rid,
1626 roster_t & new_roster,
1627 marking_map & new_markings,
1628 node_id_source & nis)
1629 {
1630 I(!null_id(left_rid) && !null_id(right_rid));
1631 I(left_uncommon_ancestors.find(left_rid) != left_uncommon_ancestors.end());
1632 I(left_uncommon_ancestors.find(right_rid) == left_uncommon_ancestors.end());
1633 I(right_uncommon_ancestors.find(right_rid) != right_uncommon_ancestors.end());
1634 I(right_uncommon_ancestors.find(left_rid) == right_uncommon_ancestors.end());
1635 MM(left_rid);
1636 MM(left_roster);
1637 MM(left_markings);
1638 MM(left_cs);
1639 MM(left_uncommon_ancestors);
1640 MM(right_rid);
1641 MM(right_roster);
1642 MM(right_markings);
1643 MM(right_cs);
1644 MM(right_uncommon_ancestors);
1645 MM(new_rid);
1646 MM(new_roster);
1647 MM(new_markings);
1648 {
1649 temp_node_id_source temp_nis;
1650 new_roster = left_roster;
1651 roster_t from_right_r(right_roster);
1652 MM(from_right_r);
1653
1654 editable_roster_for_merge from_left_er(new_roster, temp_nis);
1655 editable_roster_for_merge from_right_er(from_right_r, temp_nis);
1656
1657 left_cs.apply_to(from_left_er);
1658 right_cs.apply_to(from_right_er);
1659
1660 set<node_id> new_ids;
1661 unify_rosters(new_roster, from_left_er.new_nodes,
1662 from_right_r, from_right_er.new_nodes,
1663 nis);
1664 I(new_roster == from_right_r);
1665 }
1666
1667 // SPEEDUP?: instead of constructing new marking from scratch, track which
1668 // nodes were modified, and scan only them
1669 // load one of the parent markings directly into the new marking map
1670 new_markings.clear();
1671 mark_merge_roster(left_roster, left_markings, left_uncommon_ancestors,
1672 right_roster, right_markings, right_uncommon_ancestors,
1673 new_rid, new_roster, new_markings);
1674 }
1675
1676
1677 // WARNING: this function is not tested directly (no unit tests). Do not
1678 // put real logic in it.
1679 void
1680 make_roster_for_merge(revision_id const & left_rid, cset const & left_cs,
1681 revision_id const & right_rid, cset const & right_cs,
1682 revision_id const & new_rid,
1683 roster_t & new_roster, marking_map & new_markings,
1684 app_state & app)
1685 {
1686 I(!null_id(left_rid) && !null_id(right_rid));
1687 roster_t left_roster, right_roster;
1688 marking_map left_marking, right_marking;
1689 app.db.get_roster(left_rid, left_roster, left_marking);
1690 app.db.get_roster(right_rid, right_roster, right_marking);
1691 true_node_id_source tnis = true_node_id_source(app);
1692
1693 set<revision_id> left_uncommon_ancestors, right_uncommon_ancestors;
1694 app.db.get_uncommon_ancestors(left_rid, right_rid,
1695 left_uncommon_ancestors,
1696 right_uncommon_ancestors);
1697 make_roster_for_merge(left_rid, left_roster, left_marking, left_cs,
1698 left_uncommon_ancestors,
1699 right_rid, right_roster, right_marking, right_cs,
1700 right_uncommon_ancestors,
1701 new_rid,
1702 new_roster, new_markings,
1703 tnis);
1704 }
1705
1706 // Warning: this function expects the parent's roster and markings in the
1707 // 'new_roster' and 'new_markings' parameters, and they are modified
1708 // destructively!
1709 // This function performs an almost identical task to
1710 // mark_roster_with_one_parent; however, for efficiency, it is implemented
1711 // in a different, destructive way.
1712 void
1713 make_roster_for_nonmerge(cset const & cs,
1714 revision_id const & new_rid,
1715 roster_t & new_roster, marking_map & new_markings,
1716 node_id_source & nis)
1717 {
1718 editable_roster_for_nonmerge er(new_roster, nis, new_rid, new_markings);
1719 cs.apply_to(er);
1720 }
1721
1722 // WARNING: this function is not tested directly (no unit tests). Do not
1723 // put real logic in it.
1724 void
1725 make_roster_for_nonmerge(revision_id const & parent_rid,
1726 cset const & parent_cs,
1727 revision_id const & new_rid,
1728 roster_t & new_roster, marking_map & new_markings,
1729 app_state & app)
1730 {
1731 app.db.get_roster(parent_rid, new_roster, new_markings);
1732 true_node_id_source nis(app);
1733 make_roster_for_nonmerge(parent_cs, new_rid, new_roster, new_markings, nis);
1734 }
1735}
1736
1737// WARNING: this function is not tested directly (no unit tests). Do not
1738// put real logic in it.
1739void
1740make_roster_for_base_plus_cset(revision_id const & base, cset const & cs,
1741 revision_id const & new_rid,
1742 roster_t & new_roster, marking_map & new_markings,
1743 node_id_source & nis,
1744 app_state & app)
1745{
1746 MM(base);
1747 MM(cs);
1748 app.db.get_roster(base, new_roster, new_markings);
1749 make_roster_for_nonmerge(cs, new_rid, new_roster, new_markings, nis);
1750}
1751
1752void
1753mark_roster_with_no_parents(revision_id const & rid,
1754 roster_t const & roster,
1755 marking_map & markings)
1756{
1757 roster_t mock_parent;
1758 marking_map mock_parent_markings;
1759 mark_roster_with_one_parent(mock_parent, mock_parent_markings,
1760 rid, roster, markings);
1761}
1762
1763void
1764mark_roster_with_one_parent(roster_t const & parent,
1765 marking_map const & parent_markings,
1766 revision_id const & child_rid,
1767 roster_t const & child,
1768 marking_map & child_markings)
1769{
1770 MM(parent);
1771 MM(parent_markings);
1772 MM(child_rid);
1773 MM(child);
1774 MM(child_markings);
1775
1776 I(!null_id(child_rid));
1777 child_markings.clear();
1778
1779 for (node_map::const_iterator i = child.all_nodes().begin();
1780 i != child.all_nodes().end(); ++i)
1781 {
1782 marking_t new_marking;
1783 if (parent.has_node(i->first))
1784 mark_unmerged_node(safe_get(parent_markings, i->first),
1785 parent.get_node(i->first),
1786 child_rid, i->second, new_marking);
1787 else
1788 mark_new_node(child_rid, i->second, new_marking);
1789 safe_insert(child_markings, make_pair(i->first, new_marking));
1790 }
1791
1792 child.check_sane_against(child_markings, true);
1793}
1794
1795// WARNING: this function is not tested directly (no unit tests). Do not put
1796// real logic in it.
1797void
1798make_roster_for_revision(revision_t const & rev, revision_id const & new_rid,
1799 roster_t & new_roster, marking_map & new_markings,
1800 app_state & app)
1801{
1802 MM(rev);
1803 MM(new_rid);
1804 MM(new_roster);
1805 MM(new_markings);
1806 if (rev.edges.size() == 1)
1807 make_roster_for_nonmerge(edge_old_revision(rev.edges.begin()),
1808 edge_changes(rev.edges.begin()),
1809 new_rid, new_roster, new_markings, app);
1810 else if (rev.edges.size() == 2)
1811 {
1812 edge_map::const_iterator i = rev.edges.begin();
1813 revision_id const & left_rid = edge_old_revision(i);
1814 cset const & left_cs = edge_changes(i);
1815 ++i;
1816 revision_id const & right_rid = edge_old_revision(i);
1817 cset const & right_cs = edge_changes(i);
1818 make_roster_for_merge(left_rid, left_cs, right_rid, right_cs,
1819 new_rid, new_roster, new_markings, app);
1820 }
1821 else
1822 I(false);
1823
1824 new_roster.check_sane_against(new_markings);
1825}
1826
1827
1828////////////////////////////////////////////////////////////////////
1829// Calculation of a cset
1830////////////////////////////////////////////////////////////////////
1831
1832
1833namespace
1834{
1835
1836 void delta_only_in_from(roster_t const & from,
1837 node_id nid, node_t n,
1838 cset & cs)
1839 {
1840 split_path sp;
1841 from.get_name(nid, sp);
1842 safe_insert(cs.nodes_deleted, sp);
1843 }
1844
1845
1846 void delta_only_in_to(roster_t const & to, node_id nid, node_t n,
1847 cset & cs)
1848 {
1849 split_path sp;
1850 to.get_name(nid, sp);
1851 if (is_file_t(n))
1852 {
1853 safe_insert(cs.files_added,
1854 make_pair(sp, downcast_to_file_t(n)->content));
1855 }
1856 else
1857 {
1858 safe_insert(cs.dirs_added, sp);
1859 }
1860 for (full_attr_map_t::const_iterator i = n->attrs.begin();
1861 i != n->attrs.end(); ++i)
1862 if (i->second.first)
1863 safe_insert(cs.attrs_set,
1864 make_pair(make_pair(sp, i->first), i->second.second));
1865 }
1866
1867 void delta_in_both(node_id nid,
1868 roster_t const & from, node_t from_n,
1869 roster_t const & to, node_t to_n,
1870 cset & cs)
1871 {
1872 I(same_type(from_n, to_n));
1873 I(from_n->self == to_n->self);
1874
1875 if (shallow_equal(from_n, to_n, false))
1876 return;
1877
1878 split_path from_sp, to_sp;
1879 from.get_name(nid, from_sp);
1880 to.get_name(nid, to_sp);
1881
1882 // Compare name and path.
1883 if (from_n->name != to_n->name || from_n->parent != to_n->parent)
1884 safe_insert(cs.nodes_renamed, make_pair(from_sp, to_sp));
1885
1886 // Compare file content.
1887 if (is_file_t(from_n))
1888 {
1889 file_t from_f = downcast_to_file_t(from_n);
1890 file_t to_f = downcast_to_file_t(to_n);
1891 if (!(from_f->content == to_f->content))
1892 {
1893 safe_insert(cs.deltas_applied,
1894 make_pair(to_sp, make_pair(from_f->content,
1895 to_f->content)));
1896 }
1897 }
1898
1899 // Compare attrs.
1900 {
1901 parallel::iter<full_attr_map_t> i(from_n->attrs, to_n->attrs);
1902 while (i.next())
1903 {
1904 MM(i);
1905 if ((i.state() == parallel::in_left
1906 || (i.state() == parallel::in_both && !i.right_data().first))
1907 && i.left_data().first)
1908 {
1909 safe_insert(cs.attrs_cleared,
1910 make_pair(to_sp, i.left_key()));
1911 }
1912 else if ((i.state() == parallel::in_right
1913 || (i.state() == parallel::in_both && !i.left_data().first))
1914 && i.right_data().first)
1915 {
1916 safe_insert(cs.attrs_set,
1917 make_pair(make_pair(to_sp, i.right_key()),
1918 i.right_data().second));
1919 }
1920 else if (i.state() == parallel::in_both
1921 && i.right_data().first
1922 && i.left_data().first
1923 && i.right_data().second != i.left_data().second)
1924 {
1925 safe_insert(cs.attrs_set,
1926 make_pair(make_pair(to_sp, i.right_key()),
1927 i.right_data().second));
1928 }
1929 }
1930 }
1931 }
1932}
1933
1934void
1935make_cset(roster_t const & from, roster_t const & to, cset & cs)
1936{
1937 cs.clear();
1938 parallel::iter<node_map> i(from.all_nodes(), to.all_nodes());
1939 while (i.next())
1940 {
1941 MM(i);
1942 switch (i.state())
1943 {
1944 case parallel::invalid:
1945 I(false);
1946
1947 case parallel::in_left:
1948 // deleted
1949 delta_only_in_from(from, i.left_key(), i.left_data(), cs);
1950 break;
1951
1952 case parallel::in_right:
1953 // added
1954 delta_only_in_to(to, i.right_key(), i.right_data(), cs);
1955 break;
1956
1957 case parallel::in_both:
1958 // moved/renamed/patched/attribute changes
1959 delta_in_both(i.left_key(), from, i.left_data(), to, i.right_data(), cs);
1960 break;
1961 }
1962 }
1963}
1964
1965
1966// we assume our input is sane
1967bool
1968equal_up_to_renumbering(roster_t const & a, marking_map const & a_markings,
1969 roster_t const & b, marking_map const & b_markings)
1970{
1971 if (a.all_nodes().size() != b.all_nodes().size())
1972 return false;
1973
1974 for (node_map::const_iterator i = a.all_nodes().begin();
1975 i != a.all_nodes().end(); ++i)
1976 {
1977 split_path sp;
1978 a.get_name(i->first, sp);
1979 if (!b.has_node(sp))
1980 return false;
1981 node_t b_n = b.get_node(sp);
1982 // we already know names are the same
1983 if (!same_type(i->second, b_n))
1984 return false;
1985 if (i->second->attrs != b_n->attrs)
1986 return false;
1987 if (is_file_t(i->second))
1988 {
1989 if (!(downcast_to_file_t(i->second)->content
1990 == downcast_to_file_t(b_n)->content))
1991 return false;
1992 }
1993 // nodes match, check the markings too
1994 if (!(safe_get(a_markings, i->first) == safe_get(b_markings, b_n->self)))
1995 return false;
1996 }
1997 return true;
1998}
1999
2000
2001void make_restricted_csets(roster_t const & from, roster_t const & to,
2002 cset & included, cset & excluded,
2003 node_restriction const & mask)
2004{
2005 included.clear();
2006 excluded.clear();
2007
2008 L(FL("building restricted csets"));
2009 parallel::iter<node_map> i(from.all_nodes(), to.all_nodes());
2010 while (i.next())
2011 {
2012 MM(i);
2013 switch (i.state())
2014 {
2015 case parallel::invalid:
2016 I(false);
2017
2018 case parallel::in_left:
2019 // deleted
2020 if (mask.includes(from, i.left_key()))
2021 {
2022 delta_only_in_from(from, i.left_key(), i.left_data(), included);
2023 L(FL("included left %d") % i.left_key());
2024 }
2025 else
2026 {
2027 delta_only_in_from(from, i.left_key(), i.left_data(), excluded);
2028 L(FL("excluded left %d") % i.left_key());
2029 }
2030 break;
2031
2032 case parallel::in_right:
2033 // added
2034 if (mask.includes(to, i.right_key()))
2035 {
2036 delta_only_in_to(to, i.right_key(), i.right_data(), included);
2037 L(FL("included right %d") % i.right_key());
2038 }
2039 else
2040 {
2041 delta_only_in_to(to, i.right_key(), i.right_data(), excluded);
2042 L(FL("excluded right %d") % i.right_key());
2043 }
2044 break;
2045
2046 case parallel::in_both:
2047 // moved/renamed/patched/attribute changes
2048 if (mask.includes(from, i.left_key()) || mask.includes(to, i.right_key()))
2049 {
2050 delta_in_both(i.left_key(), from, i.left_data(), to, i.right_data(), included);
2051 L(FL("in both %d %d") % i.left_key() % i.right_key());
2052 }
2053 else
2054 {
2055 delta_in_both(i.left_key(), from, i.left_data(), to, i.right_data(), excluded);
2056 L(FL("in both %d %d") % i.left_key() % i.right_key());
2057 }
2058 break;
2059 }
2060 }
2061
2062}
2063
2064class editable_roster_for_check
2065 : public editable_roster_base
2066{
2067 public:
2068 editable_roster_for_check(roster_t & r);
2069 virtual node_id detach_node(split_path const & src);
2070 virtual void drop_detached_node(node_id nid);
2071 virtual void attach_node(node_id nid, split_path const & dst);
2072 int problems;
2073
2074 private:
2075 temp_node_id_source nis;
2076 map<node_id, pair<split_path, vector<path_component> > > detached_dirs;
2077};
2078
2079editable_roster_for_check::editable_roster_for_check(roster_t & r)
2080 : editable_roster_base(r, nis), problems(0)
2081{
2082 // Ensure that this roster has no temp nodes.
2083 // If this every becomes a problem, then we could instead have our
2084 // temp_node_id_source start at the largest node used + 1.
2085 node_map nodes = r.all_nodes();
2086 for (node_map::const_iterator i = nodes.begin(); i != nodes.end(); ++i)
2087 I(!temp_node(i->first));
2088}
2089
2090node_id
2091editable_roster_for_check::detach_node(split_path const & src)
2092{
2093 node_t n = r.get_node(src);
2094 if (is_dir_t(n))
2095 {
2096 dir_t dir = downcast_to_dir_t(n);
2097 vector<path_component> children;
2098 for (dir_map::const_iterator
2099 i = dir->children.begin(); i != dir->children.end(); ++i)
2100 {
2101 children.push_back(i->first);
2102 }
2103 detached_dirs.insert(make_pair(dir->self,
2104 make_pair(src, children)));
2105 }
2106
2107 return this->editable_roster_base::detach_node(src);
2108}
2109
2110void
2111editable_roster_for_check::drop_detached_node(node_id nid)
2112{
2113 node_t n = r.get_node(nid);
2114 if (is_dir_t(n) && !downcast_to_dir_t(n)->children.empty())
2115 {
2116 map<node_id, pair<split_path, vector<path_component> > >::const_iterator
2117 i = detached_dirs.find(nid);
2118 I(i != detached_dirs.end());
2119
2120 split_path dir = i->second.first;
2121 for (vector<path_component>::const_iterator
2122 p = i->second.second.begin(); p != i->second.second.end(); ++p)
2123 {
2124 split_path child(dir);
2125 child.push_back(*p);
2126 W(F("restriction includes deletion of '%s' but excludes deletion of '%s'")
2127 % dir % child);
2128 problems++;
2129 }
2130 }
2131 else
2132 {
2133 this->editable_roster_base::drop_detached_node(nid);
2134 }
2135}
2136
2137void
2138editable_roster_for_check::attach_node(node_id nid, split_path const & dst)
2139{
2140 split_path dirname;
2141 path_component basename;
2142 dirname_basename(dst, dirname, basename);
2143
2144 if (!dirname.empty() && !r.has_node(dirname))
2145 {
2146 W(F("restriction excludes addition of '%s' but includes addition of '%s'")
2147 % dirname % dst);
2148 problems++;
2149 }
2150 else
2151 {
2152 this->editable_roster_base::attach_node(nid, dst);
2153 }
2154}
2155
2156void
2157check_restricted_cset(roster_t const & roster, cset const & cs)
2158{
2159 // This command checks that a cset generated by make_restricted_cset still
2160 // is sensical when applied to the given roster -- e.g., it does not
2161 // include a deletion of a directory that would be empty, except that some
2162 // of the deletions/renames that emptied it were not included in the
2163 // restriction, it does not include the addition of a file when the
2164 // addition of its parent directory was not included, etc.
2165
2166 // 'roster' must not contain any temp nids, for now; this is checked, and
2167 // easily fixed if it becomes a problem.
2168
2169 MM(roster);
2170 MM(cs);
2171
2172 // make a copy of the roster to apply the cset to destructively
2173 roster_t tmp(roster);
2174 editable_roster_for_check e(tmp);
2175 cs.apply_to(e);
2176
2177 N(e.problems == 0, F("invalid restriction"));
2178}
2179
2180
2181void
2182select_nodes_modified_by_cset(cset const & cs,
2183 roster_t const & old_roster,
2184 roster_t const & new_roster,
2185 set<node_id> & nodes_modified)
2186{
2187 nodes_modified.clear();
2188
2189 path_set modified_prestate_nodes;
2190 path_set modified_poststate_nodes;
2191
2192 // Pre-state damage
2193
2194 copy(cs.nodes_deleted.begin(), cs.nodes_deleted.end(),
2195 inserter(modified_prestate_nodes, modified_prestate_nodes.begin()));
2196
2197 for (map<split_path, split_path>::const_iterator i = cs.nodes_renamed.begin();
2198 i != cs.nodes_renamed.end(); ++i)
2199 modified_prestate_nodes.insert(i->first);
2200
2201 // Post-state damage
2202
2203 copy(cs.dirs_added.begin(), cs.dirs_added.end(),
2204 inserter(modified_poststate_nodes, modified_poststate_nodes.begin()));
2205
2206 for (map<split_path, file_id>::const_iterator i = cs.files_added.begin();
2207 i != cs.files_added.end(); ++i)
2208 modified_poststate_nodes.insert(i->first);
2209
2210 for (map<split_path, split_path>::const_iterator i = cs.nodes_renamed.begin();
2211 i != cs.nodes_renamed.end(); ++i)
2212 modified_poststate_nodes.insert(i->second);
2213
2214 for (map<split_path, pair<file_id, file_id> >::const_iterator i = cs.deltas_applied.begin();
2215 i != cs.deltas_applied.end(); ++i)
2216 modified_poststate_nodes.insert(i->first);
2217
2218 for (set<pair<split_path, attr_key> >::const_iterator i = cs.attrs_cleared.begin();
2219 i != cs.attrs_cleared.end(); ++i)
2220 modified_poststate_nodes.insert(i->first);
2221
2222 for (map<pair<split_path, attr_key>, attr_value>::const_iterator i = cs.attrs_set.begin();
2223 i != cs.attrs_set.end(); ++i)
2224 modified_poststate_nodes.insert(i->first.first);
2225
2226 // Finale
2227
2228 for (path_set::const_iterator i = modified_prestate_nodes.begin();
2229 i != modified_prestate_nodes.end(); ++i)
2230 {
2231 I(old_roster.has_node(*i));
2232 nodes_modified.insert(old_roster.get_node(*i)->self);
2233 }
2234
2235 for (path_set::const_iterator i = modified_poststate_nodes.begin();
2236 i != modified_poststate_nodes.end(); ++i)
2237 {
2238 I(new_roster.has_node(*i));
2239 nodes_modified.insert(new_roster.get_node(*i)->self);
2240 }
2241
2242}
2243
2244////////////////////////////////////////////////////////////////////
2245// getting rosters from the workspace
2246////////////////////////////////////////////////////////////////////
2247
2248// TODO: doesn't that mean they should go in work.cc ?
2249// perhaps do that after propagating back to n.v.m.experiment.rosters
2250// or to mainline so that diffs are more informative
2251
2252inline static bool
2253inodeprint_unchanged(inodeprint_map const & ipm, file_path const & path)
2254{
2255 inodeprint_map::const_iterator old_ip = ipm.find(path);
2256 if (old_ip != ipm.end())
2257 {
2258 hexenc<inodeprint> ip;
2259 if (inodeprint_file(path, ip) && ip == old_ip->second)
2260 return true; // unchanged
2261 else
2262 return false; // changed or unavailable
2263 }
2264 else
2265 return false; // unavailable
2266}
2267
2268// TODO: unchanged, changed, missing might be better as set<node_id>
2269
2270// note that this does not take a restriction because it is used only by
2271// automate_inventory which operates on the entire, unrestricted, working
2272// directory.
2273
2274void
2275classify_roster_paths(roster_t const & ros,
2276 path_set & unchanged,
2277 path_set & changed,
2278 path_set & missing,
2279 app_state & app)
2280{
2281 temp_node_id_source nis;
2282 inodeprint_map ipm;
2283
2284 if (in_inodeprints_mode())
2285 {
2286 data dat;
2287 read_inodeprints(dat);
2288 read_inodeprint_map(dat, ipm);
2289 }
2290
2291 // this code is speed critical, hence the use of inode fingerprints so be
2292 // careful when making changes in here and preferably do some timing tests
2293
2294 if (!ros.has_root())
2295 return;
2296
2297 node_map const & nodes = ros.all_nodes();
2298 for (node_map::const_iterator i = nodes.begin(); i != nodes.end(); ++i)
2299 {
2300 node_id nid = i->first;
2301 node_t node = i->second;
2302
2303 split_path sp;
2304 ros.get_name(nid, sp);
2305
2306 file_path fp(sp);
2307
2308 if (is_dir_t(node) || inodeprint_unchanged(ipm, fp))
2309 {
2310 // dirs don't have content changes
2311 unchanged.insert(sp);
2312 }
2313 else
2314 {
2315 file_t file = downcast_to_file_t(node);
2316 file_id fid;
2317 if (ident_existing_file(fp, fid, app.lua))
2318 {
2319 if (file->content == fid)
2320 unchanged.insert(sp);
2321 else
2322 changed.insert(sp);
2323 }
2324 else
2325 {
2326 missing.insert(sp);
2327 }
2328 }
2329 }
2330}
2331
2332void
2333update_current_roster_from_filesystem(roster_t & ros,
2334 node_restriction const & mask,
2335 app_state & app)
2336{
2337 temp_node_id_source nis;
2338 inodeprint_map ipm;
2339
2340 if (in_inodeprints_mode())
2341 {
2342 data dat;
2343 read_inodeprints(dat);
2344 read_inodeprint_map(dat, ipm);
2345 }
2346
2347 size_t missing_files = 0;
2348
2349 // this code is speed critical, hence the use of inode fingerprints so be
2350 // careful when making changes in here and preferably do some timing tests
2351
2352 if (!ros.has_root())
2353 return;
2354
2355 node_map const & nodes = ros.all_nodes();
2356 for (node_map::const_iterator i = nodes.begin(); i != nodes.end(); ++i)
2357 {
2358 node_id nid = i->first;
2359 node_t node = i->second;
2360
2361 // Only analyze files further, not dirs.
2362 if (! is_file_t(node))
2363 continue;
2364
2365 // Only analyze restriction-included files.
2366 if (!mask.includes(ros, nid))
2367 continue;
2368
2369 split_path sp;
2370 ros.get_name(nid, sp);
2371 file_path fp(sp);
2372
2373 // Only analyze changed files (or all files if inodeprints mode
2374 // is disabled).
2375 if (inodeprint_unchanged(ipm, fp))
2376 continue;
2377
2378 file_t file = downcast_to_file_t(node);
2379 if (!ident_existing_file(fp, file->content, app.lua))
2380 {
2381 W(F("missing %s") % (fp));
2382 missing_files++;
2383 }
2384 }
2385
2386 N(missing_files == 0,
2387 F("%d missing files; use '%s ls missing' to view\n"
2388 "to restore consistency, on each missing file run either\n"
2389 "'%s drop FILE' to remove it permanently, or\n"
2390 "'%s revert FILE' to restore it\n"
2391 "or to handle all at once, simply '%s drop --missing'\n"
2392 "or '%s revert --missing'")
2393 % missing_files % app.prog_name % app.prog_name % app.prog_name
2394 % app.prog_name % app.prog_name);
2395}
2396
2397void
2398update_current_roster_from_filesystem(roster_t & ros,
2399 app_state & app)
2400{
2401 node_restriction tmp(app);
2402 update_current_roster_from_filesystem(ros, tmp, app);
2403}
2404
2405void
2406roster_t::extract_path_set(path_set & paths) const
2407{
2408 paths.clear();
2409 if (has_root())
2410 {
2411 for (dfs_iter i(root_dir); !i.finished(); ++i)
2412 {
2413 node_t curr = *i;
2414 split_path pth;
2415 get_name(curr->self, pth);
2416 if (!workspace_root(pth))
2417 paths.insert(pth);
2418 }
2419 }
2420}
2421
2422
2423////////////////////////////////////////////////////////////////////
2424// I/O routines
2425////////////////////////////////////////////////////////////////////
2426
2427
2428namespace
2429{
2430 namespace syms
2431 {
2432 // roster symbols
2433 symbol const format_version("format_version");
2434 symbol const dir("dir");
2435 symbol const file("file");
2436 symbol const content("content");
2437 symbol const attr("attr");
2438
2439 // 'local' roster and marking symbols
2440 symbol const ident("ident");
2441 symbol const birth("birth");
2442 symbol const dormant_attr("dormant_attr");
2443
2444 symbol const path_mark("path_mark");
2445 symbol const content_mark("content_mark");
2446 symbol const attr_mark("attr_mark");
2447 }
2448}
2449
2450
2451static void
2452push_marking(basic_io::stanza & st,
2453 node_t curr,
2454 marking_t const & mark)
2455{
2456
2457 I(!null_id(mark.birth_revision));
2458 st.push_hex_pair(syms::birth, mark.birth_revision.inner());
2459
2460 for (set<revision_id>::const_iterator i = mark.parent_name.begin();
2461 i != mark.parent_name.end(); ++i)
2462 st.push_hex_pair(syms::path_mark, i->inner());
2463
2464 if (is_file_t(curr))
2465 {
2466 for (set<revision_id>::const_iterator i = mark.file_content.begin();
2467 i != mark.file_content.end(); ++i)
2468 st.push_hex_pair(syms::content_mark, i->inner());
2469 }
2470 else
2471 I(mark.file_content.empty());
2472
2473 for (full_attr_map_t::const_iterator i = curr->attrs.begin();
2474 i != curr->attrs.end(); ++i)
2475 {
2476 map<attr_key, set<revision_id> >::const_iterator am = mark.attrs.find(i->first);
2477 I(am != mark.attrs.end());
2478 for (set<revision_id>::const_iterator j = am->second.begin();
2479 j != am->second.end(); ++j)
2480 st.push_hex_triple(syms::attr_mark, i->first(), j->inner());
2481 }
2482}
2483
2484static void
2485push_marking_binary(string_queue &out,
2486 node_t curr,
2487 marking_t const & mark)
2488{
2489
2490 I(!null_id(mark.birth_revision));
2491 out += 'm';
2492 insert_variable_length_string(mark.birth_revision.inner()(),out);
2493
2494 for (set<revision_id>::const_iterator i = mark.parent_name.begin();
2495 i != mark.parent_name.end(); ++i) {
2496 out += 'p';
2497 insert_variable_length_string(i->inner()(),out);
2498 }
2499
2500 if (is_file_t(curr))
2501 {
2502 for (set<revision_id>::const_iterator i = mark.file_content.begin();
2503 i != mark.file_content.end(); ++i) {
2504 out += 'c';
2505 insert_variable_length_string(i->inner()(),out);
2506 }
2507 }
2508 else
2509 I(mark.file_content.empty());
2510
2511 for (full_attr_map_t::const_iterator i = curr->attrs.begin();
2512 i != curr->attrs.end(); ++i)
2513 {
2514 map<attr_key, set<revision_id> >::const_iterator am = mark.attrs.find(i->first);
2515 I(am != mark.attrs.end());
2516 for (set<revision_id>::const_iterator j = am->second.begin();
2517 j != am->second.end(); ++j) {
2518 out += 'a';
2519 insert_variable_length_string(i->first(), out);
2520 insert_variable_length_string(j->inner()(), out);
2521 }
2522 }
2523}
2524
2525
2526void
2527parse_marking(basic_io::parser & pa,
2528 node_t n,
2529 marking_t & marking)
2530{
2531 while (pa.symp())
2532 {
2533 string rev;
2534 if (pa.symp(syms::birth))
2535 {
2536 pa.sym();
2537 pa.hex(rev);
2538 marking.birth_revision = revision_id(rev);
2539 }
2540 else if (pa.symp(syms::path_mark))
2541 {
2542 pa.sym();
2543 pa.hex(rev);
2544 safe_insert(marking.parent_name, revision_id(rev));
2545 }
2546 else if (pa.symp(syms::content_mark))
2547 {
2548 pa.sym();
2549 pa.hex(rev);
2550 safe_insert(marking.file_content, revision_id(rev));
2551 }
2552 else if (pa.symp(syms::attr_mark))
2553 {
2554 string k;
2555 pa.sym();
2556 pa.str(k);
2557 pa.hex(rev);
2558 attr_key key = attr_key(k);
2559 I(n->attrs.find(key) != n->attrs.end());
2560 safe_insert(marking.attrs[key], revision_id(rev));
2561 }
2562 else break;
2563 }
2564}
2565
2566void
2567parse_marking_binary(const string &from, unsigned &from_pos,
2568 node_t n,
2569 marking_t & marking)
2570{
2571 I(from_pos < from.size());
2572 if ('m' != from[from_pos]) {
2573 cout << "Error parsing:\n" << from << "\nError parsing\n";
2574 }
2575
2576 I('m' == from[from_pos]);
2577 ++from_pos;
2578 string rev;
2579 extract_variable_length_string(from, rev, from_pos, "birth marking");
2580 marking.birth_revision = revision_id(rev);
2581
2582 while (1) {
2583 I(from_pos < from.size());
2584 if ('p' != from[from_pos]) {
2585 break;
2586 }
2587 ++from_pos;
2588 extract_variable_length_string(from, rev, from_pos, "parent_name path_mark");
2589 safe_insert(marking.parent_name, revision_id(rev));
2590 }
2591 while (1) {
2592 I(from_pos < from.size());
2593 if ('c' != from[from_pos]) {
2594 break;
2595 }
2596 ++from_pos;
2597 extract_variable_length_string(from, rev, from_pos, "file_content revision");
2598 safe_insert(marking.file_content, revision_id(rev));
2599 }
2600
2601 string k;
2602 while (1) {
2603 I(from_pos < from.size());
2604 if ('a' != from[from_pos]) {
2605 break;
2606 }
2607 ++from_pos;
2608 extract_variable_length_string(from, k, from_pos, "attribute key");
2609 extract_variable_length_string(from, rev, from_pos, "attribute revision");
2610 attr_key key = attr_key(k);
2611 I(n->attrs.find(key) != n->attrs.end());
2612 safe_insert(marking.attrs[key], revision_id(rev));
2613 }
2614}
2615
2616// SPEEDUP?: hand-writing a parser for manifests was a measurable speed win,
2617// and the original parser was much simpler than basic_io. After benchmarking
2618// consider replacing the roster disk format with something that can be
2619// processed more efficiently.
2620
2621void
2622roster_t::print_to(basic_io::printer & pr,
2623 marking_map const & mm,
2624 bool print_local_parts) const
2625{
2626 I(has_root());
2627 {
2628 basic_io::stanza st;
2629 st.push_str_pair(syms::format_version, "1");
2630 pr.print_stanza(st);
2631 }
2632 for (dfs_iter i(root_dir); !i.finished(); ++i)
2633 {
2634 node_t curr = *i;
2635 split_path pth;
2636 get_name(curr->self, pth);
2637
2638 file_path fp = file_path(pth);
2639
2640 basic_io::stanza st;
2641 if (is_dir_t(curr))
2642 {
2643 // L(FL("printing dir %s") % fp);
2644 st.push_file_pair(syms::dir, fp);
2645 }
2646 else
2647 {
2648 file_t ftmp = downcast_to_file_t(curr);
2649 st.push_file_pair(syms::file, fp);
2650 st.push_hex_pair(syms::content, ftmp->content.inner());
2651 // L(FL("printing file %s") % fp);
2652 }
2653
2654 if (print_local_parts)
2655 {
2656 I(curr->self != the_null_node);
2657 st.push_str_pair(syms::ident, lexical_cast<string>(curr->self));
2658 }
2659
2660 // Push the non-dormant part of the attr map
2661 for (full_attr_map_t::const_iterator j = curr->attrs.begin();
2662 j != curr->attrs.end(); ++j)
2663 {
2664 if (j->second.first)
2665 {
2666 // L(FL("printing attr %s : %s = %s") % fp % j->first % j->second);
2667 st.push_str_triple(syms::attr, j->first(), j->second.second());
2668 }
2669 }
2670
2671 if (print_local_parts)
2672 {
2673 // Push the dormant part of the attr map
2674 for (full_attr_map_t::const_iterator j = curr->attrs.begin();
2675 j != curr->attrs.end(); ++j)
2676 {
2677 if (!j->second.first)
2678 {
2679 I(j->second.second().empty());
2680 st.push_str_pair(syms::dormant_attr, j->first());
2681 }
2682 }
2683
2684 marking_map::const_iterator m = mm.find(curr->self);
2685 I(m != mm.end());
2686 push_marking(st, curr, m->second);
2687 }
2688
2689 pr.print_stanza(st);
2690 }
2691}
2692
2693void
2694roster_t::print_binary(string_queue &out,
2695 marking_map const & mm) const
2696{
2697 I(has_root());
2698 out += 'V';
2699 out += (char)1;
2700
2701 unsigned node_count = 0;
2702 for (dfs_iter i(root_dir); !i.finished(); ++i) {
2703 ++node_count;
2704 }
2705
2706 for (dfs_iter i(root_dir); !i.finished(); ++i)
2707 {
2708 node_t curr = *i;
2709 split_path pth;
2710 get_name(curr->self, pth);
2711
2712 file_path fp = file_path(pth);
2713
2714 I(curr->self != the_null_node);
2715 I(sizeof(node_id) == 4);
2716
2717 out += is_dir_t(curr) ? 'D' : 'F';
2718 insert_datum_lsb((u32)curr->self, out);
2719 u32 record_size_pos = out.size();
2720 insert_datum_lsb((u32)0, out);
2721 insert_variable_length_string(fp.as_internal(),out);
2722 if (!is_dir_t(curr))
2723 {
2724 file_t ftmp = downcast_to_file_t(curr);
2725 insert_variable_length_string(ftmp->content.inner()(),out);
2726 }
2727
2728 // Push the non-dormant part of the attr map
2729 for (full_attr_map_t::const_iterator j = curr->attrs.begin();
2730 j != curr->attrs.end(); ++j)
2731 {
2732 if (j->second.first)
2733 {
2734 // L(FL("printing attr %s : %s = %s") % fp % j->first % j->second);
2735 // st.push_str_triple(syms::attr, j->first(), j->second.second());
2736 out += 'n';
2737 insert_variable_length_string(j->first(),out);
2738 insert_variable_length_string(j->second.second(),out);
2739 }
2740 }
2741
2742 // Push the dormant part of the attr map
2743 for (full_attr_map_t::const_iterator j = curr->attrs.begin();
2744 j != curr->attrs.end(); ++j)
2745 {
2746 if (!j->second.first)
2747 {
2748 I(j->second.second().empty());
2749 out += 'd';
2750 insert_variable_length_string(j->first(),out);
2751 }
2752 }
2753
2754 marking_map::const_iterator m = mm.find(curr->self);
2755 I(m != mm.end());
2756 push_marking_binary(out, curr, m->second);
2757 string tmp;
2758 insert_datum_lsb((u32)(out.size()-(record_size_pos+4)), tmp);
2759 I(tmp.size() == 4);
2760 for(unsigned i=0; i<4; ++i)
2761 {
2762 out[record_size_pos+i] = tmp[i];
2763 }
2764 }
2765 u32 index_start_offset = out.size();
2766 out += 'X';
2767}
2768
2769inline size_t
2770read_num(string const & s)
2771{
2772 size_t n = 0;
2773
2774 for (string::const_iterator i = s.begin(); i != s.end(); i++)
2775 {
2776 I(*i >= '0' && *i <= '9');
2777 n *= 10;
2778 n += static_cast<size_t>(*i - '0');
2779 }
2780 return n;
2781}
2782
2783static void
2784skip_to_next_record(basic_io::input_source &src)
2785{
2786 src.col = 1;
2787 // records start at two newlines in a row
2788 while(LIKELY(src.curr != src.in.end())) {
2789 if (*src.curr == '\n') {
2790 ++src.line;
2791 if ((src.curr+1) != src.in.end() && *(src.curr+1) == '\n') {
2792 ++src.curr;
2793 ++src.curr;
2794 ++src.line;
2795 break;
2796 }
2797 }
2798 ++src.curr;
2799 }
2800}
2801
2802static void
2803skip_to_record_key(basic_io::input_source &src)
2804{
2805 while(LIKELY(src.curr != src.in.end())) {
2806 if (*src.curr == ' ') {
2807 // skip
2808 } else if (isalpha(*src.curr)) {
2809 break;
2810 } else {
2811 I(false);
2812 }
2813 ++src.curr;
2814 }
2815}
2816
2817static bool
2818next_str_is(basic_io::input_source &src, const string &str)
2819{
2820 std::string::const_iterator i = src.curr;
2821 std::string::const_iterator j = str.begin();
2822 while(j != str.end()) {
2823 if (i == src.in.end() || *i != *j) {
2824 return false;
2825 }
2826 ++i;
2827 ++j;
2828 }
2829 src.curr = i;
2830 return true;
2831}
2832
2833static void
2834skip_to_next_line(basic_io::input_source &src)
2835{
2836 while(LIKELY(src.curr != src.in.end())) {
2837 if (*src.curr == '\n') {
2838 ++src.curr;
2839 break;
2840 }
2841 ++src.curr;
2842 }
2843 ++src.line;
2844}
2845
2846
2847static bool
2848roster_get_revision_fid_info_ascii(const roster_data &dat,
2849 node_id const &file_id_int,
2850 file_t &file,
2851 marking_t &marks)
2852{
2853 // there has to be a better way
2854 char buf[30];
2855 sprintf(buf,"\"%u\"",file_id_int);
2856 string file_id_token(buf);
2857
2858 basic_io::input_source src(dat.inner()(), "roster");
2859 basic_io::tokenizer tok(src);
2860 basic_io::parser pars(tok);
2861 {
2862 pars.esym(syms::format_version);
2863 string vers;
2864 pars.str(vers);
2865 I(vers == "1");
2866 }
2867
2868 // Make sure we're at the beginning we expect to be at...
2869 I(pars.token == "dir");
2870 I(src.lookahead == ' ');
2871 src.advance();
2872 I(src.lookahead == '"');
2873 src.advance();
2874 I(src.lookahead == '"');
2875 src.advance();
2876 I(src.lookahead == '\n');
2877 skip_to_next_record(src);
2878
2879 while(src.curr != src.in.end()) {
2880 std::string::const_iterator save_record_start = src.curr;
2881 skip_to_record_key(src);
2882 I(src.curr != src.in.end());
2883 if (next_str_is(src,"dir ")) {
2884 // skip
2885 } else if (next_str_is(src,"file ")) {
2886 skip_to_next_line(src);
2887 skip_to_record_key(src);
2888 bool ok = next_str_is(src,"content ");
2889 I(ok);
2890 skip_to_next_line(src);
2891 skip_to_record_key(src);
2892 ok = next_str_is(src,"ident ");
2893 I(ok);
2894 if (next_str_is(src,file_id_token)) {
2895 src.curr = save_record_start;
2896 src.line -= 2; // backup by file, content
2897 pars.advance();
2898
2899 file_t n;
2900 if (pars.symp(syms::file))
2901 {
2902 string content, pth, ident;
2903 pars.sym();
2904 pars.str(pth);
2905 pars.esym(syms::content);
2906 pars.hex(content);
2907 pars.esym(syms::ident);
2908 pars.str(ident);
2909 n = file_t(new file_node(read_num(ident),
2910 file_id(content)));
2911 }
2912 else
2913 {
2914 I(false);
2915 }
2916 I(static_cast<bool>(n));
2917 while(pars.symp(syms::attr))
2918 {
2919 pars.sym();
2920 string k, v;
2921 pars.str(k);
2922 pars.str(v);
2923 safe_insert(n->attrs, make_pair(attr_key(k),
2924 make_pair(true, attr_value(v))));
2925 }
2926
2927 // Dormant attrs
2928 while(pars.symp(syms::dormant_attr))
2929 {
2930 pars.sym();
2931 string k;
2932 pars.str(k);
2933 safe_insert(n->attrs, make_pair(attr_key(k),
2934 make_pair(false, attr_value())));
2935 }
2936 parse_marking(pars, n, marks);
2937 file = n;
2938 return true;
2939 }
2940 } else {
2941 I(false);
2942 }
2943 skip_to_next_record(src);
2944 }
2945 I(src.curr == src.in.end());
2946 return false;
2947}
2948
2949static void
2950parse_binary_attrs(node_t &n, const std::string &from, size_t &from_pos)
2951{
2952 // Non-dormant attrs
2953 while(1)
2954 {
2955 I(from_pos < from.size());
2956 if ('n' != from[from_pos])
2957 {
2958 break;
2959 }
2960 ++from_pos;
2961 string k,v;
2962 extract_variable_length_string(from, k, from_pos, nondormant_attr_key_str);
2963 extract_variable_length_string(from, v, from_pos, nondormant_attr_value_str);
2964 safe_insert(n->attrs, make_pair(attr_key(k),
2965 make_pair(true, attr_value(v))));
2966 }
2967
2968 // Dormant attrs
2969 while(1)
2970 {
2971 I(from_pos < from.size());
2972 if ('d' != from[from_pos])
2973 {
2974 break;
2975 }
2976 ++from_pos;
2977 string k;
2978 extract_variable_length_string(from, k, from_pos, "dormant attr key");
2979 safe_insert(n->attrs, make_pair(attr_key(k),
2980 make_pair(false, attr_value())));
2981 }
2982}
2983
2984bool
2985roster_get_revision_fid_info_binary(const roster_data &dat,
2986 node_id const file_id_int,
2987 file_t &file,
2988 marking_t &marks)
2989{
2990 const string from(dat.inner()());
2991
2992 I(from.size() >= 10);
2993 I(from[0] == 'V');
2994 I(from[1] == (char)1);
2995 size_t from_pos = 2;
2996 while(1)
2997 {
2998 I(from_pos < from.size());
2999 if ('X' == from[from_pos])
3000 {
3001 return false;
3002 }
3003 I('D' == from[from_pos] || 'F' == from[from_pos]);
3004 ++from_pos;
3005 u32 node_id = extract_datum_lsb<u32>(from, from_pos, node_id_str);
3006 if (node_id != file_id_int)
3007 {
3008 u32 record_size = extract_datum_lsb<u32>(from, from_pos, record_size_str);
3009 from_pos += record_size;
3010 continue;
3011 }
3012 from_pos -= 5;
3013 break;
3014 }
3015 I(from_pos >= 2 && from_pos < from.size());
3016 I('F' == from[from_pos]);
3017 ++from_pos;
3018 u32 ident = extract_datum_lsb<u32>(from, from_pos, ident_str);
3019 I(ident == file_id_int);
3020 u32 record_size = extract_datum_lsb<u32>(from, from_pos, record_size_str);
3021 string content, path;
3022 extract_variable_length_string(from, path, from_pos, path_str);
3023 extract_variable_length_string(from, content, from_pos, content_str);
3024 node_t n = file_t(new file_node(ident,file_id(content)));
3025 parse_binary_attrs(n,from,from_pos);
3026 parse_marking_binary(from, from_pos, n, marks);
3027 I(from_pos < from.size());
3028 I('F' == from[from_pos] || 'D' == from[from_pos] || 'X' == from[from_pos]);
3029 file = downcast_to_file_t(n);
3030 return true;
3031}
3032
3033bool
3034roster_get_revision_fid_info(const roster_data &dat,
3035 node_id const &file_id_int,
3036 file_t &file,
3037 marking_t &marks)
3038{
3039 if ('V' == dat.inner()()[0])
3040 {
3041 return roster_get_revision_fid_info_binary(dat, file_id_int, file, marks);
3042 }
3043 else if ('f' == dat.inner()()[0])
3044 {
3045 return roster_get_revision_fid_info_ascii(dat, file_id_int, file, marks);
3046 }
3047 else
3048 {
3049 throw logic_error((F("Expected roster to start with either 'V' or 'f', not '%c'") % dat.inner()()[0]).str());
3050 }
3051}
3052
3053
3054void
3055roster_t::parse_from(basic_io::parser & pa,
3056 marking_map & mm)
3057{
3058 // Instantiate some lookaside caches to ensure this roster reuses
3059 // string storage across ATOMIC elements.
3060 id::symtab id_syms;
3061 path_component::symtab path_syms;
3062 attr_key::symtab attr_key_syms;
3063 attr_value::symtab attr_value_syms;
3064
3065
3066 // We *always* parse the local part of a roster, because we do not
3067 // actually send the non-local part over the network; the only times
3068 // we serialize a manifest (non-local roster) is when we're printing
3069 // it out for a user, or when we're hashing it for a manifest ID.
3070 nodes.clear();
3071 root_dir.reset();
3072 mm.clear();
3073
3074 {
3075 pa.esym(syms::format_version);
3076 string vers;
3077 pa.str(vers);
3078 I(vers == "1");
3079 }
3080
3081 while(pa.symp())
3082 {
3083 string pth, ident, rev;
3084 node_t n;
3085
3086 if (pa.symp(syms::file))
3087 {
3088 string content;
3089 pa.sym();
3090 pa.str(pth);
3091 pa.esym(syms::content);
3092 pa.hex(content);
3093 pa.esym(syms::ident);
3094 pa.str(ident);
3095 n = file_t(new file_node(read_num(ident),
3096 file_id(content)));
3097 }
3098 else if (pa.symp(syms::dir))
3099 {
3100 pa.sym();
3101 pa.str(pth);
3102 pa.esym(syms::ident);
3103 pa.str(ident);
3104 n = dir_t(new dir_node(read_num(ident)));
3105 }
3106 else
3107 break;
3108
3109 I(static_cast<bool>(n));
3110
3111 safe_insert(nodes, make_pair(n->self, n));
3112 if (is_dir_t(n) && pth.empty())
3113 {
3114 I(! has_root());
3115 root_dir = downcast_to_dir_t(n);
3116 }
3117 else
3118 {
3119 I(!pth.empty());
3120 split_path sp;
3121 internal_string_to_split_path(pth, sp);
3122 attach_node(n->self, sp);
3123 }
3124
3125 // Non-dormant attrs
3126 while(pa.symp(syms::attr))
3127 {
3128 pa.sym();
3129 string k, v;
3130 pa.str(k);
3131 pa.str(v);
3132 safe_insert(n->attrs, make_pair(attr_key(k),
3133 make_pair(true, attr_value(v))));
3134 }
3135
3136 // Dormant attrs
3137 while(pa.symp(syms::dormant_attr))
3138 {
3139 pa.sym();
3140 string k;
3141 pa.str(k);
3142 safe_insert(n->attrs, make_pair(attr_key(k),
3143 make_pair(false, attr_value())));
3144 }
3145
3146 {
3147 marking_t & m(safe_insert(mm, make_pair(n->self, marking_t()))->second);
3148 parse_marking(pa, n, m);
3149 }
3150 }
3151}
3152
3153void
3154roster_t::parse_binary(const std::string &from,
3155 marking_map & mm)
3156{
3157 // Instantiate some lookaside caches to ensure this roster reuses
3158 // string storage across ATOMIC elements.
3159 id::symtab id_syms;
3160 path_component::symtab path_syms;
3161 attr_key::symtab attr_key_syms;
3162 attr_value::symtab attr_value_syms;
3163
3164 // We *always* parse the local part of a roster, because we do not
3165 // actually send the non-local part over the network; the only times
3166 // we serialize a manifest (non-local roster) is when we're printing
3167 // it out for a user, or when we're hashing it for a manifest ID.
3168 nodes.clear();
3169 root_dir.reset();
3170 mm.clear();
3171
3172 I(from.size() >= 2);
3173 I(from[0] == 'V');
3174 I(from[1] == (char)1);
3175 size_t from_pos = 2;
3176
3177 while(1)
3178 {
3179 I(from_pos < from.size());
3180 string pth, rev;
3181 node_t n;
3182
3183 if ('X' == from[from_pos])
3184 { // index, done
3185 break;
3186 }
3187 char type = from[from_pos];
3188 ++from_pos;
3189 if ('F' != type && 'D' != type)
3190 {
3191 throw logic_error((F("Unexpected character '%c' at position %d while parsing roster, expected X,F,D") % from[from_pos] % from_pos).str());
3192 }
3193 u32 ident = extract_datum_lsb<u32>(from, from_pos, ident_str);
3194 u32 record_size = extract_datum_lsb<u32>(from, from_pos, record_size_str);
3195 u32 start_record = from_pos;
3196 extract_variable_length_string(from, pth, from_pos, path_str);
3197
3198 if ('F' == type)
3199 {
3200 string content;
3201 extract_variable_length_string(from, content, from_pos, content_str);
3202 n = file_t(new file_node(ident,file_id(content)));
3203 }
3204 else if ('D' == type)
3205 {
3206 n = dir_t(new dir_node(ident));
3207 }
3208
3209 safe_insert(nodes, make_pair(n->self, n));
3210 if (is_dir_t(n) && pth.empty())
3211 {
3212 I(! has_root());
3213 root_dir = downcast_to_dir_t(n);
3214 }
3215 else
3216 {
3217 I(!pth.empty());
3218 split_path sp;
3219 internal_string_to_split_path(pth, sp);
3220 attach_node(n->self, sp);
3221 }
3222
3223 parse_binary_attrs(n,from,from_pos);
3224 {
3225 marking_t & m(safe_insert(mm, make_pair(n->self, marking_t()))->second);
3226 parse_marking_binary(from, from_pos, n, m);
3227 }
3228 I(record_size == from_pos - start_record);
3229 }
3230}
3231
3232void
3233read_roster_and_marking(roster_data const & dat,
3234 roster_t & ros,
3235 marking_map & mm)
3236{
3237 if ('V' == dat.inner()()[0])
3238 {
3239 ros.parse_binary(dat.inner()(), mm);
3240 }
3241 else if ('f' == dat.inner()()[0])
3242 {
3243 basic_io::input_source src(dat.inner()(), "roster");
3244 basic_io::tokenizer tok(src);
3245 basic_io::parser pars(tok);
3246 ros.parse_from(pars, mm);
3247 I(src.lookahead == EOF);
3248 }
3249 else
3250 {
3251 throw logic_error((F("Expected roster to start with either 'V' or 'f', not '%c'") % dat.inner()()[0]).str());
3252 }
3253 ros.check_sane_against(mm);
3254}
3255
3256
3257static void
3258write_roster_and_marking(roster_t const & ros,
3259 marking_map const & mm,
3260 roster_data & dat,
3261 bool print_local_parts,
3262 bool binary_ok)
3263{
3264 if (print_local_parts)
3265 ros.check_sane_against(mm);
3266 else
3267 ros.check_sane(true);
3268 if (binary_ok && print_local_parts) {
3269 // the complete roster is internal, it looks like the manifest varient
3270 // is used in a bunch of hashes, so we really can't safely move to a
3271 // binary format. Interestingly, because the ident is only printed if
3272 // print_local_parts is set, EricAnderson doesn't think that parse_from
3273 // will parse it because it expects to see an ident after a file chunk
3274 // of data.
3275
3276 string_queue out;
3277 ros.print_binary(out, mm);
3278 dat = roster_data(out.substr(0,out.size()));
3279 } else {
3280 basic_io::printer pr;
3281 ros.print_to(pr, mm, print_local_parts);
3282 dat = roster_data(pr.buf);
3283 }
3284}
3285
3286
3287void
3288write_roster_and_marking(roster_t const & ros,
3289 marking_map const & mm,
3290 roster_data & dat)
3291{
3292 write_roster_and_marking(ros, mm, dat, true, true);
3293}
3294
3295void
3296write_roster_and_marking_ascii(roster_t const & ros,
3297 marking_map const & mm,
3298 roster_data & dat)
3299{
3300 write_roster_and_marking(ros, mm, dat, true, false);
3301}
3302
3303
3304void
3305write_manifest_of_roster(roster_t const & ros,
3306 roster_data & dat)
3307{
3308 marking_map mm;
3309 write_roster_and_marking(ros, mm, dat, false, false);
3310}
3311
3312void calculate_ident(roster_t const & ros,
3313 manifest_id & ident)
3314{
3315 roster_data tmp;
3316 roster_id tid;
3317 if (!ros.all_nodes().empty())
3318 {
3319 write_manifest_of_roster(ros, tmp);
3320 calculate_ident(tmp, tid);
3321 }
3322 ident = tid.inner();
3323}
3324
3325////////////////////////////////////////////////////////////////////
3326// testing
3327////////////////////////////////////////////////////////////////////
3328
3329#ifdef BUILD_UNIT_TESTS
3330#include "unit_tests.hh"
3331#include "sanity.hh"
3332#include "constants.hh"
3333#include "randomizer.hh"
3334
3335#include <string>
3336#include <cstdlib>
3337#include <boost/lexical_cast.hpp>
3338
3339using std::logic_error;
3340using std::search;
3341
3342using boost::shared_ptr;
3343
3344using randomizer::uniform;
3345using randomizer::flip;
3346
3347static void
3348make_fake_marking_for(roster_t const & r, marking_map & mm)
3349{
3350 mm.clear();
3351 revision_id rid(string("0123456789abcdef0123456789abcdef01234567"));
3352 for (node_map::const_iterator i = r.all_nodes().begin(); i != r.all_nodes().end();
3353 ++i)
3354 {
3355 marking_t fake_marks;
3356 mark_new_node(rid, i->second, fake_marks);
3357 mm.insert(make_pair(i->first, fake_marks));
3358 }
3359}
3360
3361static void
3362do_testing_on_one_roster(roster_t const & r)
3363{
3364 if (!r.has_root())
3365 {
3366 I(r.all_nodes().size() == 0);
3367 // not much testing to be done on an empty roster -- can't iterate over
3368 // it or read/write it.
3369 return;
3370 }
3371
3372 MM(r);
3373 // test dfs_iter by making sure it returns the same number of items as there
3374 // are items in all_nodes()
3375 int n; MM(n);
3376 n = r.all_nodes().size();
3377 int dfs_counted = 0; MM(dfs_counted);
3378 split_path root_name;
3379 file_path().split(root_name);
3380 for (dfs_iter i(downcast_to_dir_t(r.get_node(root_name))); !i.finished(); ++i)
3381 ++dfs_counted;
3382 I(n == dfs_counted);
3383
3384 // do a read/write spin
3385 roster_data r_dat; MM(r_dat);
3386 marking_map fm;
3387 make_fake_marking_for(r, fm);
3388 write_roster_and_marking(r, fm, r_dat);
3389 roster_t r2; MM(r2);
3390 marking_map fm2;
3391 read_roster_and_marking(r_dat, r2, fm2);
3392 I(r == r2);
3393 I(fm == fm2);
3394 roster_data r2_dat; MM(r2_dat);
3395 write_roster_and_marking(r2, fm2, r2_dat);
3396 I(r_dat == r2_dat);
3397}
3398
3399static void
3400do_testing_on_two_equivalent_csets(cset const & a, cset const & b)
3401{
3402 // we do all this reading/writing/comparing of both strings and objects to
3403 // cross-check the reading, writing, and comparison logic against each
3404 // other. (if, say, there is a field in cset that == forgets to check but
3405 // that write remembers to include, this should catch it).
3406 MM(a);
3407 MM(b);
3408 I(a == b);
3409
3410 data a_dat, b_dat, a2_dat, b2_dat;
3411 MM(a_dat);
3412 MM(b_dat);
3413 MM(a2_dat);
3414 MM(b2_dat);
3415
3416 write_cset(a, a_dat);
3417 write_cset(b, b_dat);
3418 I(a_dat == b_dat);
3419 cset a2, b2;
3420 MM(a2);
3421 MM(b2);
3422 read_cset(a_dat, a2);
3423 read_cset(b_dat, b2);
3424 I(a2 == a);
3425 I(b2 == b);
3426 I(b2 == a);
3427 I(a2 == b);
3428 I(a2 == b2);
3429 write_cset(a2, a2_dat);
3430 write_cset(b2, b2_dat);
3431 I(a_dat == a2_dat);
3432 I(b_dat == b2_dat);
3433}
3434
3435static void
3436apply_cset_and_do_testing(roster_t & r, cset const & cs, node_id_source & nis)
3437{
3438 MM(r);
3439 MM(cs);
3440 roster_t original = r;
3441 MM(original);
3442 I(original == r);
3443
3444 editable_roster_base e(r, nis);
3445 cs.apply_to(e);
3446
3447 cset derived;
3448 MM(derived);
3449 make_cset(original, r, derived);
3450
3451 do_testing_on_two_equivalent_csets(cs, derived);
3452 do_testing_on_one_roster(r);
3453}
3454
3455static void
3456tests_on_two_rosters(roster_t const & a, roster_t const & b, node_id_source & nis)
3457{
3458 MM(a);
3459 MM(b);
3460
3461 do_testing_on_one_roster(a);
3462 do_testing_on_one_roster(b);
3463
3464 cset a_to_b; MM(a_to_b);
3465 cset b_to_a; MM(b_to_a);
3466 make_cset(a, b, a_to_b);
3467 make_cset(b, a, b_to_a);
3468 roster_t a2(b); MM(a2);
3469 roster_t b2(a); MM(b2);
3470 // we can't use a cset to entirely empty out a roster, so don't bother doing
3471 // the apply_to tests towards an empty roster
3472 // (NOTE: if you notice this special case in a time when root dirs can be
3473 // renamed or deleted, remove it, it will no longer be necessary.
3474 if (!a.all_nodes().empty())
3475 {
3476 editable_roster_base eb(a2, nis);
3477 b_to_a.apply_to(eb);
3478 }
3479 else
3480 a2 = a;
3481 if (!b.all_nodes().empty())
3482 {
3483 editable_roster_base ea(b2, nis);
3484 a_to_b.apply_to(ea);
3485 }
3486 else
3487 b2 = b;
3488 // We'd like to assert that a2 == a and b2 == b, but we can't, because they
3489 // will have new ids assigned.
3490 // But they _will_ have the same manifests, assuming things are working
3491 // correctly.
3492 roster_data a_dat; MM(a_dat);
3493 roster_data a2_dat; MM(a2_dat);
3494 roster_data b_dat; MM(b_dat);
3495 roster_data b2_dat; MM(b2_dat);
3496 if (a.has_root())
3497 write_manifest_of_roster(a, a_dat);
3498 if (a2.has_root())
3499 write_manifest_of_roster(a2, a2_dat);
3500 if (b.has_root())
3501 write_manifest_of_roster(b, b_dat);
3502 if (b2.has_root())
3503 write_manifest_of_roster(b2, b2_dat);
3504 I(a_dat == a2_dat);
3505 I(b_dat == b2_dat);
3506
3507 cset a2_to_b2; MM(a2_to_b2);
3508 cset b2_to_a2; MM(b2_to_a2);
3509 make_cset(a2, b2, a2_to_b2);
3510 make_cset(b2, a2, b2_to_a2);
3511 do_testing_on_two_equivalent_csets(a_to_b, a2_to_b2);
3512 do_testing_on_two_equivalent_csets(b_to_a, b2_to_a2);
3513}
3514
3515template<typename M>
3516typename M::const_iterator
3517random_element(M const & m)
3518{
3519 size_t i = randomizer::uniform(m.size());
3520 typename M::const_iterator j = m.begin();
3521 while (i > 0)
3522 {
3523 I(j != m.end());
3524 --i;
3525 ++j;
3526 }
3527 return j;
3528}
3529
3530string new_word()
3531{
3532 static string wordchars = "abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
3533 static unsigned tick = 0;
3534 string tmp;
3535 do
3536 {
3537 tmp += wordchars[uniform(wordchars.size())];
3538 }
3539 while (tmp.size() < 10 && !flip(10));
3540 return tmp + lexical_cast<string>(tick++);
3541}
3542
3543file_id new_ident()
3544{
3545 static string tab = "0123456789abcdef";
3546 string tmp;
3547 tmp.reserve(constants::idlen);
3548 for (unsigned i = 0; i < constants::idlen; ++i)
3549 tmp += tab[uniform(tab.size())];
3550 return file_id(tmp);
3551}
3552
3553path_component new_component()
3554{
3555 split_path pieces;
3556 file_path_internal(new_word()).split(pieces);
3557 return pieces.back();
3558}
3559
3560
3561attr_key pick_attr(full_attr_map_t const & attrs)
3562{
3563 return random_element(attrs)->first;
3564}
3565
3566attr_key pick_attr(attr_map_t const & attrs)
3567{
3568 return random_element(attrs)->first;
3569}
3570
3571bool parent_of(split_path const & p,
3572 split_path const & c)
3573{
3574 bool is_parent = false;
3575
3576 if (p.size() <= c.size())
3577 {
3578 split_path::const_iterator c_anchor =
3579 search(c.begin(), c.end(),
3580 p.begin(), p.end());
3581
3582 is_parent = (c_anchor == c.begin());
3583 }
3584
3585 // L(FL("path '%s' is%s parent of '%s'")
3586 // % file_path(p)
3587 // % (is_parent ? "" : " not")
3588 // % file_path(c));
3589
3590 return is_parent;
3591}
3592
3593struct
3594change_automaton
3595{
3596
3597 void perform_random_action(roster_t & r, node_id_source & nis)
3598 {
3599 cset c;
3600 while (c.empty())
3601 {
3602 if (r.all_nodes().empty())
3603 {
3604 // Must add, couldn't find anything to work with
3605 split_path root;
3606 root.push_back(the_null_component);
3607 c.dirs_added.insert(root);
3608 }
3609 else
3610 {
3611 node_t n = random_element(r.all_nodes())->second;
3612 split_path pth;
3613 r.get_name(n->self, pth);
3614 // L(FL("considering acting on '%s'") % file_path(pth));
3615
3616 switch (uniform(7))
3617 {
3618 default:
3619 case 0:
3620 case 1:
3621 case 2:
3622 if (is_file_t(n) || (pth.size() > 1 && flip()))
3623 // Add a sibling of an existing entry.
3624 pth[pth.size() - 1] = new_component();
3625
3626 else
3627 // Add a child of an existing entry.
3628 pth.push_back(new_component());
3629
3630 if (flip())
3631 {
3632 // L(FL("adding dir '%s'") % file_path(pth));
3633 safe_insert(c.dirs_added, pth);
3634 }
3635 else
3636 {
3637 // L(FL("adding file '%s'") % file_path(pth));
3638 safe_insert(c.files_added, make_pair(pth, new_ident()));
3639 }
3640 break;
3641
3642 case 3:
3643 if (is_file_t(n))
3644 {
3645 safe_insert(c.deltas_applied,
3646 make_pair
3647 (pth, make_pair(downcast_to_file_t(n)->content,
3648 new_ident())));
3649 }
3650 break;
3651
3652 case 4:
3653 {
3654 node_t n2 = random_element(r.all_nodes())->second;
3655 split_path pth2;
3656 r.get_name(n2->self, pth2);
3657
3658 if (n == n2)
3659 continue;
3660
3661 if (is_file_t(n2) || (pth2.size() > 1 && flip()))
3662 {
3663 // L(FL("renaming to a sibling of an existing entry '%s'") % file_path(pth2));
3664 // Move to a sibling of an existing entry.
3665 pth2[pth2.size() - 1] = new_component();
3666 }
3667
3668 else
3669 {
3670 // L(FL("renaming to a child of an existing entry '%s'") % file_path(pth2));
3671 // Move to a child of an existing entry.
3672 pth2.push_back(new_component());
3673 }
3674
3675 if (!parent_of(pth, pth2))
3676 {
3677 // L(FL("renaming '%s' -> '%s") % file_path(pth) % file_path(pth2));
3678 safe_insert(c.nodes_renamed, make_pair(pth, pth2));
3679 }
3680 }
3681 break;
3682
3683 case 5:
3684 if (!null_node(n->parent) &&
3685 (is_file_t(n) || downcast_to_dir_t(n)->children.empty()))
3686 {
3687 // L(FL("deleting '%s'") % file_path(pth));
3688 safe_insert(c.nodes_deleted, pth);
3689 }
3690 break;
3691
3692 case 6:
3693 if (!n->attrs.empty() && flip())
3694 {
3695 attr_key k = pick_attr(n->attrs);
3696 if (safe_get(n->attrs, k).first)
3697 {
3698 if (flip())
3699 {
3700 // L(FL("clearing attr on '%s'") % file_path(pth));
3701 safe_insert(c.attrs_cleared, make_pair(pth, k));
3702 }
3703 else
3704 {
3705 // L(FL("changing attr on '%s'\n) % file_path(pth));
3706 safe_insert(c.attrs_set, make_pair(make_pair(pth, k), new_word()));
3707 }
3708 }
3709 else
3710 {
3711 // L(FL("setting previously set attr on '%s'") % file_path(pth));
3712 safe_insert(c.attrs_set, make_pair(make_pair(pth, k), new_word()));
3713 }
3714 }
3715 else
3716 {
3717 // L(FL("setting attr on '%s'") % file_path(pth));
3718 safe_insert(c.attrs_set, make_pair(make_pair(pth, new_word()), new_word()));
3719 }
3720 break;
3721 }
3722 }
3723 }
3724 // now do it
3725 apply_cset_and_do_testing(r, c, nis);
3726 }
3727};
3728
3729testing_node_id_source::testing_node_id_source()
3730 : curr(first_node)
3731{}
3732
3733node_id
3734testing_node_id_source::next()
3735{
3736 // L(FL("creating node %x") % curr);
3737 node_id n = curr++;
3738 I(!temp_node(n));
3739 return n;
3740}
3741
3742template <> void
3743dump(int const & i, string & out)
3744{
3745 out = lexical_cast<string>(i) + "\n";
3746}
3747
3748static void
3749automaton_roster_test()
3750{
3751 roster_t r;
3752 change_automaton aut;
3753 testing_node_id_source nis;
3754
3755 roster_t empty, prev;
3756
3757 for (int i = 0; i < 2000; ++i)
3758 {
3759 MM(i);
3760 if (i % 100 == 0)
3761 P(F("performing random action %d") % i);
3762 // test operator==
3763 I(r == r);
3764 aut.perform_random_action(r, nis);
3765 if (i == 0)
3766 prev = r;
3767 else
3768 {
3769 // test operator==
3770 I(!(prev == r));
3771 }
3772 // some randomly made up magic numbers, just to make sure we do tests on
3773 // rosters that have a number of changes between them, not just a single
3774 // change.
3775 if (i == 4 || i == 50 || i == 100 || i == 200 || i == 205
3776 || i == 500 || i == 640 || i == 1200 || i == 1900 || i == 1910)
3777 {
3778 tests_on_two_rosters(prev, r, nis);
3779 tests_on_two_rosters(empty, r, nis);
3780 prev = r;
3781 }
3782 }
3783}
3784
3785// some of our raising operations leave our state corrupted. so rather than
3786// trying to do all the illegal things in one pass, we re-run this function a
3787// bunch of times, and each time we do only one of these potentially
3788// corrupting tests. Test numbers are in the range [0, total).
3789
3790#define MAYBE(code) if (total == to_run) { L(FL(#code)); code; return; } ++total
3791
3792static void
3793check_sane_roster_do_tests(int to_run, int& total)
3794{
3795 total = 0;
3796 testing_node_id_source nis;
3797 roster_t r;
3798 MM(r);
3799
3800 // roster must have a root dir
3801 MAYBE(BOOST_CHECK_THROW(r.check_sane(false), logic_error));
3802 MAYBE(BOOST_CHECK_THROW(r.check_sane(true), logic_error));
3803
3804 split_path sp_, sp_foo, sp_foo_bar, sp_foo_baz;
3805 file_path().split(sp_);
3806 file_path_internal("foo").split(sp_foo);
3807 file_path_internal("foo/bar").split(sp_foo_bar);
3808 file_path_internal("foo/baz").split(sp_foo_baz);
3809 node_id nid_f = r.create_file_node(file_id(string("0000000000000000000000000000000000000000")),
3810 nis);
3811 // root must be a directory, not a file
3812 MAYBE(BOOST_CHECK_THROW(r.attach_node(nid_f, sp_), logic_error));
3813
3814 node_id root_dir = r.create_dir_node(nis);
3815 r.attach_node(root_dir, sp_);
3816 // has a root dir, but a detached file
3817 MAYBE(BOOST_CHECK_THROW(r.check_sane(false), logic_error));
3818 MAYBE(BOOST_CHECK_THROW(r.check_sane(true), logic_error));
3819
3820 r.attach_node(nid_f, sp_foo);
3821 // now should be sane
3822 BOOST_CHECK_NOT_THROW(r.check_sane(false), logic_error);
3823 BOOST_CHECK_NOT_THROW(r.check_sane(true), logic_error);
3824
3825 node_id nid_d = r.create_dir_node(nis);
3826 // if "foo" exists, can't attach another node at "foo"
3827 MAYBE(BOOST_CHECK_THROW(r.attach_node(nid_d, sp_foo), logic_error));
3828 // if "foo" is a file, can't attach a node at "foo/bar"
3829 MAYBE(BOOST_CHECK_THROW(r.attach_node(nid_d, sp_foo_bar), logic_error));
3830
3831 BOOST_CHECK(r.detach_node(sp_foo) == nid_f);
3832 r.attach_node(nid_d, sp_foo);
3833 r.attach_node(nid_f, sp_foo_bar);
3834 BOOST_CHECK_NOT_THROW(r.check_sane(false), logic_error);
3835 BOOST_CHECK_NOT_THROW(r.check_sane(true), logic_error);
3836
3837 temp_node_id_source nis_tmp;
3838 node_id nid_tmp = r.create_dir_node(nis_tmp);
3839 // has a detached node
3840 MAYBE(BOOST_CHECK_THROW(r.check_sane(false), logic_error));
3841 MAYBE(BOOST_CHECK_THROW(r.check_sane(true), logic_error));
3842 r.attach_node(nid_tmp, sp_foo_baz);
3843 // now has no detached nodes, but one temp node
3844 MAYBE(BOOST_CHECK_THROW(r.check_sane(false), logic_error));
3845 BOOST_CHECK_NOT_THROW(r.check_sane(true), logic_error);
3846}
3847
3848#undef MAYBE
3849
3850static void
3851check_sane_roster_test()
3852{
3853 int total;
3854 check_sane_roster_do_tests(-1, total);
3855 for (int to_run = 0; to_run < total; ++to_run)
3856 {
3857 L(FL("check_sane_roster_test: loop = %i (of %i)") % to_run % (total - 1));
3858 int tmp;
3859 check_sane_roster_do_tests(to_run, tmp);
3860 }
3861}
3862
3863static void
3864check_sane_roster_loop_test()
3865{
3866 testing_node_id_source nis;
3867 roster_t r; MM(r);
3868 split_path root, foo_bar;
3869 file_path().split(root);
3870 file_path_internal("foo/bar").split(foo_bar);
3871 r.attach_node(r.create_dir_node(nis), root);
3872 node_id nid_foo = r.create_dir_node(nis);
3873 node_id nid_bar = r.create_dir_node(nis);
3874 r.attach_node(nid_foo, nid_bar, foo_bar[1]);
3875 r.attach_node(nid_bar, nid_foo, foo_bar[2]);
3876 BOOST_CHECK_THROW(r.check_sane(true), logic_error);
3877}
3878
3879static void
3880check_sane_roster_screwy_dir_map()
3881{
3882 testing_node_id_source nis;
3883 roster_t r; MM(r);
3884 split_path root, foo;
3885 file_path().split(root);
3886 file_path_internal("foo").split(foo);
3887 r.attach_node(r.create_dir_node(nis), root);
3888 roster_t other; MM(other);
3889 node_id other_nid = other.create_dir_node(nis);
3890 dir_t root_n = downcast_to_dir_t(r.get_node(root));
3891 root_n->children.insert(make_pair(*(foo.end()-1), other.get_node(other_nid)));
3892 BOOST_CHECK_THROW(r.check_sane(), logic_error);
3893 // well, but that one was easy, actually, because a dir traversal will hit
3894 // more nodes than actually exist... so let's make it harder, by making sure
3895 // that a dir traversal will hit exactly as many nodes as actually exist.
3896 node_id distractor_nid = r.create_dir_node(nis);
3897 BOOST_CHECK_THROW(r.check_sane(), logic_error);
3898 // and even harder, by making that node superficially valid too
3899 dir_t distractor_n = downcast_to_dir_t(r.get_node(distractor_nid));
3900 distractor_n->parent = distractor_nid;
3901 distractor_n->name = *(foo.end()-1);
3902 distractor_n->children.insert(make_pair(distractor_n->name, distractor_n));
3903 BOOST_CHECK_THROW(r.check_sane(), logic_error);
3904}
3905
3906static void
3907bad_attr_test()
3908{
3909 testing_node_id_source nis;
3910 roster_t r; MM(r);
3911 split_path root;
3912 file_path().split(root);
3913 r.attach_node(r.create_dir_node(nis), root);
3914 BOOST_CHECK_THROW(r.set_attr(root, attr_key("test_key1"),
3915 make_pair(false, attr_value("invalid"))),
3916 logic_error);
3917 BOOST_CHECK_NOT_THROW(r.check_sane(true), logic_error);
3918 safe_insert(r.get_node(root)->attrs,
3919 make_pair(attr_key("test_key2"),
3920 make_pair(false, attr_value("invalid"))));
3921 BOOST_CHECK_THROW(r.check_sane(true), logic_error);
3922}
3923
3924////////////////////////////////////////////////////////////////////////
3925// exhaustive marking tests
3926////////////////////////////////////////////////////////////////////////
3927
3928// The marking/roster generation code is extremely critical. It is the very
3929// core of monotone's versioning technology, very complex, and bugs can result
3930// in corrupt and nonsensical histories (not to mention erroneous merges and
3931// the like). Furthermore, the code that implements it is littered with
3932// case-by-case analysis, where copy-paste errors could easily occur. So the
3933// purpose of this section is to systematically and exhaustively test every
3934// possible case.
3935//
3936// Our underlying merger, *-merge, works on scalars, case-by-case. The cases are:
3937// 0 parent:
3938// a*
3939// 1 parent:
3940// a a
3941// | |
3942// a b*
3943// 2 parents:
3944// a a a a a b a b
3945// \ / \ / \ / \ /
3946// a b* c* a?
3947//
3948// Each node has a number of scalars associated with it:
3949// * basename+parent
3950// * file content (iff a file)
3951// * attributes
3952//
3953// So for each scalar, we want to test each way it can appear in each of the
3954// above shapes. This is made more complex by lifecycles. We can achieve a 0
3955// parent node as:
3956// * a node in a 0-parent roster (root revision)
3957// * a newly added node in a 1-parent roster
3958// * a newly added node in a 2-parent roster
3959// a 1 parent node as:
3960// * a pre-existing node in a 1-parent roster
3961// * a node in a 2-parent roster that only existed in one of the parents
3962// a 2 parent node as:
3963// * a pre-existing node in a 2-parent roster
3964//
3965// Because the basename+parent and file_content scalars have lifetimes that
3966// exactly match the lifetime of the node they are on, those are all the cases
3967// for these scalars. However, attrs make things a bit more complicated,
3968// because they can be added. An attr can have 0 parents:
3969// * in any of the above cases, with an attribute newly added on the node
3970// And one parent:
3971// * in any of the cases above with one node parent and the attr pre-existing
3972// * in a 2-parent node where the attr exists in only one of the parents
3973//
3974// Plus, just to be sure, in the merge cases we check both the given example
3975// and the mirror-reversed one, since the code implementing this could
3976// conceivably mark merge(A, B) right but get merge(B, A) wrong. And for the
3977// scalars that can appear on either files or dirs, we check both.
3978
3979// The following somewhat elaborate code implements all these checks. The
3980// most important background assumption to know, is that it always assumes
3981// (and this assumption is hard-coded in various places) that it is looking at
3982// one of the following topologies:
3983//
3984// old
3985//
3986// old
3987// |
3988// new
3989//
3990// old
3991// / \.
3992// left right
3993// \ /
3994// new
3995//
3996// There is various tricksiness in making sure that the root directory always
3997// has the right birth_revision, that nodes are created with good birth
3998// revisions and sane markings on the scalars we are not interested in, etc.
3999// This code is ugly and messy and could use refactoring, but it seems to
4000// work.
4001
4002////////////////
4003// These are some basic utility pieces handy for the exhaustive mark tests
4004
4005namespace
4006{
4007 template <typename T> set<T>
4008 singleton(T const & t)
4009 {
4010 set<T> s;
4011 s.insert(t);
4012 return s;
4013 }
4014
4015 template <typename T> set<T>
4016 doubleton(T const & t1, T const & t2)
4017 {
4018 set<T> s;
4019 s.insert(t1);
4020 s.insert(t2);
4021 return s;
4022 }
4023
4024 revision_id old_rid(string("0000000000000000000000000000000000000000"));
4025 revision_id left_rid(string("1111111111111111111111111111111111111111"));
4026 revision_id right_rid(string("2222222222222222222222222222222222222222"));
4027 revision_id new_rid(string("4444444444444444444444444444444444444444"));
4028
4029 split_path
4030 split(string const & s)
4031 {
4032 split_path sp;
4033 file_path_internal(s).split(sp);
4034 return sp;
4035 }
4036
4037////////////////
4038// These classes encapsulate information about all the different scalars
4039// that *-merge applies to.
4040
4041 typedef enum { scalar_a, scalar_b, scalar_c,
4042 scalar_none, scalar_none_2 } scalar_val;
4043
4044 void
4045 dump(scalar_val const & val, string & out)
4046 {
4047 switch (val)
4048 {
4049 case scalar_a: out = "scalar_a"; break;
4050 case scalar_b: out = "scalar_b"; break;
4051 case scalar_c: out = "scalar_c"; break;
4052 case scalar_none: out = "scalar_none"; break;
4053 case scalar_none_2: out = "scalar_none_2"; break;
4054 }
4055 out += "\n";
4056 }
4057
4058 struct a_scalar
4059 {
4060 virtual void set(revision_id const & scalar_origin_rid,
4061 scalar_val val, set<revision_id> const & this_scalar_mark,
4062 roster_t & roster, marking_map & markings)
4063 = 0;
4064 virtual ~a_scalar() {};
4065
4066 node_id_source & nis;
4067 node_id const root_nid;
4068 node_id const obj_under_test_nid;
4069 a_scalar(node_id_source & nis)
4070 : nis(nis), root_nid(nis.next()), obj_under_test_nid(nis.next())
4071 {}
4072
4073 void setup(roster_t & roster, marking_map & markings)
4074 {
4075 roster.create_dir_node(root_nid);
4076 roster.attach_node(root_nid, split(""));
4077 marking_t marking;
4078 marking.birth_revision = old_rid;
4079 marking.parent_name.insert(old_rid);
4080 safe_insert(markings, make_pair(root_nid, marking));
4081 }
4082
4083 virtual string my_type() const = 0;
4084
4085 virtual void dump(string & out) const
4086 {
4087 ostringstream oss;
4088 oss << "type: " << my_type() << "\n"
4089 << "root_nid: " << root_nid << "\n"
4090 << "obj_under_test_nid: " << obj_under_test_nid << "\n";
4091 out = oss.str();
4092 }
4093 };
4094
4095 void
4096 dump(a_scalar const & s, string & out)
4097 {
4098 s.dump(out);
4099 }
4100
4101 struct file_maker
4102 {
4103 static void make_obj(revision_id const & scalar_origin_rid, node_id nid,
4104 roster_t & roster, marking_map & markings)
4105 {
4106 make_file(scalar_origin_rid, nid,
4107 file_id(string("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")),
4108 roster, markings);
4109 }
4110 static void make_file(revision_id const & scalar_origin_rid, node_id nid,
4111 file_id const & fid,
4112 roster_t & roster, marking_map & markings)
4113 {
4114 roster.create_file_node(fid, nid);
4115 marking_t marking;
4116 marking.birth_revision = scalar_origin_rid;
4117 marking.parent_name = marking.file_content = singleton(scalar_origin_rid);
4118 safe_insert(markings, make_pair(nid, marking));
4119 }
4120 };
4121
4122 struct dir_maker
4123 {
4124 static void make_obj(revision_id const & scalar_origin_rid, node_id nid,
4125 roster_t & roster, marking_map & markings)
4126 {
4127 roster.create_dir_node(nid);
4128 marking_t marking;
4129 marking.birth_revision = scalar_origin_rid;
4130 marking.parent_name = singleton(scalar_origin_rid);
4131 safe_insert(markings, make_pair(nid, marking));
4132 }
4133 };
4134
4135 struct file_content_scalar : public a_scalar
4136 {
4137 virtual string my_type() const { return "file_content_scalar"; }
4138
4139 map<scalar_val, file_id> values;
4140 file_content_scalar(node_id_source & nis)
4141 : a_scalar(nis)
4142 {
4143 safe_insert(values,
4144 make_pair(scalar_a,
4145 file_id(string("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))));
4146 safe_insert(values,
4147 make_pair(scalar_b,
4148 file_id(string("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"))));
4149 safe_insert(values,
4150 make_pair(scalar_c,
4151 file_id(string("cccccccccccccccccccccccccccccccccccccccc"))));
4152 }
4153 virtual void
4154 set(revision_id const & scalar_origin_rid, scalar_val val,
4155 std::set<revision_id> const & this_scalar_mark,
4156 roster_t & roster, marking_map & markings)
4157 {
4158 setup(roster, markings);
4159 if (val != scalar_none)
4160 {
4161 file_maker::make_file(scalar_origin_rid, obj_under_test_nid,
4162 safe_get(values, val),
4163 roster, markings);
4164 roster.attach_node(obj_under_test_nid, split("foo"));
4165 markings[obj_under_test_nid].file_content = this_scalar_mark;
4166 }
4167 roster.check_sane_against(markings);
4168 }
4169 };
4170
4171 template <typename T>
4172 struct X_basename_scalar : public a_scalar
4173 {
4174 virtual string my_type() const { return "X_basename_scalar"; }
4175
4176 map<scalar_val, split_path> values;
4177 X_basename_scalar(node_id_source & nis)
4178 : a_scalar(nis)
4179 {
4180 safe_insert(values, make_pair(scalar_a, split("a")));
4181 safe_insert(values, make_pair(scalar_b, split("b")));
4182 safe_insert(values, make_pair(scalar_c, split("c")));
4183 }
4184 virtual void
4185 set(revision_id const & scalar_origin_rid, scalar_val val,
4186 std::set<revision_id> const & this_scalar_mark,
4187 roster_t & roster, marking_map & markings)
4188 {
4189 setup(roster, markings);
4190 if (val != scalar_none)
4191 {
4192 T::make_obj(scalar_origin_rid, obj_under_test_nid, roster, markings);
4193 roster.attach_node(obj_under_test_nid, safe_get(values, val));
4194 markings[obj_under_test_nid].parent_name = this_scalar_mark;
4195 }
4196 roster.check_sane_against(markings);
4197 }
4198 };
4199
4200 template <typename T>
4201 struct X_parent_scalar : public a_scalar
4202 {
4203 virtual string my_type() const { return "X_parent_scalar"; }
4204
4205 map<scalar_val, split_path> values;
4206 node_id const a_nid, b_nid, c_nid;
4207 X_parent_scalar(node_id_source & nis)
4208 : a_scalar(nis), a_nid(nis.next()), b_nid(nis.next()), c_nid(nis.next())
4209 {
4210 safe_insert(values, make_pair(scalar_a, split("dir_a/foo")));
4211 safe_insert(values, make_pair(scalar_b, split("dir_b/foo")));
4212 safe_insert(values, make_pair(scalar_c, split("dir_c/foo")));
4213 }
4214 void
4215 setup_dirs(roster_t & roster, marking_map & markings)
4216 {
4217 roster.create_dir_node(a_nid);
4218 roster.attach_node(a_nid, split("dir_a"));
4219 roster.create_dir_node(b_nid);
4220 roster.attach_node(b_nid, split("dir_b"));
4221 roster.create_dir_node(c_nid);
4222 roster.attach_node(c_nid, split("dir_c"));
4223 marking_t marking;
4224 marking.birth_revision = old_rid;
4225 marking.parent_name.insert(old_rid);
4226 safe_insert(markings, make_pair(a_nid, marking));
4227 safe_insert(markings, make_pair(b_nid, marking));
4228 safe_insert(markings, make_pair(c_nid, marking));
4229 }
4230 virtual void
4231 set(revision_id const & scalar_origin_rid, scalar_val val,
4232 std::set<revision_id> const & this_scalar_mark,
4233 roster_t & roster, marking_map & markings)
4234 {
4235 setup(roster, markings);
4236 setup_dirs(roster, markings);
4237 if (val != scalar_none)
4238 {
4239 T::make_obj(scalar_origin_rid, obj_under_test_nid, roster, markings);
4240 roster.attach_node(obj_under_test_nid, safe_get(values, val));
4241 markings[obj_under_test_nid].parent_name = this_scalar_mark;
4242 }
4243 roster.check_sane_against(markings);
4244 }
4245 };
4246
4247 // this scalar represents an attr whose node already exists, and we put an
4248 // attr on it.
4249 template <typename T>
4250 struct X_attr_existing_node_scalar : public a_scalar
4251 {
4252 virtual string my_type() const { return "X_attr_scalar"; }
4253
4254 map<scalar_val, pair<bool, attr_value> > values;
4255 X_attr_existing_node_scalar(node_id_source & nis)
4256 : a_scalar(nis)
4257 {
4258 safe_insert(values, make_pair(scalar_a, make_pair(true, attr_value("a"))));
4259 safe_insert(values, make_pair(scalar_b, make_pair(true, attr_value("b"))));
4260 safe_insert(values, make_pair(scalar_c, make_pair(true, attr_value("c"))));
4261 }
4262 virtual void
4263 set(revision_id const & scalar_origin_rid, scalar_val val,
4264 std::set<revision_id> const & this_scalar_mark,
4265 roster_t & roster, marking_map & markings)
4266 {
4267 setup(roster, markings);
4268 // _not_ scalar_origin_rid, because our object exists everywhere, regardless of
4269 // when the attr shows up
4270 T::make_obj(old_rid, obj_under_test_nid, roster, markings);
4271 roster.attach_node(obj_under_test_nid, split("foo"));
4272 if (val != scalar_none)
4273 {
4274 safe_insert(roster.get_node(obj_under_test_nid)->attrs,
4275 make_pair(attr_key("test_key"), safe_get(values, val)));
4276 markings[obj_under_test_nid].attrs[attr_key("test_key")] = this_scalar_mark;
4277 }
4278 roster.check_sane_against(markings);
4279 }
4280 };
4281
4282 // this scalar represents an attr whose node does not exist; we create the
4283 // node when we create the attr.
4284 template <typename T>
4285 struct X_attr_new_node_scalar : public a_scalar
4286 {
4287 virtual string my_type() const { return "X_attr_scalar"; }
4288
4289 map<scalar_val, pair<bool, attr_value> > values;
4290 X_attr_new_node_scalar(node_id_source & nis)
4291 : a_scalar(nis)
4292 {
4293 safe_insert(values, make_pair(scalar_a, make_pair(true, attr_value("a"))));
4294 safe_insert(values, make_pair(scalar_b, make_pair(true, attr_value("b"))));
4295 safe_insert(values, make_pair(scalar_c, make_pair(true, attr_value("c"))));
4296 }
4297 virtual void
4298 set(revision_id const & scalar_origin_rid, scalar_val val,
4299 std::set<revision_id> const & this_scalar_mark,
4300 roster_t & roster, marking_map & markings)
4301 {
4302 setup(roster, markings);
4303 if (val != scalar_none)
4304 {
4305 T::make_obj(scalar_origin_rid, obj_under_test_nid, roster, markings);
4306 roster.attach_node(obj_under_test_nid, split("foo"));
4307 safe_insert(roster.get_node(obj_under_test_nid)->attrs,
4308 make_pair(attr_key("test_key"), safe_get(values, val)));
4309 markings[obj_under_test_nid].attrs[attr_key("test_key")] = this_scalar_mark;
4310 }
4311 roster.check_sane_against(markings);
4312 }
4313 };
4314
4315 typedef vector<shared_ptr<a_scalar> > scalars;
4316 scalars
4317 all_scalars(node_id_source & nis)
4318 {
4319 scalars ss;
4320 ss.push_back(shared_ptr<a_scalar>(new file_content_scalar(nis)));
4321 ss.push_back(shared_ptr<a_scalar>(new X_basename_scalar<file_maker>(nis)));
4322 ss.push_back(shared_ptr<a_scalar>(new X_basename_scalar<dir_maker>(nis)));
4323 ss.push_back(shared_ptr<a_scalar>(new X_parent_scalar<file_maker>(nis)));
4324 ss.push_back(shared_ptr<a_scalar>(new X_parent_scalar<dir_maker>(nis)));
4325 ss.push_back(shared_ptr<a_scalar>(new X_attr_existing_node_scalar<file_maker>(nis)));
4326 ss.push_back(shared_ptr<a_scalar>(new X_attr_existing_node_scalar<dir_maker>(nis)));
4327 ss.push_back(shared_ptr<a_scalar>(new X_attr_new_node_scalar<file_maker>(nis)));
4328 ss.push_back(shared_ptr<a_scalar>(new X_attr_new_node_scalar<dir_maker>(nis)));
4329 return ss;
4330 }
4331}
4332
4333////////////////
4334// These functions encapsulate the logic for running a particular mark
4335// scenario with a particular scalar with 0, 1, or 2 roster parents.
4336
4337static void
4338run_with_0_roster_parents(a_scalar & s, revision_id scalar_origin_rid,
4339 scalar_val new_val,
4340 set<revision_id> const & new_mark_set,
4341 node_id_source & nis)
4342{
4343 MM(s);
4344 MM(scalar_origin_rid);
4345 MM(new_val);
4346 MM(new_mark_set);
4347 roster_t expected_roster; MM(expected_roster);
4348 marking_map expected_markings; MM(expected_markings);
4349
4350 s.set(scalar_origin_rid, new_val, new_mark_set, expected_roster, expected_markings);
4351
4352 roster_t empty_roster;
4353 cset cs; MM(cs);
4354 make_cset(empty_roster, expected_roster, cs);
4355
4356 roster_t new_roster; MM(new_roster);
4357 marking_map new_markings; MM(new_markings);
4358 // this function takes the old parent roster/marking and modifies them; in
4359 // our case, the parent roster/marking are empty, and so are our
4360 // roster/marking, so we don't need to do anything special.
4361 make_roster_for_nonmerge(cs, old_rid, new_roster, new_markings, nis);
4362
4363 I(equal_up_to_renumbering(expected_roster, expected_markings,
4364 new_roster, new_markings));
4365
4366 marking_map new_markings2; MM(new_markings2);
4367 mark_roster_with_no_parents(old_rid, new_roster, new_markings2);
4368 I(new_markings == new_markings2);
4369
4370 marking_map new_markings3; MM(new_markings3);
4371 roster_t parent3;
4372 marking_map old_markings3;
4373 mark_roster_with_one_parent(parent3, old_markings3, old_rid, new_roster,
4374 new_markings3);
4375 I(new_markings == new_markings3);
4376}
4377
4378static void
4379run_with_1_roster_parent(a_scalar & s,
4380 revision_id scalar_origin_rid,
4381 scalar_val parent_val,
4382 set<revision_id> const & parent_mark_set,
4383 scalar_val new_val,
4384 set<revision_id> const & new_mark_set,
4385 node_id_source & nis)
4386{
4387 MM(s);
4388 MM(scalar_origin_rid);
4389 MM(parent_val);
4390 MM(parent_mark_set);
4391 MM(new_val);
4392 MM(new_mark_set);
4393 roster_t parent_roster; MM(parent_roster);
4394 marking_map parent_markings; MM(parent_markings);
4395 roster_t expected_roster; MM(expected_roster);
4396 marking_map expected_markings; MM(expected_markings);
4397
4398 s.set(scalar_origin_rid, parent_val, parent_mark_set, parent_roster, parent_markings);
4399 s.set(scalar_origin_rid, new_val, new_mark_set, expected_roster, expected_markings);
4400
4401 cset cs; MM(cs);
4402 make_cset(parent_roster, expected_roster, cs);
4403
4404 roster_t new_roster; MM(new_roster);
4405 marking_map new_markings; MM(new_markings);
4406 new_roster = parent_roster;
4407 new_markings = parent_markings;
4408 make_roster_for_nonmerge(cs, new_rid, new_roster, new_markings, nis);
4409
4410 I(equal_up_to_renumbering(expected_roster, expected_markings,
4411 new_roster, new_markings));
4412
4413 marking_map new_markings2; MM(new_markings2);
4414 mark_roster_with_one_parent(parent_roster, parent_markings,
4415 new_rid, new_roster, new_markings2);
4416 I(new_markings == new_markings2);
4417}
4418
4419static void
4420run_with_2_roster_parents(a_scalar & s,
4421 revision_id scalar_origin_rid,
4422 scalar_val left_val,
4423 set<revision_id> const & left_mark_set,
4424 scalar_val right_val,
4425 set<revision_id> const & right_mark_set,
4426 scalar_val new_val,
4427 set<revision_id> const & new_mark_set,
4428 node_id_source & nis)
4429{
4430 MM(s);
4431 MM(scalar_origin_rid);
4432 MM(left_val);
4433 MM(left_mark_set);
4434 MM(right_val);
4435 MM(right_mark_set);
4436 MM(new_val);
4437 MM(new_mark_set);
4438 roster_t left_roster; MM(left_roster);
4439 roster_t right_roster; MM(right_roster);
4440 roster_t expected_roster; MM(expected_roster);
4441 marking_map left_markings; MM(left_markings);
4442 marking_map right_markings; MM(right_markings);
4443 marking_map expected_markings; MM(expected_markings);
4444
4445 s.set(scalar_origin_rid, left_val, left_mark_set, left_roster, left_markings);
4446 s.set(scalar_origin_rid, right_val, right_mark_set, right_roster, right_markings);
4447 s.set(scalar_origin_rid, new_val, new_mark_set, expected_roster, expected_markings);
4448
4449 cset left_cs; MM(left_cs);
4450 cset right_cs; MM(right_cs);
4451 make_cset(left_roster, expected_roster, left_cs);
4452 make_cset(right_roster, expected_roster, right_cs);
4453
4454 set<revision_id> left_uncommon_ancestors; MM(left_uncommon_ancestors);
4455 left_uncommon_ancestors.insert(left_rid);
4456 set<revision_id> right_uncommon_ancestors; MM(right_uncommon_ancestors);
4457 right_uncommon_ancestors.insert(right_rid);
4458
4459 roster_t new_roster; MM(new_roster);
4460 marking_map new_markings; MM(new_markings);
4461 make_roster_for_merge(left_rid, left_roster, left_markings, left_cs,
4462 left_uncommon_ancestors,
4463 right_rid, right_roster, right_markings, right_cs,
4464 right_uncommon_ancestors,
4465 new_rid, new_roster, new_markings,
4466 nis);
4467
4468 I(equal_up_to_renumbering(expected_roster, expected_markings,
4469 new_roster, new_markings));
4470}
4471
4472////////////////
4473// These functions encapsulate all the different ways to get a 0 parent node,
4474// a 1 parent node, and a 2 parent node.
4475
4476////////////////
4477// These functions encapsulate all the different ways to get a 0 parent
4478// scalar, a 1 parent scalar, and a 2 parent scalar.
4479
4480// FIXME: have clients just use s.nis instead of passing it separately...?
4481
4482static void
4483run_a_2_scalar_parent_mark_scenario_exact(revision_id const & scalar_origin_rid,
4484 scalar_val left_val,
4485 set<revision_id> const & left_mark_set,
4486 scalar_val right_val,
4487 set<revision_id> const & right_mark_set,
4488 scalar_val new_val,
4489 set<revision_id> const & new_mark_set)
4490{
4491 testing_node_id_source nis;
4492 scalars ss = all_scalars(nis);
4493 for (scalars::const_iterator i = ss.begin(); i != ss.end(); ++i)
4494 {
4495 run_with_2_roster_parents(**i, scalar_origin_rid,
4496 left_val, left_mark_set,
4497 right_val, right_mark_set,
4498 new_val, new_mark_set,
4499 nis);
4500 }
4501}
4502
4503static revision_id
4504flip_revision_id(revision_id const & rid)
4505{
4506 if (rid == old_rid || rid == new_rid)
4507 return rid;
4508 else if (rid == left_rid)
4509 return right_rid;
4510 else if (rid == right_rid)
4511 return left_rid;
4512 else
4513 I(false);
4514}
4515
4516static set<revision_id>
4517flip_revision(set<revision_id> const & rids)
4518{
4519 set<revision_id> flipped_rids;
4520 for (set<revision_id>::const_iterator i = rids.begin(); i != rids.end(); ++i)
4521 flipped_rids.insert(flip_revision_id(*i));
4522 return flipped_rids;
4523}
4524
4525static void
4526run_a_2_scalar_parent_mark_scenario(revision_id const & scalar_origin_rid,
4527 scalar_val left_val,
4528 set<revision_id> const & left_mark_set,
4529 scalar_val right_val,
4530 set<revision_id> const & right_mark_set,
4531 scalar_val new_val,
4532 set<revision_id> const & new_mark_set)
4533{
4534 // run both what we're given...
4535 run_a_2_scalar_parent_mark_scenario_exact(scalar_origin_rid,
4536 left_val, left_mark_set,
4537 right_val, right_mark_set,
4538 new_val, new_mark_set);
4539 // ...and its symmetric reflection. but we have to flip the mark set,
4540 // because the exact stuff has hard-coded the names of the various
4541 // revisions and their uncommon ancestor sets.
4542 {
4543 set<revision_id> flipped_left_mark_set = flip_revision(left_mark_set);
4544 set<revision_id> flipped_right_mark_set = flip_revision(right_mark_set);
4545 set<revision_id> flipped_new_mark_set = flip_revision(new_mark_set);
4546
4547 run_a_2_scalar_parent_mark_scenario_exact(flip_revision_id(scalar_origin_rid),
4548 right_val, flipped_right_mark_set,
4549 left_val, flipped_left_mark_set,
4550 new_val, flipped_new_mark_set);
4551 }
4552}
4553
4554static void
4555run_a_2_scalar_parent_mark_scenario(scalar_val left_val,
4556 set<revision_id> const & left_mark_set,
4557 scalar_val right_val,
4558 set<revision_id> const & right_mark_set,
4559 scalar_val new_val,
4560 set<revision_id> const & new_mark_set)
4561{
4562 run_a_2_scalar_parent_mark_scenario(old_rid,
4563 left_val, left_mark_set,
4564 right_val, right_mark_set,
4565 new_val, new_mark_set);
4566}
4567
4568static void
4569run_a_1_scalar_parent_mark_scenario(scalar_val parent_val,
4570 set<revision_id> const & parent_mark_set,
4571 scalar_val new_val,
4572 set<revision_id> const & new_mark_set)
4573{
4574 {
4575 testing_node_id_source nis;
4576 scalars ss = all_scalars(nis);
4577 for (scalars::const_iterator i = ss.begin(); i != ss.end(); ++i)
4578 run_with_1_roster_parent(**i, old_rid,
4579 parent_val, parent_mark_set,
4580 new_val, new_mark_set,
4581 nis);
4582 }
4583 // this is an asymmetric, test, so run it via the code that will test it
4584 // both ways
4585 run_a_2_scalar_parent_mark_scenario(left_rid,
4586 parent_val, parent_mark_set,
4587 scalar_none, set<revision_id>(),
4588 new_val, new_mark_set);
4589}
4590
4591static void
4592run_a_0_scalar_parent_mark_scenario()
4593{
4594 {
4595 testing_node_id_source nis;
4596 scalars ss = all_scalars(nis);
4597 for (scalars::const_iterator i = ss.begin(); i != ss.end(); ++i)
4598 {
4599 run_with_0_roster_parents(**i, old_rid, scalar_a, singleton(old_rid), nis);
4600 run_with_1_roster_parent(**i, new_rid,
4601 scalar_none, set<revision_id>(),
4602 scalar_a, singleton(new_rid),
4603 nis);
4604 run_with_2_roster_parents(**i, new_rid,
4605 scalar_none, set<revision_id>(),
4606 scalar_none, set<revision_id>(),
4607 scalar_a, singleton(new_rid),
4608 nis);
4609 }
4610 }
4611}
4612
4613////////////////
4614// These functions contain the actual list of *-merge cases that we would like
4615// to test.
4616
4617static void
4618test_all_0_scalar_parent_mark_scenarios()
4619{
4620 L(FL("TEST: begin checking 0-parent marking"));
4621 // a*
4622 run_a_0_scalar_parent_mark_scenario();
4623 L(FL("TEST: end checking 0-parent marking"));
4624}
4625
4626static void
4627test_all_1_scalar_parent_mark_scenarios()
4628{
4629 L(FL("TEST: begin checking 1-parent marking"));
4630 // a
4631 // |
4632 // a
4633 run_a_1_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4634 scalar_a, singleton(old_rid));
4635 // a*
4636 // |
4637 // a
4638 run_a_1_scalar_parent_mark_scenario(scalar_a, singleton(left_rid),
4639 scalar_a, singleton(left_rid));
4640 // a* a*
4641 // \ /
4642 // a
4643 // |
4644 // a
4645 run_a_1_scalar_parent_mark_scenario(scalar_a, doubleton(left_rid, right_rid),
4646 scalar_a, doubleton(left_rid, right_rid));
4647 // a
4648 // |
4649 // b*
4650 run_a_1_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4651 scalar_b, singleton(new_rid));
4652 // a*
4653 // |
4654 // b*
4655 run_a_1_scalar_parent_mark_scenario(scalar_a, singleton(left_rid),
4656 scalar_b, singleton(new_rid));
4657 // a* a*
4658 // \ /
4659 // a
4660 // |
4661 // b*
4662 run_a_1_scalar_parent_mark_scenario(scalar_a, doubleton(left_rid, right_rid),
4663 scalar_b, singleton(new_rid));
4664 L(FL("TEST: end checking 1-parent marking"));
4665}
4666
4667static void
4668test_all_2_scalar_parent_mark_scenarios()
4669{
4670 L(FL("TEST: begin checking 2-parent marking"));
4671 ///////////////////////////////////////////////////////////////////
4672 // a a
4673 // \ /
4674 // a
4675 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4676 scalar_a, singleton(old_rid),
4677 scalar_a, singleton(old_rid));
4678 // a a*
4679 // \ /
4680 // a
4681 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4682 scalar_a, singleton(right_rid),
4683 scalar_a, doubleton(old_rid, right_rid));
4684 // a* a*
4685 // \ /
4686 // a
4687 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(left_rid),
4688 scalar_a, singleton(right_rid),
4689 scalar_a, doubleton(left_rid, right_rid));
4690
4691 ///////////////////////////////////////////////////////////////////
4692 // a a
4693 // \ /
4694 // b*
4695 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4696 scalar_a, singleton(old_rid),
4697 scalar_b, singleton(new_rid));
4698 // a a*
4699 // \ /
4700 // b*
4701 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4702 scalar_a, singleton(right_rid),
4703 scalar_b, singleton(new_rid));
4704 // a* a*
4705 // \ /
4706 // b*
4707 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(left_rid),
4708 scalar_a, singleton(right_rid),
4709 scalar_b, singleton(new_rid));
4710
4711 ///////////////////////////////////////////////////////////////////
4712 // a* b*
4713 // \ /
4714 // c*
4715 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(left_rid),
4716 scalar_b, singleton(right_rid),
4717 scalar_c, singleton(new_rid));
4718 // a b*
4719 // \ /
4720 // c*
4721 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4722 scalar_b, singleton(right_rid),
4723 scalar_c, singleton(new_rid));
4724 // this case cannot actually arise, because if *(a) = *(b) then val(a) =
4725 // val(b). but hey.
4726 // a b
4727 // \ /
4728 // c*
4729 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4730 scalar_b, singleton(old_rid),
4731 scalar_c, singleton(new_rid));
4732
4733 ///////////////////////////////////////////////////////////////////
4734 // a* b*
4735 // \ /
4736 // a*
4737 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(left_rid),
4738 scalar_b, singleton(right_rid),
4739 scalar_a, singleton(new_rid));
4740 // a b*
4741 // \ /
4742 // a*
4743 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(old_rid),
4744 scalar_b, singleton(right_rid),
4745 scalar_a, singleton(new_rid));
4746 // a* b
4747 // \ /
4748 // a
4749 run_a_2_scalar_parent_mark_scenario(scalar_a, singleton(left_rid),
4750 scalar_b, singleton(old_rid),
4751 scalar_a, singleton(left_rid));
4752
4753 // FIXME: be nice to test:
4754 // a* a* b
4755 // \ / /
4756 // a /
4757 // \ /
4758 // a
4759 L(FL("TEST: end checking 2-parent marking"));
4760}
4761
4762// there is _one_ remaining case that the above tests miss, because they
4763// couple scalar lifetimes and node lifetimes. Maybe they shouldn't do that,
4764// but anyway... until someone decides to refactor, we need this. The basic
4765// issue is that for content and name scalars, the scalar lifetime and the
4766// node lifetime are identical. For attrs, this isn't necessarily true. This
4767// is why we have two different attr scalars. Let's say that "." means a node
4768// that doesn't exist, and "+" means a node that exists but has no roster.
4769// The first scalar checks cases like
4770// +
4771// |
4772// a
4773//
4774// + +
4775// \ /
4776// a*
4777//
4778// a* +
4779// \ /
4780// a
4781// and the second one checks cases like
4782// .
4783// |
4784// a
4785//
4786// . .
4787// \ /
4788// a*
4789//
4790// a* .
4791// \ /
4792// a
4793// Between them, they cover _almost_ all possibilities. The one that they
4794// miss is:
4795// . +
4796// \ /
4797// a*
4798// (and its reflection).
4799// That is what this test checks.
4800// Sorry it's so code-duplication-iferous. Refactors would be good...
4801
4802namespace
4803{
4804 // this scalar represents an attr whose node may or may not already exist
4805 template <typename T>
4806 struct X_attr_mixed_scalar : public a_scalar
4807 {
4808 virtual string my_type() const { return "X_attr_scalar"; }
4809
4810 map<scalar_val, pair<bool, attr_value> > values;
4811 X_attr_mixed_scalar(node_id_source & nis)
4812 : a_scalar(nis)
4813 {
4814 safe_insert(values, make_pair(scalar_a, make_pair(true, attr_value("a"))));
4815 safe_insert(values, make_pair(scalar_b, make_pair(true, attr_value("b"))));
4816 safe_insert(values, make_pair(scalar_c, make_pair(true, attr_value("c"))));
4817 }
4818 virtual void
4819 set(revision_id const & scalar_origin_rid, scalar_val val,
4820 std::set<revision_id> const & this_scalar_mark,
4821 roster_t & roster, marking_map & markings)
4822 {
4823 setup(roster, markings);
4824 // scalar_none is . in the above notation
4825 // and scalar_none_2 is +
4826 if (val != scalar_none)
4827 {
4828 T::make_obj(scalar_origin_rid, obj_under_test_nid, roster, markings);
4829 roster.attach_node(obj_under_test_nid, split("foo"));
4830 }
4831 if (val != scalar_none && val != scalar_none_2)
4832 {
4833 safe_insert(roster.get_node(obj_under_test_nid)->attrs,
4834 make_pair(attr_key("test_key"), safe_get(values, val)));
4835 markings[obj_under_test_nid].attrs[attr_key("test_key")] = this_scalar_mark;
4836 }
4837 roster.check_sane_against(markings);
4838 }
4839 };
4840}
4841
4842static void
4843test_residual_attr_mark_scenario()
4844{
4845 L(FL("TEST: begin checking residual attr marking case"));
4846 {
4847 testing_node_id_source nis;
4848 X_attr_mixed_scalar<file_maker> s(nis);
4849 run_with_2_roster_parents(s, left_rid,
4850 scalar_none_2, set<revision_id>(),
4851 scalar_none, set<revision_id>(),
4852 scalar_a, singleton(new_rid),
4853 nis);
4854 }
4855 {
4856 testing_node_id_source nis;
4857 X_attr_mixed_scalar<dir_maker> s(nis);
4858 run_with_2_roster_parents(s, left_rid,
4859 scalar_none_2, set<revision_id>(),
4860 scalar_none, set<revision_id>(),
4861 scalar_a, singleton(new_rid),
4862 nis);
4863 }
4864 {
4865 testing_node_id_source nis;
4866 X_attr_mixed_scalar<file_maker> s(nis);
4867 run_with_2_roster_parents(s, right_rid,
4868 scalar_none, set<revision_id>(),
4869 scalar_none_2, set<revision_id>(),
4870 scalar_a, singleton(new_rid),
4871 nis);
4872 }
4873 {
4874 testing_node_id_source nis;
4875 X_attr_mixed_scalar<dir_maker> s(nis);
4876 run_with_2_roster_parents(s, right_rid,
4877 scalar_none, set<revision_id>(),
4878 scalar_none_2, set<revision_id>(),
4879 scalar_a, singleton(new_rid),
4880 nis);
4881 }
4882 L(FL("TEST: end checking residual attr marking case"));
4883}
4884
4885static void
4886test_all_mark_scenarios()
4887{
4888 test_all_0_scalar_parent_mark_scenarios();
4889 test_all_1_scalar_parent_mark_scenarios();
4890 test_all_2_scalar_parent_mark_scenarios();
4891 test_residual_attr_mark_scenario();
4892}
4893
4894////////////////////////////////////////////////////////////////////////
4895// end of exhaustive tests
4896////////////////////////////////////////////////////////////////////////
4897
4898////////////////////////////////////////////////////////////////////////
4899// lifecyle tests
4900////////////////////////////////////////////////////////////////////////
4901
4902// nodes can't survive dying on one side of a merge
4903static void
4904test_die_die_die_merge()
4905{
4906 roster_t left_roster; MM(left_roster);
4907 marking_map left_markings; MM(left_markings);
4908 roster_t right_roster; MM(right_roster);
4909 marking_map right_markings; MM(right_markings);
4910 testing_node_id_source nis;
4911
4912 // left roster is empty except for the root
4913 left_roster.attach_node(left_roster.create_dir_node(nis), split(""));
4914 marking_t an_old_marking;
4915 an_old_marking.birth_revision = old_rid;
4916 an_old_marking.parent_name = singleton(old_rid);
4917 safe_insert(left_markings, make_pair(left_roster.get_node(split(""))->self,
4918 an_old_marking));
4919 // right roster is identical, except for a dir created in the old rev
4920 right_roster = left_roster;
4921 right_markings = left_markings;
4922 right_roster.attach_node(right_roster.create_dir_node(nis), split("foo"));
4923 safe_insert(right_markings, make_pair(right_roster.get_node(split("foo"))->self,
4924 an_old_marking));
4925
4926 left_roster.check_sane_against(left_markings);
4927 right_roster.check_sane_against(right_markings);
4928
4929 cset left_cs; MM(left_cs);
4930 // we add the node
4931 left_cs.dirs_added.insert(split("foo"));
4932 // we do nothing
4933 cset right_cs; MM(right_cs);
4934
4935 roster_t new_roster; MM(new_roster);
4936 marking_map new_markings; MM(new_markings);
4937
4938 // because the dir was created in the old rev, the left side has logically
4939 // seen it and killed it, so it needs to be dead in the result.
4940 BOOST_CHECK_THROW(
4941 make_roster_for_merge(left_rid, left_roster, left_markings, left_cs,
4942 singleton(left_rid),
4943 right_rid, right_roster, right_markings, right_cs,
4944 singleton(right_rid),
4945 new_rid, new_roster, new_markings,
4946 nis),
4947 logic_error);
4948 BOOST_CHECK_THROW(
4949 make_roster_for_merge(right_rid, right_roster, right_markings, right_cs,
4950 singleton(right_rid),
4951 left_rid, left_roster, left_markings, left_cs,
4952 singleton(left_rid),
4953 new_rid, new_roster, new_markings,
4954 nis),
4955 logic_error);
4956}
4957// nodes can't change type file->dir or dir->file
4958// make_cset fails
4959// merging a file and a dir with the same nid and no mention of what should
4960// happen to them fails
4961
4962static void
4963test_same_nid_diff_type()
4964{
4965 testing_node_id_source nis;
4966
4967 roster_t dir_roster; MM(dir_roster);
4968 marking_map dir_markings; MM(dir_markings);
4969 dir_roster.attach_node(dir_roster.create_dir_node(nis), split(""));
4970 marking_t marking;
4971 marking.birth_revision = old_rid;
4972 marking.parent_name = singleton(old_rid);
4973 safe_insert(dir_markings, make_pair(dir_roster.get_node(split(""))->self,
4974 marking));
4975
4976 roster_t file_roster; MM(file_roster);
4977 marking_map file_markings; MM(file_markings);
4978 file_roster = dir_roster;
4979 file_markings = dir_markings;
4980
4981 // okay, they both have the root dir
4982 node_id nid = nis.next();
4983 dir_roster.create_dir_node(nid);
4984 dir_roster.attach_node(nid, split("foo"));
4985 safe_insert(dir_markings, make_pair(nid, marking));
4986
4987 file_roster.create_file_node(new_ident(), nid);
4988 file_roster.attach_node(nid, split("foo"));
4989 marking.file_content = singleton(old_rid);
4990 safe_insert(file_markings, make_pair(nid, marking));
4991
4992 dir_roster.check_sane_against(dir_markings);
4993 file_roster.check_sane_against(file_markings);
4994
4995 cset cs; MM(cs);
4996 BOOST_CHECK_THROW(make_cset(dir_roster, file_roster, cs), logic_error);
4997 BOOST_CHECK_THROW(make_cset(file_roster, dir_roster, cs), logic_error);
4998
4999 cset left_cs; MM(left_cs);
5000 cset right_cs; MM(right_cs);
5001 roster_t new_roster; MM(new_roster);
5002 marking_map new_markings; MM(new_markings);
5003 BOOST_CHECK_THROW(
5004 make_roster_for_merge(left_rid, dir_roster, dir_markings, left_cs,
5005 singleton(left_rid),
5006 right_rid, file_roster, file_markings, right_cs,
5007 singleton(right_rid),
5008 new_rid, new_roster, new_markings,
5009 nis),
5010 logic_error);
5011 BOOST_CHECK_THROW(
5012 make_roster_for_merge(left_rid, file_roster, file_markings, left_cs,
5013 singleton(left_rid),
5014 right_rid, dir_roster, dir_markings, right_cs,
5015 singleton(right_rid),
5016 new_rid, new_roster, new_markings,
5017 nis),
5018 logic_error);
5019
5020}
5021
5022
5023static void
5024write_roster_test()
5025{
5026 L(FL("TEST: write_roster_test"));
5027 roster_t r; MM(r);
5028 marking_map mm; MM(mm);
5029
5030 testing_node_id_source nis;
5031 split_path root, foo, xx, fo, foo_bar, foo_ang, foo_zoo;
5032 file_path().split(root);
5033 file_path_internal("foo").split(foo);
5034 file_path_internal("foo/ang").split(foo_ang);
5035 file_path_internal("foo/bar").split(foo_bar);
5036 file_path_internal("foo/zoo").split(foo_zoo);
5037 file_path_internal("fo").split(fo);
5038 file_path_internal("xx").split(xx);
5039
5040 file_id f1(string("1111111111111111111111111111111111111111"));
5041 revision_id rid(string("1234123412341234123412341234123412341234"));
5042 node_id nid;
5043
5044 // if adding new nodes, add them at the end to keep the node_id order
5045
5046 nid = nis.next();
5047 r.create_dir_node(nid);
5048 r.attach_node(nid, root);
5049 mark_new_node(rid, r.get_node(nid), mm[nid]);
5050
5051 nid = nis.next();
5052 r.create_dir_node(nid);
5053 r.attach_node(nid, foo);
5054 mark_new_node(rid, r.get_node(nid), mm[nid]);
5055
5056 nid = nis.next();
5057 r.create_dir_node(nid);