monotone

monotone Mtn Source Tree

Root/paths.cc

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include <iostream>
11#include <string>
12#include <sstream>
13
14#include <boost/filesystem/path.hpp>
15#include <boost/filesystem/operations.hpp>
16#include <boost/filesystem/convenience.hpp>
17
18#include "constants.hh"
19#include "paths.hh"
20#include "platform.hh"
21#include "sanity.hh"
22#include "interner.hh"
23#include "charset.hh"
24#include "simplestring_xform.hh"
25
26using std::exception;
27using std::ostream;
28using std::ostringstream;
29using std::string;
30
31// some structure to ensure we aren't doing anything broken when resolving
32// filenames. the idea is to make sure
33// -- we don't depend on the existence of something before it has been set
34// -- we don't re-set something that has already been used
35// -- sometimes, we use the _non_-existence of something, so we shouldn't
36// set anything whose un-setted-ness has already been used
37template <typename T>
38struct access_tracker
39{
40 void set(T const & val, bool may_be_initialized)
41 {
42 I(may_be_initialized || !initialized);
43 I(!very_uninitialized);
44 I(!used);
45 initialized = true;
46 value = val;
47 }
48 T const & get()
49 {
50 I(initialized);
51 used = true;
52 return value;
53 }
54 T const & get_but_unused()
55 {
56 I(initialized);
57 return value;
58 }
59 void may_not_initialize()
60 {
61 I(!initialized);
62 very_uninitialized = true;
63 }
64 // for unit tests
65 void unset()
66 {
67 used = initialized = very_uninitialized = false;
68 }
69 T value;
70 bool initialized, used, very_uninitialized;
71 access_tracker() : initialized(false), used(false), very_uninitialized(false) {};
72};
73
74// paths to use in interpreting paths from various sources,
75// conceptually:
76// working_root / initial_rel_path == initial_abs_path
77
78// initial_abs_path is for interpreting relative system_path's
79static access_tracker<system_path> initial_abs_path;
80// initial_rel_path is for interpreting external file_path's
81// for now we just make it an fs::path for convenience; we used to make it a
82// file_path, but then you can't run monotone from inside the _MTN/ dir (even
83// when referring to files outside the _MTN/ dir).
84static access_tracker<fs::path> initial_rel_path;
85// working_root is for converting file_path's and bookkeeping_path's to
86// system_path's.
87static access_tracker<system_path> working_root;
88
89bookkeeping_path const bookkeeping_root("_MTN");
90path_component const bookkeeping_root_component("_MTN");
91
92void
93save_initial_path()
94{
95 // FIXME: BUG: this only works if the current working dir is in utf8
96 initial_abs_path.set(system_path(get_current_working_dir()), false);
97 // We still use boost::fs, so let's continue to initialize it properly.
98 fs::initial_path();
99 fs::path::default_name_check(fs::native);
100 L(FL("initial abs path is: %s") % initial_abs_path.get_but_unused());
101}
102
103///////////////////////////////////////////////////////////////////////////
104// verifying that internal paths are indeed normalized.
105// this code must be superfast
106///////////////////////////////////////////////////////////////////////////
107
108// normalized means:
109// -- / as path separator
110// -- not an absolute path (on either posix or win32)
111// operationally, this means: first character != '/', first character != '\',
112// second character != ':'
113// -- no illegal characters
114// -- 0x00 -- 0x1f, 0x7f, \ are the illegal characters. \ is illegal
115// unconditionally to prevent people checking in files on posix that
116// have a different interpretation on win32
117// -- (may want to allow 0x0a and 0x0d (LF and CR) in the future, but this
118// is blocked on manifest format changing)
119// (also requires changes to 'automate inventory', possibly others, to
120// handle quoting)
121// -- no doubled /'s
122// -- no trailing /
123// -- no "." or ".." path components
124static inline bool
125bad_component(string const & component)
126{
127 static const string dot(".");
128 static const string dotdot("..");
129 if (component.empty())
130 return true;
131 if (component == dot)
132 return true;
133 if (component == dotdot)
134 return true;
135 return false;
136}
137
138static inline bool
139has_bad_chars(string const & path)
140{
141 for (string::const_iterator c = path.begin(); LIKELY(c != path.end()); c++)
142 {
143 // char is often a signed type; convert to unsigned to ensure that
144 // bytes 0x80-0xff are considered > 0x1f.
145 u8 x = (u8)*c;
146 // 0x5c is '\\'; we use the hex constant to make the dependency on
147 // ASCII encoding explicit.
148 if (UNLIKELY(x <= 0x1f || x == 0x5c || x == 0x7f))
149 return true;
150 }
151 return false;
152}
153
154// fully_normalized_path performs very similar function to file_path.split().
155// if want_split is set, split_path will be filled with the '/' separated
156// components of the path.
157static inline bool
158fully_normalized_path_split(string const & path, bool want_split,
159 split_path & sp)
160{
161 // empty path is fine
162 if (path.empty())
163 return true;
164 // could use is_absolute_somewhere, but this is the only part of it that
165 // wouldn't be redundant
166 if (path.size() > 1 && path[1] == ':')
167 return false;
168 // first scan for completely illegal bytes
169 if (has_bad_chars(path))
170 return false;
171 // now check each component
172 string::size_type start, stop;
173 start = 0;
174 while (1)
175 {
176 stop = path.find('/', start);
177 if (stop == string::npos)
178 {
179 string const & s(path.substr(start));
180 if (bad_component(s))
181 return false;
182 if (want_split)
183 sp.push_back(s);
184 break;
185 }
186 string const & s(path.substr(start, stop - start));
187 if (bad_component(s))
188 return false;
189 if (want_split)
190 sp.push_back(s);
191 start = stop + 1;
192 }
193 return true;
194}
195
196static inline bool
197fully_normalized_path(string const & path)
198{
199 split_path sp;
200 return fully_normalized_path_split(path, false, sp);
201}
202
203// This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping
204// paths, because on case insensitive filesystems, files put in any of them
205// may end up in _MTN instead. This allows arbitrary code execution. A
206// better solution would be to fix this in the working directory writing code
207// -- this prevents all-unix projects from naming things "mt", which is a bit
208// rude -- but as a temporary security kluge it works.
209static inline bool
210in_bookkeeping_dir(string const & path)
211{
212 if (path.size() == 0 || (path[0] != '_'))
213 return false;
214 if (path.size() == 1 || (path[1] != 'M' && path[1] != 'm'))
215 return false;
216 if (path.size() == 2 || (path[2] != 'T' && path[2] != 't'))
217 return false;
218 if (path.size() == 3 || (path[3] != 'N' && path[3] != 'n'))
219 return false;
220 // if we've gotten here, the first three letters are _, M, T, and N, in
221 // either upper or lower case. So if that is the whole path, or else if it
222 // continues but the next character is /, then this is a bookkeeping path.
223 if (path.size() == 4 || (path[4] == '/'))
224 return true;
225 return false;
226}
227
228static inline bool
229is_valid_internal(string const & path)
230{
231 return (fully_normalized_path(path)
232 && !in_bookkeeping_dir(path));
233}
234
235// equivalent to file_path_internal(path).split(sp), but
236// avoids splitting the string twice
237void
238internal_string_to_split_path(string const & path, split_path & sp)
239{
240 I(utf8_validate(path));
241 I(!in_bookkeeping_dir(path));
242 sp.clear();
243 sp.reserve(8);
244 sp.push_back(the_null_component);
245 I(fully_normalized_path_split(path, true, sp));
246}
247
248file_path::file_path(file_path::source_type type, string const & path)
249{
250 MM(path);
251 I(utf8_validate(path));
252 switch (type)
253 {
254 case internal:
255 data = path;
256 break;
257 case external:
258 if (!initial_rel_path.initialized)
259 {
260 // we are not in a workspace; treat this as an internal
261 // path, and set the access_tracker() into a very uninitialised
262 // state so that we will hit an exception if we do eventually
263 // enter a workspace
264 initial_rel_path.may_not_initialize();
265 data = path;
266 N(is_valid_internal(path) && !in_bookkeeping_dir(path),
267 F("path '%s' is invalid") % path);
268 break;
269 }
270 N(!path.empty(), F("empty path '%s' is invalid") % path);
271 fs::path out, base, relative;
272 try
273 {
274 base = initial_rel_path.get();
275 // the fs::native is needed to get it to accept paths like ".foo".
276 relative = fs::path(path, fs::native);
277 out = (base / relative).normalize();
278 }
279 catch (exception &)
280 {
281 N(false, F("path '%s' is invalid") % path);
282 }
283 data = utf8(out.string());
284 if (data() == ".")
285 data = string("");
286 N(!relative.has_root_path(),
287 F("absolute path '%s' is invalid") % relative.string());
288 N(fully_normalized_path(data()), F("path '%s' is invalid") % data);
289 N(!in_bookkeeping_dir(data()), F("path '%s' is in bookkeeping dir") % data);
290 break;
291 }
292 MM(data);
293 I(is_valid_internal(data()));
294}
295
296bookkeeping_path::bookkeeping_path(string const & path)
297{
298 I(fully_normalized_path(path));
299 I(in_bookkeeping_dir(path));
300 data = path;
301}
302
303bool
304bookkeeping_path::is_bookkeeping_path(string const & path)
305{
306 return in_bookkeeping_dir(path);
307}
308
309///////////////////////////////////////////////////////////////////////////
310// splitting/joining
311// this code must be superfast
312// it depends very much on knowing that it can only be applied to fully
313// normalized, relative, paths.
314///////////////////////////////////////////////////////////////////////////
315
316// This function takes a vector of path components and joins them into a
317// single file_path. This is the inverse to file_path::split. It takes a
318// vector of the form:
319//
320// ["", p[0], p[1], ..., p[n]]
321//
322// and constructs the path:
323//
324// p[0]/p[1]/.../p[n]
325//
326file_path::file_path(split_path const & sp)
327{
328 split_path::const_iterator i = sp.begin();
329 I(i != sp.end());
330 I(null_name(*i));
331 string tmp;
332 bool start = true;
333 for (++i; i != sp.end(); ++i)
334 {
335 I(!null_name(*i));
336 if (!start)
337 tmp += "/";
338 tmp += (*i)();
339 if (start)
340 start = false;
341 }
342 I(!in_bookkeeping_dir(tmp));
343 data = tmp;
344}
345
346//
347// this takes a path of the form
348//
349// "p[0]/p[1]/.../p[n-1]/p[n]"
350//
351// and fills in a vector of paths corresponding to p[0] ... p[n]. This is the
352// inverse to the file_path::file_path(split_path) constructor.
353//
354// The first entry in this vector is always the null component, "". This path
355// is the root of the tree. So we actually output a vector like:
356// ["", p[0], p[1], ..., p[n]]
357// with n+1 members.
358void
359file_path::split(split_path & sp) const
360{
361 sp.clear();
362 sp.push_back(the_null_component);
363 if (empty())
364 return;
365 string::size_type start, stop;
366 start = 0;
367 string const & s = data();
368 while (1)
369 {
370 stop = s.find('/', start);
371 if (stop == string::npos)
372 {
373 sp.push_back(s.substr(start));
374 break;
375 }
376 sp.push_back(s.substr(start, stop - start));
377 start = stop + 1;
378 }
379}
380
381template <>
382void dump(split_path const & sp, string & out)
383{
384 ostringstream oss;
385
386 for (split_path::const_iterator i = sp.begin(); i != sp.end(); ++i)
387 {
388 if (null_name(*i))
389 oss << ".";
390 else
391 oss << "/" << *i;
392 }
393
394 oss << "\n";
395
396 out = oss.str();
397}
398
399
400///////////////////////////////////////////////////////////////////////////
401// localizing file names (externalizing them)
402// this code must be superfast when there is no conversion needed
403///////////////////////////////////////////////////////////////////////////
404
405string
406any_path::as_external() const
407{
408#ifdef __APPLE__
409 // on OS X paths for the filesystem/kernel are UTF-8 encoded, regardless of
410 // locale.
411 return data();
412#else
413 // on normal systems we actually have some work to do, alas.
414 // not much, though, because utf8_to_system does all the hard work. it is
415 // carefully optimized. do not screw it up.
416 external out;
417 utf8_to_system(data, out);
418 return out();
419#endif
420}
421
422///////////////////////////////////////////////////////////////////////////
423// writing out paths
424///////////////////////////////////////////////////////////////////////////
425
426ostream &
427operator <<(ostream & o, any_path const & a)
428{
429 o << a.as_internal();
430 return o;
431}
432
433ostream &
434operator <<(ostream & o, split_path const & sp)
435{
436 file_path tmp(sp);
437 return o << tmp;
438}
439
440///////////////////////////////////////////////////////////////////////////
441// path manipulation
442// this code's speed does not matter much
443///////////////////////////////////////////////////////////////////////////
444
445static bool
446is_absolute_here(string const & path)
447{
448 if (path.empty())
449 return false;
450 if (path[0] == '/')
451 return true;
452#ifdef WIN32
453 if (path[0] == '\\')
454 return true;
455 if (path.size() > 1 && path[1] == ':')
456 return true;
457#endif
458 return false;
459}
460
461static inline bool
462is_absolute_somewhere(string const & path)
463{
464 if (path.empty())
465 return false;
466 if (path[0] == '/')
467 return true;
468 if (path[0] == '\\')
469 return true;
470 if (path.size() > 1 && path[1] == ':')
471 return true;
472 return false;
473}
474
475file_path
476file_path::operator /(string const & to_append) const
477{
478 I(!is_absolute_somewhere(to_append));
479 if (empty())
480 return file_path_internal(to_append);
481 else
482 return file_path_internal(data() + "/" + to_append);
483}
484
485bookkeeping_path
486bookkeeping_path::operator /(string const & to_append) const
487{
488 I(!is_absolute_somewhere(to_append));
489 I(!empty());
490 return bookkeeping_path(data() + "/" + to_append);
491}
492
493system_path
494system_path::operator /(string const & to_append) const
495{
496 I(!empty());
497 I(!is_absolute_here(to_append));
498 return system_path(data() + "/" + to_append);
499}
500
501///////////////////////////////////////////////////////////////////////////
502// system_path
503///////////////////////////////////////////////////////////////////////////
504
505static string
506normalize_out_dots(string const & path)
507{
508#ifdef WIN32
509 return fs::path(path, fs::native).normalize().string();
510#else
511 return fs::path(path, fs::native).normalize().native_file_string();
512#endif
513}
514
515system_path::system_path(any_path const & other, bool in_true_workspace)
516{
517 if (is_absolute_here(other.as_internal()))
518 // another system_path. the normalizing isn't really necessary, but it
519 // makes me feel warm and fuzzy.
520 data = normalize_out_dots(other.as_internal());
521 else
522 {
523 system_path wr;
524 if (in_true_workspace)
525 wr = working_root.get();
526 else
527 wr = working_root.get_but_unused();
528 data = normalize_out_dots((wr / other.as_internal()).as_internal());
529 }
530}
531
532static inline string const_system_path(utf8 const & path)
533{
534 N(!path().empty(), F("invalid path ''"));
535 string expanded = tilde_expand(path)();
536 if (is_absolute_here(expanded))
537 return normalize_out_dots(expanded);
538 else
539 return normalize_out_dots((initial_abs_path.get() / expanded).as_internal());
540}
541
542system_path::system_path(string const & path)
543{
544 data = const_system_path(path);
545}
546
547system_path::system_path(utf8 const & path)
548{
549 data = const_system_path(path);
550}
551
552///////////////////////////////////////////////////////////////////////////
553// utility
554///////////////////////////////////////////////////////////////////////////
555
556bool
557workspace_root(split_path const & sp)
558{
559 I(null_name(idx(sp,0)));
560 return sp.size() == 1;
561}
562
563void
564dirname_basename(split_path const & sp,
565 split_path & dirname, path_component & basename)
566{
567 I(!sp.empty());
568 // L(FL("dirname_basename('%s' [%d components],...)") % file_path(sp) % sp.size());
569 dirname = sp;
570 dirname.pop_back();
571 basename = sp.back();
572 if (dirname.empty())
573 {
574 // L(FL("basename %d vs. null component %d") % basename % the_null_component);
575 I(null_name(basename));
576 }
577}
578
579///////////////////////////////////////////////////////////////////////////
580// workspace (and path root) handling
581///////////////////////////////////////////////////////////////////////////
582
583system_path
584current_root_path()
585{
586 return system_path(fs::initial_path().root_path().string());
587}
588
589
590bool
591find_and_go_to_workspace(system_path const & search_root)
592{
593 fs::path root(search_root.as_external(), fs::native);
594 fs::path bookdir(bookkeeping_root.as_external(), fs::native);
595 fs::path current(fs::initial_path());
596 fs::path removed;
597 fs::path check = current / bookdir;
598
599 // check that the current directory is below the specified search root
600
601 fs::path::iterator ri = root.begin();
602 fs::path::iterator ci = current.begin();
603
604 while (ri != root.end() && ci != current.end() && *ri == *ci)
605 {
606 ++ri;
607 ++ci;
608 }
609
610 // if it's not then issue a warning and abort the search
611
612 if (ri != root.end())
613 {
614 W(F("current directory '%s' is not below root '%s'")
615 % current.string()
616 % root.string());
617 return false;
618 }
619
620 L(FL("searching for '%s' directory with root '%s'")
621 % bookdir.string()
622 % root.string());
623
624 while (current != root
625 && current.has_branch_path()
626 && current.has_leaf()
627 && !fs::exists(check))
628 {
629 L(FL("'%s' not found in '%s' with '%s' removed")
630 % bookdir.string() % current.string() % removed.string());
631 removed = fs::path(current.leaf(), fs::native) / removed;
632 current = current.branch_path();
633 check = current / bookdir;
634 }
635
636 L(FL("search for '%s' ended at '%s' with '%s' removed")
637 % bookdir.string() % current.string() % removed.string());
638
639 if (!fs::exists(check))
640 {
641 L(FL("'%s' does not exist") % check.string());
642 return false;
643 }
644
645 if (!fs::is_directory(check))
646 {
647 L(FL("'%s' is not a directory") % check.string());
648 return false;
649 }
650
651 // check for _MTN/. and _MTN/.. to see if mt dir is readable
652 if (!fs::exists(check / ".") || !fs::exists(check / ".."))
653 {
654 L(FL("problems with '%s' (missing '.' or '..')") % check.string());
655 return false;
656 }
657
658 working_root.set(current.native_file_string(), true);
659 initial_rel_path.set(removed, true);
660
661 L(FL("working root is '%s'") % working_root.get_but_unused());
662 L(FL("initial relative path is '%s'") % initial_rel_path.get_but_unused().string());
663
664 change_current_working_dir(working_root.get_but_unused());
665
666 return true;
667}
668
669void
670go_to_workspace(system_path const & new_workspace)
671{
672 working_root.set(new_workspace, true);
673 initial_rel_path.set(fs::path(), true);
674 change_current_working_dir(new_workspace);
675}
676
677///////////////////////////////////////////////////////////////////////////
678// tests
679///////////////////////////////////////////////////////////////////////////
680
681#ifdef BUILD_UNIT_TESTS
682#include "unit_tests.hh"
683
684using std::logic_error;
685
686static void test_null_name()
687{
688 BOOST_CHECK(null_name(the_null_component));
689}
690
691static void test_file_path_internal()
692{
693 char const * baddies[] = {"/foo",
694 "foo//bar",
695 "foo/../bar",
696 "../bar",
697 "_MTN",
698 "_MTN/blah",
699 "foo/bar/",
700 "foo/./bar",
701 "./foo",
702 ".",
703 "..",
704 "c:\\foo",
705 "c:foo",
706 "c:/foo",
707 // some baddies made bad by a security kluge --
708 // see the comment in in_bookkeeping_dir
709 "_mtn",
710 "_mtN",
711 "_mTn",
712 "_Mtn",
713 "_MTn",
714 "_MtN",
715 "_mTN",
716 "_mtn/foo",
717 "_mtN/foo",
718 "_mTn/foo",
719 "_Mtn/foo",
720 "_MTn/foo",
721 "_MtN/foo",
722 "_mTN/foo",
723 0 };
724 initial_rel_path.unset();
725 initial_rel_path.set(fs::path(), true);
726 for (char const ** c = baddies; *c; ++c)
727 {
728 BOOST_CHECK_THROW(file_path_internal(*c), logic_error);
729 }
730 initial_rel_path.unset();
731 initial_rel_path.set(fs::path("blah/blah/blah", fs::native), true);
732 for (char const ** c = baddies; *c; ++c)
733 {
734 BOOST_CHECK_THROW(file_path_internal(*c), logic_error);
735 }
736
737 BOOST_CHECK(file_path().empty());
738 BOOST_CHECK(file_path_internal("").empty());
739
740 char const * goodies[] = {"",
741 "a",
742 "foo",
743 "foo/bar/baz",
744 "foo/bar.baz",
745 "foo/with-hyphen/bar",
746 "foo/with_underscore/bar",
747 "foo/with,other+@weird*%#$=stuff/bar",
748 ".foo/bar",
749 "..foo/bar",
750 "_MTNfoo/bar",
751 "foo:bar",
752 0 };
753
754 for (int i = 0; i < 2; ++i)
755 {
756 initial_rel_path.unset();
757 initial_rel_path.set(i ? fs::path()
758 : fs::path("blah/blah/blah", fs::native),
759 true);
760 for (char const ** c = goodies; *c; ++c)
761 {
762 file_path fp = file_path_internal(*c);
763 BOOST_CHECK(fp.as_internal() == *c);
764 BOOST_CHECK(file_path_internal(fp.as_internal()) == fp);
765 split_path split_test;
766 fp.split(split_test);
767 BOOST_CHECK(!split_test.empty());
768 file_path fp2(split_test);
769 BOOST_CHECK(fp == fp2);
770 BOOST_CHECK(null_name(split_test[0]));
771 for (split_path::const_iterator
772 i = split_test.begin() + 1; i != split_test.end(); ++i)
773 BOOST_CHECK(!null_name(*i));
774 }
775 }
776
777 initial_rel_path.unset();
778}
779
780static void check_fp_normalizes_to(char * before, char * after)
781{
782 L(FL("check_fp_normalizes_to: '%s' -> '%s'") % before % after);
783 file_path fp = file_path_external(string(before));
784 L(FL(" (got: %s)") % fp);
785 BOOST_CHECK(fp.as_internal() == after);
786 BOOST_CHECK(file_path_internal(fp.as_internal()) == fp);
787 // we compare after to the external form too, since as far as we know
788 // relative normalized posix paths are always good win32 paths too
789 BOOST_CHECK(fp.as_external() == after);
790 split_path split_test;
791 fp.split(split_test);
792 BOOST_CHECK(!split_test.empty());
793 file_path fp2(split_test);
794 BOOST_CHECK(fp == fp2);
795 BOOST_CHECK(null_name(split_test[0]));
796 for (split_path::const_iterator
797 i = split_test.begin() + 1; i != split_test.end(); ++i)
798 BOOST_CHECK(!null_name(*i));
799}
800
801static void test_file_path_external_null_prefix()
802{
803 initial_rel_path.unset();
804 initial_rel_path.set(fs::path(), true);
805
806 char const * baddies[] = {"/foo",
807 "../bar",
808 "_MTN/blah",
809 "_MTN",
810 "//blah",
811 "\\foo",
812 "..",
813 "c:\\foo",
814 "c:foo",
815 "c:/foo",
816 "",
817 // some baddies made bad by a security kluge --
818 // see the comment in in_bookkeeping_dir
819 "_mtn",
820 "_mtN",
821 "_mTn",
822 "_Mtn",
823 "_MTn",
824 "_MtN",
825 "_mTN",
826 "_mtn/foo",
827 "_mtN/foo",
828 "_mTn/foo",
829 "_Mtn/foo",
830 "_MTn/foo",
831 "_MtN/foo",
832 "_mTN/foo",
833 0 };
834 for (char const ** c = baddies; *c; ++c)
835 {
836 L(FL("test_file_path_external_null_prefix: trying baddie: %s") % *c);
837 BOOST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure);
838 }
839
840 check_fp_normalizes_to("a", "a");
841 check_fp_normalizes_to("foo", "foo");
842 check_fp_normalizes_to("foo/bar", "foo/bar");
843 check_fp_normalizes_to("foo/bar/baz", "foo/bar/baz");
844 check_fp_normalizes_to("foo/bar.baz", "foo/bar.baz");
845 check_fp_normalizes_to("foo/with-hyphen/bar", "foo/with-hyphen/bar");
846 check_fp_normalizes_to("foo/with_underscore/bar", "foo/with_underscore/bar");
847 check_fp_normalizes_to(".foo/bar", ".foo/bar");
848 check_fp_normalizes_to("..foo/bar", "..foo/bar");
849 check_fp_normalizes_to(".", "");
850#ifndef WIN32
851 check_fp_normalizes_to("foo:bar", "foo:bar");
852#endif
853 check_fp_normalizes_to("foo/with,other+@weird*%#$=stuff/bar",
854 "foo/with,other+@weird*%#$=stuff/bar");
855
856 // Why are these tests with // in them commented out? because boost::fs
857 // sucks and can't normalize them. FIXME.
858 //check_fp_normalizes_to("foo//bar", "foo/bar");
859 check_fp_normalizes_to("foo/../bar", "bar");
860 check_fp_normalizes_to("foo/bar/", "foo/bar");
861 check_fp_normalizes_to("foo/./bar/", "foo/bar");
862 check_fp_normalizes_to("./foo", "foo");
863 //check_fp_normalizes_to("foo///.//", "foo");
864
865 initial_rel_path.unset();
866}
867
868static void test_file_path_external_prefix__MTN()
869{
870 initial_rel_path.unset();
871 initial_rel_path.set(fs::path("_MTN"), true);
872
873 BOOST_CHECK_THROW(file_path_external(utf8("foo")), informative_failure);
874 BOOST_CHECK_THROW(file_path_external(utf8(".")), informative_failure);
875 BOOST_CHECK_THROW(file_path_external(utf8("./blah")), informative_failure);
876 check_fp_normalizes_to("..", "");
877 check_fp_normalizes_to("../foo", "foo");
878}
879
880static void test_file_path_external_prefix_a_b()
881{
882 initial_rel_path.unset();
883 initial_rel_path.set(fs::path("a/b"), true);
884
885 char const * baddies[] = {"/foo",
886 "../../../bar",
887 "../../..",
888 "../../_MTN",
889 "../../_MTN/foo",
890 "//blah",
891 "\\foo",
892 "c:\\foo",
893#ifdef WIN32
894 "c:foo",
895 "c:/foo",
896#endif
897 "",
898 // some baddies made bad by a security kluge --
899 // see the comment in in_bookkeeping_dir
900 "../../_mtn",
901 "../../_mtN",
902 "../../_mTn",
903 "../../_Mtn",
904 "../../_MTn",
905 "../../_MtN",
906 "../../_mTN",
907 "../../_mtn/foo",
908 "../../_mtN/foo",
909 "../../_mTn/foo",
910 "../../_Mtn/foo",
911 "../../_MTn/foo",
912 "../../_MtN/foo",
913 "../../_mTN/foo",
914 0 };
915 for (char const ** c = baddies; *c; ++c)
916 {
917 L(FL("test_file_path_external_prefix_a_b: trying baddie: %s") % *c);
918 BOOST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure);
919 }
920
921 check_fp_normalizes_to("foo", "a/b/foo");
922 check_fp_normalizes_to("a", "a/b/a");
923 check_fp_normalizes_to("foo/bar", "a/b/foo/bar");
924 check_fp_normalizes_to("foo/bar/baz", "a/b/foo/bar/baz");
925 check_fp_normalizes_to("foo/bar.baz", "a/b/foo/bar.baz");
926 check_fp_normalizes_to("foo/with-hyphen/bar", "a/b/foo/with-hyphen/bar");
927 check_fp_normalizes_to("foo/with_underscore/bar", "a/b/foo/with_underscore/bar");
928 check_fp_normalizes_to(".foo/bar", "a/b/.foo/bar");
929 check_fp_normalizes_to("..foo/bar", "a/b/..foo/bar");
930 check_fp_normalizes_to(".", "a/b");
931#ifndef WIN32
932 check_fp_normalizes_to("foo:bar", "a/b/foo:bar");
933#endif
934 check_fp_normalizes_to("foo/with,other+@weird*%#$=stuff/bar",
935 "a/b/foo/with,other+@weird*%#$=stuff/bar");
936 // why are the tests with // in them commented out? because boost::fs sucks
937 // and can't normalize them. FIXME.
938 //check_fp_normalizes_to("foo//bar", "a/b/foo/bar");
939 check_fp_normalizes_to("foo/../bar", "a/b/bar");
940 check_fp_normalizes_to("foo/bar/", "a/b/foo/bar");
941 check_fp_normalizes_to("foo/./bar/", "a/b/foo/bar");
942 check_fp_normalizes_to("./foo", "a/b/foo");
943 //check_fp_normalizes_to("foo///.//", "a/b/foo");
944 // things that would have been bad without the initial_rel_path:
945 check_fp_normalizes_to("../foo", "a/foo");
946 check_fp_normalizes_to("..", "a");
947 check_fp_normalizes_to("../..", "");
948 check_fp_normalizes_to("_MTN/foo", "a/b/_MTN/foo");
949 check_fp_normalizes_to("_MTN", "a/b/_MTN");
950#ifndef WIN32
951 check_fp_normalizes_to("c:foo", "a/b/c:foo");
952 check_fp_normalizes_to("c:/foo", "a/b/c:/foo");
953#endif
954
955 initial_rel_path.unset();
956}
957
958static void test_split_join()
959{
960 file_path fp1 = file_path_internal("foo/bar/baz");
961 file_path fp2 = file_path_internal("bar/baz/foo");
962 split_path split1, split2;
963 fp1.split(split1);
964 fp2.split(split2);
965 BOOST_CHECK(fp1 == file_path(split1));
966 BOOST_CHECK(fp2 == file_path(split2));
967 BOOST_CHECK(!(fp1 == file_path(split2)));
968 BOOST_CHECK(!(fp2 == file_path(split1)));
969 BOOST_CHECK(split1.size() == 4);
970 BOOST_CHECK(split2.size() == 4);
971 BOOST_CHECK(split1[1] != split1[2]);
972 BOOST_CHECK(split1[1] != split1[3]);
973 BOOST_CHECK(split1[2] != split1[3]);
974 BOOST_CHECK(null_name(split1[0])
975 && !null_name(split1[1])
976 && !null_name(split1[2])
977 && !null_name(split1[3]));
978 BOOST_CHECK(split1[1] == split2[3]);
979 BOOST_CHECK(split1[2] == split2[1]);
980 BOOST_CHECK(split1[3] == split2[2]);
981
982 file_path fp3 = file_path_internal("");
983 split_path split3;
984 fp3.split(split3);
985 BOOST_CHECK(split3.size() == 1 && null_name(split3[0]));
986
987 // empty split_path is invalid
988 split_path split4;
989 // this comparison tricks the compiler into not completely eliminating this
990 // code as dead...
991 BOOST_CHECK_THROW(file_path(split4) == file_path(), logic_error);
992 split4.push_back(the_null_component);
993 BOOST_CHECK(file_path(split4) == file_path());
994
995 // split_path without null first item is invalid
996 split4.clear();
997 split4.push_back(split1[1]);
998 // this comparison tricks the compiler into not completely eliminating this
999 // code as dead...
1000 BOOST_CHECK_THROW(file_path(split4) == file_path(), logic_error);
1001
1002 // split_path with non-first item item null is invalid
1003 split4.clear();
1004 split4.push_back(the_null_component);
1005 split4.push_back(split1[0]);
1006 split4.push_back(the_null_component);
1007 // this comparison tricks the compiler into not completely eliminating this
1008 // code as dead...
1009 BOOST_CHECK_THROW(file_path(split4) == file_path(), logic_error);
1010
1011 // Make sure that we can't use joining to create a path into the bookkeeping
1012 // dir
1013 {
1014 split_path split_mt1, split_mt2;
1015 file_path_internal("foo/_MTN").split(split_mt1);
1016 BOOST_CHECK(split_mt1.size() == 3);
1017 I(split_mt1[2] == bookkeeping_root_component);
1018 split_mt2.push_back(the_null_component);
1019 split_mt2.push_back(split_mt1[2]);
1020 // split_mt2 now contains the component "_MTN"
1021 BOOST_CHECK_THROW(file_path(split_mt2) == file_path(), logic_error);
1022 split_mt2.push_back(split_mt1[1]);
1023 // split_mt2 now contains the components "_MTN", "foo" in that order
1024 // this comparison tricks the compiler into not completely eliminating this
1025 // code as dead...
1026 BOOST_CHECK_THROW(file_path(split_mt2) == file_path(), logic_error);
1027 }
1028 // and make sure it fails for the klugy security cases -- see comments on
1029 // in_bookkeeping_dir
1030 {
1031 split_path split_mt1, split_mt2;
1032 file_path_internal("foo/_mTn").split(split_mt1);
1033 BOOST_CHECK(split_mt1.size() == 3);
1034 split_mt2.push_back(the_null_component);
1035 split_mt2.push_back(split_mt1[2]);
1036 // split_mt2 now contains the component "_mTn"
1037 BOOST_CHECK_THROW(file_path(split_mt2) == file_path(), logic_error);
1038 split_mt2.push_back(split_mt1[1]);
1039 // split_mt2 now contains the components "_mTn", "foo" in that order
1040 // this comparison tricks the compiler into not completely eliminating this
1041 // code as dead...
1042 BOOST_CHECK_THROW(file_path(split_mt2) == file_path(), logic_error);
1043 }
1044}
1045
1046static void check_bk_normalizes_to(char * before, char * after)
1047{
1048 bookkeeping_path bp(bookkeeping_root / before);
1049 L(FL("normalizing %s to %s (got %s)") % before % after % bp);
1050 BOOST_CHECK(bp.as_external() == after);
1051 BOOST_CHECK(bookkeeping_path(bp.as_internal()).as_internal() == bp.as_internal());
1052}
1053
1054static void test_bookkeeping_path()
1055{
1056 char const * baddies[] = {"/foo",
1057 "foo//bar",
1058 "foo/../bar",
1059 "../bar",
1060 "foo/bar/",
1061 "foo/./bar",
1062 "./foo",
1063 ".",
1064 "..",
1065 "c:\\foo",
1066 "c:foo",
1067 "c:/foo",
1068 "",
1069 "a:b",
1070 0 };
1071 string tmp_path_string;
1072
1073 for (char const ** c = baddies; *c; ++c)
1074 {
1075 L(FL("test_bookkeeping_path baddie: trying '%s'") % *c);
1076 BOOST_CHECK_THROW(bookkeeping_path(tmp_path_string.assign(*c)), logic_error);
1077 BOOST_CHECK_THROW(bookkeeping_root / tmp_path_string.assign(*c), logic_error);
1078 }
1079 BOOST_CHECK_THROW(bookkeeping_path(tmp_path_string.assign("foo/bar")), logic_error);
1080 BOOST_CHECK_THROW(bookkeeping_path(tmp_path_string.assign("a")), logic_error);
1081
1082 check_bk_normalizes_to("a", "_MTN/a");
1083 check_bk_normalizes_to("foo", "_MTN/foo");
1084 check_bk_normalizes_to("foo/bar", "_MTN/foo/bar");
1085 check_bk_normalizes_to("foo/bar/baz", "_MTN/foo/bar/baz");
1086}
1087
1088static void check_system_normalizes_to(char * before, char * after)
1089{
1090 system_path sp(before);
1091 L(FL("normalizing '%s' to '%s' (got '%s')") % before % after % sp);
1092 BOOST_CHECK(sp.as_external() == after);
1093 BOOST_CHECK(system_path(sp.as_internal()).as_internal() == sp.as_internal());
1094}
1095
1096static void test_system_path()
1097{
1098 initial_abs_path.unset();
1099 initial_abs_path.set(system_path("/a/b"), true);
1100
1101 BOOST_CHECK_THROW(system_path(""), informative_failure);
1102
1103 check_system_normalizes_to("foo", "/a/b/foo");
1104 check_system_normalizes_to("foo/bar", "/a/b/foo/bar");
1105 check_system_normalizes_to("/foo/bar", "/foo/bar");
1106 check_system_normalizes_to("//foo/bar", "//foo/bar");
1107#ifdef WIN32
1108 check_system_normalizes_to("c:foo", "c:foo");
1109 check_system_normalizes_to("c:/foo", "c:/foo");
1110 check_system_normalizes_to("c:\\foo", "c:/foo");
1111#else
1112 check_system_normalizes_to("c:foo", "/a/b/c:foo");
1113 check_system_normalizes_to("c:/foo", "/a/b/c:/foo");
1114 check_system_normalizes_to("c:\\foo", "/a/b/c:\\foo");
1115 check_system_normalizes_to("foo:bar", "/a/b/foo:bar");
1116#endif
1117 // we require that system_path normalize out ..'s, because of the following
1118 // case:
1119 // /work mkdir newdir
1120 // /work$ cd newdir
1121 // /work/newdir$ monotone setup --db=../foo.db
1122 // Now they have either "/work/foo.db" or "/work/newdir/../foo.db" in
1123 // _MTN/options
1124 // /work/newdir$ cd ..
1125 // /work$ mv newdir newerdir # better name
1126 // Oops, now, if we stored the version with ..'s in, this workspace
1127 // is broken.
1128 check_system_normalizes_to("../foo", "/a/foo");
1129 check_system_normalizes_to("foo/..", "/a/b");
1130 check_system_normalizes_to("/foo/bar/..", "/foo");
1131 check_system_normalizes_to("/foo/..", "/");
1132 // can't do particularly interesting checking of tilde expansion, but at
1133 // least we can check that it's doing _something_...
1134 string tilde_expanded = system_path("~/foo").as_external();
1135#ifdef WIN32
1136 BOOST_CHECK(tilde_expanded[1] == ':');
1137#else
1138 BOOST_CHECK(tilde_expanded[0] == '/');
1139#endif
1140 BOOST_CHECK(tilde_expanded.find('~') == string::npos);
1141 // and check for the weird WIN32 version
1142#ifdef WIN32
1143 string tilde_expanded2 = system_path("~this_user_does_not_exist_anywhere").as_external();
1144 BOOST_CHECK(tilde_expanded2[0] = '/');
1145 BOOST_CHECK(tilde_expanded2.find('~') == string::npos);
1146#else
1147 BOOST_CHECK_THROW(system_path("~this_user_does_not_exist_anywhere"), informative_failure);
1148#endif
1149
1150 // finally, make sure that the copy-from-any_path constructor works right
1151 // in particular, it should interpret the paths it gets as being relative to
1152 // the project root, not the initial path
1153 working_root.unset();
1154 working_root.set(system_path("/working/root"), true);
1155 initial_rel_path.unset();
1156 initial_rel_path.set(fs::path("rel/initial"), true);
1157
1158 BOOST_CHECK(system_path(system_path("foo/bar")).as_internal() == "/a/b/foo/bar");
1159 BOOST_CHECK(!working_root.used);
1160 BOOST_CHECK(system_path(system_path("/foo/bar")).as_internal() == "/foo/bar");
1161 BOOST_CHECK(!working_root.used);
1162 BOOST_CHECK(system_path(file_path_internal("foo/bar"), false).as_internal()
1163 == "/working/root/foo/bar");
1164 BOOST_CHECK(!working_root.used);
1165 BOOST_CHECK(system_path(file_path_internal("foo/bar")).as_internal()
1166 == "/working/root/foo/bar");
1167 BOOST_CHECK(working_root.used);
1168 BOOST_CHECK(system_path(file_path_external(string("foo/bar"))).as_external()
1169 == "/working/root/rel/initial/foo/bar");
1170 file_path a_file_path;
1171 BOOST_CHECK(system_path(a_file_path).as_external()
1172 == "/working/root");
1173 BOOST_CHECK(system_path(bookkeeping_path("_MTN/foo/bar")).as_internal()
1174 == "/working/root/_MTN/foo/bar");
1175 BOOST_CHECK(system_path(bookkeeping_root).as_internal()
1176 == "/working/root/_MTN");
1177 initial_abs_path.unset();
1178 working_root.unset();
1179 initial_rel_path.unset();
1180}
1181
1182static void test_access_tracker()
1183{
1184 access_tracker<int> a;
1185 BOOST_CHECK_THROW(a.get(), logic_error);
1186 a.set(1, false);
1187 BOOST_CHECK_THROW(a.set(2, false), logic_error);
1188 a.set(2, true);
1189 BOOST_CHECK_THROW(a.set(3, false), logic_error);
1190 BOOST_CHECK(a.get() == 2);
1191 BOOST_CHECK_THROW(a.set(3, true), logic_error);
1192 a.unset();
1193 a.may_not_initialize();
1194 BOOST_CHECK_THROW(a.set(1, false), logic_error);
1195 BOOST_CHECK_THROW(a.set(2, true), logic_error);
1196 a.unset();
1197 a.set(1, false);
1198 BOOST_CHECK_THROW(a.may_not_initialize(), logic_error);
1199}
1200
1201static void test_a_path_ordering(string const & left, string const & right)
1202{
1203 MM(left);
1204 MM(right);
1205 split_path left_sp, right_sp;
1206 file_path_internal(left).split(left_sp);
1207 file_path_internal(right).split(right_sp);
1208 I(left_sp < right_sp);
1209}
1210
1211static void test_path_ordering()
1212{
1213 // this ordering is very important:
1214 // -- it is used to determine the textual form of csets and manifests
1215 // (in particular, it cannot be changed)
1216 // -- it is used to determine in what order cset operations can be applied
1217 // (in particular, foo must sort before foo/bar, so that we can use it
1218 // to do top-down and bottom-up traversals of a set of paths).
1219 test_a_path_ordering("a", "b");
1220 test_a_path_ordering("a", "c");
1221 test_a_path_ordering("ab", "ac");
1222 test_a_path_ordering("a", "ab");
1223 test_a_path_ordering("", "a");
1224 test_a_path_ordering("", ".foo");
1225 test_a_path_ordering("foo", "foo/bar");
1226 // . is before / asciibetically, so sorting by strings will give the wrong
1227 // answer on this:
1228 test_a_path_ordering("foo/bar", "foo.bar");
1229
1230 // path_components used to be interned strings, and we used the default sort
1231 // order, which meant that in practice path components would sort in the
1232 // _order they were first used in the program_. So let's put in a test that
1233 // would catch this sort of brokenness.
1234 test_a_path_ordering("fallanopic_not_otherwise_mentioned", "xyzzy");
1235 test_a_path_ordering("fallanoooo_not_otherwise_mentioned_and_smaller", "fallanopic_not_otherwise_mentioned");
1236}
1237
1238
1239void add_paths_tests(test_suite * suite)
1240{
1241 I(suite);
1242 suite->add(BOOST_TEST_CASE(&test_path_ordering));
1243 suite->add(BOOST_TEST_CASE(&test_null_name));
1244 suite->add(BOOST_TEST_CASE(&test_file_path_internal));
1245 suite->add(BOOST_TEST_CASE(&test_file_path_external_null_prefix));
1246 suite->add(BOOST_TEST_CASE(&test_file_path_external_prefix__MTN));
1247 suite->add(BOOST_TEST_CASE(&test_file_path_external_prefix_a_b));
1248 suite->add(BOOST_TEST_CASE(&test_split_join));
1249 suite->add(BOOST_TEST_CASE(&test_bookkeeping_path));
1250 suite->add(BOOST_TEST_CASE(&test_system_path));
1251 suite->add(BOOST_TEST_CASE(&test_access_tracker));
1252}
1253
1254#endif // BUILD_UNIT_TESTS
1255
1256// Local Variables:
1257// mode: C++
1258// fill-column: 76
1259// c-file-style: "gnu"
1260// indent-tabs-mode: nil
1261// End:
1262// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status