monotone

monotone Mtn Source Tree

Root/paths.cc

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <sstream>
12
13#include "paths.hh"
14#include "file_io.hh"
15#include "charset.hh"
16
17using std::exception;
18using std::ostream;
19using std::ostringstream;
20using std::string;
21using std::vector;
22
23// some structure to ensure we aren't doing anything broken when resolving
24// filenames. the idea is to make sure
25// -- we don't depend on the existence of something before it has been set
26// -- we don't re-set something that has already been used
27// -- sometimes, we use the _non_-existence of something, so we shouldn't
28// set anything whose un-setted-ness has already been used
29template <typename T>
30struct access_tracker
31{
32 void set(T const & val, bool may_be_initialized)
33 {
34 I(may_be_initialized || !initialized);
35 I(!very_uninitialized);
36 I(!used);
37 initialized = true;
38 value = val;
39 }
40 T const & get()
41 {
42 I(initialized);
43 used = true;
44 return value;
45 }
46 T const & get_but_unused()
47 {
48 I(initialized);
49 return value;
50 }
51 void may_not_initialize()
52 {
53 I(!initialized);
54 very_uninitialized = true;
55 }
56 // for unit tests
57 void unset()
58 {
59 used = initialized = very_uninitialized = false;
60 }
61 T value;
62 bool initialized, used, very_uninitialized;
63 access_tracker() : initialized(false), used(false), very_uninitialized(false) {};
64};
65
66// paths to use in interpreting paths from various sources,
67// conceptually:
68// working_root / initial_rel_path == initial_abs_path
69
70// initial_abs_path is for interpreting relative system_path's
71static access_tracker<system_path> initial_abs_path;
72// initial_rel_path is for interpreting external file_path's
73// we used to make it a file_path, but then you can't run monotone from
74// inside the _MTN/ dir (even when referring to files outside the _MTN/
75// dir). use of a bare string requires some caution but does work.
76static access_tracker<string> initial_rel_path;
77// working_root is for converting file_path's and bookkeeping_path's to
78// system_path's.
79static access_tracker<system_path> working_root;
80
81bookkeeping_path const bookkeeping_root("_MTN");
82path_component const bookkeeping_root_component("_MTN");
83path_component const old_bookkeeping_root_component("MT");
84
85void
86save_initial_path()
87{
88 // FIXME: BUG: this only works if the current working dir is in utf8
89 initial_abs_path.set(system_path(get_current_working_dir()), false);
90 L(FL("initial abs path is: %s") % initial_abs_path.get_but_unused());
91}
92
93///////////////////////////////////////////////////////////////////////////
94// verifying that internal paths are indeed normalized.
95// this code must be superfast
96///////////////////////////////////////////////////////////////////////////
97
98// normalized means:
99// -- / as path separator
100// -- not an absolute path (on either posix or win32)
101// operationally, this means: first character != '/', first character != '\',
102// second character != ':'
103// -- no illegal characters
104// -- 0x00 -- 0x1f, 0x7f, \ are the illegal characters. \ is illegal
105// unconditionally to prevent people checking in files on posix that
106// have a different interpretation on win32
107// -- (may want to allow 0x0a and 0x0d (LF and CR) in the future, but this
108// is blocked on manifest format changing)
109// (also requires changes to 'automate inventory', possibly others, to
110// handle quoting)
111// -- no doubled /'s
112// -- no trailing /
113// -- no "." or ".." path components
114
115static inline bool
116bad_component(string const & component)
117{
118 if (component.empty())
119 return true;
120 if (component == ".")
121 return true;
122 if (component == "..")
123 return true;
124 return false;
125}
126
127static inline bool
128has_bad_chars(string const & path)
129{
130 for (string::const_iterator c = path.begin(); LIKELY(c != path.end()); c++)
131 {
132 // char is often a signed type; convert to unsigned to ensure that
133 // bytes 0x80-0xff are considered > 0x1f.
134 u8 x = (u8)*c;
135 // 0x5c is '\\'; we use the hex constant to make the dependency on
136 // ASCII encoding explicit.
137 if (UNLIKELY(x <= 0x1f || x == 0x5c || x == 0x7f))
138 return true;
139 }
140 return false;
141}
142
143// as above, but disallows / as well.
144static inline bool
145has_bad_component_chars(string const & pc)
146{
147 for (string::const_iterator c = pc.begin(); LIKELY(c != pc.end()); c++)
148 {
149 // char is often a signed type; convert to unsigned to ensure that
150 // bytes 0x80-0xff are considered > 0x1f.
151 u8 x = (u8)*c;
152 // 0x2f is '/' and 0x5c is '\\'; we use hex constants to make the
153 // dependency on ASCII encoding explicit.
154 if (UNLIKELY(x <= 0x1f || x == 0x2f || x == 0x5c || x == 0x7f))
155 return true;
156 }
157 return false;
158
159}
160
161static bool
162is_absolute_here(string const & path)
163{
164 if (path.empty())
165 return false;
166 if (path[0] == '/')
167 return true;
168#ifdef WIN32
169 if (path[0] == '\\')
170 return true;
171 if (path.size() > 1 && path[1] == ':')
172 return true;
173#endif
174 return false;
175}
176
177static inline bool
178is_absolute_somewhere(string const & path)
179{
180 if (path.empty())
181 return false;
182 if (path[0] == '/')
183 return true;
184 if (path[0] == '\\')
185 return true;
186 if (path.size() > 1 && path[1] == ':')
187 return true;
188 return false;
189}
190
191// fully_normalized_path verifies a complete pathname for validity and
192// having been properly normalized (as if by normalize_path, below).
193static inline bool
194fully_normalized_path(string const & path)
195{
196 // empty path is fine
197 if (path.empty())
198 return true;
199 // could use is_absolute_somewhere, but this is the only part of it that
200 // wouldn't be redundant
201 if (path.size() > 1 && path[1] == ':')
202 return false;
203 // first scan for completely illegal bytes
204 if (has_bad_chars(path))
205 return false;
206 // now check each component
207 string::size_type start = 0, stop;
208 while (1)
209 {
210 stop = path.find('/', start);
211 if (stop == string::npos)
212 break;
213 string const & s(path.substr(start, stop - start));
214 if (bad_component(s))
215 return false;
216 start = stop + 1;
217 }
218
219 string const & s(path.substr(start));
220 return !bad_component(s);
221}
222
223// This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping
224// paths, because on case insensitive filesystems, files put in any of them
225// may end up in _MTN instead. This allows arbitrary code execution. A
226// better solution would be to fix this in the working directory writing
227// code -- this prevents all-unix projects from naming things "_mtn", which
228// is less rude than when the bookkeeping root was "MT", but still rude --
229// but as a temporary security kluge it works.
230static inline bool
231in_bookkeeping_dir(string const & path)
232{
233 if (path.size() == 0 || (path[0] != '_'))
234 return false;
235 if (path.size() == 1 || (path[1] != 'M' && path[1] != 'm'))
236 return false;
237 if (path.size() == 2 || (path[2] != 'T' && path[2] != 't'))
238 return false;
239 if (path.size() == 3 || (path[3] != 'N' && path[3] != 'n'))
240 return false;
241 // if we've gotten here, the first three letters are _, M, T, and N, in
242 // either upper or lower case. So if that is the whole path, or else if it
243 // continues but the next character is /, then this is a bookkeeping path.
244 if (path.size() == 4 || (path[4] == '/'))
245 return true;
246 return false;
247}
248
249static inline bool
250is_valid_internal(string const & path)
251{
252 return (fully_normalized_path(path)
253 && !in_bookkeeping_dir(path));
254}
255
256static string
257normalize_path(string const & in)
258{
259 string inT = in;
260 string leader;
261 MM(inT);
262
263#ifdef WIN32
264 // the first thing we do is kill all the backslashes
265 for (string::iterator i = inT.begin(); i != inT.end(); i++)
266 if (*i == '\\')
267 *i = '/';
268#endif
269
270 if (is_absolute_here (inT))
271 {
272 if (inT[0] == '/')
273 {
274 leader = "/";
275 inT = inT.substr(1);
276
277 if (inT.size() > 0 && inT[0] == '/')
278 {
279 // if there are exactly two slashes at the beginning they
280 // are both preserved. three or more are the same as one.
281 string::size_type f = inT.find_first_not_of("/");
282 if (f == string::npos)
283 f = inT.size();
284 if (f == 1)
285 leader = "//";
286 inT = inT.substr(f);
287 }
288 }
289#ifdef WIN32
290 else
291 {
292 I(inT[1] == ':');
293 if (inT.size() > 2 && inT[2] == '/')
294 {
295 leader = inT.substr(0, 3);
296 inT = inT.substr(3);
297 }
298 else
299 {
300 leader = inT.substr(0, 2);
301 inT = inT.substr(2);
302 }
303 }
304#endif
305
306 I(!is_absolute_here(inT));
307 if (inT.size() == 0)
308 return leader;
309 }
310
311 vector<string> stack;
312 string::const_iterator head, tail;
313 string::size_type size_estimate = leader.size();
314 for (head = inT.begin(); head != inT.end(); head = tail)
315 {
316 tail = head;
317 while (tail != inT.end() && *tail != '/')
318 tail++;
319
320 string elt(head, tail);
321 while (tail != inT.end() && *tail == '/')
322 tail++;
323
324 if (elt == ".")
325 continue;
326 // remove foo/.. element pairs; leave leading .. components alone
327 if (elt == ".." && !stack.empty() && stack.back() != "..")
328 {
329 stack.pop_back();
330 continue;
331 }
332
333 size_estimate += elt.size() + 1;
334 stack.push_back(elt);
335 }
336
337 leader.reserve(size_estimate);
338 for (vector<string>::const_iterator i = stack.begin(); i != stack.end(); i++)
339 {
340 if (i != stack.begin())
341 leader += "/";
342 leader += *i;
343 }
344 return leader;
345}
346
347static void
348normalize_external_path(string const & path, string & normalized)
349{
350 if (!initial_rel_path.initialized)
351 {
352 // we are not in a workspace; treat this as an internal
353 // path, and set the access_tracker() into a very uninitialised
354 // state so that we will hit an exception if we do eventually
355 // enter a workspace
356 initial_rel_path.may_not_initialize();
357 normalized = path;
358 N(is_valid_internal(path),
359 F("path '%s' is invalid") % path);
360 }
361 else
362 {
363 N(!path.empty(), F("empty path '%s' is invalid") % path);
364 N(!is_absolute_here(path), F("absolute path '%s' is invalid") % path);
365 string base;
366 try
367 {
368 base = initial_rel_path.get();
369 if (base == "")
370 normalized = normalize_path(path);
371 else
372 normalized = normalize_path(base + "/" + path);
373 }
374 catch (exception &)
375 {
376 N(false, F("path '%s' is invalid") % path);
377 }
378 if (normalized == ".")
379 normalized = string("");
380 N(fully_normalized_path(normalized),
381 F("path '%s' is invalid") % normalized);
382 }
383}
384
385///////////////////////////////////////////////////////////////////////////
386// single path component handling.
387///////////////////////////////////////////////////////////////////////////
388
389// these constructors confirm that what they are passed is a legitimate
390// component. note that the empty string is a legitimate component,
391// but is not acceptable to bad_component (above) and therefore we have
392// to open-code most of those checks.
393path_component::path_component(utf8 const & d)
394 : data(d())
395{
396 MM(data);
397 I(!has_bad_component_chars(data) && data != "." && data != "..");
398}
399
400path_component::path_component(string const & d)
401 : data(d)
402{
403 MM(data);
404 I(utf8_validate(utf8(data))
405 && !has_bad_component_chars(data)
406 && data != "." && data != "..");
407}
408
409path_component::path_component(char const * d)
410 : data(d)
411{
412 MM(data);
413 I(utf8_validate(utf8(data))
414 && !has_bad_component_chars(data)
415 && data != "." && data != "..");
416}
417
418std::ostream & operator<<(std::ostream & s, path_component const & pc)
419{
420 return s << pc();
421}
422
423template <> void dump(path_component const & pc, std::string & to)
424{
425 to = pc();
426}
427
428///////////////////////////////////////////////////////////////////////////
429// complete paths to files within a working directory
430///////////////////////////////////////////////////////////////////////////
431
432file_path::file_path(file_path::source_type type, string const & path)
433{
434 MM(path);
435 I(utf8_validate(utf8(path)));
436 if (type == external)
437 {
438 string normalized;
439 normalize_external_path(path, normalized);
440 N(!in_bookkeeping_dir(normalized),
441 F("path '%s' is in bookkeeping dir") % normalized);
442 data = normalized;
443 }
444 else
445 data = path;
446 MM(data);
447 I(is_valid_internal(data));
448}
449
450file_path::file_path(file_path::source_type type, utf8 const & path)
451{
452 MM(path);
453 I(utf8_validate(path));
454 if (type == external)
455 {
456 string normalized;
457 normalize_external_path(path(), normalized);
458 N(!in_bookkeeping_dir(normalized),
459 F("path '%s' is in bookkeeping dir") % normalized);
460 data = normalized;
461 }
462 else
463 data = path();
464 MM(data);
465 I(is_valid_internal(data));
466}
467
468bookkeeping_path::bookkeeping_path(string const & path)
469{
470 I(fully_normalized_path(path));
471 I(in_bookkeeping_dir(path));
472 data = path;
473}
474
475bool
476bookkeeping_path::external_string_is_bookkeeping_path(utf8 const & path)
477{
478 // FIXME: this charset casting everywhere is ridiculous
479 string normalized;
480 normalize_external_path(path(), normalized);
481 return internal_string_is_bookkeeping_path(utf8(normalized));
482}
483bool bookkeeping_path::internal_string_is_bookkeeping_path(utf8 const & path)
484{
485 return in_bookkeeping_dir(path());
486}
487
488///////////////////////////////////////////////////////////////////////////
489// splitting/joining
490// this code must be superfast
491// it depends very much on knowing that it can only be applied to fully
492// normalized, relative, paths.
493///////////////////////////////////////////////////////////////////////////
494
495// this peels off the last component of any path and returns it.
496// the last component of a path with no slashes in it is the complete path.
497// the last component of a path referring to the root directory is an
498// empty string.
499path_component
500any_path::basename() const
501{
502 string const & s = data;
503 string::size_type sep = s.rfind('/');
504#ifdef WIN32
505 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
506 sep = 1;
507#endif
508 if (sep == string::npos)
509 return path_component(s, 0); // force use of short circuit
510 if (sep == s.size())
511 return path_component();
512 return path_component(s, sep + 1);
513}
514
515// this returns all but the last component of any path. It has to take
516// care at the root.
517any_path
518any_path::dirname() const
519{
520 string const & s = data;
521 string::size_type sep = s.rfind('/');
522#ifdef WIN32
523 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
524 sep = 1;
525#endif
526 if (sep == string::npos)
527 return any_path();
528
529 // dirname() of the root directory is itself
530 if (sep == s.size() - 1)
531 return *this;
532
533 // dirname() of a direct child of the root is the root
534 if (sep == 0 || (sep == 1 && s[1] == '/')
535#ifdef WIN32
536 || (sep == 1 || sep == 2 && s[1] == ':')
537#endif
538 )
539 return any_path(s, 0, sep+1);
540
541 return any_path(s, 0, sep);
542}
543
544// these variations exist to get the return type right. also,
545// file_path dirname() can be a little simpler.
546file_path
547file_path::dirname() const
548{
549 string const & s = data;
550 string::size_type sep = s.rfind('/');
551 if (sep == string::npos)
552 return file_path();
553 return file_path(s, 0, sep);
554}
555
556system_path
557system_path::dirname() const
558{
559 string const & s = data;
560 string::size_type sep = s.rfind('/');
561#ifdef WIN32
562 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
563 sep = 1;
564#endif
565 I(sep != string::npos);
566
567 // dirname() of the root directory is itself
568 if (sep == s.size() - 1)
569 return *this;
570
571 // dirname() of a direct child of the root is the root
572 if (sep == 0 || (sep == 1 && s[1] == '/')
573#ifdef WIN32
574 || (sep == 1 || sep == 2 && s[1] == ':')
575#endif
576 )
577 return system_path(s, 0, sep+1);
578
579 return system_path(s, 0, sep);
580}
581
582
583// produce dirname and basename at the same time
584void
585file_path::dirname_basename(file_path & dir, path_component & base) const
586{
587 string const & s = data;
588 string::size_type sep = s.rfind('/');
589 if (sep == string::npos)
590 {
591 dir = file_path();
592 base = path_component(s, 0);
593 }
594 else
595 {
596 I(sep < s.size() - 1); // last component must have at least one char
597 dir = file_path(s, 0, sep);
598 base = path_component(s, sep + 1);
599 }
600}
601
602// count the number of /-separated components of the path.
603unsigned int
604file_path::depth() const
605{
606 if (data.empty())
607 return 0;
608
609 unsigned int components = 1;
610 for (string::const_iterator p = data.begin(); p != data.end(); p++)
611 if (*p == '/')
612 components++;
613
614 return components;
615}
616
617///////////////////////////////////////////////////////////////////////////
618// localizing file names (externalizing them)
619// this code must be superfast when there is no conversion needed
620///////////////////////////////////////////////////////////////////////////
621
622string
623any_path::as_external() const
624{
625#ifdef __APPLE__
626 // on OS X paths for the filesystem/kernel are UTF-8 encoded, regardless of
627 // locale.
628 return data;
629#else
630 // on normal systems we actually have some work to do, alas.
631 // not much, though, because utf8_to_system_string does all the hard work.
632 // it is carefully optimized. do not screw it up.
633 external out;
634 utf8_to_system_strict(utf8(data), out);
635 return out();
636#endif
637}
638
639///////////////////////////////////////////////////////////////////////////
640// writing out paths
641///////////////////////////////////////////////////////////////////////////
642
643ostream &
644operator <<(ostream & o, any_path const & a)
645{
646 o << a.as_internal();
647 return o;
648}
649
650template <>
651void dump(file_path const & p, string & out)
652{
653 ostringstream oss;
654 oss << p << '\n';
655 out = oss.str();
656}
657
658template <>
659void dump(system_path const & p, string & out)
660{
661 ostringstream oss;
662 oss << p << '\n';
663 out = oss.str();
664}
665
666template <>
667void dump(bookkeeping_path const & p, string & out)
668{
669 ostringstream oss;
670 oss << p << '\n';
671 out = oss.str();
672}
673
674///////////////////////////////////////////////////////////////////////////
675// path manipulation
676// this code's speed does not matter much
677///////////////////////////////////////////////////////////////////////////
678
679// relies on its arguments already being validated, except that you may not
680// append the empty path component, and if you are appending to the empty
681// path, you may not create an absolute path or a path into the bookkeeping
682// directory.
683file_path
684file_path::operator /(path_component const & to_append) const
685{
686 I(!to_append.empty());
687 if (empty())
688 {
689 string const & s = to_append();
690 I(!is_absolute_somewhere(s) && !in_bookkeeping_dir(s));
691 return file_path(s, 0, string::npos);
692 }
693 else
694 return file_path(data + "/" + to_append(), 0, string::npos);
695}
696
697// similarly, but even less checking is needed.
698file_path
699file_path::operator /(file_path const & to_append) const
700{
701 I(!to_append.empty());
702 if (empty())
703 return to_append;
704 return file_path(data + "/" + to_append.as_internal(), 0, string::npos);
705}
706
707bookkeeping_path
708bookkeeping_path::operator /(path_component const & to_append) const
709{
710 I(!to_append.empty());
711 I(!empty());
712 return bookkeeping_path(data + "/" + to_append(), 0, string::npos);
713}
714
715system_path
716system_path::operator /(path_component const & to_append) const
717{
718 I(!to_append.empty());
719 I(!empty());
720 return system_path(data + "/" + to_append(), 0, string::npos);
721}
722
723any_path
724any_path::operator /(path_component const & to_append) const
725{
726 I(!to_append.empty());
727 I(!empty());
728 return any_path(data + "/" + to_append(), 0, string::npos);
729}
730
731// these take strings and validate
732bookkeeping_path
733bookkeeping_path::operator /(char const * to_append) const
734{
735 I(!is_absolute_somewhere(to_append));
736 I(!empty());
737 return bookkeeping_path(data + "/" + to_append);
738}
739
740system_path
741system_path::operator /(char const * to_append) const
742{
743 I(!empty());
744 I(!is_absolute_here(to_append));
745 return system_path(data + "/" + to_append);
746}
747
748///////////////////////////////////////////////////////////////////////////
749// system_path
750///////////////////////////////////////////////////////////////////////////
751
752system_path::system_path(any_path const & other, bool in_true_workspace)
753{
754 if (is_absolute_here(other.as_internal()))
755 // another system_path. the normalizing isn't really necessary, but it
756 // makes me feel warm and fuzzy.
757 data = normalize_path(other.as_internal());
758 else
759 {
760 system_path wr;
761 if (in_true_workspace)
762 wr = working_root.get();
763 else
764 wr = working_root.get_but_unused();
765 data = normalize_path(wr.as_internal() + "/" + other.as_internal());
766 }
767}
768
769static inline string const_system_path(utf8 const & path)
770{
771 N(!path().empty(), F("invalid path ''"));
772 string expanded = tilde_expand(path());
773 if (is_absolute_here(expanded))
774 return normalize_path(expanded);
775 else
776 return normalize_path(initial_abs_path.get().as_internal()
777 + "/" + path());
778}
779
780system_path::system_path(string const & path)
781{
782 data = const_system_path(utf8(path));
783}
784
785system_path::system_path(utf8 const & path)
786{
787 data = const_system_path(utf8(path));
788}
789
790///////////////////////////////////////////////////////////////////////////
791// workspace (and path root) handling
792///////////////////////////////////////////////////////////////////////////
793
794static bool
795find_bookdir(system_path const & root, path_component const & bookdir,
796 system_path & current, string & removed)
797{
798 current = initial_abs_path.get();
799 removed.clear();
800
801 // check that the current directory is below the specified search root
802 if (current.as_internal().find(root.as_internal()) != 0)
803 {
804 W(F("current directory '%s' is not below root '%s'") % current % root);
805 return false;
806 }
807
808 L(FL("searching for '%s' directory with root '%s'") % bookdir % root);
809
810 system_path check;
811 while (!(current == root))
812 {
813 check = current / bookdir;
814 switch (get_path_status(check))
815 {
816 case path::nonexistent:
817 L(FL("'%s' not found in '%s' with '%s' removed")
818 % bookdir % current % removed);
819 if (removed.empty())
820 removed = current.basename()();
821 else
822 removed = current.basename()() + "/" + removed;
823 current = current.dirname();
824 continue;
825
826 case path::file:
827 L(FL("'%s' is not a directory") % check);
828 return false;
829
830 case path::directory:
831 goto found;
832 }
833 }
834
835 // if we get here, we have hit the root; try once more
836 check = current / bookdir;
837 switch (get_path_status(check))
838 {
839 case path::nonexistent:
840 L(FL("'%s' not found in '%s' with '%s' removed")
841 % bookdir % current % removed);
842 return false;
843
844 case path::file:
845 L(FL("'%s' is not a directory") % check);
846 return false;
847
848 case path::directory:
849 goto found;
850 }
851 return false;
852
853 found:
854 // check for _MTN/. and _MTN/.. to see if mt dir is readable
855 try
856 {
857 if (!path_exists(check / ".") || !path_exists(check / ".."))
858 {
859 L(FL("problems with '%s' (missing '.' or '..')") % check);
860 return false;
861 }
862 }
863 catch(exception &)
864 {
865 L(FL("problems with '%s' (cannot check for '.' or '..')") % check);
866 return false;
867 }
868 return true;
869}
870
871
872bool
873find_and_go_to_workspace(string const & search_root)
874{
875 system_path root, current;
876 string removed;
877
878 if (search_root.empty())
879 {
880#ifdef WIN32
881 std::string cur_str = get_current_working_dir();
882 current = cur_str;
883 if (cur_str[0] == '/' || cur_str[0] == '\\')
884 {
885 if (cur_str.size() > 1 && (cur_str[1] == '/' || cur_str[1] == '\\'))
886 {
887 // UNC name
888 string::size_type uncend = cur_str.find_first_of("\\/", 2);
889 if (uncend == string::npos)
890 root = system_path(cur_str + "/");
891 else
892 root = system_path(cur_str.substr(0, uncend));
893 }
894 else
895 root = system_path("/");
896 }
897 else if (cur_str.size() > 1 && cur_str[1] == ':')
898 {
899 root = system_path(cur_str.substr(0,2) + "/");
900 }
901 else I(false);
902#else
903 root = system_path("/");
904#endif
905 }
906 else
907 {
908 root = system_path(search_root);
909 L(FL("limiting search for workspace to %s") % root);
910
911 require_path_is_directory(root,
912 F("search root '%s' does not exist") % root,
913 F("search root '%s' is not a directory") % root);
914 }
915
916 // first look for the current name of the bookkeeping directory.
917 // if we don't find it, look for it under the old name, so that
918 // migration has a chance to work.
919 if (!find_bookdir(root, bookkeeping_root_component, current, removed))
920 if (!find_bookdir(root, old_bookkeeping_root_component, current, removed))
921 return false;
922
923 working_root.set(current, true);
924 initial_rel_path.set(removed, true);
925
926 L(FL("working root is '%s'") % working_root.get_but_unused());
927 L(FL("initial relative path is '%s'") % initial_rel_path.get_but_unused());
928
929 change_current_working_dir(working_root.get_but_unused());
930
931 return true;
932}
933
934void
935go_to_workspace(system_path const & new_workspace)
936{
937 working_root.set(new_workspace, true);
938 initial_rel_path.set(string(), true);
939 change_current_working_dir(new_workspace);
940}
941
942void
943mark_std_paths_used(void)
944{
945 working_root.get();
946 initial_rel_path.get();
947}
948
949///////////////////////////////////////////////////////////////////////////
950// tests
951///////////////////////////////////////////////////////////////////////////
952
953#ifdef BUILD_UNIT_TESTS
954#include "unit_tests.hh"
955#include "randomizer.hh"
956
957using std::logic_error;
958
959UNIT_TEST(paths, path_component)
960{
961 char const * baddies[] = {".",
962 "..",
963 "/foo",
964 "\\foo",
965 "foo/bar",
966 "foo\\bar",
967 0 };
968
969 // these would not be okay in a full file_path, but are okay here.
970 char const * goodies[] = {"c:foo",
971 "_mtn",
972 "_mtN",
973 "_mTn",
974 "_Mtn",
975 "_MTn",
976 "_MtN",
977 "_MTN",
978 0 };
979
980
981 for (char const ** c = baddies; *c; ++c)
982 {
983 // the comparison prevents the compiler from eliminating the
984 // expression.
985 UNIT_TEST_CHECK_THROW((path_component(*c)()) == *c, logic_error);
986 }
987 for (char const **c = goodies; *c; ++c)
988 {
989 path_component p(*c);
990 UNIT_TEST_CHECK_THROW(file_path() / p, logic_error);
991 }
992
993 UNIT_TEST_CHECK_THROW(file_path_internal("foo") / path_component(),
994 logic_error);
995}
996
997
998UNIT_TEST(paths, file_path_internal)
999{
1000 char const * baddies[] = {"/foo",
1001 "foo//bar",
1002 "foo/../bar",
1003 "../bar",
1004 "_MTN",
1005 "_MTN/blah",
1006 "foo/bar/",
1007 "foo/bar/.",
1008 "foo/bar/./",
1009 "foo/./bar",
1010 "./foo",
1011 ".",
1012 "..",
1013 "c:\\foo",
1014 "c:foo",
1015 "c:/foo",
1016 // some baddies made bad by a security kluge --
1017 // see the comment in in_bookkeeping_dir
1018 "_mtn",
1019 "_mtN",
1020 "_mTn",
1021 "_Mtn",
1022 "_MTn",
1023 "_MtN",
1024 "_mTN",
1025 "_mtn/foo",
1026 "_mtN/foo",
1027 "_mTn/foo",
1028 "_Mtn/foo",
1029 "_MTn/foo",
1030 "_MtN/foo",
1031 "_mTN/foo",
1032 0 };
1033 initial_rel_path.unset();
1034 initial_rel_path.set(string(), true);
1035 for (char const ** c = baddies; *c; ++c)
1036 {
1037 UNIT_TEST_CHECK_THROW(file_path_internal(*c), logic_error);
1038 }
1039 initial_rel_path.unset();
1040 initial_rel_path.set("blah/blah/blah", true);
1041 for (char const ** c = baddies; *c; ++c)
1042 {
1043 UNIT_TEST_CHECK_THROW(file_path_internal(*c), logic_error);
1044 }
1045
1046 UNIT_TEST_CHECK(file_path().empty());
1047 UNIT_TEST_CHECK(file_path_internal("").empty());
1048
1049 char const * goodies[] = {"",
1050 "a",
1051 "foo",
1052 "foo/bar/baz",
1053 "foo/bar.baz",
1054 "foo/with-hyphen/bar",
1055 "foo/with_underscore/bar",
1056 "foo/with,other+@weird*%#$=stuff/bar",
1057 ".foo/bar",
1058 "..foo/bar",
1059 "_MTNfoo/bar",
1060 "foo:bar",
1061 0 };
1062
1063 for (int i = 0; i < 2; ++i)
1064 {
1065 initial_rel_path.unset();
1066 initial_rel_path.set(i ? string()
1067 : string("blah/blah/blah"),
1068 true);
1069 for (char const ** c = goodies; *c; ++c)
1070 {
1071 file_path fp = file_path_internal(*c);
1072 UNIT_TEST_CHECK(fp.as_internal() == *c);
1073 UNIT_TEST_CHECK(file_path_internal(fp.as_internal()) == fp);
1074 }
1075 }
1076
1077 initial_rel_path.unset();
1078}
1079
1080static void check_fp_normalizes_to(char * before, char * after)
1081{
1082 L(FL("check_fp_normalizes_to: '%s' -> '%s'") % before % after);
1083 file_path fp = file_path_external(utf8(before));
1084 L(FL(" (got: %s)") % fp);
1085 UNIT_TEST_CHECK(fp.as_internal() == after);
1086 UNIT_TEST_CHECK(file_path_internal(fp.as_internal()) == fp);
1087 // we compare after to the external form too, since as far as we know
1088 // relative normalized posix paths are always good win32 paths too
1089 UNIT_TEST_CHECK(fp.as_external() == after);
1090}
1091
1092UNIT_TEST(paths, file_path_external_null_prefix)
1093{
1094 initial_rel_path.unset();
1095 initial_rel_path.set(string(), true);
1096
1097 char const * baddies[] = {"/foo",
1098 "../bar",
1099 "_MTN/blah",
1100 "_MTN",
1101 "//blah",
1102 "\\foo",
1103 "..",
1104 "c:\\foo",
1105 "c:foo",
1106 "c:/foo",
1107 "",
1108 // some baddies made bad by a security kluge --
1109 // see the comment in in_bookkeeping_dir
1110 "_mtn",
1111 "_mtN",
1112 "_mTn",
1113 "_Mtn",
1114 "_MTn",
1115 "_MtN",
1116 "_mTN",
1117 "_mtn/foo",
1118 "_mtN/foo",
1119 "_mTn/foo",
1120 "_Mtn/foo",
1121 "_MTn/foo",
1122 "_MtN/foo",
1123 "_mTN/foo",
1124 0 };
1125 for (char const ** c = baddies; *c; ++c)
1126 {
1127 L(FL("test_file_path_external_null_prefix: trying baddie: %s") % *c);
1128 UNIT_TEST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure);
1129 }
1130
1131 check_fp_normalizes_to("a", "a");
1132 check_fp_normalizes_to("foo", "foo");
1133 check_fp_normalizes_to("foo/bar", "foo/bar");
1134 check_fp_normalizes_to("foo/bar/baz", "foo/bar/baz");
1135 check_fp_normalizes_to("foo/bar.baz", "foo/bar.baz");
1136 check_fp_normalizes_to("foo/with-hyphen/bar", "foo/with-hyphen/bar");
1137 check_fp_normalizes_to("foo/with_underscore/bar", "foo/with_underscore/bar");
1138 check_fp_normalizes_to(".foo/bar", ".foo/bar");
1139 check_fp_normalizes_to("..foo/bar", "..foo/bar");
1140 check_fp_normalizes_to(".", "");
1141#ifndef WIN32
1142 check_fp_normalizes_to("foo:bar", "foo:bar");
1143#endif
1144 check_fp_normalizes_to("foo/with,other+@weird*%#$=stuff/bar",
1145 "foo/with,other+@weird*%#$=stuff/bar");
1146
1147 // Why are these tests with // in them commented out? because boost::fs
1148 // sucks and can't normalize them. FIXME.
1149 //check_fp_normalizes_to("foo//bar", "foo/bar");
1150 check_fp_normalizes_to("foo/../bar", "bar");
1151 check_fp_normalizes_to("foo/bar/", "foo/bar");
1152 check_fp_normalizes_to("foo/bar/.", "foo/bar");
1153 check_fp_normalizes_to("foo/bar/./", "foo/bar");
1154 check_fp_normalizes_to("foo/./bar/", "foo/bar");
1155 check_fp_normalizes_to("./foo", "foo");
1156 //check_fp_normalizes_to("foo///.//", "foo");
1157
1158 initial_rel_path.unset();
1159}
1160
1161UNIT_TEST(paths, file_path_external_prefix__MTN)
1162{
1163 initial_rel_path.unset();
1164 initial_rel_path.set(string("_MTN"), true);
1165
1166 UNIT_TEST_CHECK_THROW(file_path_external(utf8("foo")), informative_failure);
1167 UNIT_TEST_CHECK_THROW(file_path_external(utf8(".")), informative_failure);
1168 UNIT_TEST_CHECK_THROW(file_path_external(utf8("./blah")), informative_failure);
1169 check_fp_normalizes_to("..", "");
1170 check_fp_normalizes_to("../foo", "foo");
1171}
1172
1173UNIT_TEST(paths, file_path_external_prefix_a_b)
1174{
1175 initial_rel_path.unset();
1176 initial_rel_path.set(string("a/b"), true);
1177
1178 char const * baddies[] = {"/foo",
1179 "../../../bar",
1180 "../../..",
1181 "../../_MTN",
1182 "../../_MTN/foo",
1183 "//blah",
1184 "\\foo",
1185 "c:\\foo",
1186#ifdef WIN32
1187 "c:foo",
1188 "c:/foo",
1189#endif
1190 "",
1191 // some baddies made bad by a security kluge --
1192 // see the comment in in_bookkeeping_dir
1193 "../../_mtn",
1194 "../../_mtN",
1195 "../../_mTn",
1196 "../../_Mtn",
1197 "../../_MTn",
1198 "../../_MtN",
1199 "../../_mTN",
1200 "../../_mtn/foo",
1201 "../../_mtN/foo",
1202 "../../_mTn/foo",
1203 "../../_Mtn/foo",
1204 "../../_MTn/foo",
1205 "../../_MtN/foo",
1206 "../../_mTN/foo",
1207 0 };
1208 for (char const ** c = baddies; *c; ++c)
1209 {
1210 L(FL("test_file_path_external_prefix_a_b: trying baddie: %s") % *c);
1211 UNIT_TEST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure);
1212 }
1213
1214 check_fp_normalizes_to("foo", "a/b/foo");
1215 check_fp_normalizes_to("a", "a/b/a");
1216 check_fp_normalizes_to("foo/bar", "a/b/foo/bar");
1217 check_fp_normalizes_to("foo/bar/baz", "a/b/foo/bar/baz");
1218 check_fp_normalizes_to("foo/bar.baz", "a/b/foo/bar.baz");
1219 check_fp_normalizes_to("foo/with-hyphen/bar", "a/b/foo/with-hyphen/bar");
1220 check_fp_normalizes_to("foo/with_underscore/bar", "a/b/foo/with_underscore/bar");
1221 check_fp_normalizes_to(".foo/bar", "a/b/.foo/bar");
1222 check_fp_normalizes_to("..foo/bar", "a/b/..foo/bar");
1223 check_fp_normalizes_to(".", "a/b");
1224#ifndef WIN32
1225 check_fp_normalizes_to("foo:bar", "a/b/foo:bar");
1226#endif
1227 check_fp_normalizes_to("foo/with,other+@weird*%#$=stuff/bar",
1228 "a/b/foo/with,other+@weird*%#$=stuff/bar");
1229 // why are the tests with // in them commented out? because boost::fs sucks
1230 // and can't normalize them. FIXME.
1231 //check_fp_normalizes_to("foo//bar", "a/b/foo/bar");
1232 check_fp_normalizes_to("foo/../bar", "a/b/bar");
1233 check_fp_normalizes_to("foo/bar/", "a/b/foo/bar");
1234 check_fp_normalizes_to("foo/bar/.", "a/b/foo/bar");
1235 check_fp_normalizes_to("foo/bar/./", "a/b/foo/bar");
1236 check_fp_normalizes_to("foo/./bar/", "a/b/foo/bar");
1237 check_fp_normalizes_to("./foo", "a/b/foo");
1238 //check_fp_normalizes_to("foo///.//", "a/b/foo");
1239 // things that would have been bad without the initial_rel_path:
1240 check_fp_normalizes_to("../foo", "a/foo");
1241 check_fp_normalizes_to("..", "a");
1242 check_fp_normalizes_to("../..", "");
1243 check_fp_normalizes_to("_MTN/foo", "a/b/_MTN/foo");
1244 check_fp_normalizes_to("_MTN", "a/b/_MTN");
1245#ifndef WIN32
1246 check_fp_normalizes_to("c:foo", "a/b/c:foo");
1247 check_fp_normalizes_to("c:/foo", "a/b/c:/foo");
1248#endif
1249
1250 initial_rel_path.unset();
1251}
1252
1253UNIT_TEST(paths, basename)
1254{
1255 struct t
1256 {
1257 char const * in;
1258 char const * out;
1259 };
1260 // file_paths cannot be absolute, but may be the empty string.
1261 struct t const fp_cases[] = {
1262 { "", "" },
1263 { "foo", "foo" },
1264 { "foo/bar", "bar" },
1265 { "foo/bar/baz", "baz" },
1266 { 0, 0 }
1267 };
1268 // bookkeeping_paths cannot be absolute and must start with the
1269 // bookkeeping_root_component.
1270 struct t const bp_cases[] = {
1271 { "_MTN", "_MTN" },
1272 { "_MTN/foo", "foo" },
1273 { "_MTN/foo/bar", "bar" },
1274 { 0, 0 }
1275 };
1276
1277 // system_paths must be absolute. this relies on the setting of
1278 // initial_abs_path below. note that most of the cases whose full paths
1279 // vary between Unix and Windows will still have the same basenames.
1280 struct t const sp_cases[] = {
1281 { "/", "" },
1282 { "//", "" },
1283 { "foo", "foo" },
1284 { "/foo", "foo" },
1285 { "//foo", "foo" },
1286 { "~/foo", "foo" },
1287 { "c:/foo", "foo" },
1288 { "foo/bar", "bar" },
1289 { "/foo/bar", "bar" },
1290 { "//foo/bar", "bar" },
1291 { "~/foo/bar", "bar" },
1292 { "c:/foo/bar", "bar" },
1293#ifdef WIN32
1294 { "c:/", "" },
1295 { "c:foo", "foo" },
1296#else
1297 { "c:/", "c:" },
1298 { "c:foo", "c:foo" },
1299#endif
1300 { 0, 0 }
1301 };
1302
1303 UNIT_TEST_CHECKPOINT("file_path basenames");
1304 for (struct t const *p = fp_cases; p->in; p++)
1305 {
1306 file_path fp = file_path_internal(p->in);
1307 path_component pc(fp.basename());
1308 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1309 FL("basename('%s') = '%s' (expect '%s')")
1310 % p->in % pc % p->out);
1311 }
1312
1313 UNIT_TEST_CHECKPOINT("bookkeeping_path basenames");
1314 for (struct t const *p = bp_cases; p->in; p++)
1315 {
1316 bookkeeping_path fp(p->in);
1317 path_component pc(fp.basename());
1318 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1319 FL("basename('%s') = '%s' (expect '%s')")
1320 % p->in % pc % p->out);
1321 }
1322
1323
1324 UNIT_TEST_CHECKPOINT("system_path basenames");
1325
1326 initial_abs_path.unset();
1327 initial_abs_path.set(system_path("/a/b"), true);
1328
1329 for (struct t const *p = sp_cases; p->in; p++)
1330 {
1331 system_path fp(p->in);
1332 path_component pc(fp.basename());
1333 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1334 FL("basename('%s') = '%s' (expect '%s')")
1335 % p->in % pc % p->out);
1336 }
1337
1338 // any_path::basename() should return exactly the same thing that
1339 // the corresponding specialized basename() does, but with type any_path.
1340 UNIT_TEST_CHECKPOINT("any_path basenames");
1341 for (struct t const *p = fp_cases; p->in; p++)
1342 {
1343 any_path ap(file_path_internal(p->in));
1344 path_component pc(ap.basename());
1345 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1346 FL("basename('%s') = '%s' (expect '%s')")
1347 % p->in % pc % p->out);
1348 }
1349 for (struct t const *p = bp_cases; p->in; p++)
1350 {
1351 any_path ap(bookkeeping_path(p->in));
1352 path_component pc(ap.basename());
1353 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1354 FL("basename('%s') = '%s' (expect '%s')")
1355 % p->in % pc % p->out);
1356 }
1357 for (struct t const *p = sp_cases; p->in; p++)
1358 {
1359 any_path ap(system_path(p->in));
1360 path_component pc(ap.basename());
1361 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1362 FL("basename('%s') = '%s' (expect '%s')")
1363 % p->in % pc % p->out);
1364 }
1365
1366 initial_abs_path.unset();
1367}
1368
1369UNIT_TEST(paths, dirname)
1370{
1371 struct t
1372 {
1373 char const * in;
1374 char const * out;
1375 };
1376 // file_paths cannot be absolute, but may be the empty string.
1377 struct t const fp_cases[] = {
1378 { "", "" },
1379 { "foo", "" },
1380 { "foo/bar", "foo" },
1381 { "foo/bar/baz", "foo/bar" },
1382 { 0, 0 }
1383 };
1384
1385 // system_paths must be absolute. this relies on the setting of
1386 // initial_abs_path below.
1387 struct t const sp_cases[] = {
1388 { "/", "/" },
1389 { "//", "//" },
1390 { "foo", "/a/b" },
1391 { "/foo", "/" },
1392 { "//foo", "//" },
1393 { "~/foo", "~" },
1394 { "foo/bar", "/a/b/foo" },
1395 { "/foo/bar", "/foo" },
1396 { "//foo/bar", "//foo" },
1397 { "~/foo/bar", "~/foo" },
1398#ifdef WIN32
1399 { "c:", "c:" },
1400 { "c:foo", "c:" },
1401 { "c:/", "c:/" },
1402 { "c:/foo", "c:/" },
1403 { "c:/foo/bar", "c:/foo" },
1404#else
1405 { "c:", "/a/b" },
1406 { "c:foo", "/a/b" },
1407 { "c:/", "/a/b" },
1408 { "c:/foo", "/a/b/c:" },
1409 { "c:/foo/bar", "/a/b/c:/foo" },
1410#endif
1411 { 0, 0 }
1412 };
1413
1414 initial_abs_path.unset();
1415
1416 UNIT_TEST_CHECKPOINT("file_path dirnames");
1417 for (struct t const *p = fp_cases; p->in; p++)
1418 {
1419 file_path fp = file_path_internal(p->in);
1420 file_path dn = fp.dirname();
1421 UNIT_TEST_CHECK_MSG(dn == file_path_internal(p->out),
1422 FL("dirname('%s') = '%s' (expect '%s')")
1423 % p->in % dn % p->out);
1424 }
1425
1426
1427 initial_abs_path.set(system_path("/a/b"), true);
1428 UNIT_TEST_CHECKPOINT("system_path dirnames");
1429 for (struct t const *p = sp_cases; p->in; p++)
1430 {
1431 system_path fp(p->in);
1432 system_path dn(fp.dirname());
1433
1434 UNIT_TEST_CHECK_MSG(dn == system_path(p->out),
1435 FL("dirname('%s') = '%s' (expect '%s')")
1436 % p->in % dn % p->out);
1437 }
1438
1439 // any_path::dirname() should return exactly the same thing that
1440 // the corresponding specialized dirname() does, but with type any_path.
1441 UNIT_TEST_CHECKPOINT("any_path dirnames");
1442 for (struct t const *p = fp_cases; p->in; p++)
1443 {
1444 any_path ap(file_path_internal(p->in));
1445 any_path dn(ap.dirname());
1446 any_path rf(file_path_internal(p->out));
1447 UNIT_TEST_CHECK_MSG(dn.as_internal() == rf.as_internal(),
1448 FL("dirname('%s') = '%s' (expect '%s')")
1449 % p->in % dn % rf);
1450 }
1451 for (struct t const *p = sp_cases; p->in; p++)
1452 {
1453 any_path ap(system_path(p->in));
1454 any_path dn(ap.dirname());
1455 any_path rf(system_path(p->out));
1456 UNIT_TEST_CHECK_MSG(dn.as_internal() == rf.as_internal(),
1457 FL("dirname('%s') = '%s' (expect '%s')")
1458 % p->in % dn % rf);
1459 }
1460
1461 initial_abs_path.unset();
1462}
1463
1464UNIT_TEST(paths, depth)
1465{
1466 char const * const cases[] = {"", "foo", "foo/bar", "foo/bar/baz", 0};
1467 for (unsigned int i = 0; cases[i]; i++)
1468 {
1469 file_path fp = file_path_internal(cases[i]);
1470 unsigned int d = fp.depth();
1471 UNIT_TEST_CHECK_MSG(d == i,
1472 FL("depth('%s') = %d (expect %d)") % fp % d % i);
1473 }
1474}
1475
1476static void check_bk_normalizes_to(char * before, char * after)
1477{
1478 bookkeeping_path bp(bookkeeping_root / before);
1479 L(FL("normalizing %s to %s (got %s)") % before % after % bp);
1480 UNIT_TEST_CHECK(bp.as_external() == after);
1481 UNIT_TEST_CHECK(bookkeeping_path(bp.as_internal()).as_internal() == bp.as_internal());
1482}
1483
1484UNIT_TEST(paths, bookkeeping)
1485{
1486 char const * baddies[] = {"/foo",
1487 "foo//bar",
1488 "foo/../bar",
1489 "../bar",
1490 "foo/bar/",
1491 "foo/bar/.",
1492 "foo/bar/./",
1493 "foo/./bar",
1494 "./foo",
1495 ".",
1496 "..",
1497 "c:\\foo",
1498 "c:foo",
1499 "c:/foo",
1500 "",
1501 "a:b",
1502 0 };
1503 string tmp_path_string;
1504
1505 for (char const ** c = baddies; *c; ++c)
1506 {
1507 L(FL("test_bookkeeping_path baddie: trying '%s'") % *c);
1508 UNIT_TEST_CHECK_THROW(bookkeeping_path(tmp_path_string.assign(*c)),
1509 logic_error);
1510 UNIT_TEST_CHECK_THROW(bookkeeping_root / *c, logic_error);
1511 }
1512
1513 // these are legitimate as things to append to bookkeeping_root, but
1514 // not as bookkeeping_paths in themselves.
1515 UNIT_TEST_CHECK_THROW(bookkeeping_path("a"), logic_error);
1516 UNIT_TEST_CHECK_NOT_THROW(bookkeeping_root / "a", logic_error);
1517 UNIT_TEST_CHECK_THROW(bookkeeping_path("foo/bar"), logic_error);
1518 UNIT_TEST_CHECK_NOT_THROW(bookkeeping_root / "foo/bar", logic_error);
1519
1520 check_bk_normalizes_to("a", "_MTN/a");
1521 check_bk_normalizes_to("foo", "_MTN/foo");
1522 check_bk_normalizes_to("foo/bar", "_MTN/foo/bar");
1523 check_bk_normalizes_to("foo/bar/baz", "_MTN/foo/bar/baz");
1524}
1525
1526static void check_system_normalizes_to(char * before, char * after)
1527{
1528 system_path sp(before);
1529 L(FL("normalizing '%s' to '%s' (got '%s')") % before % after % sp);
1530 UNIT_TEST_CHECK(sp.as_external() == after);
1531 UNIT_TEST_CHECK(system_path(sp.as_internal()).as_internal() == sp.as_internal());
1532}
1533
1534UNIT_TEST(paths, system)
1535{
1536 initial_abs_path.unset();
1537 initial_abs_path.set(system_path("/a/b"), true);
1538
1539 UNIT_TEST_CHECK_THROW(system_path(""), informative_failure);
1540
1541 check_system_normalizes_to("foo", "/a/b/foo");
1542 check_system_normalizes_to("foo/bar", "/a/b/foo/bar");
1543 check_system_normalizes_to("/foo/bar", "/foo/bar");
1544 check_system_normalizes_to("//foo/bar", "//foo/bar");
1545#ifdef WIN32
1546 check_system_normalizes_to("c:foo", "c:foo");
1547 check_system_normalizes_to("c:/foo", "c:/foo");
1548 check_system_normalizes_to("c:\\foo", "c:/foo");
1549#else
1550 check_system_normalizes_to("c:foo", "/a/b/c:foo");
1551 check_system_normalizes_to("c:/foo", "/a/b/c:/foo");
1552 check_system_normalizes_to("c:\\foo", "/a/b/c:\\foo");
1553 check_system_normalizes_to("foo:bar", "/a/b/foo:bar");
1554#endif
1555 // we require that system_path normalize out ..'s, because of the following
1556 // case:
1557 // /work mkdir newdir
1558 // /work$ cd newdir
1559 // /work/newdir$ monotone setup --db=../foo.db
1560 // Now they have either "/work/foo.db" or "/work/newdir/../foo.db" in
1561 // _MTN/options
1562 // /work/newdir$ cd ..
1563 // /work$ mv newdir newerdir # better name
1564 // Oops, now, if we stored the version with ..'s in, this workspace
1565 // is broken.
1566 check_system_normalizes_to("../foo", "/a/foo");
1567 check_system_normalizes_to("foo/..", "/a/b");
1568 check_system_normalizes_to("/foo/bar/..", "/foo");
1569 check_system_normalizes_to("/foo/..", "/");
1570 // can't do particularly interesting checking of tilde expansion, but at
1571 // least we can check that it's doing _something_...
1572 string tilde_expanded = system_path("~/foo").as_external();
1573#ifdef WIN32
1574 UNIT_TEST_CHECK(tilde_expanded[1] == ':');
1575#else
1576 UNIT_TEST_CHECK(tilde_expanded[0] == '/');
1577#endif
1578 UNIT_TEST_CHECK(tilde_expanded.find('~') == string::npos);
1579 // on Windows, ~name is not expanded
1580#ifdef WIN32
1581 UNIT_TEST_CHECK(system_path("~this_user_does_not_exist_anywhere")
1582 .as_external()
1583 == "/a/b/~this_user_does_not_exist_anywhere");
1584#else
1585 UNIT_TEST_CHECK_THROW(system_path("~this_user_does_not_exist_anywhere"),
1586 informative_failure);
1587#endif
1588
1589 // finally, make sure that the copy-from-any_path constructor works right
1590 // in particular, it should interpret the paths it gets as being relative to
1591 // the project root, not the initial path
1592 working_root.unset();
1593 working_root.set(system_path("/working/root"), true);
1594 initial_rel_path.unset();
1595 initial_rel_path.set(string("rel/initial"), true);
1596
1597 UNIT_TEST_CHECK(system_path(system_path("foo/bar")).as_internal() == "/a/b/foo/bar");
1598 UNIT_TEST_CHECK(!working_root.used);
1599 UNIT_TEST_CHECK(system_path(system_path("/foo/bar")).as_internal() == "/foo/bar");
1600 UNIT_TEST_CHECK(!working_root.used);
1601 UNIT_TEST_CHECK(system_path(file_path_internal("foo/bar"), false).as_internal()
1602 == "/working/root/foo/bar");
1603 UNIT_TEST_CHECK(!working_root.used);
1604 UNIT_TEST_CHECK(system_path(file_path_internal("foo/bar")).as_internal()
1605 == "/working/root/foo/bar");
1606 UNIT_TEST_CHECK(working_root.used);
1607 UNIT_TEST_CHECK(system_path(file_path_external(utf8("foo/bar"))).as_external()
1608 == "/working/root/rel/initial/foo/bar");
1609 file_path a_file_path;
1610 UNIT_TEST_CHECK(system_path(a_file_path).as_external()
1611 == "/working/root");
1612 UNIT_TEST_CHECK(system_path(bookkeeping_path("_MTN/foo/bar")).as_internal()
1613 == "/working/root/_MTN/foo/bar");
1614 UNIT_TEST_CHECK(system_path(bookkeeping_root).as_internal()
1615 == "/working/root/_MTN");
1616 initial_abs_path.unset();
1617 working_root.unset();
1618 initial_rel_path.unset();
1619}
1620
1621UNIT_TEST(paths, access_tracker)
1622{
1623 access_tracker<int> a;
1624 UNIT_TEST_CHECK_THROW(a.get(), logic_error);
1625 a.set(1, false);
1626 UNIT_TEST_CHECK_THROW(a.set(2, false), logic_error);
1627 a.set(2, true);
1628 UNIT_TEST_CHECK_THROW(a.set(3, false), logic_error);
1629 UNIT_TEST_CHECK(a.get() == 2);
1630 UNIT_TEST_CHECK_THROW(a.set(3, true), logic_error);
1631 a.unset();
1632 a.may_not_initialize();
1633 UNIT_TEST_CHECK_THROW(a.set(1, false), logic_error);
1634 UNIT_TEST_CHECK_THROW(a.set(2, true), logic_error);
1635 a.unset();
1636 a.set(1, false);
1637 UNIT_TEST_CHECK_THROW(a.may_not_initialize(), logic_error);
1638}
1639
1640static void test_path_less_than(string const & left, string const & right)
1641{
1642 MM(left);
1643 MM(right);
1644 file_path left_fp = file_path_internal(left);
1645 file_path right_fp = file_path_internal(right);
1646 I(left_fp < right_fp);
1647}
1648
1649static void test_path_equal(string const & left, string const & right)
1650{
1651 MM(left);
1652 MM(right);
1653 file_path left_fp = file_path_internal(left);
1654 file_path right_fp = file_path_internal(right);
1655 I(left_fp == right_fp);
1656}
1657
1658UNIT_TEST(paths, ordering)
1659{
1660 // this ordering is very important:
1661 // -- it is used to determine the textual form of csets and manifests
1662 // (in particular, it cannot be changed)
1663 // -- it is used to determine in what order cset operations can be applied
1664 // (in particular, foo must sort before foo/bar, so that we can use it
1665 // to do top-down and bottom-up traversals of a set of paths).
1666 test_path_less_than("a", "b");
1667 test_path_less_than("a", "c");
1668 test_path_less_than("ab", "ac");
1669 test_path_less_than("a", "ab");
1670 test_path_less_than("", "a");
1671 test_path_less_than("", ".foo");
1672 test_path_less_than("foo", "foo/bar");
1673 // . is before / asciibetically, so sorting by strings will give the wrong
1674 // answer on this:
1675 test_path_less_than("foo/bar", "foo.bar");
1676
1677 // path_components used to be interned strings, and we used the default sort
1678 // order, which meant that in practice path components would sort in the
1679 // _order they were first used in the program_. So let's put in a test that
1680 // would catch this sort of brokenness.
1681 test_path_less_than("fallanopic_not_otherwise_mentioned", "xyzzy");
1682 test_path_less_than("fallanoooo_not_otherwise_mentioned_and_smaller",
1683 "fallanopic_not_otherwise_mentioned");
1684}
1685
1686UNIT_TEST(paths, ordering_random)
1687{
1688 char x[4] = {0,0,0,0};
1689 char y[4] = {0,0,0,0};
1690 u8 a, b, c, d;
1691 const int ntrials = 1000;
1692 int i;
1693 randomizer rng;
1694
1695 // use of numbers is intentional; these strings are defined to be UTF-8.
1696
1697 UNIT_TEST_CHECKPOINT("a and b");
1698 for (i = 0; i < ntrials; i++)
1699 {
1700 do a = rng.uniform(0x7f - 0x20) + 0x20;
1701 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1702
1703 do b = rng.uniform(0x7f - 0x20) + 0x20;
1704 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1705
1706 x[0] = a;
1707 y[0] = b;
1708 if (a < b)
1709 test_path_less_than(x, y);
1710 else if (a > b)
1711 test_path_less_than(y, x);
1712 else
1713 test_path_equal(x, y);
1714 }
1715
1716 UNIT_TEST_CHECKPOINT("ab and cd");
1717 for (i = 0; i < ntrials; i++)
1718 {
1719 do
1720 {
1721 do a = rng.uniform(0x7f - 0x20) + 0x20;
1722 while (a == 0x5c || a == 0x2f); // '\\', '/'
1723
1724 do b = rng.uniform(0x7f - 0x20) + 0x20;
1725 while (b == 0x5c || b == 0x2f || b == 0x3a); // '\\', '/', ':'
1726 }
1727 while (a == 0x2e && b == 0x2e); // ".."
1728
1729 do
1730 {
1731 do c = rng.uniform(0x7f - 0x20) + 0x20;
1732 while (c == 0x5c || c == 0x2f); // '\\', '/'
1733
1734 do d = rng.uniform(0x7f - 0x20) + 0x20;
1735 while (d == 0x5c || d == 0x2f || d == 0x3a); // '\\', '/', ':'
1736 }
1737 while (c == 0x2e && d == 0x2e); // ".."
1738
1739 x[0] = a;
1740 x[1] = b;
1741 y[0] = c;
1742 y[1] = d;
1743
1744 if (a < c || (a == c && b < d))
1745 test_path_less_than(x, y);
1746 else if (a > c || (a == c && b > d))
1747 test_path_less_than(y, x);
1748 else
1749 test_path_equal(x, y);
1750 }
1751
1752 UNIT_TEST_CHECKPOINT("a and b/c");
1753 x[1] = 0;
1754 y[1] = '/';
1755 for (i = 0; i < ntrials; i++)
1756 {
1757 do a = rng.uniform(0x7f - 0x20) + 0x20;
1758 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1759
1760 do b = rng.uniform(0x7f - 0x20) + 0x20;
1761 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1762
1763 do c = rng.uniform(0x7f - 0x20) + 0x20;
1764 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1765
1766 x[0] = a;
1767 y[0] = b;
1768 y[2] = c;
1769
1770 // only the order of a and b matters. 1 sorts before 1/2.
1771 if (a <= b)
1772 test_path_less_than(x, y);
1773 else
1774 test_path_less_than(y, x);
1775 }
1776
1777 UNIT_TEST_CHECKPOINT("ab and c/d");
1778 for (i = 0; i < ntrials; i++)
1779 {
1780 do
1781 {
1782 do a = rng.uniform(0x7f - 0x20) + 0x20;
1783 while (a == 0x5c || a == 0x2f); // '\\', '/'
1784
1785 do b = rng.uniform(0x7f - 0x20) + 0x20;
1786 while (b == 0x5c || b == 0x2f || b == 0x3a); // '\\', '/', ':'
1787 }
1788 while (a == 0x2e && b == 0x2e); // ".."
1789
1790 do c = rng.uniform(0x7f - 0x20) + 0x20;
1791 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1792
1793 do d = rng.uniform(0x7f - 0x20) + 0x20;
1794 while (d == 0x5c || d == 0x2f || d == 0x2e); // '\\', '/', '.'
1795
1796
1797 x[0] = a;
1798 x[1] = b;
1799 y[0] = c;
1800 y[2] = d;
1801
1802 // only the order of a and c matters,
1803 // but this time, 12 sorts after 1/2.
1804 if (a < c)
1805 test_path_less_than(x, y);
1806 else
1807 test_path_less_than(y, x);
1808 }
1809
1810
1811 UNIT_TEST_CHECKPOINT("a/b and c/d");
1812 x[1] = '/';
1813 for (i = 0; i < ntrials; i++)
1814 {
1815 do a = rng.uniform(0x7f - 0x20) + 0x20;
1816 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1817
1818 do b = rng.uniform(0x7f - 0x20) + 0x20;
1819 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1820
1821 do c = rng.uniform(0x7f - 0x20) + 0x20;
1822 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1823
1824 do d = rng.uniform(0x7f - 0x20) + 0x20;
1825 while (d == 0x5c || d == 0x2f || d == 0x2e); // '\\', '/', '.'
1826
1827 x[0] = a;
1828 x[2] = b;
1829 y[0] = c;
1830 y[2] = d;
1831
1832 if (a < c || (a == c && b < d))
1833 test_path_less_than(x, y);
1834 else if (a > c || (a == c && b > d))
1835 test_path_less_than(y, x);
1836 else
1837 test_path_equal(x, y);
1838 }
1839}
1840
1841UNIT_TEST(paths, test_internal_string_is_bookkeeping_path)
1842{
1843 char const * yes[] = {"_MTN",
1844 "_MTN/foo",
1845 "_mtn/Foo",
1846 0 };
1847 char const * no[] = {"foo/_MTN",
1848 "foo/bar",
1849 0 };
1850 for (char const ** c = yes; *c; ++c)
1851 UNIT_TEST_CHECK(bookkeeping_path
1852 ::internal_string_is_bookkeeping_path(utf8(std::string(*c))));
1853 for (char const ** c = no; *c; ++c)
1854 UNIT_TEST_CHECK(!bookkeeping_path
1855 ::internal_string_is_bookkeeping_path(utf8(std::string(*c))));
1856}
1857
1858UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix_none)
1859{
1860 initial_rel_path.unset();
1861 initial_rel_path.set(string(), true);
1862
1863 char const * yes[] = {"_MTN",
1864 "_MTN/foo",
1865 "_mtn/Foo",
1866 "_MTN/foo/..",
1867 0 };
1868 char const * no[] = {"foo/_MTN",
1869 "foo/bar",
1870 "_MTN/..",
1871 0 };
1872 for (char const ** c = yes; *c; ++c)
1873 UNIT_TEST_CHECK(bookkeeping_path
1874 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1875 for (char const ** c = no; *c; ++c)
1876 UNIT_TEST_CHECK(!bookkeeping_path
1877 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1878}
1879
1880UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix_a_b)
1881{
1882 initial_rel_path.unset();
1883 initial_rel_path.set(string("a/b"), true);
1884
1885 char const * yes[] = {"../../_MTN",
1886 "../../_MTN/foo",
1887 "../../_mtn/Foo",
1888 "../../_MTN/foo/..",
1889 "../../foo/../_MTN/foo",
1890 0 };
1891 char const * no[] = {"foo/_MTN",
1892 "foo/bar",
1893 "_MTN",
1894 "../../foo/_MTN",
1895 0 };
1896 for (char const ** c = yes; *c; ++c)
1897 UNIT_TEST_CHECK(bookkeeping_path
1898 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1899 for (char const ** c = no; *c; ++c)
1900 UNIT_TEST_CHECK(!bookkeeping_path
1901 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1902}
1903
1904UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix__MTN)
1905{
1906 initial_rel_path.unset();
1907 initial_rel_path.set(string("_MTN"), true);
1908
1909 char const * yes[] = {".",
1910 "foo",
1911 "../_MTN/foo/..",
1912 "../_mtn/foo",
1913 "../foo/../_MTN/foo",
1914 0 };
1915 char const * no[] = {"../foo",
1916 "../foo/bar",
1917 "../foo/_MTN",
1918 0 };
1919 for (char const ** c = yes; *c; ++c)
1920 UNIT_TEST_CHECK(bookkeeping_path
1921 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1922 for (char const ** c = no; *c; ++c)
1923 UNIT_TEST_CHECK(!bookkeeping_path
1924 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1925}
1926
1927#endif // BUILD_UNIT_TESTS
1928
1929// Local Variables:
1930// mode: C++
1931// fill-column: 76
1932// c-file-style: "gnu"
1933// indent-tabs-mode: nil
1934// End:
1935// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status