monotone

monotone Mtn Source Tree

Root/paths.cc

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <sstream>
12
13#include "paths.hh"
14#include "file_io.hh"
15#include "charset.hh"
16#include "lua.hh"
17
18using std::exception;
19using std::ostream;
20using std::ostringstream;
21using std::string;
22using std::vector;
23
24// some structure to ensure we aren't doing anything broken when resolving
25// filenames. the idea is to make sure
26// -- we don't depend on the existence of something before it has been set
27// -- we don't re-set something that has already been used
28// -- sometimes, we use the _non_-existence of something, so we shouldn't
29// set anything whose un-setted-ness has already been used
30template <typename T>
31struct access_tracker
32{
33 void set(T const & val, bool may_be_initialized)
34 {
35 I(may_be_initialized || !initialized);
36 I(!very_uninitialized);
37 I(!used);
38 initialized = true;
39 value = val;
40 }
41 T const & get()
42 {
43 I(initialized);
44 used = true;
45 return value;
46 }
47 T const & get_but_unused()
48 {
49 I(initialized);
50 return value;
51 }
52 void may_not_initialize()
53 {
54 I(!initialized);
55 very_uninitialized = true;
56 }
57 // for unit tests
58 void unset()
59 {
60 used = initialized = very_uninitialized = false;
61 }
62 T value;
63 bool initialized, used, very_uninitialized;
64 access_tracker() : initialized(false), used(false), very_uninitialized(false) {};
65};
66
67// paths to use in interpreting paths from various sources,
68// conceptually:
69// working_root / initial_rel_path == initial_abs_path
70
71// initial_abs_path is for interpreting relative system_path's
72static access_tracker<system_path> initial_abs_path;
73// initial_rel_path is for interpreting external file_path's
74// we used to make it a file_path, but then you can't run monotone from
75// inside the _MTN/ dir (even when referring to files outside the _MTN/
76// dir). use of a bare string requires some caution but does work.
77static access_tracker<string> initial_rel_path;
78// working_root is for converting file_path's and bookkeeping_path's to
79// system_path's.
80static access_tracker<system_path> working_root;
81
82void
83save_initial_path()
84{
85 // FIXME: BUG: this only works if the current working dir is in utf8
86 initial_abs_path.set(system_path(get_current_working_dir()), false);
87 L(FL("initial abs path is: %s") % initial_abs_path.get_but_unused());
88}
89
90///////////////////////////////////////////////////////////////////////////
91// verifying that internal paths are indeed normalized.
92// this code must be superfast
93///////////////////////////////////////////////////////////////////////////
94
95// normalized means:
96// -- / as path separator
97// -- not an absolute path (on either posix or win32)
98// operationally, this means: first character != '/', first character != '\',
99// second character != ':'
100// -- no illegal characters
101// -- 0x00 -- 0x1f, 0x7f, \ are the illegal characters. \ is illegal
102// unconditionally to prevent people checking in files on posix that
103// have a different interpretation on win32
104// -- (may want to allow 0x0a and 0x0d (LF and CR) in the future, but this
105// is blocked on manifest format changing)
106// (also requires changes to 'automate inventory', possibly others, to
107// handle quoting)
108// -- no doubled /'s
109// -- no trailing /
110// -- no "." or ".." path components
111
112static inline bool
113bad_component(string const & component)
114{
115 if (component.empty())
116 return true;
117 if (component == ".")
118 return true;
119 if (component == "..")
120 return true;
121 return false;
122}
123
124static inline bool
125has_bad_chars(string const & path)
126{
127 for (string::const_iterator c = path.begin(); LIKELY(c != path.end()); c++)
128 {
129 // char is often a signed type; convert to unsigned to ensure that
130 // bytes 0x80-0xff are considered > 0x1f.
131 u8 x = (u8)*c;
132 // 0x5c is '\\'; we use the hex constant to make the dependency on
133 // ASCII encoding explicit.
134 if (UNLIKELY(x <= 0x1f || x == 0x5c || x == 0x7f))
135 return true;
136 }
137 return false;
138}
139
140// as above, but disallows / as well.
141static inline bool
142has_bad_component_chars(string const & pc)
143{
144 for (string::const_iterator c = pc.begin(); LIKELY(c != pc.end()); c++)
145 {
146 // char is often a signed type; convert to unsigned to ensure that
147 // bytes 0x80-0xff are considered > 0x1f.
148 u8 x = (u8)*c;
149 // 0x2f is '/' and 0x5c is '\\'; we use hex constants to make the
150 // dependency on ASCII encoding explicit.
151 if (UNLIKELY(x <= 0x1f || x == 0x2f || x == 0x5c || x == 0x7f))
152 return true;
153 }
154 return false;
155
156}
157
158static bool
159is_absolute_here(string const & path)
160{
161 if (path.empty())
162 return false;
163 if (path[0] == '/')
164 return true;
165#ifdef WIN32
166 if (path[0] == '\\')
167 return true;
168 if (path.size() > 1 && path[1] == ':')
169 return true;
170#endif
171 return false;
172}
173
174static inline bool
175is_absolute_somewhere(string const & path)
176{
177 if (path.empty())
178 return false;
179 if (path[0] == '/')
180 return true;
181 if (path[0] == '\\')
182 return true;
183 if (path.size() > 1 && path[1] == ':')
184 return true;
185 return false;
186}
187
188// fully_normalized_path verifies a complete pathname for validity and
189// having been properly normalized (as if by normalize_path, below).
190static inline bool
191fully_normalized_path(string const & path)
192{
193 // empty path is fine
194 if (path.empty())
195 return true;
196 // could use is_absolute_somewhere, but this is the only part of it that
197 // wouldn't be redundant
198 if (path.size() > 1 && path[1] == ':')
199 return false;
200 // first scan for completely illegal bytes
201 if (has_bad_chars(path))
202 return false;
203 // now check each component
204 string::size_type start = 0, stop;
205 while (1)
206 {
207 stop = path.find('/', start);
208 if (stop == string::npos)
209 break;
210 string const & s(path.substr(start, stop - start));
211 if (bad_component(s))
212 return false;
213 start = stop + 1;
214 }
215
216 string const & s(path.substr(start));
217 return !bad_component(s);
218}
219
220// This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping
221// paths, because on case insensitive filesystems, files put in any of them
222// may end up in _MTN instead. This allows arbitrary code execution. A
223// better solution would be to fix this in the working directory writing
224// code -- this prevents all-unix projects from naming things "_mtn", which
225// is less rude than when the bookkeeping root was "MT", but still rude --
226// but as a temporary security kluge it works.
227static inline bool
228in_bookkeeping_dir(string const & path)
229{
230 if (path.empty() || (path[0] != '_'))
231 return false;
232 if (path.size() == 1 || (path[1] != 'M' && path[1] != 'm'))
233 return false;
234 if (path.size() == 2 || (path[2] != 'T' && path[2] != 't'))
235 return false;
236 if (path.size() == 3 || (path[3] != 'N' && path[3] != 'n'))
237 return false;
238 // if we've gotten here, the first three letters are _, M, T, and N, in
239 // either upper or lower case. So if that is the whole path, or else if it
240 // continues but the next character is /, then this is a bookkeeping path.
241 if (path.size() == 4 || (path[4] == '/'))
242 return true;
243 return false;
244}
245
246static inline bool
247is_valid_internal(string const & path)
248{
249 return (fully_normalized_path(path)
250 && !in_bookkeeping_dir(path));
251}
252
253static string
254normalize_path(string const & in)
255{
256 string inT = in;
257 string leader;
258 MM(inT);
259
260#ifdef WIN32
261 // the first thing we do is kill all the backslashes
262 for (string::iterator i = inT.begin(); i != inT.end(); i++)
263 if (*i == '\\')
264 *i = '/';
265#endif
266
267 if (is_absolute_here (inT))
268 {
269 if (inT[0] == '/')
270 {
271 leader = "/";
272 inT = inT.substr(1);
273
274 if (!inT.empty() && inT[0] == '/')
275 {
276 // if there are exactly two slashes at the beginning they
277 // are both preserved. three or more are the same as one.
278 string::size_type f = inT.find_first_not_of("/");
279 if (f == string::npos)
280 f = inT.size();
281 if (f == 1)
282 leader = "//";
283 inT = inT.substr(f);
284 }
285 }
286#ifdef WIN32
287 else
288 {
289 I(inT[1] == ':');
290 if (inT.size() > 2 && inT[2] == '/')
291 {
292 leader = inT.substr(0, 3);
293 inT = inT.substr(3);
294 }
295 else
296 {
297 leader = inT.substr(0, 2);
298 inT = inT.substr(2);
299 }
300 }
301#endif
302
303 I(!is_absolute_here(inT));
304 if (inT.empty())
305 return leader;
306 }
307
308 vector<string> stack;
309 string::const_iterator head, tail;
310 string::size_type size_estimate = leader.size();
311 for (head = inT.begin(); head != inT.end(); head = tail)
312 {
313 tail = head;
314 while (tail != inT.end() && *tail != '/')
315 tail++;
316
317 string elt(head, tail);
318 while (tail != inT.end() && *tail == '/')
319 tail++;
320
321 if (elt == ".")
322 continue;
323 // remove foo/.. element pairs; leave leading .. components alone
324 if (elt == ".." && !stack.empty() && stack.back() != "..")
325 {
326 stack.pop_back();
327 continue;
328 }
329
330 size_estimate += elt.size() + 1;
331 stack.push_back(elt);
332 }
333
334 leader.reserve(size_estimate);
335 for (vector<string>::const_iterator i = stack.begin(); i != stack.end(); i++)
336 {
337 if (i != stack.begin())
338 leader += "/";
339 leader += *i;
340 }
341 return leader;
342}
343
344LUAEXT(normalize_path, )
345{
346 const char *pathstr = luaL_checkstring(L, -1);
347 N(pathstr, F("%s called with an invalid parameter") % "normalize_path");
348
349 lua_pushstring(L, normalize_path(string(pathstr)).c_str());
350 return 1;
351}
352
353static void
354normalize_external_path(string const & path, string & normalized)
355{
356 if (!initial_rel_path.initialized)
357 {
358 // we are not in a workspace; treat this as an internal
359 // path, and set the access_tracker() into a very uninitialised
360 // state so that we will hit an exception if we do eventually
361 // enter a workspace
362 initial_rel_path.may_not_initialize();
363 normalized = path;
364 N(is_valid_internal(path),
365 F("path '%s' is invalid") % path);
366 }
367 else
368 {
369 N(!path.empty(), F("empty path '%s' is invalid") % path);
370 N(!is_absolute_here(path), F("absolute path '%s' is invalid") % path);
371 string base;
372 try
373 {
374 base = initial_rel_path.get();
375 if (base == "")
376 normalized = normalize_path(path);
377 else
378 normalized = normalize_path(base + "/" + path);
379 }
380 catch (exception &)
381 {
382 N(false, F("path '%s' is invalid") % path);
383 }
384 if (normalized == ".")
385 normalized = string("");
386 N(fully_normalized_path(normalized),
387 F("path '%s' is invalid") % normalized);
388 }
389}
390
391///////////////////////////////////////////////////////////////////////////
392// single path component handling.
393///////////////////////////////////////////////////////////////////////////
394
395// these constructors confirm that what they are passed is a legitimate
396// component. note that the empty string is a legitimate component,
397// but is not acceptable to bad_component (above) and therefore we have
398// to open-code most of those checks.
399path_component::path_component(utf8 const & d)
400 : data(d())
401{
402 MM(data);
403 I(!has_bad_component_chars(data) && data != "." && data != "..");
404}
405
406path_component::path_component(string const & d)
407 : data(d)
408{
409 MM(data);
410 I(utf8_validate(utf8(data))
411 && !has_bad_component_chars(data)
412 && data != "." && data != "..");
413}
414
415path_component::path_component(char const * d)
416 : data(d)
417{
418 MM(data);
419 I(utf8_validate(utf8(data))
420 && !has_bad_component_chars(data)
421 && data != "." && data != "..");
422}
423
424std::ostream & operator<<(std::ostream & s, path_component const & pc)
425{
426 return s << pc();
427}
428
429template <> void dump(path_component const & pc, std::string & to)
430{
431 to = pc();
432}
433
434///////////////////////////////////////////////////////////////////////////
435// complete paths to files within a working directory
436///////////////////////////////////////////////////////////////////////////
437
438file_path::file_path(file_path::source_type type, string const & path)
439{
440 MM(path);
441 I(utf8_validate(utf8(path)));
442 if (type == external)
443 {
444 string normalized;
445 normalize_external_path(path, normalized);
446 N(!in_bookkeeping_dir(normalized),
447 F("path '%s' is in bookkeeping dir") % normalized);
448 data = normalized;
449 }
450 else
451 data = path;
452 MM(data);
453 I(is_valid_internal(data));
454}
455
456file_path::file_path(file_path::source_type type, utf8 const & path)
457{
458 MM(path);
459 I(utf8_validate(path));
460 if (type == external)
461 {
462 string normalized;
463 normalize_external_path(path(), normalized);
464 N(!in_bookkeeping_dir(normalized),
465 F("path '%s' is in bookkeeping dir") % normalized);
466 data = normalized;
467 }
468 else
469 data = path();
470 MM(data);
471 I(is_valid_internal(data));
472}
473
474bookkeeping_path::bookkeeping_path(string const & path)
475{
476 I(fully_normalized_path(path));
477 I(in_bookkeeping_dir(path));
478 data = path;
479}
480
481bool
482bookkeeping_path::external_string_is_bookkeeping_path(utf8 const & path)
483{
484 // FIXME: this charset casting everywhere is ridiculous
485 string normalized;
486 normalize_external_path(path(), normalized);
487 return internal_string_is_bookkeeping_path(utf8(normalized));
488}
489bool bookkeeping_path::internal_string_is_bookkeeping_path(utf8 const & path)
490{
491 return in_bookkeeping_dir(path());
492}
493
494///////////////////////////////////////////////////////////////////////////
495// splitting/joining
496// this code must be superfast
497// it depends very much on knowing that it can only be applied to fully
498// normalized, relative, paths.
499///////////////////////////////////////////////////////////////////////////
500
501// this peels off the last component of any path and returns it.
502// the last component of a path with no slashes in it is the complete path.
503// the last component of a path referring to the root directory is an
504// empty string.
505path_component
506any_path::basename() const
507{
508 string const & s = data;
509 string::size_type sep = s.rfind('/');
510#ifdef WIN32
511 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
512 sep = 1;
513#endif
514 if (sep == string::npos)
515 return path_component(s, 0); // force use of short circuit
516 if (sep == s.size())
517 return path_component();
518 return path_component(s, sep + 1);
519}
520
521// this returns all but the last component of any path. It has to take
522// care at the root.
523any_path
524any_path::dirname() const
525{
526 string const & s = data;
527 string::size_type sep = s.rfind('/');
528#ifdef WIN32
529 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
530 sep = 1;
531#endif
532 if (sep == string::npos)
533 return any_path();
534
535 // dirname() of the root directory is itself
536 if (sep == s.size() - 1)
537 return *this;
538
539 // dirname() of a direct child of the root is the root
540 if (sep == 0 || (sep == 1 && s[1] == '/')
541#ifdef WIN32
542 || (sep == 1 || sep == 2 && s[1] == ':')
543#endif
544 )
545 return any_path(s, 0, sep+1);
546
547 return any_path(s, 0, sep);
548}
549
550// these variations exist to get the return type right. also,
551// file_path dirname() can be a little simpler.
552file_path
553file_path::dirname() const
554{
555 string const & s = data;
556 string::size_type sep = s.rfind('/');
557 if (sep == string::npos)
558 return file_path();
559 return file_path(s, 0, sep);
560}
561
562system_path
563system_path::dirname() const
564{
565 string const & s = data;
566 string::size_type sep = s.rfind('/');
567#ifdef WIN32
568 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
569 sep = 1;
570#endif
571 I(sep != string::npos);
572
573 // dirname() of the root directory is itself
574 if (sep == s.size() - 1)
575 return *this;
576
577 // dirname() of a direct child of the root is the root
578 if (sep == 0 || (sep == 1 && s[1] == '/')
579#ifdef WIN32
580 || (sep == 1 || sep == 2 && s[1] == ':')
581#endif
582 )
583 return system_path(s, 0, sep+1);
584
585 return system_path(s, 0, sep);
586}
587
588
589// produce dirname and basename at the same time
590void
591file_path::dirname_basename(file_path & dir, path_component & base) const
592{
593 string const & s = data;
594 string::size_type sep = s.rfind('/');
595 if (sep == string::npos)
596 {
597 dir = file_path();
598 base = path_component(s, 0);
599 }
600 else
601 {
602 I(sep < s.size() - 1); // last component must have at least one char
603 dir = file_path(s, 0, sep);
604 base = path_component(s, sep + 1);
605 }
606}
607
608// count the number of /-separated components of the path.
609unsigned int
610file_path::depth() const
611{
612 if (data.empty())
613 return 0;
614
615 unsigned int components = 1;
616 for (string::const_iterator p = data.begin(); p != data.end(); p++)
617 if (*p == '/')
618 components++;
619
620 return components;
621}
622
623///////////////////////////////////////////////////////////////////////////
624// localizing file names (externalizing them)
625// this code must be superfast when there is no conversion needed
626///////////////////////////////////////////////////////////////////////////
627
628string
629any_path::as_external() const
630{
631#ifdef __APPLE__
632 // on OS X paths for the filesystem/kernel are UTF-8 encoded, regardless of
633 // locale.
634 return data;
635#else
636 // on normal systems we actually have some work to do, alas.
637 // not much, though, because utf8_to_system_string does all the hard work.
638 // it is carefully optimized. do not screw it up.
639 external out;
640 utf8_to_system_strict(utf8(data), out);
641 return out();
642#endif
643}
644
645///////////////////////////////////////////////////////////////////////////
646// writing out paths
647///////////////////////////////////////////////////////////////////////////
648
649ostream &
650operator <<(ostream & o, any_path const & a)
651{
652 o << a.as_internal();
653 return o;
654}
655
656template <>
657void dump(file_path const & p, string & out)
658{
659 ostringstream oss;
660 oss << p << '\n';
661 out = oss.str();
662}
663
664template <>
665void dump(system_path const & p, string & out)
666{
667 ostringstream oss;
668 oss << p << '\n';
669 out = oss.str();
670}
671
672template <>
673void dump(bookkeeping_path const & p, string & out)
674{
675 ostringstream oss;
676 oss << p << '\n';
677 out = oss.str();
678}
679
680///////////////////////////////////////////////////////////////////////////
681// path manipulation
682// this code's speed does not matter much
683///////////////////////////////////////////////////////////////////////////
684
685// relies on its arguments already being validated, except that you may not
686// append the empty path component, and if you are appending to the empty
687// path, you may not create an absolute path or a path into the bookkeeping
688// directory.
689file_path
690file_path::operator /(path_component const & to_append) const
691{
692 I(!to_append.empty());
693 if (empty())
694 {
695 string const & s = to_append();
696 I(!is_absolute_somewhere(s) && !in_bookkeeping_dir(s));
697 return file_path(s, 0, string::npos);
698 }
699 else
700 return file_path(((*(data.end() - 1) == '/') ? data : data + "/")
701 + to_append(), 0, string::npos);
702}
703
704// similarly, but even less checking is needed.
705file_path
706file_path::operator /(file_path const & to_append) const
707{
708 I(!to_append.empty());
709 if (empty())
710 return to_append;
711 return file_path(((*(data.end() - 1) == '/') ? data : data + "/")
712 + to_append.as_internal(), 0, string::npos);
713}
714
715bookkeeping_path
716bookkeeping_path::operator /(path_component const & to_append) const
717{
718 I(!to_append.empty());
719 I(!empty());
720 return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
721 + to_append(), 0, string::npos);
722}
723
724system_path
725system_path::operator /(path_component const & to_append) const
726{
727 I(!to_append.empty());
728 I(!empty());
729 return system_path(((*(data.end() - 1) == '/') ? data : data + "/")
730 + to_append(), 0, string::npos);
731}
732
733any_path
734any_path::operator /(path_component const & to_append) const
735{
736 I(!to_append.empty());
737 I(!empty());
738 return any_path(((*(data.end() - 1) == '/') ? data : data + "/")
739 + to_append(), 0, string::npos);
740}
741
742// these take strings and validate
743bookkeeping_path
744bookkeeping_path::operator /(char const * to_append) const
745{
746 I(!is_absolute_somewhere(to_append));
747 I(!empty());
748 return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
749 + to_append);
750}
751
752system_path
753system_path::operator /(char const * to_append) const
754{
755 I(!empty());
756 I(!is_absolute_here(to_append));
757 return system_path(((*(data.end() - 1) == '/') ? data : data + "/")
758 + to_append);
759}
760
761///////////////////////////////////////////////////////////////////////////
762// system_path
763///////////////////////////////////////////////////////////////////////////
764
765system_path::system_path(any_path const & other, bool in_true_workspace)
766{
767 if (is_absolute_here(other.as_internal()))
768 // another system_path. the normalizing isn't really necessary, but it
769 // makes me feel warm and fuzzy.
770 data = normalize_path(other.as_internal());
771 else
772 {
773 system_path wr;
774 if (in_true_workspace)
775 wr = working_root.get();
776 else
777 wr = working_root.get_but_unused();
778 data = normalize_path(wr.as_internal() + "/" + other.as_internal());
779 }
780}
781
782static inline string const_system_path(utf8 const & path)
783{
784 N(!path().empty(), F("invalid path ''"));
785 string expanded = tilde_expand(path());
786 if (is_absolute_here(expanded))
787 return normalize_path(expanded);
788 else
789 return normalize_path(initial_abs_path.get().as_internal()
790 + "/" + path());
791}
792
793system_path::system_path(string const & path)
794{
795 data = const_system_path(utf8(path));
796}
797
798system_path::system_path(utf8 const & path)
799{
800 data = const_system_path(utf8(path));
801}
802
803///////////////////////////////////////////////////////////////////////////
804// workspace (and path root) handling
805///////////////////////////////////////////////////////////////////////////
806
807static bool
808find_bookdir(system_path const & root, path_component const & bookdir,
809 system_path & current, string & removed)
810{
811 current = initial_abs_path.get();
812 removed.clear();
813
814 // check that the current directory is below the specified search root
815 if (current.as_internal().find(root.as_internal()) != 0)
816 {
817 W(F("current directory '%s' is not below root '%s'") % current % root);
818 return false;
819 }
820
821 L(FL("searching for '%s' directory with root '%s'") % bookdir % root);
822
823 system_path check;
824 while (!(current == root))
825 {
826 check = current / bookdir;
827 switch (get_path_status(check))
828 {
829 case path::nonexistent:
830 L(FL("'%s' not found in '%s' with '%s' removed")
831 % bookdir % current % removed);
832 if (removed.empty())
833 removed = current.basename()();
834 else
835 removed = current.basename()() + "/" + removed;
836 current = current.dirname();
837 continue;
838
839 case path::file:
840 L(FL("'%s' is not a directory") % check);
841 return false;
842
843 case path::directory:
844 goto found;
845 }
846 }
847
848 // if we get here, we have hit the root; try once more
849 check = current / bookdir;
850 switch (get_path_status(check))
851 {
852 case path::nonexistent:
853 L(FL("'%s' not found in '%s' with '%s' removed")
854 % bookdir % current % removed);
855 return false;
856
857 case path::file:
858 L(FL("'%s' is not a directory") % check);
859 return false;
860
861 case path::directory:
862 goto found;
863 }
864 return false;
865
866 found:
867 // check for _MTN/. and _MTN/.. to see if mt dir is readable
868 try
869 {
870 if (!path_exists(check / ".") || !path_exists(check / ".."))
871 {
872 L(FL("problems with '%s' (missing '.' or '..')") % check);
873 return false;
874 }
875 }
876 catch(exception &)
877 {
878 L(FL("problems with '%s' (cannot check for '.' or '..')") % check);
879 return false;
880 }
881 return true;
882}
883
884
885bool
886find_and_go_to_workspace(string const & search_root)
887{
888 system_path root, current;
889 string removed;
890
891 if (search_root.empty())
892 {
893#ifdef WIN32
894 std::string cur_str = get_current_working_dir();
895 current = cur_str;
896 if (cur_str[0] == '/' || cur_str[0] == '\\')
897 {
898 if (cur_str.size() > 1 && (cur_str[1] == '/' || cur_str[1] == '\\'))
899 {
900 // UNC name
901 string::size_type uncend = cur_str.find_first_of("\\/", 2);
902 if (uncend == string::npos)
903 root = system_path(cur_str + "/");
904 else
905 root = system_path(cur_str.substr(0, uncend));
906 }
907 else
908 root = system_path("/");
909 }
910 else if (cur_str.size() > 1 && cur_str[1] == ':')
911 {
912 root = system_path(cur_str.substr(0,2) + "/");
913 }
914 else I(false);
915#else
916 root = system_path("/");
917#endif
918 }
919 else
920 {
921 root = system_path(search_root);
922 L(FL("limiting search for workspace to %s") % root);
923
924 require_path_is_directory(root,
925 F("search root '%s' does not exist") % root,
926 F("search root '%s' is not a directory") % root);
927 }
928
929 // first look for the current name of the bookkeeping directory.
930 // if we don't find it, look for it under the old name, so that
931 // migration has a chance to work.
932 if (!find_bookdir(root, bookkeeping_root_component, current, removed))
933 if (!find_bookdir(root, old_bookkeeping_root_component, current, removed))
934 return false;
935
936 working_root.set(current, true);
937 initial_rel_path.set(removed, true);
938
939 L(FL("working root is '%s'") % working_root.get_but_unused());
940 L(FL("initial relative path is '%s'") % initial_rel_path.get_but_unused());
941
942 change_current_working_dir(working_root.get_but_unused());
943
944 return true;
945}
946
947void
948go_to_workspace(system_path const & new_workspace)
949{
950 working_root.set(new_workspace, true);
951 initial_rel_path.set(string(), true);
952 change_current_working_dir(new_workspace);
953}
954
955void
956mark_std_paths_used(void)
957{
958 working_root.get();
959 initial_rel_path.get();
960}
961
962///////////////////////////////////////////////////////////////////////////
963// tests
964///////////////////////////////////////////////////////////////////////////
965
966#ifdef BUILD_UNIT_TESTS
967#include "unit_tests.hh"
968#include "randomizer.hh"
969
970using std::logic_error;
971
972UNIT_TEST(paths, path_component)
973{
974 char const * const baddies[] = {".",
975 "..",
976 "/foo",
977 "\\foo",
978 "foo/bar",
979 "foo\\bar",
980 0 };
981
982 // these would not be okay in a full file_path, but are okay here.
983 char const * const goodies[] = {"c:foo",
984 "_mtn",
985 "_mtN",
986 "_mTn",
987 "_Mtn",
988 "_MTn",
989 "_MtN",
990 "_MTN",
991 0 };
992
993
994 for (char const * const * c = baddies; *c; ++c)
995 {
996 // the comparison prevents the compiler from eliminating the
997 // expression.
998 UNIT_TEST_CHECK_THROW((path_component(*c)()) == *c, logic_error);
999 }
1000 for (char const * const *c = goodies; *c; ++c)
1001 {
1002 path_component p(*c);
1003 UNIT_TEST_CHECK_THROW(file_path() / p, logic_error);
1004 }
1005
1006 UNIT_TEST_CHECK_THROW(file_path_internal("foo") / path_component(),
1007 logic_error);
1008}
1009
1010
1011UNIT_TEST(paths, file_path_internal)
1012{
1013 char const * const baddies[] = {"/foo",
1014 "foo//bar",
1015 "foo/../bar",
1016 "../bar",
1017 "_MTN",
1018 "_MTN/blah",
1019 "foo/bar/",
1020 "foo/bar/.",
1021 "foo/bar/./",
1022 "foo/./bar",
1023 "./foo",
1024 ".",
1025 "..",
1026 "c:\\foo",
1027 "c:foo",
1028 "c:/foo",
1029 // some baddies made bad by a security kluge --
1030 // see the comment in in_bookkeeping_dir
1031 "_mtn",
1032 "_mtN",
1033 "_mTn",
1034 "_Mtn",
1035 "_MTn",
1036 "_MtN",
1037 "_mTN",
1038 "_mtn/foo",
1039 "_mtN/foo",
1040 "_mTn/foo",
1041 "_Mtn/foo",
1042 "_MTn/foo",
1043 "_MtN/foo",
1044 "_mTN/foo",
1045 0 };
1046 initial_rel_path.unset();
1047 initial_rel_path.set(string(), true);
1048 for (char const * const * c = baddies; *c; ++c)
1049 {
1050 UNIT_TEST_CHECK_THROW(file_path_internal(*c), logic_error);
1051 }
1052 initial_rel_path.unset();
1053 initial_rel_path.set("blah/blah/blah", true);
1054 for (char const * const * c = baddies; *c; ++c)
1055 {
1056 UNIT_TEST_CHECK_THROW(file_path_internal(*c), logic_error);
1057 }
1058
1059 UNIT_TEST_CHECK(file_path().empty());
1060 UNIT_TEST_CHECK(file_path_internal("").empty());
1061
1062 char const * const goodies[] = {"",
1063 "a",
1064 "foo",
1065 "foo/bar/baz",
1066 "foo/bar.baz",
1067 "foo/with-hyphen/bar",
1068 "foo/with_underscore/bar",
1069 "foo/with,other+@weird*%#$=stuff/bar",
1070 ".foo/bar",
1071 "..foo/bar",
1072 "_MTNfoo/bar",
1073 "foo:bar",
1074 0 };
1075
1076 for (int i = 0; i < 2; ++i)
1077 {
1078 initial_rel_path.unset();
1079 initial_rel_path.set(i ? string()
1080 : string("blah/blah/blah"),
1081 true);
1082 for (char const * const * c = goodies; *c; ++c)
1083 {
1084 file_path fp = file_path_internal(*c);
1085 UNIT_TEST_CHECK(fp.as_internal() == *c);
1086 UNIT_TEST_CHECK(file_path_internal(fp.as_internal()) == fp);
1087 }
1088 }
1089
1090 initial_rel_path.unset();
1091}
1092
1093static void check_fp_normalizes_to(char const * before, char const * after)
1094{
1095 L(FL("check_fp_normalizes_to: '%s' -> '%s'") % before % after);
1096 file_path fp = file_path_external(utf8(before));
1097 L(FL(" (got: %s)") % fp);
1098 UNIT_TEST_CHECK(fp.as_internal() == after);
1099 UNIT_TEST_CHECK(file_path_internal(fp.as_internal()) == fp);
1100 // we compare after to the external form too, since as far as we know
1101 // relative normalized posix paths are always good win32 paths too
1102 UNIT_TEST_CHECK(fp.as_external() == after);
1103}
1104
1105UNIT_TEST(paths, file_path_external_null_prefix)
1106{
1107 initial_rel_path.unset();
1108 initial_rel_path.set(string(), true);
1109
1110 char const * const baddies[] = {"/foo",
1111 "../bar",
1112 "_MTN/blah",
1113 "_MTN",
1114 "//blah",
1115 "\\foo",
1116 "..",
1117 "c:\\foo",
1118 "c:foo",
1119 "c:/foo",
1120 "",
1121 // some baddies made bad by a security kluge --
1122 // see the comment in in_bookkeeping_dir
1123 "_mtn",
1124 "_mtN",
1125 "_mTn",
1126 "_Mtn",
1127 "_MTn",
1128 "_MtN",
1129 "_mTN",
1130 "_mtn/foo",
1131 "_mtN/foo",
1132 "_mTn/foo",
1133 "_Mtn/foo",
1134 "_MTn/foo",
1135 "_MtN/foo",
1136 "_mTN/foo",
1137 0 };
1138 for (char const * const * c = baddies; *c; ++c)
1139 {
1140 L(FL("test_file_path_external_null_prefix: trying baddie: %s") % *c);
1141 UNIT_TEST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure);
1142 }
1143
1144 check_fp_normalizes_to("a", "a");
1145 check_fp_normalizes_to("foo", "foo");
1146 check_fp_normalizes_to("foo/bar", "foo/bar");
1147 check_fp_normalizes_to("foo/bar/baz", "foo/bar/baz");
1148 check_fp_normalizes_to("foo/bar.baz", "foo/bar.baz");
1149 check_fp_normalizes_to("foo/with-hyphen/bar", "foo/with-hyphen/bar");
1150 check_fp_normalizes_to("foo/with_underscore/bar", "foo/with_underscore/bar");
1151 check_fp_normalizes_to(".foo/bar", ".foo/bar");
1152 check_fp_normalizes_to("..foo/bar", "..foo/bar");
1153 check_fp_normalizes_to(".", "");
1154#ifndef WIN32
1155 check_fp_normalizes_to("foo:bar", "foo:bar");
1156#endif
1157 check_fp_normalizes_to("foo/with,other+@weird*%#$=stuff/bar",
1158 "foo/with,other+@weird*%#$=stuff/bar");
1159
1160 // Why are these tests with // in them commented out? because boost::fs
1161 // sucks and can't normalize them. FIXME.
1162 //check_fp_normalizes_to("foo//bar", "foo/bar");
1163 check_fp_normalizes_to("foo/../bar", "bar");
1164 check_fp_normalizes_to("foo/bar/", "foo/bar");
1165 check_fp_normalizes_to("foo/bar/.", "foo/bar");
1166 check_fp_normalizes_to("foo/bar/./", "foo/bar");
1167 check_fp_normalizes_to("foo/./bar/", "foo/bar");
1168 check_fp_normalizes_to("./foo", "foo");
1169 //check_fp_normalizes_to("foo///.//", "foo");
1170
1171 initial_rel_path.unset();
1172}
1173
1174UNIT_TEST(paths, file_path_external_prefix__MTN)
1175{
1176 initial_rel_path.unset();
1177 initial_rel_path.set(string("_MTN"), true);
1178
1179 UNIT_TEST_CHECK_THROW(file_path_external(utf8("foo")), informative_failure);
1180 UNIT_TEST_CHECK_THROW(file_path_external(utf8(".")), informative_failure);
1181 UNIT_TEST_CHECK_THROW(file_path_external(utf8("./blah")), informative_failure);
1182 check_fp_normalizes_to("..", "");
1183 check_fp_normalizes_to("../foo", "foo");
1184}
1185
1186UNIT_TEST(paths, file_path_external_prefix_a_b)
1187{
1188 initial_rel_path.unset();
1189 initial_rel_path.set(string("a/b"), true);
1190
1191 char const * const baddies[] = {"/foo",
1192 "../../../bar",
1193 "../../..",
1194 "../../_MTN",
1195 "../../_MTN/foo",
1196 "//blah",
1197 "\\foo",
1198 "c:\\foo",
1199#ifdef WIN32
1200 "c:foo",
1201 "c:/foo",
1202#endif
1203 "",
1204 // some baddies made bad by a security kluge --
1205 // see the comment in in_bookkeeping_dir
1206 "../../_mtn",
1207 "../../_mtN",
1208 "../../_mTn",
1209 "../../_Mtn",
1210 "../../_MTn",
1211 "../../_MtN",
1212 "../../_mTN",
1213 "../../_mtn/foo",
1214 "../../_mtN/foo",
1215 "../../_mTn/foo",
1216 "../../_Mtn/foo",
1217 "../../_MTn/foo",
1218 "../../_MtN/foo",
1219 "../../_mTN/foo",
1220 0 };
1221 for (char const * const * c = baddies; *c; ++c)
1222 {
1223 L(FL("test_file_path_external_prefix_a_b: trying baddie: %s") % *c);
1224 UNIT_TEST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure);
1225 }
1226
1227 check_fp_normalizes_to("foo", "a/b/foo");
1228 check_fp_normalizes_to("a", "a/b/a");
1229 check_fp_normalizes_to("foo/bar", "a/b/foo/bar");
1230 check_fp_normalizes_to("foo/bar/baz", "a/b/foo/bar/baz");
1231 check_fp_normalizes_to("foo/bar.baz", "a/b/foo/bar.baz");
1232 check_fp_normalizes_to("foo/with-hyphen/bar", "a/b/foo/with-hyphen/bar");
1233 check_fp_normalizes_to("foo/with_underscore/bar", "a/b/foo/with_underscore/bar");
1234 check_fp_normalizes_to(".foo/bar", "a/b/.foo/bar");
1235 check_fp_normalizes_to("..foo/bar", "a/b/..foo/bar");
1236 check_fp_normalizes_to(".", "a/b");
1237#ifndef WIN32
1238 check_fp_normalizes_to("foo:bar", "a/b/foo:bar");
1239#endif
1240 check_fp_normalizes_to("foo/with,other+@weird*%#$=stuff/bar",
1241 "a/b/foo/with,other+@weird*%#$=stuff/bar");
1242 // why are the tests with // in them commented out? because boost::fs sucks
1243 // and can't normalize them. FIXME.
1244 //check_fp_normalizes_to("foo//bar", "a/b/foo/bar");
1245 check_fp_normalizes_to("foo/../bar", "a/b/bar");
1246 check_fp_normalizes_to("foo/bar/", "a/b/foo/bar");
1247 check_fp_normalizes_to("foo/bar/.", "a/b/foo/bar");
1248 check_fp_normalizes_to("foo/bar/./", "a/b/foo/bar");
1249 check_fp_normalizes_to("foo/./bar/", "a/b/foo/bar");
1250 check_fp_normalizes_to("./foo", "a/b/foo");
1251 //check_fp_normalizes_to("foo///.//", "a/b/foo");
1252 // things that would have been bad without the initial_rel_path:
1253 check_fp_normalizes_to("../foo", "a/foo");
1254 check_fp_normalizes_to("..", "a");
1255 check_fp_normalizes_to("../..", "");
1256 check_fp_normalizes_to("_MTN/foo", "a/b/_MTN/foo");
1257 check_fp_normalizes_to("_MTN", "a/b/_MTN");
1258#ifndef WIN32
1259 check_fp_normalizes_to("c:foo", "a/b/c:foo");
1260 check_fp_normalizes_to("c:/foo", "a/b/c:/foo");
1261#endif
1262
1263 initial_rel_path.unset();
1264}
1265
1266UNIT_TEST(paths, basename)
1267{
1268 struct t
1269 {
1270 char const * in;
1271 char const * out;
1272 };
1273 // file_paths cannot be absolute, but may be the empty string.
1274 struct t const fp_cases[] = {
1275 { "", "" },
1276 { "foo", "foo" },
1277 { "foo/bar", "bar" },
1278 { "foo/bar/baz", "baz" },
1279 { 0, 0 }
1280 };
1281 // bookkeeping_paths cannot be absolute and must start with the
1282 // bookkeeping_root_component.
1283 struct t const bp_cases[] = {
1284 { "_MTN", "_MTN" },
1285 { "_MTN/foo", "foo" },
1286 { "_MTN/foo/bar", "bar" },
1287 { 0, 0 }
1288 };
1289
1290 // system_paths must be absolute. this relies on the setting of
1291 // initial_abs_path below. note that most of the cases whose full paths
1292 // vary between Unix and Windows will still have the same basenames.
1293 struct t const sp_cases[] = {
1294 { "/", "" },
1295 { "//", "" },
1296 { "foo", "foo" },
1297 { "/foo", "foo" },
1298 { "//foo", "foo" },
1299 { "~/foo", "foo" },
1300 { "c:/foo", "foo" },
1301 { "foo/bar", "bar" },
1302 { "/foo/bar", "bar" },
1303 { "//foo/bar", "bar" },
1304 { "~/foo/bar", "bar" },
1305 { "c:/foo/bar", "bar" },
1306#ifdef WIN32
1307 { "c:/", "" },
1308 { "c:foo", "foo" },
1309#else
1310 { "c:/", "c:" },
1311 { "c:foo", "c:foo" },
1312#endif
1313 { 0, 0 }
1314 };
1315
1316 UNIT_TEST_CHECKPOINT("file_path basenames");
1317 for (struct t const *p = fp_cases; p->in; p++)
1318 {
1319 file_path fp = file_path_internal(p->in);
1320 path_component pc(fp.basename());
1321 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1322 FL("basename('%s') = '%s' (expect '%s')")
1323 % p->in % pc % p->out);
1324 }
1325
1326 UNIT_TEST_CHECKPOINT("bookkeeping_path basenames");
1327 for (struct t const *p = bp_cases; p->in; p++)
1328 {
1329 bookkeeping_path fp(p->in);
1330 path_component pc(fp.basename());
1331 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1332 FL("basename('%s') = '%s' (expect '%s')")
1333 % p->in % pc % p->out);
1334 }
1335
1336
1337 UNIT_TEST_CHECKPOINT("system_path basenames");
1338
1339 initial_abs_path.unset();
1340 initial_abs_path.set(system_path("/a/b"), true);
1341
1342 for (struct t const *p = sp_cases; p->in; p++)
1343 {
1344 system_path fp(p->in);
1345 path_component pc(fp.basename());
1346 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1347 FL("basename('%s') = '%s' (expect '%s')")
1348 % p->in % pc % p->out);
1349 }
1350
1351 // any_path::basename() should return exactly the same thing that
1352 // the corresponding specialized basename() does, but with type any_path.
1353 UNIT_TEST_CHECKPOINT("any_path basenames");
1354 for (struct t const *p = fp_cases; p->in; p++)
1355 {
1356 any_path ap(file_path_internal(p->in));
1357 path_component pc(ap.basename());
1358 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1359 FL("basename('%s') = '%s' (expect '%s')")
1360 % p->in % pc % p->out);
1361 }
1362 for (struct t const *p = bp_cases; p->in; p++)
1363 {
1364 any_path ap(bookkeeping_path(p->in));
1365 path_component pc(ap.basename());
1366 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1367 FL("basename('%s') = '%s' (expect '%s')")
1368 % p->in % pc % p->out);
1369 }
1370 for (struct t const *p = sp_cases; p->in; p++)
1371 {
1372 any_path ap(system_path(p->in));
1373 path_component pc(ap.basename());
1374 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1375 FL("basename('%s') = '%s' (expect '%s')")
1376 % p->in % pc % p->out);
1377 }
1378
1379 initial_abs_path.unset();
1380}
1381
1382UNIT_TEST(paths, dirname)
1383{
1384 struct t
1385 {
1386 char const * in;
1387 char const * out;
1388 };
1389 // file_paths cannot be absolute, but may be the empty string.
1390 struct t const fp_cases[] = {
1391 { "", "" },
1392 { "foo", "" },
1393 { "foo/bar", "foo" },
1394 { "foo/bar/baz", "foo/bar" },
1395 { 0, 0 }
1396 };
1397
1398 // system_paths must be absolute. this relies on the setting of
1399 // initial_abs_path below.
1400 struct t const sp_cases[] = {
1401 { "/", "/" },
1402 { "//", "//" },
1403 { "foo", "/a/b" },
1404 { "/foo", "/" },
1405 { "//foo", "//" },
1406 { "~/foo", "~" },
1407 { "foo/bar", "/a/b/foo" },
1408 { "/foo/bar", "/foo" },
1409 { "//foo/bar", "//foo" },
1410 { "~/foo/bar", "~/foo" },
1411#ifdef WIN32
1412 { "c:", "c:" },
1413 { "c:foo", "c:" },
1414 { "c:/", "c:/" },
1415 { "c:/foo", "c:/" },
1416 { "c:/foo/bar", "c:/foo" },
1417#else
1418 { "c:", "/a/b" },
1419 { "c:foo", "/a/b" },
1420 { "c:/", "/a/b" },
1421 { "c:/foo", "/a/b/c:" },
1422 { "c:/foo/bar", "/a/b/c:/foo" },
1423#endif
1424 { 0, 0 }
1425 };
1426
1427 initial_abs_path.unset();
1428
1429 UNIT_TEST_CHECKPOINT("file_path dirnames");
1430 for (struct t const *p = fp_cases; p->in; p++)
1431 {
1432 file_path fp = file_path_internal(p->in);
1433 file_path dn = fp.dirname();
1434 UNIT_TEST_CHECK_MSG(dn == file_path_internal(p->out),
1435 FL("dirname('%s') = '%s' (expect '%s')")
1436 % p->in % dn % p->out);
1437 }
1438
1439
1440 initial_abs_path.set(system_path("/a/b"), true);
1441 UNIT_TEST_CHECKPOINT("system_path dirnames");
1442 for (struct t const *p = sp_cases; p->in; p++)
1443 {
1444 system_path fp(p->in);
1445 system_path dn(fp.dirname());
1446
1447 UNIT_TEST_CHECK_MSG(dn == system_path(p->out),
1448 FL("dirname('%s') = '%s' (expect '%s')")
1449 % p->in % dn % p->out);
1450 }
1451
1452 // any_path::dirname() should return exactly the same thing that
1453 // the corresponding specialized dirname() does, but with type any_path.
1454 UNIT_TEST_CHECKPOINT("any_path dirnames");
1455 for (struct t const *p = fp_cases; p->in; p++)
1456 {
1457 any_path ap(file_path_internal(p->in));
1458 any_path dn(ap.dirname());
1459 any_path rf(file_path_internal(p->out));
1460 UNIT_TEST_CHECK_MSG(dn.as_internal() == rf.as_internal(),
1461 FL("dirname('%s') = '%s' (expect '%s')")
1462 % p->in % dn % rf);
1463 }
1464 for (struct t const *p = sp_cases; p->in; p++)
1465 {
1466 any_path ap(system_path(p->in));
1467 any_path dn(ap.dirname());
1468 any_path rf(system_path(p->out));
1469 UNIT_TEST_CHECK_MSG(dn.as_internal() == rf.as_internal(),
1470 FL("dirname('%s') = '%s' (expect '%s')")
1471 % p->in % dn % rf);
1472 }
1473
1474 initial_abs_path.unset();
1475}
1476
1477UNIT_TEST(paths, depth)
1478{
1479 char const * const cases[] = {"", "foo", "foo/bar", "foo/bar/baz", 0};
1480 for (unsigned int i = 0; cases[i]; i++)
1481 {
1482 file_path fp = file_path_internal(cases[i]);
1483 unsigned int d = fp.depth();
1484 UNIT_TEST_CHECK_MSG(d == i,
1485 FL("depth('%s') = %d (expect %d)") % fp % d % i);
1486 }
1487}
1488
1489static void check_bk_normalizes_to(char const * before, char const * after)
1490{
1491 bookkeeping_path bp(bookkeeping_root / before);
1492 L(FL("normalizing %s to %s (got %s)") % before % after % bp);
1493 UNIT_TEST_CHECK(bp.as_external() == after);
1494 UNIT_TEST_CHECK(bookkeeping_path(bp.as_internal()).as_internal() == bp.as_internal());
1495}
1496
1497UNIT_TEST(paths, bookkeeping)
1498{
1499 char const * const baddies[] = {"/foo",
1500 "foo//bar",
1501 "foo/../bar",
1502 "../bar",
1503 "foo/bar/",
1504 "foo/bar/.",
1505 "foo/bar/./",
1506 "foo/./bar",
1507 "./foo",
1508 ".",
1509 "..",
1510 "c:\\foo",
1511 "c:foo",
1512 "c:/foo",
1513 "",
1514 "a:b",
1515 0 };
1516 string tmp_path_string;
1517
1518 for (char const * const * c = baddies; *c; ++c)
1519 {
1520 L(FL("test_bookkeeping_path baddie: trying '%s'") % *c);
1521 UNIT_TEST_CHECK_THROW(bookkeeping_path(tmp_path_string.assign(*c)),
1522 logic_error);
1523 UNIT_TEST_CHECK_THROW(bookkeeping_root / *c, logic_error);
1524 }
1525
1526 // these are legitimate as things to append to bookkeeping_root, but
1527 // not as bookkeeping_paths in themselves.
1528 UNIT_TEST_CHECK_THROW(bookkeeping_path("a"), logic_error);
1529 UNIT_TEST_CHECK_NOT_THROW(bookkeeping_root / "a", logic_error);
1530 UNIT_TEST_CHECK_THROW(bookkeeping_path("foo/bar"), logic_error);
1531 UNIT_TEST_CHECK_NOT_THROW(bookkeeping_root / "foo/bar", logic_error);
1532
1533 check_bk_normalizes_to("a", "_MTN/a");
1534 check_bk_normalizes_to("foo", "_MTN/foo");
1535 check_bk_normalizes_to("foo/bar", "_MTN/foo/bar");
1536 check_bk_normalizes_to("foo/bar/baz", "_MTN/foo/bar/baz");
1537}
1538
1539static void check_system_normalizes_to(char const * before, char const * after)
1540{
1541 system_path sp(before);
1542 L(FL("normalizing '%s' to '%s' (got '%s')") % before % after % sp);
1543 UNIT_TEST_CHECK(sp.as_external() == after);
1544 UNIT_TEST_CHECK(system_path(sp.as_internal()).as_internal() == sp.as_internal());
1545}
1546
1547UNIT_TEST(paths, system)
1548{
1549 initial_abs_path.unset();
1550 initial_abs_path.set(system_path("/a/b"), true);
1551
1552 UNIT_TEST_CHECK_THROW(system_path(""), informative_failure);
1553
1554 check_system_normalizes_to("foo", "/a/b/foo");
1555 check_system_normalizes_to("foo/bar", "/a/b/foo/bar");
1556 check_system_normalizes_to("/foo/bar", "/foo/bar");
1557 check_system_normalizes_to("//foo/bar", "//foo/bar");
1558#ifdef WIN32
1559 check_system_normalizes_to("c:foo", "c:foo");
1560 check_system_normalizes_to("c:/foo", "c:/foo");
1561 check_system_normalizes_to("c:\\foo", "c:/foo");
1562#else
1563 check_system_normalizes_to("c:foo", "/a/b/c:foo");
1564 check_system_normalizes_to("c:/foo", "/a/b/c:/foo");
1565 check_system_normalizes_to("c:\\foo", "/a/b/c:\\foo");
1566 check_system_normalizes_to("foo:bar", "/a/b/foo:bar");
1567#endif
1568 // we require that system_path normalize out ..'s, because of the following
1569 // case:
1570 // /work mkdir newdir
1571 // /work$ cd newdir
1572 // /work/newdir$ monotone setup --db=../foo.db
1573 // Now they have either "/work/foo.db" or "/work/newdir/../foo.db" in
1574 // _MTN/options
1575 // /work/newdir$ cd ..
1576 // /work$ mv newdir newerdir # better name
1577 // Oops, now, if we stored the version with ..'s in, this workspace
1578 // is broken.
1579 check_system_normalizes_to("../foo", "/a/foo");
1580 check_system_normalizes_to("foo/..", "/a/b");
1581 check_system_normalizes_to("/foo/bar/..", "/foo");
1582 check_system_normalizes_to("/foo/..", "/");
1583 // can't do particularly interesting checking of tilde expansion, but at
1584 // least we can check that it's doing _something_...
1585 string tilde_expanded = system_path("~/foo").as_external();
1586#ifdef WIN32
1587 UNIT_TEST_CHECK(tilde_expanded[1] == ':');
1588#else
1589 UNIT_TEST_CHECK(tilde_expanded[0] == '/');
1590#endif
1591 UNIT_TEST_CHECK(tilde_expanded.find('~') == string::npos);
1592 // on Windows, ~name is not expanded
1593#ifdef WIN32
1594 UNIT_TEST_CHECK(system_path("~this_user_does_not_exist_anywhere")
1595 .as_external()
1596 == "/a/b/~this_user_does_not_exist_anywhere");
1597#else
1598 UNIT_TEST_CHECK_THROW(system_path("~this_user_does_not_exist_anywhere"),
1599 informative_failure);
1600#endif
1601
1602 // finally, make sure that the copy-from-any_path constructor works right
1603 // in particular, it should interpret the paths it gets as being relative to
1604 // the project root, not the initial path
1605 working_root.unset();
1606 working_root.set(system_path("/working/root"), true);
1607 initial_rel_path.unset();
1608 initial_rel_path.set(string("rel/initial"), true);
1609
1610 UNIT_TEST_CHECK(system_path(system_path("foo/bar")).as_internal() == "/a/b/foo/bar");
1611 UNIT_TEST_CHECK(!working_root.used);
1612 UNIT_TEST_CHECK(system_path(system_path("/foo/bar")).as_internal() == "/foo/bar");
1613 UNIT_TEST_CHECK(!working_root.used);
1614 UNIT_TEST_CHECK(system_path(file_path_internal("foo/bar"), false).as_internal()
1615 == "/working/root/foo/bar");
1616 UNIT_TEST_CHECK(!working_root.used);
1617 UNIT_TEST_CHECK(system_path(file_path_internal("foo/bar")).as_internal()
1618 == "/working/root/foo/bar");
1619 UNIT_TEST_CHECK(working_root.used);
1620 UNIT_TEST_CHECK(system_path(file_path_external(utf8("foo/bar"))).as_external()
1621 == "/working/root/rel/initial/foo/bar");
1622 file_path a_file_path;
1623 UNIT_TEST_CHECK(system_path(a_file_path).as_external()
1624 == "/working/root");
1625 UNIT_TEST_CHECK(system_path(bookkeeping_path("_MTN/foo/bar")).as_internal()
1626 == "/working/root/_MTN/foo/bar");
1627 UNIT_TEST_CHECK(system_path(bookkeeping_root).as_internal()
1628 == "/working/root/_MTN");
1629 initial_abs_path.unset();
1630 working_root.unset();
1631 initial_rel_path.unset();
1632}
1633
1634UNIT_TEST(paths, access_tracker)
1635{
1636 access_tracker<int> a;
1637 UNIT_TEST_CHECK_THROW(a.get(), logic_error);
1638 a.set(1, false);
1639 UNIT_TEST_CHECK_THROW(a.set(2, false), logic_error);
1640 a.set(2, true);
1641 UNIT_TEST_CHECK_THROW(a.set(3, false), logic_error);
1642 UNIT_TEST_CHECK(a.get() == 2);
1643 UNIT_TEST_CHECK_THROW(a.set(3, true), logic_error);
1644 a.unset();
1645 a.may_not_initialize();
1646 UNIT_TEST_CHECK_THROW(a.set(1, false), logic_error);
1647 UNIT_TEST_CHECK_THROW(a.set(2, true), logic_error);
1648 a.unset();
1649 a.set(1, false);
1650 UNIT_TEST_CHECK_THROW(a.may_not_initialize(), logic_error);
1651}
1652
1653static void test_path_less_than(string const & left, string const & right)
1654{
1655 MM(left);
1656 MM(right);
1657 file_path left_fp = file_path_internal(left);
1658 file_path right_fp = file_path_internal(right);
1659 I(left_fp < right_fp);
1660}
1661
1662static void test_path_equal(string const & left, string const & right)
1663{
1664 MM(left);
1665 MM(right);
1666 file_path left_fp = file_path_internal(left);
1667 file_path right_fp = file_path_internal(right);
1668 I(left_fp == right_fp);
1669}
1670
1671UNIT_TEST(paths, ordering)
1672{
1673 // this ordering is very important:
1674 // -- it is used to determine the textual form of csets and manifests
1675 // (in particular, it cannot be changed)
1676 // -- it is used to determine in what order cset operations can be applied
1677 // (in particular, foo must sort before foo/bar, so that we can use it
1678 // to do top-down and bottom-up traversals of a set of paths).
1679 test_path_less_than("a", "b");
1680 test_path_less_than("a", "c");
1681 test_path_less_than("ab", "ac");
1682 test_path_less_than("a", "ab");
1683 test_path_less_than("", "a");
1684 test_path_less_than("", ".foo");
1685 test_path_less_than("foo", "foo/bar");
1686 // . is before / asciibetically, so sorting by strings will give the wrong
1687 // answer on this:
1688 test_path_less_than("foo/bar", "foo.bar");
1689
1690 // path_components used to be interned strings, and we used the default sort
1691 // order, which meant that in practice path components would sort in the
1692 // _order they were first used in the program_. So let's put in a test that
1693 // would catch this sort of brokenness.
1694 test_path_less_than("fallanopic_not_otherwise_mentioned", "xyzzy");
1695 test_path_less_than("fallanoooo_not_otherwise_mentioned_and_smaller",
1696 "fallanopic_not_otherwise_mentioned");
1697}
1698
1699UNIT_TEST(paths, ordering_random)
1700{
1701 char x[4] = {0,0,0,0};
1702 char y[4] = {0,0,0,0};
1703 u8 a, b, c, d;
1704 const int ntrials = 1000;
1705 int i;
1706 randomizer rng;
1707
1708 // use of numbers is intentional; these strings are defined to be UTF-8.
1709
1710 UNIT_TEST_CHECKPOINT("a and b");
1711 for (i = 0; i < ntrials; i++)
1712 {
1713 do a = rng.uniform(0x7f - 0x20) + 0x20;
1714 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1715
1716 do b = rng.uniform(0x7f - 0x20) + 0x20;
1717 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1718
1719 x[0] = a;
1720 y[0] = b;
1721 if (a < b)
1722 test_path_less_than(x, y);
1723 else if (a > b)
1724 test_path_less_than(y, x);
1725 else
1726 test_path_equal(x, y);
1727 }
1728
1729 UNIT_TEST_CHECKPOINT("ab and cd");
1730 for (i = 0; i < ntrials; i++)
1731 {
1732 do
1733 {
1734 do a = rng.uniform(0x7f - 0x20) + 0x20;
1735 while (a == 0x5c || a == 0x2f); // '\\', '/'
1736
1737 do b = rng.uniform(0x7f - 0x20) + 0x20;
1738 while (b == 0x5c || b == 0x2f || b == 0x3a); // '\\', '/', ':'
1739 }
1740 while (a == 0x2e && b == 0x2e); // ".."
1741
1742 do
1743 {
1744 do c = rng.uniform(0x7f - 0x20) + 0x20;
1745 while (c == 0x5c || c == 0x2f); // '\\', '/'
1746
1747 do d = rng.uniform(0x7f - 0x20) + 0x20;
1748 while (d == 0x5c || d == 0x2f || d == 0x3a); // '\\', '/', ':'
1749 }
1750 while (c == 0x2e && d == 0x2e); // ".."
1751
1752 x[0] = a;
1753 x[1] = b;
1754 y[0] = c;
1755 y[1] = d;
1756
1757 if (a < c || (a == c && b < d))
1758 test_path_less_than(x, y);
1759 else if (a > c || (a == c && b > d))
1760 test_path_less_than(y, x);
1761 else
1762 test_path_equal(x, y);
1763 }
1764
1765 UNIT_TEST_CHECKPOINT("a and b/c");
1766 x[1] = 0;
1767 y[1] = '/';
1768 for (i = 0; i < ntrials; i++)
1769 {
1770 do a = rng.uniform(0x7f - 0x20) + 0x20;
1771 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1772
1773 do b = rng.uniform(0x7f - 0x20) + 0x20;
1774 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1775
1776 do c = rng.uniform(0x7f - 0x20) + 0x20;
1777 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1778
1779 x[0] = a;
1780 y[0] = b;
1781 y[2] = c;
1782
1783 // only the order of a and b matters. 1 sorts before 1/2.
1784 if (a <= b)
1785 test_path_less_than(x, y);
1786 else
1787 test_path_less_than(y, x);
1788 }
1789
1790 UNIT_TEST_CHECKPOINT("ab and c/d");
1791 for (i = 0; i < ntrials; i++)
1792 {
1793 do
1794 {
1795 do a = rng.uniform(0x7f - 0x20) + 0x20;
1796 while (a == 0x5c || a == 0x2f); // '\\', '/'
1797
1798 do b = rng.uniform(0x7f - 0x20) + 0x20;
1799 while (b == 0x5c || b == 0x2f || b == 0x3a); // '\\', '/', ':'
1800 }
1801 while (a == 0x2e && b == 0x2e); // ".."
1802
1803 do c = rng.uniform(0x7f - 0x20) + 0x20;
1804 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1805
1806 do d = rng.uniform(0x7f - 0x20) + 0x20;
1807 while (d == 0x5c || d == 0x2f || d == 0x2e); // '\\', '/', '.'
1808
1809
1810 x[0] = a;
1811 x[1] = b;
1812 y[0] = c;
1813 y[2] = d;
1814
1815 // only the order of a and c matters,
1816 // but this time, 12 sorts after 1/2.
1817 if (a < c)
1818 test_path_less_than(x, y);
1819 else
1820 test_path_less_than(y, x);
1821 }
1822
1823
1824 UNIT_TEST_CHECKPOINT("a/b and c/d");
1825 x[1] = '/';
1826 for (i = 0; i < ntrials; i++)
1827 {
1828 do a = rng.uniform(0x7f - 0x20) + 0x20;
1829 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1830
1831 do b = rng.uniform(0x7f - 0x20) + 0x20;
1832 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1833
1834 do c = rng.uniform(0x7f - 0x20) + 0x20;
1835 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1836
1837 do d = rng.uniform(0x7f - 0x20) + 0x20;
1838 while (d == 0x5c || d == 0x2f || d == 0x2e); // '\\', '/', '.'
1839
1840 x[0] = a;
1841 x[2] = b;
1842 y[0] = c;
1843 y[2] = d;
1844
1845 if (a < c || (a == c && b < d))
1846 test_path_less_than(x, y);
1847 else if (a > c || (a == c && b > d))
1848 test_path_less_than(y, x);
1849 else
1850 test_path_equal(x, y);
1851 }
1852}
1853
1854UNIT_TEST(paths, test_internal_string_is_bookkeeping_path)
1855{
1856 char const * const yes[] = {"_MTN",
1857 "_MTN/foo",
1858 "_mtn/Foo",
1859 0 };
1860 char const * const no[] = {"foo/_MTN",
1861 "foo/bar",
1862 0 };
1863 for (char const * const * c = yes; *c; ++c)
1864 UNIT_TEST_CHECK(bookkeeping_path
1865 ::internal_string_is_bookkeeping_path(utf8(std::string(*c))));
1866 for (char const * const * c = no; *c; ++c)
1867 UNIT_TEST_CHECK(!bookkeeping_path
1868 ::internal_string_is_bookkeeping_path(utf8(std::string(*c))));
1869}
1870
1871UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix_none)
1872{
1873 initial_rel_path.unset();
1874 initial_rel_path.set(string(), true);
1875
1876 char const * const yes[] = {"_MTN",
1877 "_MTN/foo",
1878 "_mtn/Foo",
1879 "_MTN/foo/..",
1880 0 };
1881 char const * const no[] = {"foo/_MTN",
1882 "foo/bar",
1883 "_MTN/..",
1884 0 };
1885 for (char const * const * c = yes; *c; ++c)
1886 UNIT_TEST_CHECK(bookkeeping_path
1887 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1888 for (char const * const * c = no; *c; ++c)
1889 UNIT_TEST_CHECK(!bookkeeping_path
1890 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1891}
1892
1893UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix_a_b)
1894{
1895 initial_rel_path.unset();
1896 initial_rel_path.set(string("a/b"), true);
1897
1898 char const * const yes[] = {"../../_MTN",
1899 "../../_MTN/foo",
1900 "../../_mtn/Foo",
1901 "../../_MTN/foo/..",
1902 "../../foo/../_MTN/foo",
1903 0 };
1904 char const * const no[] = {"foo/_MTN",
1905 "foo/bar",
1906 "_MTN",
1907 "../../foo/_MTN",
1908 0 };
1909 for (char const * const * c = yes; *c; ++c)
1910 UNIT_TEST_CHECK(bookkeeping_path
1911 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1912 for (char const * const * c = no; *c; ++c)
1913 UNIT_TEST_CHECK(!bookkeeping_path
1914 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1915}
1916
1917UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix__MTN)
1918{
1919 initial_rel_path.unset();
1920 initial_rel_path.set(string("_MTN"), true);
1921
1922 char const * const yes[] = {".",
1923 "foo",
1924 "../_MTN/foo/..",
1925 "../_mtn/foo",
1926 "../foo/../_MTN/foo",
1927 0 };
1928 char const * const no[] = {"../foo",
1929 "../foo/bar",
1930 "../foo/_MTN",
1931 0 };
1932 for (char const * const * c = yes; *c; ++c)
1933 UNIT_TEST_CHECK(bookkeeping_path
1934 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1935 for (char const * const * c = no; *c; ++c)
1936 UNIT_TEST_CHECK(!bookkeeping_path
1937 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1938}
1939
1940#endif // BUILD_UNIT_TESTS
1941
1942// Local Variables:
1943// mode: C++
1944// fill-column: 76
1945// c-file-style: "gnu"
1946// indent-tabs-mode: nil
1947// End:
1948// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status