monotone

monotone Mtn Source Tree

Root/paths.cc

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <sstream>
12
13#include "paths.hh"
14#include "file_io.hh"
15#include "charset.hh"
16
17using std::exception;
18using std::ostream;
19using std::ostringstream;
20using std::string;
21using std::vector;
22
23// some structure to ensure we aren't doing anything broken when resolving
24// filenames. the idea is to make sure
25// -- we don't depend on the existence of something before it has been set
26// -- we don't re-set something that has already been used
27// -- sometimes, we use the _non_-existence of something, so we shouldn't
28// set anything whose un-setted-ness has already been used
29template <typename T>
30struct access_tracker
31{
32 void set(T const & val, bool may_be_initialized)
33 {
34 I(may_be_initialized || !initialized);
35 I(!very_uninitialized);
36 I(!used);
37 initialized = true;
38 value = val;
39 }
40 T const & get()
41 {
42 I(initialized);
43 used = true;
44 return value;
45 }
46 T const & get_but_unused()
47 {
48 I(initialized);
49 return value;
50 }
51 void may_not_initialize()
52 {
53 I(!initialized);
54 very_uninitialized = true;
55 }
56 // for unit tests
57 void unset()
58 {
59 used = initialized = very_uninitialized = false;
60 }
61 T value;
62 bool initialized, used, very_uninitialized;
63 access_tracker() : initialized(false), used(false), very_uninitialized(false) {};
64};
65
66// paths to use in interpreting paths from various sources,
67// conceptually:
68// working_root / initial_rel_path == initial_abs_path
69
70// initial_abs_path is for interpreting relative system_path's
71static access_tracker<system_path> initial_abs_path;
72// initial_rel_path is for interpreting external file_path's
73// we used to make it a file_path, but then you can't run monotone from
74// inside the _MTN/ dir (even when referring to files outside the _MTN/
75// dir). use of a bare string requires some caution but does work.
76static access_tracker<string> initial_rel_path;
77// working_root is for converting file_path's and bookkeeping_path's to
78// system_path's.
79static access_tracker<system_path> working_root;
80
81void
82save_initial_path()
83{
84 // FIXME: BUG: this only works if the current working dir is in utf8
85 initial_abs_path.set(system_path(get_current_working_dir()), false);
86 L(FL("initial abs path is: %s") % initial_abs_path.get_but_unused());
87}
88
89///////////////////////////////////////////////////////////////////////////
90// verifying that internal paths are indeed normalized.
91// this code must be superfast
92///////////////////////////////////////////////////////////////////////////
93
94// normalized means:
95// -- / as path separator
96// -- not an absolute path (on either posix or win32)
97// operationally, this means: first character != '/', first character != '\',
98// second character != ':'
99// -- no illegal characters
100// -- 0x00 -- 0x1f, 0x7f, \ are the illegal characters. \ is illegal
101// unconditionally to prevent people checking in files on posix that
102// have a different interpretation on win32
103// -- (may want to allow 0x0a and 0x0d (LF and CR) in the future, but this
104// is blocked on manifest format changing)
105// (also requires changes to 'automate inventory', possibly others, to
106// handle quoting)
107// -- no doubled /'s
108// -- no trailing /
109// -- no "." or ".." path components
110
111static inline bool
112bad_component(string const & component)
113{
114 if (component.empty())
115 return true;
116 if (component == ".")
117 return true;
118 if (component == "..")
119 return true;
120 return false;
121}
122
123static inline bool
124has_bad_chars(string const & path)
125{
126 for (string::const_iterator c = path.begin(); LIKELY(c != path.end()); c++)
127 {
128 // char is often a signed type; convert to unsigned to ensure that
129 // bytes 0x80-0xff are considered > 0x1f.
130 u8 x = (u8)*c;
131 // 0x5c is '\\'; we use the hex constant to make the dependency on
132 // ASCII encoding explicit.
133 if (UNLIKELY(x <= 0x1f || x == 0x5c || x == 0x7f))
134 return true;
135 }
136 return false;
137}
138
139// as above, but disallows / as well.
140static inline bool
141has_bad_component_chars(string const & pc)
142{
143 for (string::const_iterator c = pc.begin(); LIKELY(c != pc.end()); c++)
144 {
145 // char is often a signed type; convert to unsigned to ensure that
146 // bytes 0x80-0xff are considered > 0x1f.
147 u8 x = (u8)*c;
148 // 0x2f is '/' and 0x5c is '\\'; we use hex constants to make the
149 // dependency on ASCII encoding explicit.
150 if (UNLIKELY(x <= 0x1f || x == 0x2f || x == 0x5c || x == 0x7f))
151 return true;
152 }
153 return false;
154
155}
156
157static bool
158is_absolute_here(string const & path)
159{
160 if (path.empty())
161 return false;
162 if (path[0] == '/')
163 return true;
164#ifdef WIN32
165 if (path[0] == '\\')
166 return true;
167 if (path.size() > 1 && path[1] == ':')
168 return true;
169#endif
170 return false;
171}
172
173static inline bool
174is_absolute_somewhere(string const & path)
175{
176 if (path.empty())
177 return false;
178 if (path[0] == '/')
179 return true;
180 if (path[0] == '\\')
181 return true;
182 if (path.size() > 1 && path[1] == ':')
183 return true;
184 return false;
185}
186
187// fully_normalized_path verifies a complete pathname for validity and
188// having been properly normalized (as if by normalize_path, below).
189static inline bool
190fully_normalized_path(string const & path)
191{
192 // empty path is fine
193 if (path.empty())
194 return true;
195 // could use is_absolute_somewhere, but this is the only part of it that
196 // wouldn't be redundant
197 if (path.size() > 1 && path[1] == ':')
198 return false;
199 // first scan for completely illegal bytes
200 if (has_bad_chars(path))
201 return false;
202 // now check each component
203 string::size_type start = 0, stop;
204 while (1)
205 {
206 stop = path.find('/', start);
207 if (stop == string::npos)
208 break;
209 string const & s(path.substr(start, stop - start));
210 if (bad_component(s))
211 return false;
212 start = stop + 1;
213 }
214
215 string const & s(path.substr(start));
216 return !bad_component(s);
217}
218
219// This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping
220// paths, because on case insensitive filesystems, files put in any of them
221// may end up in _MTN instead. This allows arbitrary code execution. A
222// better solution would be to fix this in the working directory writing
223// code -- this prevents all-unix projects from naming things "_mtn", which
224// is less rude than when the bookkeeping root was "MT", but still rude --
225// but as a temporary security kluge it works.
226static inline bool
227in_bookkeeping_dir(string const & path)
228{
229 if (path.size() == 0 || (path[0] != '_'))
230 return false;
231 if (path.size() == 1 || (path[1] != 'M' && path[1] != 'm'))
232 return false;
233 if (path.size() == 2 || (path[2] != 'T' && path[2] != 't'))
234 return false;
235 if (path.size() == 3 || (path[3] != 'N' && path[3] != 'n'))
236 return false;
237 // if we've gotten here, the first three letters are _, M, T, and N, in
238 // either upper or lower case. So if that is the whole path, or else if it
239 // continues but the next character is /, then this is a bookkeeping path.
240 if (path.size() == 4 || (path[4] == '/'))
241 return true;
242 return false;
243}
244
245static inline bool
246is_valid_internal(string const & path)
247{
248 return (fully_normalized_path(path)
249 && !in_bookkeeping_dir(path));
250}
251
252static string
253normalize_path(string const & in)
254{
255 string inT = in;
256 string leader;
257 MM(inT);
258
259#ifdef WIN32
260 // the first thing we do is kill all the backslashes
261 for (string::iterator i = inT.begin(); i != inT.end(); i++)
262 if (*i == '\\')
263 *i = '/';
264#endif
265
266 if (is_absolute_here (inT))
267 {
268 if (inT[0] == '/')
269 {
270 leader = "/";
271 inT = inT.substr(1);
272
273 if (inT.size() > 0 && inT[0] == '/')
274 {
275 // if there are exactly two slashes at the beginning they
276 // are both preserved. three or more are the same as one.
277 string::size_type f = inT.find_first_not_of("/");
278 if (f == string::npos)
279 f = inT.size();
280 if (f == 1)
281 leader = "//";
282 inT = inT.substr(f);
283 }
284 }
285#ifdef WIN32
286 else
287 {
288 I(inT[1] == ':');
289 if (inT.size() > 2 && inT[2] == '/')
290 {
291 leader = inT.substr(0, 3);
292 inT = inT.substr(3);
293 }
294 else
295 {
296 leader = inT.substr(0, 2);
297 inT = inT.substr(2);
298 }
299 }
300#endif
301
302 I(!is_absolute_here(inT));
303 if (inT.size() == 0)
304 return leader;
305 }
306
307 vector<string> stack;
308 string::const_iterator head, tail;
309 string::size_type size_estimate = leader.size();
310 for (head = inT.begin(); head != inT.end(); head = tail)
311 {
312 tail = head;
313 while (tail != inT.end() && *tail != '/')
314 tail++;
315
316 string elt(head, tail);
317 while (tail != inT.end() && *tail == '/')
318 tail++;
319
320 if (elt == ".")
321 continue;
322 // remove foo/.. element pairs; leave leading .. components alone
323 if (elt == ".." && !stack.empty() && stack.back() != "..")
324 {
325 stack.pop_back();
326 continue;
327 }
328
329 size_estimate += elt.size() + 1;
330 stack.push_back(elt);
331 }
332
333 leader.reserve(size_estimate);
334 for (vector<string>::const_iterator i = stack.begin(); i != stack.end(); i++)
335 {
336 if (i != stack.begin())
337 leader += "/";
338 leader += *i;
339 }
340 return leader;
341}
342
343static void
344normalize_external_path(string const & path, string & normalized)
345{
346 if (!initial_rel_path.initialized)
347 {
348 // we are not in a workspace; treat this as an internal
349 // path, and set the access_tracker() into a very uninitialised
350 // state so that we will hit an exception if we do eventually
351 // enter a workspace
352 initial_rel_path.may_not_initialize();
353 normalized = path;
354 N(is_valid_internal(path),
355 F("path '%s' is invalid") % path);
356 }
357 else
358 {
359 N(!path.empty(), F("empty path '%s' is invalid") % path);
360 N(!is_absolute_here(path), F("absolute path '%s' is invalid") % path);
361 string base;
362 try
363 {
364 base = initial_rel_path.get();
365 if (base == "")
366 normalized = normalize_path(path);
367 else
368 normalized = normalize_path(base + "/" + path);
369 }
370 catch (exception &)
371 {
372 N(false, F("path '%s' is invalid") % path);
373 }
374 if (normalized == ".")
375 normalized = string("");
376 N(fully_normalized_path(normalized),
377 F("path '%s' is invalid") % normalized);
378 }
379}
380
381///////////////////////////////////////////////////////////////////////////
382// single path component handling.
383///////////////////////////////////////////////////////////////////////////
384
385// these constructors confirm that what they are passed is a legitimate
386// component. note that the empty string is a legitimate component,
387// but is not acceptable to bad_component (above) and therefore we have
388// to open-code most of those checks.
389path_component::path_component(utf8 const & d)
390 : data(d())
391{
392 MM(data);
393 I(!has_bad_component_chars(data) && data != "." && data != "..");
394}
395
396path_component::path_component(string const & d)
397 : data(d)
398{
399 MM(data);
400 I(utf8_validate(utf8(data))
401 && !has_bad_component_chars(data)
402 && data != "." && data != "..");
403}
404
405path_component::path_component(char const * d)
406 : data(d)
407{
408 MM(data);
409 I(utf8_validate(utf8(data))
410 && !has_bad_component_chars(data)
411 && data != "." && data != "..");
412}
413
414std::ostream & operator<<(std::ostream & s, path_component const & pc)
415{
416 return s << pc();
417}
418
419template <> void dump(path_component const & pc, std::string & to)
420{
421 to = pc();
422}
423
424///////////////////////////////////////////////////////////////////////////
425// complete paths to files within a working directory
426///////////////////////////////////////////////////////////////////////////
427
428file_path::file_path(file_path::source_type type, string const & path)
429{
430 MM(path);
431 I(utf8_validate(utf8(path)));
432 if (type == external)
433 {
434 string normalized;
435 normalize_external_path(path, normalized);
436 N(!in_bookkeeping_dir(normalized),
437 F("path '%s' is in bookkeeping dir") % normalized);
438 data = normalized;
439 }
440 else
441 data = path;
442 MM(data);
443 I(is_valid_internal(data));
444}
445
446file_path::file_path(file_path::source_type type, utf8 const & path)
447{
448 MM(path);
449 I(utf8_validate(path));
450 if (type == external)
451 {
452 string normalized;
453 normalize_external_path(path(), normalized);
454 N(!in_bookkeeping_dir(normalized),
455 F("path '%s' is in bookkeeping dir") % normalized);
456 data = normalized;
457 }
458 else
459 data = path();
460 MM(data);
461 I(is_valid_internal(data));
462}
463
464bookkeeping_path::bookkeeping_path(string const & path)
465{
466 I(fully_normalized_path(path));
467 I(in_bookkeeping_dir(path));
468 data = path;
469}
470
471bool
472bookkeeping_path::external_string_is_bookkeeping_path(utf8 const & path)
473{
474 // FIXME: this charset casting everywhere is ridiculous
475 string normalized;
476 normalize_external_path(path(), normalized);
477 return internal_string_is_bookkeeping_path(utf8(normalized));
478}
479bool bookkeeping_path::internal_string_is_bookkeeping_path(utf8 const & path)
480{
481 return in_bookkeeping_dir(path());
482}
483
484///////////////////////////////////////////////////////////////////////////
485// splitting/joining
486// this code must be superfast
487// it depends very much on knowing that it can only be applied to fully
488// normalized, relative, paths.
489///////////////////////////////////////////////////////////////////////////
490
491// this peels off the last component of any path and returns it.
492// the last component of a path with no slashes in it is the complete path.
493// the last component of a path referring to the root directory is an
494// empty string.
495path_component
496any_path::basename() const
497{
498 string const & s = data;
499 string::size_type sep = s.rfind('/');
500#ifdef WIN32
501 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
502 sep = 1;
503#endif
504 if (sep == string::npos)
505 return path_component(s, 0); // force use of short circuit
506 if (sep == s.size())
507 return path_component();
508 return path_component(s, sep + 1);
509}
510
511// this returns all but the last component of any path. It has to take
512// care at the root.
513any_path
514any_path::dirname() const
515{
516 string const & s = data;
517 string::size_type sep = s.rfind('/');
518#ifdef WIN32
519 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
520 sep = 1;
521#endif
522 if (sep == string::npos)
523 return any_path();
524
525 // dirname() of the root directory is itself
526 if (sep == s.size() - 1)
527 return *this;
528
529 // dirname() of a direct child of the root is the root
530 if (sep == 0 || (sep == 1 && s[1] == '/')
531#ifdef WIN32
532 || (sep == 1 || sep == 2 && s[1] == ':')
533#endif
534 )
535 return any_path(s, 0, sep+1);
536
537 return any_path(s, 0, sep);
538}
539
540// these variations exist to get the return type right. also,
541// file_path dirname() can be a little simpler.
542file_path
543file_path::dirname() const
544{
545 string const & s = data;
546 string::size_type sep = s.rfind('/');
547 if (sep == string::npos)
548 return file_path();
549 return file_path(s, 0, sep);
550}
551
552system_path
553system_path::dirname() const
554{
555 string const & s = data;
556 string::size_type sep = s.rfind('/');
557#ifdef WIN32
558 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
559 sep = 1;
560#endif
561 I(sep != string::npos);
562
563 // dirname() of the root directory is itself
564 if (sep == s.size() - 1)
565 return *this;
566
567 // dirname() of a direct child of the root is the root
568 if (sep == 0 || (sep == 1 && s[1] == '/')
569#ifdef WIN32
570 || (sep == 1 || sep == 2 && s[1] == ':')
571#endif
572 )
573 return system_path(s, 0, sep+1);
574
575 return system_path(s, 0, sep);
576}
577
578
579// produce dirname and basename at the same time
580void
581file_path::dirname_basename(file_path & dir, path_component & base) const
582{
583 string const & s = data;
584 string::size_type sep = s.rfind('/');
585 if (sep == string::npos)
586 {
587 dir = file_path();
588 base = path_component(s, 0);
589 }
590 else
591 {
592 I(sep < s.size() - 1); // last component must have at least one char
593 dir = file_path(s, 0, sep);
594 base = path_component(s, sep + 1);
595 }
596}
597
598// count the number of /-separated components of the path.
599unsigned int
600file_path::depth() const
601{
602 if (data.empty())
603 return 0;
604
605 unsigned int components = 1;
606 for (string::const_iterator p = data.begin(); p != data.end(); p++)
607 if (*p == '/')
608 components++;
609
610 return components;
611}
612
613///////////////////////////////////////////////////////////////////////////
614// localizing file names (externalizing them)
615// this code must be superfast when there is no conversion needed
616///////////////////////////////////////////////////////////////////////////
617
618string
619any_path::as_external() const
620{
621#ifdef __APPLE__
622 // on OS X paths for the filesystem/kernel are UTF-8 encoded, regardless of
623 // locale.
624 return data;
625#else
626 // on normal systems we actually have some work to do, alas.
627 // not much, though, because utf8_to_system_string does all the hard work.
628 // it is carefully optimized. do not screw it up.
629 external out;
630 utf8_to_system_strict(utf8(data), out);
631 return out();
632#endif
633}
634
635///////////////////////////////////////////////////////////////////////////
636// writing out paths
637///////////////////////////////////////////////////////////////////////////
638
639ostream &
640operator <<(ostream & o, any_path const & a)
641{
642 o << a.as_internal();
643 return o;
644}
645
646template <>
647void dump(file_path const & p, string & out)
648{
649 ostringstream oss;
650 oss << p << '\n';
651 out = oss.str();
652}
653
654template <>
655void dump(system_path const & p, string & out)
656{
657 ostringstream oss;
658 oss << p << '\n';
659 out = oss.str();
660}
661
662template <>
663void dump(bookkeeping_path const & p, string & out)
664{
665 ostringstream oss;
666 oss << p << '\n';
667 out = oss.str();
668}
669
670///////////////////////////////////////////////////////////////////////////
671// path manipulation
672// this code's speed does not matter much
673///////////////////////////////////////////////////////////////////////////
674
675// relies on its arguments already being validated, except that you may not
676// append the empty path component, and if you are appending to the empty
677// path, you may not create an absolute path or a path into the bookkeeping
678// directory.
679file_path
680file_path::operator /(path_component const & to_append) const
681{
682 I(!to_append.empty());
683 if (empty())
684 {
685 string const & s = to_append();
686 I(!is_absolute_somewhere(s) && !in_bookkeeping_dir(s));
687 return file_path(s, 0, string::npos);
688 }
689 else
690 return file_path(((*(data.end() - 1) == '/') ? data : data + "/")
691 + to_append(), 0, string::npos);
692}
693
694// similarly, but even less checking is needed.
695file_path
696file_path::operator /(file_path const & to_append) const
697{
698 I(!to_append.empty());
699 if (empty())
700 return to_append;
701 return file_path(((*(data.end() - 1) == '/') ? data : data + "/")
702 + to_append.as_internal(), 0, string::npos);
703}
704
705bookkeeping_path
706bookkeeping_path::operator /(path_component const & to_append) const
707{
708 I(!to_append.empty());
709 I(!empty());
710 return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
711 + to_append(), 0, string::npos);
712}
713
714system_path
715system_path::operator /(path_component const & to_append) const
716{
717 I(!to_append.empty());
718 I(!empty());
719 return system_path(((*(data.end() - 1) == '/') ? data : data + "/")
720 + to_append(), 0, string::npos);
721}
722
723any_path
724any_path::operator /(path_component const & to_append) const
725{
726 I(!to_append.empty());
727 I(!empty());
728 return any_path(((*(data.end() - 1) == '/') ? data : data + "/")
729 + to_append(), 0, string::npos);
730}
731
732// these take strings and validate
733bookkeeping_path
734bookkeeping_path::operator /(char const * to_append) const
735{
736 I(!is_absolute_somewhere(to_append));
737 I(!empty());
738 return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
739 + to_append);
740}
741
742system_path
743system_path::operator /(char const * to_append) const
744{
745 I(!empty());
746 I(!is_absolute_here(to_append));
747 return system_path(((*(data.end() - 1) == '/') ? data : data + "/")
748 + to_append);
749}
750
751///////////////////////////////////////////////////////////////////////////
752// system_path
753///////////////////////////////////////////////////////////////////////////
754
755system_path::system_path(any_path const & other, bool in_true_workspace)
756{
757 if (is_absolute_here(other.as_internal()))
758 // another system_path. the normalizing isn't really necessary, but it
759 // makes me feel warm and fuzzy.
760 data = normalize_path(other.as_internal());
761 else
762 {
763 system_path wr;
764 if (in_true_workspace)
765 wr = working_root.get();
766 else
767 wr = working_root.get_but_unused();
768 data = normalize_path(wr.as_internal() + "/" + other.as_internal());
769 }
770}
771
772static inline string const_system_path(utf8 const & path)
773{
774 N(!path().empty(), F("invalid path ''"));
775 string expanded = tilde_expand(path());
776 if (is_absolute_here(expanded))
777 return normalize_path(expanded);
778 else
779 return normalize_path(initial_abs_path.get().as_internal()
780 + "/" + path());
781}
782
783system_path::system_path(string const & path)
784{
785 data = const_system_path(utf8(path));
786}
787
788system_path::system_path(utf8 const & path)
789{
790 data = const_system_path(utf8(path));
791}
792
793///////////////////////////////////////////////////////////////////////////
794// workspace (and path root) handling
795///////////////////////////////////////////////////////////////////////////
796
797static bool
798find_bookdir(system_path const & root, path_component const & bookdir,
799 system_path & current, string & removed)
800{
801 current = initial_abs_path.get();
802 removed.clear();
803
804 // check that the current directory is below the specified search root
805 if (current.as_internal().find(root.as_internal()) != 0)
806 {
807 W(F("current directory '%s' is not below root '%s'") % current % root);
808 return false;
809 }
810
811 L(FL("searching for '%s' directory with root '%s'") % bookdir % root);
812
813 system_path check;
814 while (!(current == root))
815 {
816 check = current / bookdir;
817 switch (get_path_status(check))
818 {
819 case path::nonexistent:
820 L(FL("'%s' not found in '%s' with '%s' removed")
821 % bookdir % current % removed);
822 if (removed.empty())
823 removed = current.basename()();
824 else
825 removed = current.basename()() + "/" + removed;
826 current = current.dirname();
827 continue;
828
829 case path::file:
830 L(FL("'%s' is not a directory") % check);
831 return false;
832
833 case path::directory:
834 goto found;
835 }
836 }
837
838 // if we get here, we have hit the root; try once more
839 check = current / bookdir;
840 switch (get_path_status(check))
841 {
842 case path::nonexistent:
843 L(FL("'%s' not found in '%s' with '%s' removed")
844 % bookdir % current % removed);
845 return false;
846
847 case path::file:
848 L(FL("'%s' is not a directory") % check);
849 return false;
850
851 case path::directory:
852 goto found;
853 }
854 return false;
855
856 found:
857 // check for _MTN/. and _MTN/.. to see if mt dir is readable
858 try
859 {
860 if (!path_exists(check / ".") || !path_exists(check / ".."))
861 {
862 L(FL("problems with '%s' (missing '.' or '..')") % check);
863 return false;
864 }
865 }
866 catch(exception &)
867 {
868 L(FL("problems with '%s' (cannot check for '.' or '..')") % check);
869 return false;
870 }
871 return true;
872}
873
874
875bool
876find_and_go_to_workspace(string const & search_root)
877{
878 system_path root, current;
879 string removed;
880
881 if (search_root.empty())
882 {
883#ifdef WIN32
884 std::string cur_str = get_current_working_dir();
885 current = cur_str;
886 if (cur_str[0] == '/' || cur_str[0] == '\\')
887 {
888 if (cur_str.size() > 1 && (cur_str[1] == '/' || cur_str[1] == '\\'))
889 {
890 // UNC name
891 string::size_type uncend = cur_str.find_first_of("\\/", 2);
892 if (uncend == string::npos)
893 root = system_path(cur_str + "/");
894 else
895 root = system_path(cur_str.substr(0, uncend));
896 }
897 else
898 root = system_path("/");
899 }
900 else if (cur_str.size() > 1 && cur_str[1] == ':')
901 {
902 root = system_path(cur_str.substr(0,2) + "/");
903 }
904 else I(false);
905#else
906 root = system_path("/");
907#endif
908 }
909 else
910 {
911 root = system_path(search_root);
912 L(FL("limiting search for workspace to %s") % root);
913
914 require_path_is_directory(root,
915 F("search root '%s' does not exist") % root,
916 F("search root '%s' is not a directory") % root);
917 }
918
919 // first look for the current name of the bookkeeping directory.
920 // if we don't find it, look for it under the old name, so that
921 // migration has a chance to work.
922 if (!find_bookdir(root, bookkeeping_root_component, current, removed))
923 if (!find_bookdir(root, old_bookkeeping_root_component, current, removed))
924 return false;
925
926 working_root.set(current, true);
927 initial_rel_path.set(removed, true);
928
929 L(FL("working root is '%s'") % working_root.get_but_unused());
930 L(FL("initial relative path is '%s'") % initial_rel_path.get_but_unused());
931
932 change_current_working_dir(working_root.get_but_unused());
933
934 return true;
935}
936
937void
938go_to_workspace(system_path const & new_workspace)
939{
940 working_root.set(new_workspace, true);
941 initial_rel_path.set(string(), true);
942 change_current_working_dir(new_workspace);
943}
944
945void
946mark_std_paths_used(void)
947{
948 working_root.get();
949 initial_rel_path.get();
950}
951
952///////////////////////////////////////////////////////////////////////////
953// tests
954///////////////////////////////////////////////////////////////////////////
955
956#ifdef BUILD_UNIT_TESTS
957#include "unit_tests.hh"
958#include "randomizer.hh"
959
960using std::logic_error;
961
962UNIT_TEST(paths, path_component)
963{
964 char const * const baddies[] = {".",
965 "..",
966 "/foo",
967 "\\foo",
968 "foo/bar",
969 "foo\\bar",
970 0 };
971
972 // these would not be okay in a full file_path, but are okay here.
973 char const * const goodies[] = {"c:foo",
974 "_mtn",
975 "_mtN",
976 "_mTn",
977 "_Mtn",
978 "_MTn",
979 "_MtN",
980 "_MTN",
981 0 };
982
983
984 for (char const * const * c = baddies; *c; ++c)
985 {
986 // the comparison prevents the compiler from eliminating the
987 // expression.
988 UNIT_TEST_CHECK_THROW((path_component(*c)()) == *c, logic_error);
989 }
990 for (char const * const *c = goodies; *c; ++c)
991 {
992 path_component p(*c);
993 UNIT_TEST_CHECK_THROW(file_path() / p, logic_error);
994 }
995
996 UNIT_TEST_CHECK_THROW(file_path_internal("foo") / path_component(),
997 logic_error);
998}
999
1000
1001UNIT_TEST(paths, file_path_internal)
1002{
1003 char const * const baddies[] = {"/foo",
1004 "foo//bar",
1005 "foo/../bar",
1006 "../bar",
1007 "_MTN",
1008 "_MTN/blah",
1009 "foo/bar/",
1010 "foo/bar/.",
1011 "foo/bar/./",
1012 "foo/./bar",
1013 "./foo",
1014 ".",
1015 "..",
1016 "c:\\foo",
1017 "c:foo",
1018 "c:/foo",
1019 // some baddies made bad by a security kluge --
1020 // see the comment in in_bookkeeping_dir
1021 "_mtn",
1022 "_mtN",
1023 "_mTn",
1024 "_Mtn",
1025 "_MTn",
1026 "_MtN",
1027 "_mTN",
1028 "_mtn/foo",
1029 "_mtN/foo",
1030 "_mTn/foo",
1031 "_Mtn/foo",
1032 "_MTn/foo",
1033 "_MtN/foo",
1034 "_mTN/foo",
1035 0 };
1036 initial_rel_path.unset();
1037 initial_rel_path.set(string(), true);
1038 for (char const * const * c = baddies; *c; ++c)
1039 {
1040 UNIT_TEST_CHECK_THROW(file_path_internal(*c), logic_error);
1041 }
1042 initial_rel_path.unset();
1043 initial_rel_path.set("blah/blah/blah", true);
1044 for (char const * const * c = baddies; *c; ++c)
1045 {
1046 UNIT_TEST_CHECK_THROW(file_path_internal(*c), logic_error);
1047 }
1048
1049 UNIT_TEST_CHECK(file_path().empty());
1050 UNIT_TEST_CHECK(file_path_internal("").empty());
1051
1052 char const * const goodies[] = {"",
1053 "a",
1054 "foo",
1055 "foo/bar/baz",
1056 "foo/bar.baz",
1057 "foo/with-hyphen/bar",
1058 "foo/with_underscore/bar",
1059 "foo/with,other+@weird*%#$=stuff/bar",
1060 ".foo/bar",
1061 "..foo/bar",
1062 "_MTNfoo/bar",
1063 "foo:bar",
1064 0 };
1065
1066 for (int i = 0; i < 2; ++i)
1067 {
1068 initial_rel_path.unset();
1069 initial_rel_path.set(i ? string()
1070 : string("blah/blah/blah"),
1071 true);
1072 for (char const * const * c = goodies; *c; ++c)
1073 {
1074 file_path fp = file_path_internal(*c);
1075 UNIT_TEST_CHECK(fp.as_internal() == *c);
1076 UNIT_TEST_CHECK(file_path_internal(fp.as_internal()) == fp);
1077 }
1078 }
1079
1080 initial_rel_path.unset();
1081}
1082
1083static void check_fp_normalizes_to(char const * before, char const * after)
1084{
1085 L(FL("check_fp_normalizes_to: '%s' -> '%s'") % before % after);
1086 file_path fp = file_path_external(utf8(before));
1087 L(FL(" (got: %s)") % fp);
1088 UNIT_TEST_CHECK(fp.as_internal() == after);
1089 UNIT_TEST_CHECK(file_path_internal(fp.as_internal()) == fp);
1090 // we compare after to the external form too, since as far as we know
1091 // relative normalized posix paths are always good win32 paths too
1092 UNIT_TEST_CHECK(fp.as_external() == after);
1093}
1094
1095UNIT_TEST(paths, file_path_external_null_prefix)
1096{
1097 initial_rel_path.unset();
1098 initial_rel_path.set(string(), true);
1099
1100 char const * const baddies[] = {"/foo",
1101 "../bar",
1102 "_MTN/blah",
1103 "_MTN",
1104 "//blah",
1105 "\\foo",
1106 "..",
1107 "c:\\foo",
1108 "c:foo",
1109 "c:/foo",
1110 "",
1111 // some baddies made bad by a security kluge --
1112 // see the comment in in_bookkeeping_dir
1113 "_mtn",
1114 "_mtN",
1115 "_mTn",
1116 "_Mtn",
1117 "_MTn",
1118 "_MtN",
1119 "_mTN",
1120 "_mtn/foo",
1121 "_mtN/foo",
1122 "_mTn/foo",
1123 "_Mtn/foo",
1124 "_MTn/foo",
1125 "_MtN/foo",
1126 "_mTN/foo",
1127 0 };
1128 for (char const * const * c = baddies; *c; ++c)
1129 {
1130 L(FL("test_file_path_external_null_prefix: trying baddie: %s") % *c);
1131 UNIT_TEST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure);
1132 }
1133
1134 check_fp_normalizes_to("a", "a");
1135 check_fp_normalizes_to("foo", "foo");
1136 check_fp_normalizes_to("foo/bar", "foo/bar");
1137 check_fp_normalizes_to("foo/bar/baz", "foo/bar/baz");
1138 check_fp_normalizes_to("foo/bar.baz", "foo/bar.baz");
1139 check_fp_normalizes_to("foo/with-hyphen/bar", "foo/with-hyphen/bar");
1140 check_fp_normalizes_to("foo/with_underscore/bar", "foo/with_underscore/bar");
1141 check_fp_normalizes_to(".foo/bar", ".foo/bar");
1142 check_fp_normalizes_to("..foo/bar", "..foo/bar");
1143 check_fp_normalizes_to(".", "");
1144#ifndef WIN32
1145 check_fp_normalizes_to("foo:bar", "foo:bar");
1146#endif
1147 check_fp_normalizes_to("foo/with,other+@weird*%#$=stuff/bar",
1148 "foo/with,other+@weird*%#$=stuff/bar");
1149
1150 // Why are these tests with // in them commented out? because boost::fs
1151 // sucks and can't normalize them. FIXME.
1152 //check_fp_normalizes_to("foo//bar", "foo/bar");
1153 check_fp_normalizes_to("foo/../bar", "bar");
1154 check_fp_normalizes_to("foo/bar/", "foo/bar");
1155 check_fp_normalizes_to("foo/bar/.", "foo/bar");
1156 check_fp_normalizes_to("foo/bar/./", "foo/bar");
1157 check_fp_normalizes_to("foo/./bar/", "foo/bar");
1158 check_fp_normalizes_to("./foo", "foo");
1159 //check_fp_normalizes_to("foo///.//", "foo");
1160
1161 initial_rel_path.unset();
1162}
1163
1164UNIT_TEST(paths, file_path_external_prefix__MTN)
1165{
1166 initial_rel_path.unset();
1167 initial_rel_path.set(string("_MTN"), true);
1168
1169 UNIT_TEST_CHECK_THROW(file_path_external(utf8("foo")), informative_failure);
1170 UNIT_TEST_CHECK_THROW(file_path_external(utf8(".")), informative_failure);
1171 UNIT_TEST_CHECK_THROW(file_path_external(utf8("./blah")), informative_failure);
1172 check_fp_normalizes_to("..", "");
1173 check_fp_normalizes_to("../foo", "foo");
1174}
1175
1176UNIT_TEST(paths, file_path_external_prefix_a_b)
1177{
1178 initial_rel_path.unset();
1179 initial_rel_path.set(string("a/b"), true);
1180
1181 char const * const baddies[] = {"/foo",
1182 "../../../bar",
1183 "../../..",
1184 "../../_MTN",
1185 "../../_MTN/foo",
1186 "//blah",
1187 "\\foo",
1188 "c:\\foo",
1189#ifdef WIN32
1190 "c:foo",
1191 "c:/foo",
1192#endif
1193 "",
1194 // some baddies made bad by a security kluge --
1195 // see the comment in in_bookkeeping_dir
1196 "../../_mtn",
1197 "../../_mtN",
1198 "../../_mTn",
1199 "../../_Mtn",
1200 "../../_MTn",
1201 "../../_MtN",
1202 "../../_mTN",
1203 "../../_mtn/foo",
1204 "../../_mtN/foo",
1205 "../../_mTn/foo",
1206 "../../_Mtn/foo",
1207 "../../_MTn/foo",
1208 "../../_MtN/foo",
1209 "../../_mTN/foo",
1210 0 };
1211 for (char const * const * c = baddies; *c; ++c)
1212 {
1213 L(FL("test_file_path_external_prefix_a_b: trying baddie: %s") % *c);
1214 UNIT_TEST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure);
1215 }
1216
1217 check_fp_normalizes_to("foo", "a/b/foo");
1218 check_fp_normalizes_to("a", "a/b/a");
1219 check_fp_normalizes_to("foo/bar", "a/b/foo/bar");
1220 check_fp_normalizes_to("foo/bar/baz", "a/b/foo/bar/baz");
1221 check_fp_normalizes_to("foo/bar.baz", "a/b/foo/bar.baz");
1222 check_fp_normalizes_to("foo/with-hyphen/bar", "a/b/foo/with-hyphen/bar");
1223 check_fp_normalizes_to("foo/with_underscore/bar", "a/b/foo/with_underscore/bar");
1224 check_fp_normalizes_to(".foo/bar", "a/b/.foo/bar");
1225 check_fp_normalizes_to("..foo/bar", "a/b/..foo/bar");
1226 check_fp_normalizes_to(".", "a/b");
1227#ifndef WIN32
1228 check_fp_normalizes_to("foo:bar", "a/b/foo:bar");
1229#endif
1230 check_fp_normalizes_to("foo/with,other+@weird*%#$=stuff/bar",
1231 "a/b/foo/with,other+@weird*%#$=stuff/bar");
1232 // why are the tests with // in them commented out? because boost::fs sucks
1233 // and can't normalize them. FIXME.
1234 //check_fp_normalizes_to("foo//bar", "a/b/foo/bar");
1235 check_fp_normalizes_to("foo/../bar", "a/b/bar");
1236 check_fp_normalizes_to("foo/bar/", "a/b/foo/bar");
1237 check_fp_normalizes_to("foo/bar/.", "a/b/foo/bar");
1238 check_fp_normalizes_to("foo/bar/./", "a/b/foo/bar");
1239 check_fp_normalizes_to("foo/./bar/", "a/b/foo/bar");
1240 check_fp_normalizes_to("./foo", "a/b/foo");
1241 //check_fp_normalizes_to("foo///.//", "a/b/foo");
1242 // things that would have been bad without the initial_rel_path:
1243 check_fp_normalizes_to("../foo", "a/foo");
1244 check_fp_normalizes_to("..", "a");
1245 check_fp_normalizes_to("../..", "");
1246 check_fp_normalizes_to("_MTN/foo", "a/b/_MTN/foo");
1247 check_fp_normalizes_to("_MTN", "a/b/_MTN");
1248#ifndef WIN32
1249 check_fp_normalizes_to("c:foo", "a/b/c:foo");
1250 check_fp_normalizes_to("c:/foo", "a/b/c:/foo");
1251#endif
1252
1253 initial_rel_path.unset();
1254}
1255
1256UNIT_TEST(paths, basename)
1257{
1258 struct t
1259 {
1260 char const * in;
1261 char const * out;
1262 };
1263 // file_paths cannot be absolute, but may be the empty string.
1264 struct t const fp_cases[] = {
1265 { "", "" },
1266 { "foo", "foo" },
1267 { "foo/bar", "bar" },
1268 { "foo/bar/baz", "baz" },
1269 { 0, 0 }
1270 };
1271 // bookkeeping_paths cannot be absolute and must start with the
1272 // bookkeeping_root_component.
1273 struct t const bp_cases[] = {
1274 { "_MTN", "_MTN" },
1275 { "_MTN/foo", "foo" },
1276 { "_MTN/foo/bar", "bar" },
1277 { 0, 0 }
1278 };
1279
1280 // system_paths must be absolute. this relies on the setting of
1281 // initial_abs_path below. note that most of the cases whose full paths
1282 // vary between Unix and Windows will still have the same basenames.
1283 struct t const sp_cases[] = {
1284 { "/", "" },
1285 { "//", "" },
1286 { "foo", "foo" },
1287 { "/foo", "foo" },
1288 { "//foo", "foo" },
1289 { "~/foo", "foo" },
1290 { "c:/foo", "foo" },
1291 { "foo/bar", "bar" },
1292 { "/foo/bar", "bar" },
1293 { "//foo/bar", "bar" },
1294 { "~/foo/bar", "bar" },
1295 { "c:/foo/bar", "bar" },
1296#ifdef WIN32
1297 { "c:/", "" },
1298 { "c:foo", "foo" },
1299#else
1300 { "c:/", "c:" },
1301 { "c:foo", "c:foo" },
1302#endif
1303 { 0, 0 }
1304 };
1305
1306 UNIT_TEST_CHECKPOINT("file_path basenames");
1307 for (struct t const *p = fp_cases; p->in; p++)
1308 {
1309 file_path fp = file_path_internal(p->in);
1310 path_component pc(fp.basename());
1311 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1312 FL("basename('%s') = '%s' (expect '%s')")
1313 % p->in % pc % p->out);
1314 }
1315
1316 UNIT_TEST_CHECKPOINT("bookkeeping_path basenames");
1317 for (struct t const *p = bp_cases; p->in; p++)
1318 {
1319 bookkeeping_path fp(p->in);
1320 path_component pc(fp.basename());
1321 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1322 FL("basename('%s') = '%s' (expect '%s')")
1323 % p->in % pc % p->out);
1324 }
1325
1326
1327 UNIT_TEST_CHECKPOINT("system_path basenames");
1328
1329 initial_abs_path.unset();
1330 initial_abs_path.set(system_path("/a/b"), true);
1331
1332 for (struct t const *p = sp_cases; p->in; p++)
1333 {
1334 system_path fp(p->in);
1335 path_component pc(fp.basename());
1336 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1337 FL("basename('%s') = '%s' (expect '%s')")
1338 % p->in % pc % p->out);
1339 }
1340
1341 // any_path::basename() should return exactly the same thing that
1342 // the corresponding specialized basename() does, but with type any_path.
1343 UNIT_TEST_CHECKPOINT("any_path basenames");
1344 for (struct t const *p = fp_cases; p->in; p++)
1345 {
1346 any_path ap(file_path_internal(p->in));
1347 path_component pc(ap.basename());
1348 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1349 FL("basename('%s') = '%s' (expect '%s')")
1350 % p->in % pc % p->out);
1351 }
1352 for (struct t const *p = bp_cases; p->in; p++)
1353 {
1354 any_path ap(bookkeeping_path(p->in));
1355 path_component pc(ap.basename());
1356 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1357 FL("basename('%s') = '%s' (expect '%s')")
1358 % p->in % pc % p->out);
1359 }
1360 for (struct t const *p = sp_cases; p->in; p++)
1361 {
1362 any_path ap(system_path(p->in));
1363 path_component pc(ap.basename());
1364 UNIT_TEST_CHECK_MSG(pc == path_component(p->out),
1365 FL("basename('%s') = '%s' (expect '%s')")
1366 % p->in % pc % p->out);
1367 }
1368
1369 initial_abs_path.unset();
1370}
1371
1372UNIT_TEST(paths, dirname)
1373{
1374 struct t
1375 {
1376 char const * in;
1377 char const * out;
1378 };
1379 // file_paths cannot be absolute, but may be the empty string.
1380 struct t const fp_cases[] = {
1381 { "", "" },
1382 { "foo", "" },
1383 { "foo/bar", "foo" },
1384 { "foo/bar/baz", "foo/bar" },
1385 { 0, 0 }
1386 };
1387
1388 // system_paths must be absolute. this relies on the setting of
1389 // initial_abs_path below.
1390 struct t const sp_cases[] = {
1391 { "/", "/" },
1392 { "//", "//" },
1393 { "foo", "/a/b" },
1394 { "/foo", "/" },
1395 { "//foo", "//" },
1396 { "~/foo", "~" },
1397 { "foo/bar", "/a/b/foo" },
1398 { "/foo/bar", "/foo" },
1399 { "//foo/bar", "//foo" },
1400 { "~/foo/bar", "~/foo" },
1401#ifdef WIN32
1402 { "c:", "c:" },
1403 { "c:foo", "c:" },
1404 { "c:/", "c:/" },
1405 { "c:/foo", "c:/" },
1406 { "c:/foo/bar", "c:/foo" },
1407#else
1408 { "c:", "/a/b" },
1409 { "c:foo", "/a/b" },
1410 { "c:/", "/a/b" },
1411 { "c:/foo", "/a/b/c:" },
1412 { "c:/foo/bar", "/a/b/c:/foo" },
1413#endif
1414 { 0, 0 }
1415 };
1416
1417 initial_abs_path.unset();
1418
1419 UNIT_TEST_CHECKPOINT("file_path dirnames");
1420 for (struct t const *p = fp_cases; p->in; p++)
1421 {
1422 file_path fp = file_path_internal(p->in);
1423 file_path dn = fp.dirname();
1424 UNIT_TEST_CHECK_MSG(dn == file_path_internal(p->out),
1425 FL("dirname('%s') = '%s' (expect '%s')")
1426 % p->in % dn % p->out);
1427 }
1428
1429
1430 initial_abs_path.set(system_path("/a/b"), true);
1431 UNIT_TEST_CHECKPOINT("system_path dirnames");
1432 for (struct t const *p = sp_cases; p->in; p++)
1433 {
1434 system_path fp(p->in);
1435 system_path dn(fp.dirname());
1436
1437 UNIT_TEST_CHECK_MSG(dn == system_path(p->out),
1438 FL("dirname('%s') = '%s' (expect '%s')")
1439 % p->in % dn % p->out);
1440 }
1441
1442 // any_path::dirname() should return exactly the same thing that
1443 // the corresponding specialized dirname() does, but with type any_path.
1444 UNIT_TEST_CHECKPOINT("any_path dirnames");
1445 for (struct t const *p = fp_cases; p->in; p++)
1446 {
1447 any_path ap(file_path_internal(p->in));
1448 any_path dn(ap.dirname());
1449 any_path rf(file_path_internal(p->out));
1450 UNIT_TEST_CHECK_MSG(dn.as_internal() == rf.as_internal(),
1451 FL("dirname('%s') = '%s' (expect '%s')")
1452 % p->in % dn % rf);
1453 }
1454 for (struct t const *p = sp_cases; p->in; p++)
1455 {
1456 any_path ap(system_path(p->in));
1457 any_path dn(ap.dirname());
1458 any_path rf(system_path(p->out));
1459 UNIT_TEST_CHECK_MSG(dn.as_internal() == rf.as_internal(),
1460 FL("dirname('%s') = '%s' (expect '%s')")
1461 % p->in % dn % rf);
1462 }
1463
1464 initial_abs_path.unset();
1465}
1466
1467UNIT_TEST(paths, depth)
1468{
1469 char const * const cases[] = {"", "foo", "foo/bar", "foo/bar/baz", 0};
1470 for (unsigned int i = 0; cases[i]; i++)
1471 {
1472 file_path fp = file_path_internal(cases[i]);
1473 unsigned int d = fp.depth();
1474 UNIT_TEST_CHECK_MSG(d == i,
1475 FL("depth('%s') = %d (expect %d)") % fp % d % i);
1476 }
1477}
1478
1479static void check_bk_normalizes_to(char const * before, char const * after)
1480{
1481 bookkeeping_path bp(bookkeeping_root / before);
1482 L(FL("normalizing %s to %s (got %s)") % before % after % bp);
1483 UNIT_TEST_CHECK(bp.as_external() == after);
1484 UNIT_TEST_CHECK(bookkeeping_path(bp.as_internal()).as_internal() == bp.as_internal());
1485}
1486
1487UNIT_TEST(paths, bookkeeping)
1488{
1489 char const * const baddies[] = {"/foo",
1490 "foo//bar",
1491 "foo/../bar",
1492 "../bar",
1493 "foo/bar/",
1494 "foo/bar/.",
1495 "foo/bar/./",
1496 "foo/./bar",
1497 "./foo",
1498 ".",
1499 "..",
1500 "c:\\foo",
1501 "c:foo",
1502 "c:/foo",
1503 "",
1504 "a:b",
1505 0 };
1506 string tmp_path_string;
1507
1508 for (char const * const * c = baddies; *c; ++c)
1509 {
1510 L(FL("test_bookkeeping_path baddie: trying '%s'") % *c);
1511 UNIT_TEST_CHECK_THROW(bookkeeping_path(tmp_path_string.assign(*c)),
1512 logic_error);
1513 UNIT_TEST_CHECK_THROW(bookkeeping_root / *c, logic_error);
1514 }
1515
1516 // these are legitimate as things to append to bookkeeping_root, but
1517 // not as bookkeeping_paths in themselves.
1518 UNIT_TEST_CHECK_THROW(bookkeeping_path("a"), logic_error);
1519 UNIT_TEST_CHECK_NOT_THROW(bookkeeping_root / "a", logic_error);
1520 UNIT_TEST_CHECK_THROW(bookkeeping_path("foo/bar"), logic_error);
1521 UNIT_TEST_CHECK_NOT_THROW(bookkeeping_root / "foo/bar", logic_error);
1522
1523 check_bk_normalizes_to("a", "_MTN/a");
1524 check_bk_normalizes_to("foo", "_MTN/foo");
1525 check_bk_normalizes_to("foo/bar", "_MTN/foo/bar");
1526 check_bk_normalizes_to("foo/bar/baz", "_MTN/foo/bar/baz");
1527}
1528
1529static void check_system_normalizes_to(char const * before, char const * after)
1530{
1531 system_path sp(before);
1532 L(FL("normalizing '%s' to '%s' (got '%s')") % before % after % sp);
1533 UNIT_TEST_CHECK(sp.as_external() == after);
1534 UNIT_TEST_CHECK(system_path(sp.as_internal()).as_internal() == sp.as_internal());
1535}
1536
1537UNIT_TEST(paths, system)
1538{
1539 initial_abs_path.unset();
1540 initial_abs_path.set(system_path("/a/b"), true);
1541
1542 UNIT_TEST_CHECK_THROW(system_path(""), informative_failure);
1543
1544 check_system_normalizes_to("foo", "/a/b/foo");
1545 check_system_normalizes_to("foo/bar", "/a/b/foo/bar");
1546 check_system_normalizes_to("/foo/bar", "/foo/bar");
1547 check_system_normalizes_to("//foo/bar", "//foo/bar");
1548#ifdef WIN32
1549 check_system_normalizes_to("c:foo", "c:foo");
1550 check_system_normalizes_to("c:/foo", "c:/foo");
1551 check_system_normalizes_to("c:\\foo", "c:/foo");
1552#else
1553 check_system_normalizes_to("c:foo", "/a/b/c:foo");
1554 check_system_normalizes_to("c:/foo", "/a/b/c:/foo");
1555 check_system_normalizes_to("c:\\foo", "/a/b/c:\\foo");
1556 check_system_normalizes_to("foo:bar", "/a/b/foo:bar");
1557#endif
1558 // we require that system_path normalize out ..'s, because of the following
1559 // case:
1560 // /work mkdir newdir
1561 // /work$ cd newdir
1562 // /work/newdir$ monotone setup --db=../foo.db
1563 // Now they have either "/work/foo.db" or "/work/newdir/../foo.db" in
1564 // _MTN/options
1565 // /work/newdir$ cd ..
1566 // /work$ mv newdir newerdir # better name
1567 // Oops, now, if we stored the version with ..'s in, this workspace
1568 // is broken.
1569 check_system_normalizes_to("../foo", "/a/foo");
1570 check_system_normalizes_to("foo/..", "/a/b");
1571 check_system_normalizes_to("/foo/bar/..", "/foo");
1572 check_system_normalizes_to("/foo/..", "/");
1573 // can't do particularly interesting checking of tilde expansion, but at
1574 // least we can check that it's doing _something_...
1575 string tilde_expanded = system_path("~/foo").as_external();
1576#ifdef WIN32
1577 UNIT_TEST_CHECK(tilde_expanded[1] == ':');
1578#else
1579 UNIT_TEST_CHECK(tilde_expanded[0] == '/');
1580#endif
1581 UNIT_TEST_CHECK(tilde_expanded.find('~') == string::npos);
1582 // on Windows, ~name is not expanded
1583#ifdef WIN32
1584 UNIT_TEST_CHECK(system_path("~this_user_does_not_exist_anywhere")
1585 .as_external()
1586 == "/a/b/~this_user_does_not_exist_anywhere");
1587#else
1588 UNIT_TEST_CHECK_THROW(system_path("~this_user_does_not_exist_anywhere"),
1589 informative_failure);
1590#endif
1591
1592 // finally, make sure that the copy-from-any_path constructor works right
1593 // in particular, it should interpret the paths it gets as being relative to
1594 // the project root, not the initial path
1595 working_root.unset();
1596 working_root.set(system_path("/working/root"), true);
1597 initial_rel_path.unset();
1598 initial_rel_path.set(string("rel/initial"), true);
1599
1600 UNIT_TEST_CHECK(system_path(system_path("foo/bar")).as_internal() == "/a/b/foo/bar");
1601 UNIT_TEST_CHECK(!working_root.used);
1602 UNIT_TEST_CHECK(system_path(system_path("/foo/bar")).as_internal() == "/foo/bar");
1603 UNIT_TEST_CHECK(!working_root.used);
1604 UNIT_TEST_CHECK(system_path(file_path_internal("foo/bar"), false).as_internal()
1605 == "/working/root/foo/bar");
1606 UNIT_TEST_CHECK(!working_root.used);
1607 UNIT_TEST_CHECK(system_path(file_path_internal("foo/bar")).as_internal()
1608 == "/working/root/foo/bar");
1609 UNIT_TEST_CHECK(working_root.used);
1610 UNIT_TEST_CHECK(system_path(file_path_external(utf8("foo/bar"))).as_external()
1611 == "/working/root/rel/initial/foo/bar");
1612 file_path a_file_path;
1613 UNIT_TEST_CHECK(system_path(a_file_path).as_external()
1614 == "/working/root");
1615 UNIT_TEST_CHECK(system_path(bookkeeping_path("_MTN/foo/bar")).as_internal()
1616 == "/working/root/_MTN/foo/bar");
1617 UNIT_TEST_CHECK(system_path(bookkeeping_root).as_internal()
1618 == "/working/root/_MTN");
1619 initial_abs_path.unset();
1620 working_root.unset();
1621 initial_rel_path.unset();
1622}
1623
1624UNIT_TEST(paths, access_tracker)
1625{
1626 access_tracker<int> a;
1627 UNIT_TEST_CHECK_THROW(a.get(), logic_error);
1628 a.set(1, false);
1629 UNIT_TEST_CHECK_THROW(a.set(2, false), logic_error);
1630 a.set(2, true);
1631 UNIT_TEST_CHECK_THROW(a.set(3, false), logic_error);
1632 UNIT_TEST_CHECK(a.get() == 2);
1633 UNIT_TEST_CHECK_THROW(a.set(3, true), logic_error);
1634 a.unset();
1635 a.may_not_initialize();
1636 UNIT_TEST_CHECK_THROW(a.set(1, false), logic_error);
1637 UNIT_TEST_CHECK_THROW(a.set(2, true), logic_error);
1638 a.unset();
1639 a.set(1, false);
1640 UNIT_TEST_CHECK_THROW(a.may_not_initialize(), logic_error);
1641}
1642
1643static void test_path_less_than(string const & left, string const & right)
1644{
1645 MM(left);
1646 MM(right);
1647 file_path left_fp = file_path_internal(left);
1648 file_path right_fp = file_path_internal(right);
1649 I(left_fp < right_fp);
1650}
1651
1652static void test_path_equal(string const & left, string const & right)
1653{
1654 MM(left);
1655 MM(right);
1656 file_path left_fp = file_path_internal(left);
1657 file_path right_fp = file_path_internal(right);
1658 I(left_fp == right_fp);
1659}
1660
1661UNIT_TEST(paths, ordering)
1662{
1663 // this ordering is very important:
1664 // -- it is used to determine the textual form of csets and manifests
1665 // (in particular, it cannot be changed)
1666 // -- it is used to determine in what order cset operations can be applied
1667 // (in particular, foo must sort before foo/bar, so that we can use it
1668 // to do top-down and bottom-up traversals of a set of paths).
1669 test_path_less_than("a", "b");
1670 test_path_less_than("a", "c");
1671 test_path_less_than("ab", "ac");
1672 test_path_less_than("a", "ab");
1673 test_path_less_than("", "a");
1674 test_path_less_than("", ".foo");
1675 test_path_less_than("foo", "foo/bar");
1676 // . is before / asciibetically, so sorting by strings will give the wrong
1677 // answer on this:
1678 test_path_less_than("foo/bar", "foo.bar");
1679
1680 // path_components used to be interned strings, and we used the default sort
1681 // order, which meant that in practice path components would sort in the
1682 // _order they were first used in the program_. So let's put in a test that
1683 // would catch this sort of brokenness.
1684 test_path_less_than("fallanopic_not_otherwise_mentioned", "xyzzy");
1685 test_path_less_than("fallanoooo_not_otherwise_mentioned_and_smaller",
1686 "fallanopic_not_otherwise_mentioned");
1687}
1688
1689UNIT_TEST(paths, ordering_random)
1690{
1691 char x[4] = {0,0,0,0};
1692 char y[4] = {0,0,0,0};
1693 u8 a, b, c, d;
1694 const int ntrials = 1000;
1695 int i;
1696 randomizer rng;
1697
1698 // use of numbers is intentional; these strings are defined to be UTF-8.
1699
1700 UNIT_TEST_CHECKPOINT("a and b");
1701 for (i = 0; i < ntrials; i++)
1702 {
1703 do a = rng.uniform(0x7f - 0x20) + 0x20;
1704 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1705
1706 do b = rng.uniform(0x7f - 0x20) + 0x20;
1707 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1708
1709 x[0] = a;
1710 y[0] = b;
1711 if (a < b)
1712 test_path_less_than(x, y);
1713 else if (a > b)
1714 test_path_less_than(y, x);
1715 else
1716 test_path_equal(x, y);
1717 }
1718
1719 UNIT_TEST_CHECKPOINT("ab and cd");
1720 for (i = 0; i < ntrials; i++)
1721 {
1722 do
1723 {
1724 do a = rng.uniform(0x7f - 0x20) + 0x20;
1725 while (a == 0x5c || a == 0x2f); // '\\', '/'
1726
1727 do b = rng.uniform(0x7f - 0x20) + 0x20;
1728 while (b == 0x5c || b == 0x2f || b == 0x3a); // '\\', '/', ':'
1729 }
1730 while (a == 0x2e && b == 0x2e); // ".."
1731
1732 do
1733 {
1734 do c = rng.uniform(0x7f - 0x20) + 0x20;
1735 while (c == 0x5c || c == 0x2f); // '\\', '/'
1736
1737 do d = rng.uniform(0x7f - 0x20) + 0x20;
1738 while (d == 0x5c || d == 0x2f || d == 0x3a); // '\\', '/', ':'
1739 }
1740 while (c == 0x2e && d == 0x2e); // ".."
1741
1742 x[0] = a;
1743 x[1] = b;
1744 y[0] = c;
1745 y[1] = d;
1746
1747 if (a < c || (a == c && b < d))
1748 test_path_less_than(x, y);
1749 else if (a > c || (a == c && b > d))
1750 test_path_less_than(y, x);
1751 else
1752 test_path_equal(x, y);
1753 }
1754
1755 UNIT_TEST_CHECKPOINT("a and b/c");
1756 x[1] = 0;
1757 y[1] = '/';
1758 for (i = 0; i < ntrials; i++)
1759 {
1760 do a = rng.uniform(0x7f - 0x20) + 0x20;
1761 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1762
1763 do b = rng.uniform(0x7f - 0x20) + 0x20;
1764 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1765
1766 do c = rng.uniform(0x7f - 0x20) + 0x20;
1767 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1768
1769 x[0] = a;
1770 y[0] = b;
1771 y[2] = c;
1772
1773 // only the order of a and b matters. 1 sorts before 1/2.
1774 if (a <= b)
1775 test_path_less_than(x, y);
1776 else
1777 test_path_less_than(y, x);
1778 }
1779
1780 UNIT_TEST_CHECKPOINT("ab and c/d");
1781 for (i = 0; i < ntrials; i++)
1782 {
1783 do
1784 {
1785 do a = rng.uniform(0x7f - 0x20) + 0x20;
1786 while (a == 0x5c || a == 0x2f); // '\\', '/'
1787
1788 do b = rng.uniform(0x7f - 0x20) + 0x20;
1789 while (b == 0x5c || b == 0x2f || b == 0x3a); // '\\', '/', ':'
1790 }
1791 while (a == 0x2e && b == 0x2e); // ".."
1792
1793 do c = rng.uniform(0x7f - 0x20) + 0x20;
1794 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1795
1796 do d = rng.uniform(0x7f - 0x20) + 0x20;
1797 while (d == 0x5c || d == 0x2f || d == 0x2e); // '\\', '/', '.'
1798
1799
1800 x[0] = a;
1801 x[1] = b;
1802 y[0] = c;
1803 y[2] = d;
1804
1805 // only the order of a and c matters,
1806 // but this time, 12 sorts after 1/2.
1807 if (a < c)
1808 test_path_less_than(x, y);
1809 else
1810 test_path_less_than(y, x);
1811 }
1812
1813
1814 UNIT_TEST_CHECKPOINT("a/b and c/d");
1815 x[1] = '/';
1816 for (i = 0; i < ntrials; i++)
1817 {
1818 do a = rng.uniform(0x7f - 0x20) + 0x20;
1819 while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.'
1820
1821 do b = rng.uniform(0x7f - 0x20) + 0x20;
1822 while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.'
1823
1824 do c = rng.uniform(0x7f - 0x20) + 0x20;
1825 while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.'
1826
1827 do d = rng.uniform(0x7f - 0x20) + 0x20;
1828 while (d == 0x5c || d == 0x2f || d == 0x2e); // '\\', '/', '.'
1829
1830 x[0] = a;
1831 x[2] = b;
1832 y[0] = c;
1833 y[2] = d;
1834
1835 if (a < c || (a == c && b < d))
1836 test_path_less_than(x, y);
1837 else if (a > c || (a == c && b > d))
1838 test_path_less_than(y, x);
1839 else
1840 test_path_equal(x, y);
1841 }
1842}
1843
1844UNIT_TEST(paths, test_internal_string_is_bookkeeping_path)
1845{
1846 char const * const yes[] = {"_MTN",
1847 "_MTN/foo",
1848 "_mtn/Foo",
1849 0 };
1850 char const * const no[] = {"foo/_MTN",
1851 "foo/bar",
1852 0 };
1853 for (char const * const * c = yes; *c; ++c)
1854 UNIT_TEST_CHECK(bookkeeping_path
1855 ::internal_string_is_bookkeeping_path(utf8(std::string(*c))));
1856 for (char const * const * c = no; *c; ++c)
1857 UNIT_TEST_CHECK(!bookkeeping_path
1858 ::internal_string_is_bookkeeping_path(utf8(std::string(*c))));
1859}
1860
1861UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix_none)
1862{
1863 initial_rel_path.unset();
1864 initial_rel_path.set(string(), true);
1865
1866 char const * const yes[] = {"_MTN",
1867 "_MTN/foo",
1868 "_mtn/Foo",
1869 "_MTN/foo/..",
1870 0 };
1871 char const * const no[] = {"foo/_MTN",
1872 "foo/bar",
1873 "_MTN/..",
1874 0 };
1875 for (char const * const * c = yes; *c; ++c)
1876 UNIT_TEST_CHECK(bookkeeping_path
1877 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1878 for (char const * const * c = no; *c; ++c)
1879 UNIT_TEST_CHECK(!bookkeeping_path
1880 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1881}
1882
1883UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix_a_b)
1884{
1885 initial_rel_path.unset();
1886 initial_rel_path.set(string("a/b"), true);
1887
1888 char const * const yes[] = {"../../_MTN",
1889 "../../_MTN/foo",
1890 "../../_mtn/Foo",
1891 "../../_MTN/foo/..",
1892 "../../foo/../_MTN/foo",
1893 0 };
1894 char const * const no[] = {"foo/_MTN",
1895 "foo/bar",
1896 "_MTN",
1897 "../../foo/_MTN",
1898 0 };
1899 for (char const * const * c = yes; *c; ++c)
1900 UNIT_TEST_CHECK(bookkeeping_path
1901 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1902 for (char const * const * c = no; *c; ++c)
1903 UNIT_TEST_CHECK(!bookkeeping_path
1904 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1905}
1906
1907UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix__MTN)
1908{
1909 initial_rel_path.unset();
1910 initial_rel_path.set(string("_MTN"), true);
1911
1912 char const * const yes[] = {".",
1913 "foo",
1914 "../_MTN/foo/..",
1915 "../_mtn/foo",
1916 "../foo/../_MTN/foo",
1917 0 };
1918 char const * const no[] = {"../foo",
1919 "../foo/bar",
1920 "../foo/_MTN",
1921 0 };
1922 for (char const * const * c = yes; *c; ++c)
1923 UNIT_TEST_CHECK(bookkeeping_path
1924 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1925 for (char const * const * c = no; *c; ++c)
1926 UNIT_TEST_CHECK(!bookkeeping_path
1927 ::external_string_is_bookkeeping_path(utf8(std::string(*c))));
1928}
1929
1930#endif // BUILD_UNIT_TESTS
1931
1932// Local Variables:
1933// mode: C++
1934// fill-column: 76
1935// c-file-style: "gnu"
1936// indent-tabs-mode: nil
1937// End:
1938// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status