monotone

monotone Mtn Source Tree

Root/src/paths.hh

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2// 2008, 2010 Stephen Leake <stephen_leake@stephe-leake.org>
3//
4// This program is made available under the GNU GPL version 2.0 or
5// greater. See the accompanying file COPYING for details.
6//
7// This program is distributed WITHOUT ANY WARRANTY; without even the
8// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9// PURPOSE.
10
11#ifndef __PATHS_HH__
12#define __PATHS_HH__
13
14// safe, portable, fast, simple path handling -- in that order.
15// but they all count.
16//
17// this file defines the vocabulary we speak in when dealing with the
18// filesystem. this is an extremely complex problem by the time one worries
19// about normalization, security issues, character sets, and so on;
20// furthermore, path manipulation has historically been a performance
21// bottleneck in monotone. so the goal here is the efficient implementation
22// of a design that makes it hard or impossible to introduce as many classes
23// of bugs as possible.
24//
25// Our approach is to have three different types of paths:
26// -- system_path
27// this is a path to anywhere in the fs. it is in native format. it is
28// always absolute. when constructed from a string, it interprets the
29// string as being relative to the directory that monotone was run in.
30// (note that this may be different from monotone's current directory, as
31// when run in workspace monotone chdir's to the project root.)
32//
33// one can also construct a system_path from one of the below two types
34// of paths. this is intelligent, in that it knows that these sorts of
35// paths are considered to be relative to the project root. thus
36// system_path(file_path_internal("foo"))
37// is not, in general, the same as
38// system_path("foo")
39//
40// -- file_path
41// this is a path representing a versioned file. it is always
42// a fully normalized relative path, that does not escape the project
43// root. it is always relative to the project root.
44// you cannot construct a file_path directly from a string; you must pick
45// a constructor:
46// file_path_internal: use this for strings that come from
47// "monotone-internal" places, e.g. parsing revisions. this turns on
48// stricter checking -- the string must already be normalized -- and
49// is extremely fast. such strings are interpreted as being relative
50// to the project root.
51// file_path_external: use this for strings that come from the user.
52// these strings are normalized before being checked, and if there
53// is a problem trigger E() invariants rather than I() invariants.
54// if in a workspace, such strings are interpreted as being
55// _relative to the user's original directory_. if not in a
56// workspace, strings are treated as relative to the tree root. The
57// null string is accepted as referring to the workspace root
58// directory, because that is how file_path.as_external() outputs
59// that directory.
60// file_path's also provide optimized splitting and joining
61// functionality.
62//
63// -- bookkeeping_path
64// this is a path representing something in the _MTN/ directory of a
65// workspace. it has the same format restrictions as a file_path,
66// except instead of being forbidden to point into the _MTN directory, it
67// is _required_ to point into the _MTN directory. the one constructor is
68// strict, and analogous to file_path_internal. however, the normal way
69// to construct bookkeeping_path's is to use the global constant
70// 'bookkeeping_root', which points to the _MTN directory. Thus to
71// construct a path pointing to _MTN/options, use:
72// bookkeeping_root / "options"
73//
74// All path types should always be constructed from utf8-encoded strings.
75//
76// All path types provide an "operator /" which allows one to construct new
77// paths pointing to things underneath a given path. E.g.,
78// file_path_internal("foo") / "bar" == file_path_internal("foo/bar")
79//
80// All path types subclass 'any_path', which provides:
81// -- emptyness checking with .empty()
82// -- a method .as_internal(), which returns the utf8-encoded string
83// representing this path for internal use. for instance, this is the
84// string that should be embedded into the text of revisions.
85// -- a method .as_external(), which returns a std::string suitable for
86// passing to filesystem interface functions. in practice, this means
87// that it is recoded into an appropriate character set, etc. For
88// bookkeeping_path and file_path, .as_external() is relative to the
89// workspace root.
90// -- a operator<< for ostreams. this should always be used when writing
91// out paths for display to the user. at the moment it just calls one
92// of the above functions, but this is _not_ correct. there are
93// actually 3 different logical character sets -- internal (utf8),
94// user (locale-specific), and filesystem (locale-specific, except
95// when it's not, i.e., on OS X). so we need three distinct operations,
96// and you should use the correct one.
97//
98// all this means that when you want to print out a path, you usually
99// want to just say:
100// F("my path is '%s'") % my_path
101// i.e., nothing fancy necessary, for purposes of F() just treat it like
102// it were a string
103
104#include <boost/shared_ptr.hpp>
105#include <boost/concept_check.hpp>
106#include "origin_type.hh"
107#include <map>
108#include <stdexcept>
109
110class any_path;
111class file_path;
112class roster_t;
113class utf8;
114
115// A path_component is one component of a path. It is always utf8, may not
116// contain either kind of slash, and may not be a magic directory entry ("."
117// or "..") It _may_ be the empty string, but you only get that if you ask
118// for the basename of the root directory. It resembles, but is not, a
119// vocab type.
120
121class path_component : public origin_aware
122{
123public:
124 path_component() : data() {}
125 explicit path_component(utf8 const &);
126 path_component(std::string const &, origin::type);
127 explicit path_component(char const *);
128
129 std::string const & operator()() const { return data; }
130 bool empty() const { return data.empty(); }
131 bool operator<(path_component const & other) const
132 { return data < other(); }
133 bool operator==(path_component const & other) const
134 { return data == other(); }
135 bool operator!=(path_component const & other) const
136 { return data != other(); }
137
138 friend std::ostream & operator<<(std::ostream &, path_component const &);
139
140private:
141 std::string data;
142
143 // constructor for use by trusted operations. bypasses validation.
144 path_component(std::string const & path,
145 std::string::size_type start,
146 std::string::size_type stop = std::string::npos)
147 : data(path.substr(start, stop))
148 {}
149
150 friend class any_path;
151 friend class file_path;
152 friend class roster_t;
153};
154std::ostream & operator<<(std::ostream &, path_component const &);
155template <> void dump(path_component const &, std::string &);
156
157// It's possible this will become a proper virtual interface in the future,
158// but since the implementation is exactly the same in all cases, there isn't
159// much point ATM...
160class any_path : public origin_aware
161{
162public:
163 // converts to native charset and path syntax
164 // this is a path that you can pass to the operating system
165 std::string as_external() const;
166 // leaves as utf8
167 std::string const & as_internal() const
168 { return data; }
169 bool empty() const
170 { return data.empty(); }
171 // returns the trailing component of the path
172 path_component basename() const;
173
174 // a few places need to manipulate any_paths (notably the low-level stuff
175 // in file_io.cc).
176 any_path operator /(path_component const &) const;
177 any_path dirname() const;
178
179 any_path() {}
180 any_path(any_path const & other)
181 : origin_aware(other.made_from), data(other.data) {}
182 any_path & operator=(any_path const & other)
183 { made_from = other.made_from; data = other.data; return *this; }
184
185protected:
186 std::string data;
187 any_path(origin::type whence) : origin_aware(whence) {}
188
189private:
190 any_path(std::string const & path,
191 std::string::size_type start,
192 std::string::size_type stop = std::string::npos)
193 {
194 data = path.substr(start, stop);
195 }
196};
197
198std::ostream & operator<<(std::ostream & o, any_path const & a);
199
200class file_path : public any_path
201{
202public:
203 file_path() {}
204 // join a file_path out of pieces
205 file_path operator /(path_component const & to_append) const;
206 file_path operator /(file_path const & to_append) const;
207
208 // these functions could be defined on any_path but are only needed
209 // for file_path, and not defining them for system_path gets us out
210 // of nailing down the semantics near the absolute root.
211
212 // returns a path with the last component removed.
213 file_path dirname() const;
214
215 // does dirname() and basename() at the same time, for efficiency
216 void dirname_basename(file_path &, path_component &) const;
217
218 // returns true if this path is beneath other
219 bool is_beneath_of(const file_path & other) const;
220
221 // returns the number of /-separated components of the path.
222 // The empty path has depth zero.
223 unsigned int depth() const;
224
225 // ordering...
226 bool operator==(const file_path & other) const
227 { return data == other.data; }
228
229 bool operator!=(const file_path & other) const
230 { return data != other.data; }
231
232 // the ordering on file_path is not exactly that of strings.
233 // see the "ordering" unit test in paths.cc.
234 bool operator <(const file_path & other) const
235 {
236 std::string::const_iterator p = data.begin();
237 std::string::const_iterator plim = data.end();
238 std::string::const_iterator q = other.data.begin();
239 std::string::const_iterator qlim = other.data.end();
240
241 while (p != plim && q != qlim && *p == *q)
242 p++, q++;
243
244 if (p == plim && q == qlim) // equal -> not less
245 return false;
246
247 // must do end of string before everything else, or 'foo' will sort
248 // after 'foo/bar' which is not what we want.
249 if (p == plim)
250 return true;
251 if (q == qlim)
252 return false;
253
254 // the only special case needed is that / sorts before everything -
255 // this gives the effect of component-by-component comparison.
256 if (*p == '/')
257 return true;
258 if (*q == '/')
259 return false;
260
261 // ensure unsigned comparison
262 return static_cast<unsigned char>(*p) < static_cast<unsigned char>(*q);
263 }
264
265 void clear() { data.clear(); }
266
267private:
268 typedef enum { internal, external } source_type;
269 // input is always in utf8, because everything in our world is always in
270 // utf8 (except interface code itself).
271 // external paths:
272 // -- are converted to internal syntax (/ rather than \, etc.)
273 // -- normalized
274 // -- if not 'to_workspace_root', assumed to be relative to the user's
275 // cwd, and munged to become relative to root of the workspace
276 // instead
277 // internal and external paths:
278 // -- are confirmed to be normalized and relative
279 // -- not to be in _MTN/
280 file_path(source_type type, std::string const & path, bool to_workspace_root);
281 file_path(source_type type, utf8 const & path, bool to_workspace_root);
282 friend file_path file_path_internal(std::string const & path);
283 friend file_path file_path_external(utf8 const & path);
284 friend file_path file_path_external_ws(utf8 const & path);
285
286 // private substring constructor, does no validation. used by dirname()
287 // and operator/ with a path_component.
288 file_path(std::string const & path,
289 std::string::size_type start,
290 std::string::size_type stop = std::string::npos)
291 {
292 data = path.substr(start, stop);
293 }
294
295 // roster_t::get_name is allowed to use the private substring constructor.
296 friend class roster_t;
297};
298
299// these are the public file_path constructors. path is relative to the
300// current working directory.
301inline file_path file_path_internal(std::string const & path)
302{
303 return file_path(file_path::internal, path, false);
304}
305inline file_path file_path_external(utf8 const & path)
306{
307 return file_path(file_path::external, path, false);
308}
309
310// path is relative to the workspace root
311inline file_path file_path_external_ws(utf8 const & path)
312{
313 return file_path(file_path::external, path, true);
314}
315
316class bookkeeping_path : public any_path
317{
318public:
319 bookkeeping_path() {}
320 // path _should_ contain the leading _MTN/
321 // and _should_ look like an internal path
322 // usually you should just use the / operator as a constructor!
323 explicit bookkeeping_path(char const * const path);
324 bookkeeping_path(std::string const &, origin::type made_from);
325 bookkeeping_path operator /(char const *) const;
326 bookkeeping_path operator /(path_component const &) const;
327 bookkeeping_path operator /(file_path const & to_append) const;
328
329 // exposed for the use of walk_tree and friends
330 static bool internal_string_is_bookkeeping_path(utf8 const & path);
331 static bool external_string_is_bookkeeping_path(utf8 const & path);
332 bool operator==(const bookkeeping_path & other) const
333 { return data == other.data; }
334
335 bool operator <(const bookkeeping_path & other) const
336 { return data < other.data; }
337
338private:
339 bookkeeping_path(std::string const & path,
340 std::string::size_type start,
341 std::string::size_type stop = std::string::npos)
342 {
343 data = path.substr(start, stop);
344 }
345};
346
347// these are #defines so that they will be constructed lazily, when
348// used. this is necessary for correct behavior; the path constructors
349// use sanity.hh assertions and therefore must not run before
350// sanity::initialize is called.
351
352#define bookkeeping_root (bookkeeping_path("_MTN"))
353#define bookkeeping_root_component (path_component("_MTN"))
354#define bookkeeping_conflicts_file (bookkeeping_path("_MTN/conflicts"))
355#define bookkeeping_resolutions_dir (bookkeeping_path("_MTN/resolutions"))
356// for migration
357#define old_bookkeeping_root_component (path_component("MT"))
358
359void normalize_external_path(std::string const & path, std::string & normalized, bool to_workspace_root);
360
361// this will always be an absolute path
362class system_path : public any_path
363{
364public:
365 system_path() {};
366 system_path(system_path const & other) : any_path(other) {};
367
368 // the optional argument takes some explanation. this constructor takes a
369 // path relative to the workspace root. the question is how to interpret
370 // that path -- since it's possible to have multiple workspaces over the
371 // course of a the program's execution (e.g., if someone runs 'checkout'
372 // while already in a workspace). if 'true' is passed (the default),
373 // then monotone will trigger an invariant if the workspace changes after
374 // we have already interpreted the path relative to some other working
375 // copy. if 'false' is passed, then the path is taken to be relative to
376 // whatever the current workspace is, and will continue to reference it
377 // even if the workspace later changes.
378 explicit system_path(any_path const & other,
379 bool in_true_workspace = true);
380 // this path can contain anything, and it will be absolutified and
381 // tilde-expanded. it will considered to be relative to the directory
382 // monotone started in. it should be in utf8.
383 explicit system_path(char const * const path);
384 system_path(std::string const & path, origin::type from);
385 explicit system_path(utf8 const & path);
386
387 bool operator==(const system_path & other) const
388 { return data == other.data; }
389 bool operator!=(const system_path & other) const
390 { return data != other.data; }
391 bool operator <(const system_path & other) const
392 { return data < other.data; }
393
394 system_path operator /(path_component const & to_append) const;
395 system_path operator /(char const * to_append) const;
396 system_path dirname() const;
397
398private:
399 system_path(std::string const & path,
400 std::string::size_type start,
401 std::string::size_type stop = std::string::npos)
402 {
403 data = path.substr(start, stop);
404 }
405};
406
407template <> void dump(file_path const & sp, std::string & out);
408template <> void dump(bookkeeping_path const & sp, std::string & out);
409template <> void dump(system_path const & sp, std::string & out);
410
411// Attempt to form the composed path P / S, as operator/ would do, but if
412// this is unsuccessful, warn the user and return false, rather than
413// aborting. For use when scanning directories. Do try to tell
414// safe_compose whether S refers to a directory; it only improves
415// diagnostics, but giving good diagnostics is very important.
416extern void report_failed_path_composition(any_path const & p,
417 char const * s, bool isdir);
418
419template <class T>
420bool safe_compose(T const & p, char const * s, T & result, bool isdir=false)
421{
422 try
423 {
424 result = p / path_component(s);
425 return true;
426 }
427 catch (std::logic_error)
428 {
429 report_failed_path_composition(p, s, isdir);
430 return false;
431 }
432}
433
434// Base class for predicate functors on paths. T must be one of the path
435// classes.
436template <class T>
437struct path_predicate
438{
439 BOOST_CLASS_REQUIRE2(T, any_path, boost, ConvertibleConcept);
440 virtual bool operator()(T const &) const = 0;
441protected:
442 path_predicate() {}
443 virtual ~path_predicate() {}
444};
445
446// paths.cc provides always-true and always-false predicates.
447template <class T>
448struct path_always_true : public path_predicate<T>
449{
450 virtual bool operator()(T const &) const;
451};
452template <class T>
453struct path_always_false : public path_predicate<T>
454{
455 virtual bool operator()(T const &) const;
456};
457
458// Return a file_path, bookkeeping_path, or system_path, as appropriate.
459// 'path' is an external path. If to_workspace_root, path is relative to
460// workspace root, or absolute. Otherwise, it is relative to the current
461// working directory, or absolute.
462boost::shared_ptr<any_path> new_optimal_path(std::string path, bool to_workspace_root);
463
464// record the initial path. must be called before any use of system_path.
465void
466save_initial_path();
467
468// returns true if workspace found, in which case cwd has been changed
469// returns false if workspace not found
470bool
471find_and_go_to_workspace(std::string const & search_root);
472
473// this is like change_current_working_dir, but also initializes the various
474// root paths that are needed to interpret paths
475void
476go_to_workspace(system_path const & new_workspace);
477
478// returns the currently active workspace path
479void
480get_current_workspace(system_path & workspace);
481
482void mark_std_paths_used(void);
483
484// reset path globals to uninitialized; should be done for each new command.
485void reset_std_paths(void);
486
487file_path
488find_new_path_for(std::map<file_path, file_path> const & renames,
489 file_path const & old_path);
490
491#endif
492
493// Local Variables:
494// mode: C++
495// fill-column: 76
496// c-file-style: "gnu"
497// indent-tabs-mode: nil
498// End:
499// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status