1 | #ifndef __GLOBISH_HH__␊ |
2 | #define __GLOBISH_HH__␊ |
3 | ␊ |
4 | // Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>␊ |
5 | //␊ |
6 | // This program is made available under the GNU GPL version 2.0 or␊ |
7 | // greater. See the accompanying file COPYING for details.␊ |
8 | //␊ |
9 | // This program is distributed WITHOUT ANY WARRANTY; without even the␊ |
10 | // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR␊ |
11 | // PURPOSE.␊ |
12 | ␊ |
13 | // a sort of glob-like pattern matcher, for use in specifying branch␊ |
14 | // collections for netsync. it is important that it not be too expensive to␊ |
15 | // match (as opposed to common regex engines, which can be exponential on␊ |
16 | // pathological patterns), because we must match branches against untrusted␊ |
17 | // patterns when doing netsync.␊ |
18 | ␊ |
19 | // all characters stand for themselves except␊ |
20 | //␊ |
21 | // \x matches x, even if x is one of the metacharacters␊ |
22 | // * matches zero or more characters of any kind (greedily)␊ |
23 | // ? matches any single character␊ |
24 | // [...] matches any single character that appears within the brackets␊ |
25 | // [^..] matches any single character that does _not_ appear␊ |
26 | // [!..] same as [^..]␊ |
27 | // {a,b,c} matches a or b or c (may be of arbitrary length, have arbitrary␊ |
28 | // number of alternations; nesting is allowed but only five deep)␊ |
29 | //␊ |
30 | // [\]] is how you put a ] in a character class␊ |
31 | // [\[] similarly (otherwise a syntax error)␊ |
32 | // [\\] similarly␊ |
33 | // [{}?*] within [] these stand for themselves␊ |
34 | //␊ |
35 | // \n matches n, not newline␊ |
36 | // \007 same as '007'␊ |
37 | //␊ |
38 | // to match, the _entire_ target must match the pattern; there is no scan␊ |
39 | // for a substring match, nor is a prefix match a match. the pattern is␊ |
40 | // expected to be utf8, and characters in the 0x00 - 0x1f range are not␊ |
41 | // permitted.␊ |
42 | //␊ |
43 | // as an extra special case, the empty string matches nothing, not even an␊ |
44 | // empty string. this hardly ever matters, but it's nice to have some way␊ |
45 | // to say "don't exclude anything", for instance.␊ |
46 | ␊ |
47 | #include "vector.hh"␊ |
48 | ␊ |
49 | struct arg_type;␊ |
50 | ␊ |
51 | struct globish␊ |
52 | {␊ |
53 | globish() : compiled_pattern() {}␊ |
54 | globish(char const * pat);␊ |
55 | globish(std::string const & pat);␊ |
56 | globish(std::vector<arg_type> const & pat);␊ |
57 | globish(std::vector<arg_type>::const_iterator const & beg,␊ |
58 | std::vector<arg_type>::const_iterator const & end);␊ |
59 | ␊ |
60 | std::string operator()(void) const;␊ |
61 | bool matches(std::string const & target) const;␊ |
62 | ␊ |
63 | private:␊ |
64 | std::string compiled_pattern;␊ |
65 | };␊ |
66 | ␊ |
67 | std::ostream & operator<<(std::ostream &, globish const &);␊ |
68 | template <> void dump(globish const &, std::string &);␊ |
69 | ␊ |
70 | // convenience functor for when you want to match all things␊ |
71 | // that _do_ match one glob but do _not_ match another␊ |
72 | struct globish_matcher␊ |
73 | {␊ |
74 | globish_matcher(globish const & incl, globish const & excl)␊ |
75 | : included(incl), excluded(excl) {}␊ |
76 | ␊ |
77 | bool operator()(std::string const & s)␊ |
78 | { return included.matches(s) && !excluded.matches(s); }␊ |
79 | ␊ |
80 | private:␊ |
81 | globish included;␊ |
82 | globish excluded;␊ |
83 | };␊ |
84 | ␊ |
85 | ␊ |
86 | // Local Variables:␊ |
87 | // mode: C++␊ |
88 | // fill-column: 76␊ |
89 | // c-file-style: "gnu"␊ |
90 | // indent-tabs-mode: nil␊ |
91 | // End:␊ |
92 | // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:␊ |
93 | ␊ |
94 | #endif␊ |