monotone

monotone Mtn Source Tree

Root/globish.cc

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "sanity.hh"
11#include "globish.hh"
12
13using std::string;
14using std::vector;
15
16using boost::regex_match;
17
18// this converts a globish pattern to a regex. The regex should be usable by
19// the Boost regex library operating in default mode, i.e., it should be a
20// valid ECMAscript regex.
21//
22// Pattern tranformation:
23//
24// - As a special case, the empty pattern is translated to "$.^", which cannot
25// match any string.
26//
27// - Any character except those described below are copied as they are.
28// - The backslash (\) escapes the following character. The escaping
29// backslash is copied to the regex along with the following character.
30// - * is transformed to .* in the regex.
31// - ? is transformed to . in the regex.
32// - { is transformed to ( in the regex
33// - } is transformed to ) in the regex
34// - , is transformed to | in the regex, if within { and }
35// - ^ is escaped unless it comes directly after an unescaped [.
36// - ! is transformed to ^ in the regex if it comes directly after an
37// unescaped [.
38// - ] directly following an unescaped [ is escaped.
39static void
40maybe_quote(char c, string & re)
41{
42 if (!(isalnum(c) || c == '_'))
43 {
44 re += '\\';
45 }
46 re += c;
47}
48
49static void
50checked_globish_to_regex(string const & glob, string & regex)
51{
52 int in_braces = 0; // counter for levels if {}
53
54 regex.clear();
55 regex.reserve(glob.size() * 2);
56
57 L(FL("checked_globish_to_regex: input = '%s'") % glob);
58
59 if (glob == "")
60 {
61 regex = "$.^";
62 // and the below loop will do nothing
63 }
64 for (string::const_iterator i = glob.begin(); i != glob.end(); ++i)
65 {
66 char c = *i;
67
68 N(in_braces < 5, F("braces nested too deep in pattern '%s'") % glob);
69
70 switch(c)
71 {
72 case '*':
73 regex += ".*";
74 break;
75 case '?':
76 regex += '.';
77 break;
78 case '{':
79 in_braces++;
80 regex += '(';
81 break;
82 case '}':
83 N(in_braces != 0,
84 F("trying to end a brace expression in a glob when none is started"));
85 regex += ')';
86 in_braces--;
87 break;
88 case ',':
89 if (in_braces > 0)
90 regex += '|';
91 else
92 maybe_quote(c, regex);
93 break;
94 case '\\':
95 N(++i != glob.end(), F("pattern '%s' ends with backslash") % glob);
96 maybe_quote(*i, regex);
97 break;
98 default:
99 maybe_quote(c, regex);
100 break;
101 }
102 }
103
104 N(in_braces == 0,
105 F("run-away brace expression in pattern '%s'") % glob);
106
107 L(FL("checked_globish_to_regex: output = '%s'") % regex);
108}
109
110void
111combine_and_check_globish(vector<utf8> const & patterns, utf8 & pattern)
112{
113 string p;
114 if (patterns.size() > 1)
115 p += '{';
116 bool first = true;
117 for (vector<utf8>::const_iterator i = patterns.begin(); i != patterns.end(); ++i)
118 {
119 string tmp;
120 // run for the checking it does
121 checked_globish_to_regex((*i)(), tmp);
122 if (!first)
123 p += ',';
124 first = false;
125 p += (*i)();
126 }
127 if (patterns.size() > 1)
128 p += '}';
129 pattern = utf8(p);
130}
131
132globish_matcher::globish_matcher(utf8 const & include_pat, utf8 const & exclude_pat)
133{
134 string re;
135 checked_globish_to_regex(include_pat(), re);
136 r_inc = re;
137 checked_globish_to_regex(exclude_pat(), re);
138 r_exc = re;
139}
140
141bool
142globish_matcher::operator()(string const & s)
143{
144 // regex_match may throw a runtime_error, if the regex turns out to be
145 // really pathological
146 bool inc_match = regex_match(s, r_inc);
147 bool exc_match = regex_match(s, r_exc);
148 bool result = inc_match && !exc_match;
149 L(FL("matching '%s' against '%s' excluding '%s': %s, %s: %s")
150 % s % r_inc % r_exc
151 % (inc_match ? "included" : "not included")
152 % (exc_match ? "excluded" : "not excluded")
153 % (result ? "matches" : "does not match"));
154 return result;
155}
156
157#ifdef BUILD_UNIT_TESTS
158#include "unit_tests.hh"
159
160static void
161checked_globish_to_regex_test()
162{
163 string pat;
164
165 checked_globish_to_regex("*", pat);
166 BOOST_CHECK(pat == ".*");
167 checked_globish_to_regex("?", pat);
168 BOOST_CHECK(pat == ".");
169 checked_globish_to_regex("{a,b,c}d", pat);
170 BOOST_CHECK(pat == "(a|b|c)d");
171 checked_globish_to_regex("foo{a,{b,c},?*}d", pat);
172 BOOST_CHECK(pat == "foo(a|(b|c)|..*)d");
173 checked_globish_to_regex("\\a\\b\\|\\{\\*", pat);
174 BOOST_CHECK(pat == "ab\\|\\{\\*");
175 checked_globish_to_regex(".+$^{}", pat);
176 BOOST_CHECK(pat == "\\.\\+\\$\\^()");
177 checked_globish_to_regex(",", pat);
178 // we're very conservative about metacharacters, and quote all
179 // non-alphanumerics, hence the backslash
180 BOOST_CHECK(pat == "\\,");
181 checked_globish_to_regex("\\.\\+\\$\\^\\(\\)", pat);
182 BOOST_CHECK(pat == "\\.\\+\\$\\^\\(\\)");
183
184 BOOST_CHECK_THROW(checked_globish_to_regex("foo\\", pat), informative_failure);
185 BOOST_CHECK_THROW(checked_globish_to_regex("{foo", pat), informative_failure);
186 BOOST_CHECK_THROW(checked_globish_to_regex("{foo,bar{baz,quux}", pat), informative_failure);
187 BOOST_CHECK_THROW(checked_globish_to_regex("foo}", pat), informative_failure);
188 BOOST_CHECK_THROW(checked_globish_to_regex("foo,bar{baz,quux}}", pat), informative_failure);
189 BOOST_CHECK_THROW(checked_globish_to_regex("{{{{{{{{{{a,b},c},d},e},f},g},h},i},j},k}", pat), informative_failure);
190}
191
192static void
193combine_and_check_globish_test()
194{
195 vector<utf8> s;
196 s.push_back(utf8("a"));
197 s.push_back(utf8("b"));
198 s.push_back(utf8("c"));
199 utf8 combined;
200 combine_and_check_globish(s, combined);
201 BOOST_CHECK(combined() == "{a,b,c}");
202}
203
204static void
205globish_matcher_test()
206{
207 {
208 globish_matcher m(utf8("{a,b}?*\\*|"), utf8("*c*"));
209 BOOST_CHECK(m("aq*|"));
210 BOOST_CHECK(m("bq*|"));
211 BOOST_CHECK(!m("bc*|"));
212 BOOST_CHECK(!m("bq|"));
213 BOOST_CHECK(!m("b*|"));
214 BOOST_CHECK(!m(""));
215 }
216 {
217 globish_matcher m(utf8("{a,\\\\,b*}"), utf8("*c*"));
218 BOOST_CHECK(m("a"));
219 BOOST_CHECK(!m("ab"));
220 BOOST_CHECK(m("\\"));
221 BOOST_CHECK(!m("\\\\"));
222 BOOST_CHECK(m("b"));
223 BOOST_CHECK(m("bfoobar"));
224 BOOST_CHECK(!m("bfoobarcfoobar"));
225 }
226 {
227 globish_matcher m(utf8("*"), utf8(""));
228 BOOST_CHECK(m("foo"));
229 BOOST_CHECK(m(""));
230 }
231 {
232 globish_matcher m(utf8("{foo}"), utf8(""));
233 BOOST_CHECK(m("foo"));
234 BOOST_CHECK(!m("bar"));
235 }
236}
237
238
239void add_globish_tests(test_suite * suite)
240{
241 I(suite);
242 suite->add(BOOST_TEST_CASE(&checked_globish_to_regex_test));
243 suite->add(BOOST_TEST_CASE(&combine_and_check_globish_test));
244 suite->add(BOOST_TEST_CASE(&globish_matcher_test));
245}
246
247#endif // BUILD_UNIT_TESTS
248
249// Local Variables:
250// mode: C++
251// fill-column: 76
252// c-file-style: "gnu"
253// indent-tabs-mode: nil
254// End:
255// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status