monotone

monotone Mtn Source Tree

Root/globish.cc

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "sanity.hh"
11#include "globish.hh"
12
13using std::string;
14using std::vector;
15
16using boost::regex_match;
17
18// this converts a globish pattern to a regex. The regex should be usable by
19// the Boost regex library operating in default mode, i.e., it should be a
20// valid ECMAscript regex.
21//
22// Pattern tranformation:
23//
24// - As a special case, the empty pattern is translated to "$.^", which cannot
25// match any string.
26//
27// - Any character except those described below are copied as they are.
28// - The backslash (\) escapes the following character. The escaping
29// backslash is copied to the regex along with the following character.
30// - * is transformed to .* in the regex.
31// - ? is transformed to . in the regex.
32// - { is transformed to ( in the regex
33// - } is transformed to ) in the regex
34// - , is transformed to | in the regex, if within { and }
35// - ^ is escaped unless it comes directly after an unescaped [.
36// - ! is transformed to ^ in the regex if it comes directly after an
37// unescaped [.
38// - ] directly following an unescaped [ is escaped.
39static void
40maybe_quote(char c, string & re)
41{
42 if (!(isalnum(c) || c == '_'))
43 {
44 re += '\\';
45 }
46 re += c;
47}
48
49static void
50checked_globish_to_regex(string const & glob, string & regex)
51{
52 int in_braces = 0; // counter for levels if {}
53
54 regex.clear();
55 regex.reserve(glob.size() * 2);
56
57 L(FL("checked_globish_to_regex: input = '%s'") % glob);
58
59 if (glob == "")
60 {
61 regex = "$.^";
62 // and the below loop will do nothing
63 }
64 for (string::const_iterator i = glob.begin(); i != glob.end(); ++i)
65 {
66 char c = *i;
67
68 N(in_braces < 5, F("braces nested too deep in pattern '%s'") % glob);
69
70 switch(c)
71 {
72 case '*':
73 regex += ".*";
74 break;
75 case '?':
76 regex += '.';
77 break;
78 case '{':
79 in_braces++;
80 regex += '(';
81 break;
82 case '}':
83 N(in_braces != 0,
84 F("trying to end a brace expression in a glob when none is started"));
85 regex += ')';
86 in_braces--;
87 break;
88 case ',':
89 if (in_braces > 0)
90 regex += '|';
91 else
92 maybe_quote(c, regex);
93 break;
94 case '\\':
95 N(++i != glob.end(), F("pattern '%s' ends with backslash") % glob);
96 maybe_quote(*i, regex);
97 break;
98 default:
99 maybe_quote(c, regex);
100 break;
101 }
102 }
103
104 N(in_braces == 0,
105 F("run-away brace expression in pattern '%s'") % glob);
106
107 L(FL("checked_globish_to_regex: output = '%s'") % regex);
108}
109
110void
111combine_and_check_globish(vector<globish> const & patterns, globish & pattern)
112{
113 string p;
114 if (patterns.size() > 1)
115 p += '{';
116 bool first = true;
117 for (vector<globish>::const_iterator i = patterns.begin();
118 i != patterns.end(); ++i)
119 {
120 string tmp;
121 // run for the checking it does
122 checked_globish_to_regex((*i)(), tmp);
123 if (!first)
124 p += ',';
125 first = false;
126 p += (*i)();
127 }
128 if (patterns.size() > 1)
129 p += '}';
130 pattern = globish(p);
131}
132
133globish_matcher::globish_matcher(globish const & include_pat,
134 globish const & exclude_pat)
135{
136 string re;
137 checked_globish_to_regex(include_pat(), re);
138 r_inc = re;
139 checked_globish_to_regex(exclude_pat(), re);
140 r_exc = re;
141}
142
143bool
144globish_matcher::operator()(string const & s)
145{
146 // regex_match may throw a runtime_error, if the regex turns out to be
147 // really pathological
148 bool inc_match = regex_match(s, r_inc);
149 bool exc_match = regex_match(s, r_exc);
150 bool result = inc_match && !exc_match;
151 L(FL("matching '%s' against '%s' excluding '%s': %s, %s: %s")
152 % s % r_inc % r_exc
153 % (inc_match ? "included" : "not included")
154 % (exc_match ? "excluded" : "not excluded")
155 % (result ? "matches" : "does not match"));
156 return result;
157}
158
159#ifdef BUILD_UNIT_TESTS
160#include "unit_tests.hh"
161
162UNIT_TEST(globish, checked_globish_to_regex)
163{
164 string pat;
165
166 checked_globish_to_regex("*", pat);
167 BOOST_CHECK(pat == ".*");
168 checked_globish_to_regex("?", pat);
169 BOOST_CHECK(pat == ".");
170 checked_globish_to_regex("{a,b,c}d", pat);
171 BOOST_CHECK(pat == "(a|b|c)d");
172 checked_globish_to_regex("foo{a,{b,c},?*}d", pat);
173 BOOST_CHECK(pat == "foo(a|(b|c)|..*)d");
174 checked_globish_to_regex("\\a\\b\\|\\{\\*", pat);
175 BOOST_CHECK(pat == "ab\\|\\{\\*");
176 checked_globish_to_regex(".+$^{}", pat);
177 BOOST_CHECK(pat == "\\.\\+\\$\\^()");
178 checked_globish_to_regex(",", pat);
179 // we're very conservative about metacharacters, and quote all
180 // non-alphanumerics, hence the backslash
181 BOOST_CHECK(pat == "\\,");
182 checked_globish_to_regex("\\.\\+\\$\\^\\(\\)", pat);
183 BOOST_CHECK(pat == "\\.\\+\\$\\^\\(\\)");
184
185 BOOST_CHECK_THROW(checked_globish_to_regex("foo\\", pat), informative_failure);
186 BOOST_CHECK_THROW(checked_globish_to_regex("{foo", pat), informative_failure);
187 BOOST_CHECK_THROW(checked_globish_to_regex("{foo,bar{baz,quux}", pat), informative_failure);
188 BOOST_CHECK_THROW(checked_globish_to_regex("foo}", pat), informative_failure);
189 BOOST_CHECK_THROW(checked_globish_to_regex("foo,bar{baz,quux}}", pat), informative_failure);
190 BOOST_CHECK_THROW(checked_globish_to_regex("{{{{{{{{{{a,b},c},d},e},f},g},h},i},j},k}", pat), informative_failure);
191}
192
193UNIT_TEST(globish, combine_and_check_globish)
194{
195 vector<globish> s;
196 s.push_back(globish("a"));
197 s.push_back(globish("b"));
198 s.push_back(globish("c"));
199 globish combined;
200 combine_and_check_globish(s, combined);
201 BOOST_CHECK(combined() == "{a,b,c}");
202}
203
204UNIT_TEST(globish, globish_matcher)
205{
206 {
207 globish_matcher m(globish("{a,b}?*\\*|"), globish("*c*"));
208 BOOST_CHECK(m("aq*|"));
209 BOOST_CHECK(m("bq*|"));
210 BOOST_CHECK(!m("bc*|"));
211 BOOST_CHECK(!m("bq|"));
212 BOOST_CHECK(!m("b*|"));
213 BOOST_CHECK(!m(""));
214 }
215 {
216 globish_matcher m(globish("{a,\\\\,b*}"), globish("*c*"));
217 BOOST_CHECK(m("a"));
218 BOOST_CHECK(!m("ab"));
219 BOOST_CHECK(m("\\"));
220 BOOST_CHECK(!m("\\\\"));
221 BOOST_CHECK(m("b"));
222 BOOST_CHECK(m("bfoobar"));
223 BOOST_CHECK(!m("bfoobarcfoobar"));
224 }
225 {
226 globish_matcher m(globish("*"), globish(""));
227 BOOST_CHECK(m("foo"));
228 BOOST_CHECK(m(""));
229 }
230 {
231 globish_matcher m(globish("{foo}"), globish(""));
232 BOOST_CHECK(m("foo"));
233 BOOST_CHECK(!m("bar"));
234 }
235}
236
237#endif // BUILD_UNIT_TESTS
238
239// Local Variables:
240// mode: C++
241// fill-column: 76
242// c-file-style: "gnu"
243// indent-tabs-mode: nil
244// End:
245// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status