monotone

monotone Mtn Source Tree

Root/globish.cc

1// Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "sanity.hh"
12#include "globish.hh"
13
14using std::string;
15using std::vector;
16
17using boost::regex_match;
18
19// this converts a globish pattern to a regex. The regex should be usable by
20// the Boost regex library operating in default mode, i.e., it should be a
21// valid ECMAscript regex.
22//
23// Pattern tranformation:
24//
25// - As a special case, the empty pattern is translated to "$.^", which cannot
26// match any string.
27//
28// - Any character except those described below are copied as they are.
29// - The backslash (\) escapes the following character. The escaping
30// backslash is copied to the regex along with the following character.
31// - * is transformed to .* in the regex.
32// - ? is transformed to . in the regex.
33// - { is transformed to ( in the regex
34// - } is transformed to ) in the regex
35// - , is transformed to | in the regex, if within { and }
36// - ^ is escaped unless it comes directly after an unescaped [.
37// - ! is transformed to ^ in the regex if it comes directly after an
38// unescaped [.
39// - ] directly following an unescaped [ is escaped.
40static void
41maybe_quote(char c, string & re)
42{
43 if (!(isalnum(c) || c == '_'))
44 {
45 re += '\\';
46 }
47 re += c;
48}
49
50static void
51checked_globish_to_regex(string const & glob, string & regex)
52{
53 int in_braces = 0; // counter for levels if {}
54
55 regex.clear();
56 regex.reserve(glob.size() * 2);
57
58 L(FL("checked_globish_to_regex: input = '%s'") % glob);
59
60 if (glob == "")
61 {
62 regex = "$.^";
63 // and the below loop will do nothing
64 }
65 for (string::const_iterator i = glob.begin(); i != glob.end(); ++i)
66 {
67 char c = *i;
68
69 N(in_braces < 5, F("braces nested too deep in pattern '%s'") % glob);
70
71 switch(c)
72 {
73 case '*':
74 regex += ".*";
75 break;
76 case '?':
77 regex += '.';
78 break;
79 case '{':
80 in_braces++;
81 regex += '(';
82 break;
83 case '}':
84 N(in_braces != 0,
85 F("trying to end a brace expression in a glob when none is started"));
86 regex += ')';
87 in_braces--;
88 break;
89 case ',':
90 if (in_braces > 0)
91 regex += '|';
92 else
93 maybe_quote(c, regex);
94 break;
95 case '\\':
96 N(++i != glob.end(), F("pattern '%s' ends with backslash") % glob);
97 maybe_quote(*i, regex);
98 break;
99 default:
100 maybe_quote(c, regex);
101 break;
102 }
103 }
104
105 N(in_braces == 0,
106 F("run-away brace expression in pattern '%s'") % glob);
107
108 L(FL("checked_globish_to_regex: output = '%s'") % regex);
109}
110
111void
112combine_and_check_globish(vector<globish> const & patterns, globish & pattern)
113{
114 string p;
115 if (patterns.size() > 1)
116 p += '{';
117 bool first = true;
118 for (vector<globish>::const_iterator i = patterns.begin();
119 i != patterns.end(); ++i)
120 {
121 string tmp;
122 // run for the checking it does
123 checked_globish_to_regex((*i)(), tmp);
124 if (!first)
125 p += ',';
126 first = false;
127 p += (*i)();
128 }
129 if (patterns.size() > 1)
130 p += '}';
131 pattern = globish(p);
132}
133
134globish_matcher::globish_matcher(globish const & include_pat,
135 globish const & exclude_pat)
136{
137 string re;
138 checked_globish_to_regex(include_pat(), re);
139 r_inc = re;
140 checked_globish_to_regex(exclude_pat(), re);
141 r_exc = re;
142}
143
144bool
145globish_matcher::operator()(string const & s)
146{
147 // regex_match may throw a runtime_error, if the regex turns out to be
148 // really pathological
149 bool inc_match = regex_match(s, r_inc);
150 bool exc_match = regex_match(s, r_exc);
151 bool result = inc_match && !exc_match;
152 L(FL("matching '%s' against '%s' excluding '%s': %s, %s: %s")
153 % s % r_inc % r_exc
154 % (inc_match ? "included" : "not included")
155 % (exc_match ? "excluded" : "not excluded")
156 % (result ? "matches" : "does not match"));
157 return result;
158}
159
160#ifdef BUILD_UNIT_TESTS
161#include "unit_tests.hh"
162
163UNIT_TEST(globish, checked_globish_to_regex)
164{
165 string pat;
166
167 checked_globish_to_regex("*", pat);
168 UNIT_TEST_CHECK(pat == ".*");
169 checked_globish_to_regex("?", pat);
170 UNIT_TEST_CHECK(pat == ".");
171 checked_globish_to_regex("{a,b,c}d", pat);
172 UNIT_TEST_CHECK(pat == "(a|b|c)d");
173 checked_globish_to_regex("foo{a,{b,c},?*}d", pat);
174 UNIT_TEST_CHECK(pat == "foo(a|(b|c)|..*)d");
175 checked_globish_to_regex("\\a\\b\\|\\{\\*", pat);
176 UNIT_TEST_CHECK(pat == "ab\\|\\{\\*");
177 checked_globish_to_regex(".+$^{}", pat);
178 UNIT_TEST_CHECK(pat == "\\.\\+\\$\\^()");
179 checked_globish_to_regex(",", pat);
180 // we're very conservative about metacharacters, and quote all
181 // non-alphanumerics, hence the backslash
182 UNIT_TEST_CHECK(pat == "\\,");
183 checked_globish_to_regex("\\.\\+\\$\\^\\(\\)", pat);
184 UNIT_TEST_CHECK(pat == "\\.\\+\\$\\^\\(\\)");
185
186 UNIT_TEST_CHECK_THROW(checked_globish_to_regex("foo\\", pat), informative_failure);
187 UNIT_TEST_CHECK_THROW(checked_globish_to_regex("{foo", pat), informative_failure);
188 UNIT_TEST_CHECK_THROW(checked_globish_to_regex("{foo,bar{baz,quux}", pat), informative_failure);
189 UNIT_TEST_CHECK_THROW(checked_globish_to_regex("foo}", pat), informative_failure);
190 UNIT_TEST_CHECK_THROW(checked_globish_to_regex("foo,bar{baz,quux}}", pat), informative_failure);
191 UNIT_TEST_CHECK_THROW(checked_globish_to_regex("{{{{{{{{{{a,b},c},d},e},f},g},h},i},j},k}", pat), informative_failure);
192}
193
194UNIT_TEST(globish, combine_and_check_globish)
195{
196 vector<globish> s;
197 s.push_back(globish("a"));
198 s.push_back(globish("b"));
199 s.push_back(globish("c"));
200 globish combined;
201 combine_and_check_globish(s, combined);
202 UNIT_TEST_CHECK(combined() == "{a,b,c}");
203}
204
205UNIT_TEST(globish, globish_matcher)
206{
207 {
208 globish_matcher m(globish("{a,b}?*\\*|"), globish("*c*"));
209 UNIT_TEST_CHECK(m("aq*|"));
210 UNIT_TEST_CHECK(m("bq*|"));
211 UNIT_TEST_CHECK(!m("bc*|"));
212 UNIT_TEST_CHECK(!m("bq|"));
213 UNIT_TEST_CHECK(!m("b*|"));
214 UNIT_TEST_CHECK(!m(""));
215 }
216 {
217 globish_matcher m(globish("{a,\\\\,b*}"), globish("*c*"));
218 UNIT_TEST_CHECK(m("a"));
219 UNIT_TEST_CHECK(!m("ab"));
220 UNIT_TEST_CHECK(m("\\"));
221 UNIT_TEST_CHECK(!m("\\\\"));
222 UNIT_TEST_CHECK(m("b"));
223 UNIT_TEST_CHECK(m("bfoobar"));
224 UNIT_TEST_CHECK(!m("bfoobarcfoobar"));
225 }
226 {
227 globish_matcher m(globish("*"), globish(""));
228 UNIT_TEST_CHECK(m("foo"));
229 UNIT_TEST_CHECK(m(""));
230 }
231 {
232 globish_matcher m(globish("{foo}"), globish(""));
233 UNIT_TEST_CHECK(m("foo"));
234 UNIT_TEST_CHECK(!m("bar"));
235 }
236}
237
238#endif // BUILD_UNIT_TESTS
239
240// Local Variables:
241// mode: C++
242// fill-column: 76
243// c-file-style: "gnu"
244// indent-tabs-mode: nil
245// End:
246// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status