monotone

monotone Mtn Source Tree

Root/globish.cc

1// copyright (C) 2005 Richard Levitte <richard@levitte.org>
2// copyright (C) 2005 nathaniel smith <njs@pobox.com>
3// all rights reserved.
4// licensed to the public under the terms of the GNU GPL (>= 2)
5// see the file COPYING for details
6
7#include "sanity.hh"
8#include "globish.hh"
9
10// this converts a globish pattern to a regex. The regex should be usable by
11// the Boost regex library operating in default mode, i.e., it should be a
12// valid ECMAscript regex.
13//
14// Pattern tranformation:
15//
16// - As a special case, the empty pattern is translated to "$.^", which cannot
17// match any string.
18//
19// - Any character except those described below are copied as they are.
20// - The backslash (\) escapes the following character. The escaping
21// backslash is copied to the regex along with the following character.
22// - * is transformed to .* in the regex.
23// - ? is transformed to . in the regex.
24// - { is transformed to ( in the regex
25// - } is transformed to ) in the regex
26// - , is transformed to | in the regex, if within { and }
27// - ^ is escaped unless it comes directly after an unescaped [.
28// - ! is transformed to ^ in the regex if it comes directly after an
29// unescaped [.
30// - ] directly following an unescaped [ is escaped.
31static void
32maybe_quote(char c, std::string & re)
33{
34 if (!(isalnum(c) || c == '_'))
35 {
36 re += '\\';
37 }
38 re += c;
39}
40
41static void
42checked_globish_to_regex(std::string const & glob, std::string & regex)
43{
44 int in_braces = 0; // counter for levels if {}
45
46 regex.clear();
47 regex.reserve(glob.size() * 2);
48
49 L(F("checked_globish_to_regex: input = '%s'\n") % glob);
50
51 if (glob == "")
52 {
53 regex = "$.^";
54 // and the below loop will do nothing
55 }
56 for (std::string::const_iterator i = glob.begin(); i != glob.end(); ++i)
57 {
58 char c = *i;
59
60 N(in_braces < 5, F("braces nested too deep in pattern '%s'") % glob);
61
62 switch(c)
63 {
64 case '*':
65 regex += ".*";
66 break;
67 case '?':
68 regex += '.';
69 break;
70 case '{':
71 in_braces++;
72 regex += '(';
73 break;
74 case '}':
75 N(in_braces != 0,
76 F("trying to end a brace expression in a glob when none is started"));
77 regex += ')';
78 in_braces--;
79 break;
80 case ',':
81 if (in_braces > 0)
82 regex += '|';
83 else
84 maybe_quote(c, regex);
85 break;
86 case '\\':
87 N(++i != glob.end(), F("pattern '%s' ends with backslash") % glob);
88 maybe_quote(*i, regex);
89 break;
90 default:
91 maybe_quote(c, regex);
92 break;
93 }
94 }
95
96 N(in_braces == 0,
97 F("run-away brace expression in pattern '%s'") % glob);
98
99 L(F("checked_globish_to_regex: output = '%s'\n") % regex);
100}
101
102void
103combine_and_check_globish(std::set<utf8> const & patterns, utf8 & pattern)
104{
105 std::string p;
106 if (patterns.size() > 1)
107 p += '{';
108 bool first = true;
109 for (std::set<utf8>::const_iterator i = patterns.begin(); i != patterns.end(); ++i)
110 {
111 std::string tmp;
112 // run for the checking it does
113 checked_globish_to_regex((*i)(), tmp);
114 if (!first)
115 p += ',';
116 first = false;
117 p += (*i)();
118 }
119 if (patterns.size() > 1)
120 p += '}';
121 pattern = utf8(p);
122}
123
124globish_matcher::globish_matcher(utf8 const & include_pat, utf8 const & exclude_pat)
125{
126 std::string re;
127 checked_globish_to_regex(include_pat(), re);
128 r_inc = re;
129 checked_globish_to_regex(exclude_pat(), re);
130 r_exc = re;
131}
132
133bool
134globish_matcher::operator()(std::string const & s)
135{
136 // regex_match may throw a std::runtime_error, if the regex turns out to be
137 // really pathological
138 bool inc_match = boost::regex_match(s, r_inc);
139 bool exc_match = boost::regex_match(s, r_exc);
140 bool result = inc_match && !exc_match;
141 L(F("matching '%s' against '%s' excluding '%s': %s, %s: %s\n")
142 % s % r_inc % r_exc
143 % (inc_match ? "included" : "not included")
144 % (exc_match ? "excluded" : "not excluded")
145 % (result ? "matches" : "does not match"));
146 return result;
147}
148
149#ifdef BUILD_UNIT_TESTS
150#include "unit_tests.hh"
151
152static void
153checked_globish_to_regex_test()
154{
155 std::string pat;
156
157 checked_globish_to_regex("*", pat);
158 BOOST_CHECK(pat == ".*");
159 checked_globish_to_regex("?", pat);
160 BOOST_CHECK(pat == ".");
161 checked_globish_to_regex("{a,b,c}d", pat);
162 BOOST_CHECK(pat == "(a|b|c)d");
163 checked_globish_to_regex("foo{a,{b,c},?*}d", pat);
164 BOOST_CHECK(pat == "foo(a|(b|c)|..*)d");
165 checked_globish_to_regex("\\a\\b\\|\\{\\*", pat);
166 BOOST_CHECK(pat == "ab\\|\\{\\*");
167 checked_globish_to_regex(".+$^{}", pat);
168 BOOST_CHECK(pat == "\\.\\+\\$\\^()");
169 checked_globish_to_regex(",", pat);
170 // we're very conservative about metacharacters, and quote all
171 // non-alphanumerics, hence the backslash
172 BOOST_CHECK(pat == "\\,");
173 checked_globish_to_regex("\\.\\+\\$\\^\\(\\)", pat);
174 BOOST_CHECK(pat == "\\.\\+\\$\\^\\(\\)");
175
176 BOOST_CHECK_THROW(checked_globish_to_regex("foo\\", pat), informative_failure);
177 BOOST_CHECK_THROW(checked_globish_to_regex("{foo", pat), informative_failure);
178 BOOST_CHECK_THROW(checked_globish_to_regex("{foo,bar{baz,quux}", pat), informative_failure);
179 BOOST_CHECK_THROW(checked_globish_to_regex("foo}", pat), informative_failure);
180 BOOST_CHECK_THROW(checked_globish_to_regex("foo,bar{baz,quux}}", pat), informative_failure);
181 BOOST_CHECK_THROW(checked_globish_to_regex("{{{{{{{{{{a,b},c},d},e},f},g},h},i},j},k}", pat), informative_failure);
182}
183
184static void
185combine_and_check_globish_test()
186{
187 std::set<utf8> s;
188 s.insert(utf8("a"));
189 s.insert(utf8("b"));
190 s.insert(utf8("c"));
191 utf8 combined;
192 combine_and_check_globish(s, combined);
193 BOOST_CHECK(combined() == "{a,b,c}");
194}
195
196static void
197globish_matcher_test()
198{
199 {
200 globish_matcher m(utf8("{a,b}?*\\*|"), utf8("*c*"));
201 BOOST_CHECK(m("aq*|"));
202 BOOST_CHECK(m("bq*|"));
203 BOOST_CHECK(!m("bc*|"));
204 BOOST_CHECK(!m("bq|"));
205 BOOST_CHECK(!m("b*|"));
206 BOOST_CHECK(!m(""));
207 }
208 {
209 globish_matcher m(utf8("{a,\\\\,b*}"), utf8("*c*"));
210 BOOST_CHECK(m("a"));
211 BOOST_CHECK(!m("ab"));
212 BOOST_CHECK(m("\\"));
213 BOOST_CHECK(!m("\\\\"));
214 BOOST_CHECK(m("b"));
215 BOOST_CHECK(m("bfoobar"));
216 BOOST_CHECK(!m("bfoobarcfoobar"));
217 }
218 {
219 globish_matcher m(utf8("*"), utf8(""));
220 BOOST_CHECK(m("foo"));
221 BOOST_CHECK(m(""));
222 }
223 {
224 globish_matcher m(utf8("{foo}"), utf8(""));
225 BOOST_CHECK(m("foo"));
226 BOOST_CHECK(!m("bar"));
227 }
228}
229
230
231void add_globish_tests(test_suite * suite)
232{
233 I(suite);
234 suite->add(BOOST_TEST_CASE(&checked_globish_to_regex_test));
235 suite->add(BOOST_TEST_CASE(&combine_and_check_globish_test));
236 suite->add(BOOST_TEST_CASE(&globish_matcher_test));
237}
238
239#endif // BUILD_UNIT_TESTS

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status