monotone

monotone Mtn Source Tree

Root/uri.cc

1// Copyright (C) 2006 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "sanity.hh"
12#include "uri.hh"
13
14using std::string;
15typedef string::size_type stringpos;
16
17static void
18parse_authority(string const & in, uri & u)
19{
20 L(FL("matched URI authority: '%s'") % in);
21
22 stringpos p = 0;
23
24 // First, there might be a user: one or more non-@ characters followed
25 // by an @.
26 stringpos user_end = in.find('@', p);
27 if (user_end != 0 && user_end < in.size())
28 {
29 u.user.assign(in, 0, user_end);
30 p = user_end + 1;
31 L(FL("matched URI user: '%s'") % u.user);
32 }
33
34 // The next thing must either be an ipv6 address, which has the form
35 // \[ [0-9A-Za-z:]+ \] and we discard the square brackets, or some other
36 // sort of hostname, [^:]+. (A host-part can be terminated by /, ?, or #
37 // as well as :, but our caller has taken care of that.)
38 if (p < in.size() && in.at(p) == '[')
39 {
40 p++;
41 stringpos ipv6_end = in.find(']', p);
42 N(ipv6_end != string::npos,
43 F("IPv6 address in URI has no closing ']'"));
44
45 u.host.assign(in, p, ipv6_end - p);
46 p = ipv6_end + 1;
47 L(FL("matched URI host (IPv6 address): '%s'") % u.host);
48 }
49 else
50 {
51 stringpos host_end = in.find(':', p);
52 u.host.assign(in, p, host_end - p);
53 p = host_end;
54 L(FL("matched URI host: '%s'") % u.host);
55 }
56
57 // Finally, if the host-part was ended by a colon, there is a port number
58 // following, which must consist entirely of digits.
59 if (p < in.size() && in.at(p) == ':')
60 {
61 p++;
62 N(p < in.size(),
63 F("explicit port-number specification in URI has no digits"));
64
65 N(in.find_first_not_of("0123456789", p) == string::npos,
66 F("explicit port-number specification in URI contains nondigits"));
67
68 u.port.assign(in, p, string::npos);
69 L(FL("matched URI port: '%s'") % u.port);
70 }
71}
72
73void
74parse_uri(string const & in, uri & u)
75{
76 u.scheme.clear();
77 u.user.clear();
78 u.host.clear();
79 u.port.clear();
80 u.path.clear();
81 u.query.clear();
82 u.fragment.clear();
83
84 stringpos p = 0;
85
86 // This is a simplified URI grammar. It does the basics.
87
88 // First there may be a scheme: one or more characters which are not
89 // ":/?#", followed by a colon.
90 stringpos scheme_end = in.find_first_of(":/?#", p);
91
92 if (scheme_end != 0 && scheme_end < in.size() && in.at(scheme_end) == ':')
93 {
94 u.scheme.assign(in, p, scheme_end - p);
95 p = scheme_end + 1;
96 L(FL("matched URI scheme: '%s'") % u.scheme);
97 }
98
99 // Next, there may be an authority: "//" followed by zero or more
100 // characters which are not "/?#".
101
102 if (p + 1 < in.size() && in.at(p) == '/' && in.at(p+1) == '/')
103 {
104 p += 2;
105 stringpos authority_end = in.find_first_of("/?#", p);
106 if (authority_end != p)
107 {
108 parse_authority(string(in, p, authority_end - p), u);
109 p = authority_end;
110 }
111 if (p >= in.size())
112 return;
113 }
114
115 // Next, a path: zero or more characters which are not "?#".
116 {
117 stringpos path_end = in.find_first_of("?#", p);
118 u.path.assign(in, p, path_end - p);
119 p = path_end;
120 L(FL("matched URI path: '%s'") % u.path);
121 if (p >= in.size())
122 return;
123 }
124
125 // Next, perhaps a query: "?" followed by zero or more characters
126 // which are not "#".
127 if (in.at(p) == '?')
128 {
129 p++;
130 stringpos query_end = in.find('#', p);
131 u.query.assign(in, p, query_end - p);
132 p = query_end;
133 L(FL("matched URI query: '%s'") % u.query);
134 if (p >= in.size())
135 return;
136 }
137
138 // Finally, if there is a '#', then whatever comes after it in the string
139 // is a fragment identifier.
140 if (in.at(p) == '#')
141 {
142 u.fragment.assign(in, p + 1, string::npos);
143 L(FL("matched URI fragment: '%s'") % u.fragment);
144 }
145}
146
147string
148urldecode(string const & in)
149{
150 string out;
151
152 for (string::const_iterator i = in.begin(); i != in.end(); ++i)
153 {
154 if (*i != '%')
155 out += *i;
156 else
157 {
158 char d1, d2;
159 ++i;
160 E(i != in.end(), F("Bad URLencoded string '%s'") % in);
161 d1 = *i;
162 ++i;
163 E(i != in.end(), F("Bad URLencoded string '%s'") % in);
164 d2 = *i;
165
166 char c = 0;
167 switch(d1)
168 {
169 case '0': c += 0; break;
170 case '1': c += 1; break;
171 case '2': c += 2; break;
172 case '3': c += 3; break;
173 case '4': c += 4; break;
174 case '5': c += 5; break;
175 case '6': c += 6; break;
176 case '7': c += 7; break;
177 case '8': c += 8; break;
178 case '9': c += 9; break;
179 case 'a': case 'A': c += 10; break;
180 case 'b': case 'B': c += 11; break;
181 case 'c': case 'C': c += 12; break;
182 case 'd': case 'D': c += 13; break;
183 case 'e': case 'E': c += 14; break;
184 case 'f': case 'F': c += 15; break;
185 default: E(false, F("Bad URLencoded string '%s'") % in);
186 }
187 c *= 16;
188 switch(d2)
189 {
190 case '0': c += 0; break;
191 case '1': c += 1; break;
192 case '2': c += 2; break;
193 case '3': c += 3; break;
194 case '4': c += 4; break;
195 case '5': c += 5; break;
196 case '6': c += 6; break;
197 case '7': c += 7; break;
198 case '8': c += 8; break;
199 case '9': c += 9; break;
200 case 'a': case 'A': c += 10; break;
201 case 'b': case 'B': c += 11; break;
202 case 'c': case 'C': c += 12; break;
203 case 'd': case 'D': c += 13; break;
204 case 'e': case 'E': c += 14; break;
205 case 'f': case 'F': c += 15; break;
206 default: E(false, F("Bad URLencoded string '%s'") % in);
207 }
208 out += c;
209 }
210 }
211
212 return out;
213}
214
215
216#ifdef BUILD_UNIT_TESTS
217#include "unit_tests.hh"
218
219static void
220test_one_uri(string scheme,
221 string user,
222 string ipv6_host,
223 string normal_host,
224 string port,
225 string path,
226 string query,
227 string fragment)
228{
229 string built;
230
231 if (!scheme.empty())
232 built += scheme + ':';
233
234 string host;
235
236 if (! ipv6_host.empty())
237 {
238 I(normal_host.empty());
239 host += '[';
240 host += (ipv6_host + ']');
241 }
242 else
243 host = normal_host;
244
245 if (! (user.empty()
246 && host.empty()
247 && port.empty()))
248 {
249 built += "//";
250
251 if (! user.empty())
252built += (user + '@');
253
254 if (! host.empty())
255built += host;
256
257 if (! port.empty())
258{
259 built += ':';
260 built += port;
261}
262 }
263
264 if (! path.empty())
265 {
266 I(path[0] == '/');
267 built += path;
268 }
269
270 if (! query.empty())
271 {
272 built += '?';
273 built += query;
274 }
275
276 if (! fragment.empty())
277 {
278 built += '#';
279 built += fragment;
280 }
281
282 L(FL("testing parse of URI '%s'") % built);
283 uri u;
284 UNIT_TEST_CHECK_NOT_THROW(parse_uri(built, u), informative_failure);
285 UNIT_TEST_CHECK(u.scheme == scheme);
286 UNIT_TEST_CHECK(u.user == user);
287 if (!normal_host.empty())
288 UNIT_TEST_CHECK(u.host == normal_host);
289 else
290 UNIT_TEST_CHECK(u.host == ipv6_host);
291 UNIT_TEST_CHECK(u.port == port);
292 UNIT_TEST_CHECK(u.path == path);
293 UNIT_TEST_CHECK(u.query == query);
294 UNIT_TEST_CHECK(u.fragment == fragment);
295}
296
297UNIT_TEST(uri, basic)
298{
299 test_one_uri("ssh", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", "");
300 test_one_uri("ssh", "graydon", "", "venge.net", "", "/tmp/foo.mtn", "", "");
301 test_one_uri("ssh", "", "", "venge.net", "22", "/tmp/foo.mtn", "", "");
302 test_one_uri("ssh", "", "", "venge.net", "", "/tmp/foo.mtn", "", "");
303 test_one_uri("ssh", "", "fe:00:01::04:21", "", "", "/tmp/foo.mtn", "", "");
304 test_one_uri("file", "", "", "", "", "/tmp/foo.mtn", "", "");
305 test_one_uri("", "", "", "", "", "/tmp/foo.mtn", "", "");
306 test_one_uri("http", "graydon", "", "venge.net", "8080", "/foo.cgi", "branch=foo", "tip");
307 test_one_uri("http", "graydon", "", "192.168.0.104", "8080", "/foo.cgi", "branch=foo", "tip");
308 test_one_uri("http", "graydon", "fe:00:01::04:21", "", "8080", "/foo.cgi", "branch=foo", "tip");
309}
310
311UNIT_TEST(uri, bizarre)
312{
313 test_one_uri("", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", "");
314 test_one_uri("", "", "", "", "", "/graydon@venge.net:22/tmp/foo.mtn", "", "");
315 test_one_uri("ssh", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", "");
316 test_one_uri("ssh", "", "", "", "", "/graydon@venge.net:22/tmp/foo.mtn", "", "");
317}
318
319UNIT_TEST(uri, invalid)
320{
321 uri u;
322
323 UNIT_TEST_CHECK_THROW(parse_uri("http://[f3:03:21/foo/bar", u), informative_failure);
324 UNIT_TEST_CHECK_THROW(parse_uri("http://example.com:/foo/bar", u), informative_failure);
325 UNIT_TEST_CHECK_THROW(parse_uri("http://example.com:1a4/foo/bar", u), informative_failure);
326}
327
328UNIT_TEST(uri, urldecode)
329{
330 UNIT_TEST_CHECK(urldecode("foo%20bar") == "foo bar");
331 UNIT_TEST_CHECK(urldecode("%61") == "a");
332 UNIT_TEST_CHECK_THROW(urldecode("%xx"), informative_failure);
333 UNIT_TEST_CHECK_THROW(urldecode("%"), informative_failure);
334 UNIT_TEST_CHECK_THROW(urldecode("%5"), informative_failure);
335}
336
337#endif // BUILD_UNIT_TESTS
338
339// Local Variables:
340// mode: C++
341// fill-column: 76
342// c-file-style: "gnu"
343// indent-tabs-mode: nil
344// End:
345// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status