monotone

monotone Mtn Source Tree

Root/uri.cc

1// Copyright (C) 2006 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "sanity.hh"
12#include "uri.hh"
13
14using std::string;
15typedef string::size_type stringpos;
16
17static void
18parse_authority(string const & in, uri & u)
19{
20 L(FL("matched URI authority: '%s'") % in);
21
22 stringpos p = 0;
23
24 // First, there might be a user: one or more non-@ characters followed
25 // by an @.
26 stringpos user_end = in.find('@', p);
27 if (user_end != 0 && user_end < in.size())
28 {
29 u.user.assign(in, 0, user_end);
30 p = user_end + 1;
31 L(FL("matched URI user: '%s'") % u.user);
32 }
33
34 // The next thing must either be an ipv6 address, which has the form
35 // \[ [0-9A-Za-z:]+ \] and we discard the square brackets, or some other
36 // sort of hostname, [^:]+. (A host-part can be terminated by /, ?, or #
37 // as well as :, but our caller has taken care of that.)
38 if (p < in.size() && in.at(p) == '[')
39 {
40 p++;
41 stringpos ipv6_end = in.find(']', p);
42 N(ipv6_end != string::npos,
43 F("IPv6 address in URI has no closing ']'"));
44
45 u.host.assign(in, p, ipv6_end - p);
46 p = ipv6_end + 1;
47 L(FL("matched URI host (IPv6 address): '%s'") % u.host);
48 }
49 else
50 {
51 stringpos host_end = in.find(':', p);
52 u.host.assign(in, p, host_end - p);
53 p = host_end;
54 L(FL("matched URI host: '%s'") % u.host);
55 }
56
57 // Finally, if the host-part was ended by a colon, there is a port number
58 // following, which must consist entirely of digits.
59 if (p < in.size() && in.at(p) == ':')
60 {
61 p++;
62 N(p < in.size(),
63 F("explicit port-number specification in URI has no digits"));
64
65 N(in.find_first_not_of("0123456789", p) == string::npos,
66 F("explicit port-number specification in URI contains nondigits"));
67
68 u.port.assign(in, p, string::npos);
69 L(FL("matched URI port: '%s'") % u.port);
70 }
71}
72
73void
74parse_uri(string const & in, uri & u)
75{
76 u.scheme.clear();
77 u.user.clear();
78 u.host.clear();
79 u.port.clear();
80 u.path.clear();
81 u.query.clear();
82 u.fragment.clear();
83
84 stringpos p = 0;
85
86 // This is a simplified URI grammar. It does the basics.
87
88 // First there may be a scheme: one or more characters which are not
89 // ":/?#", followed by a colon.
90 stringpos scheme_end = in.find_first_of(":/?#", p);
91
92 if (scheme_end != 0 && scheme_end < in.size() && in.at(scheme_end) == ':')
93 {
94 u.scheme.assign(in, p, scheme_end - p);
95 p = scheme_end + 1;
96 L(FL("matched URI scheme: '%s'") % u.scheme);
97 }
98
99 // Next, there may be an authority: "//" followed by zero or more
100 // characters which are not "/?#".
101
102 if (p + 1 < in.size() && in.at(p) == '/' && in.at(p+1) == '/')
103 {
104 p += 2;
105 stringpos authority_end = in.find_first_of("/?#", p);
106 if (authority_end != p)
107 {
108 parse_authority(string(in, p, authority_end - p), u);
109 p = authority_end;
110 }
111 if (p >= in.size())
112 return;
113 }
114
115 // Next, a path: zero or more characters which are not "?#".
116 {
117 stringpos path_end = in.find_first_of("?#", p);
118 u.path.assign(in, p, path_end - p);
119 p = path_end;
120 L(FL("matched URI path: '%s'") % u.path);
121 if (p >= in.size())
122 return;
123 }
124
125 // Next, perhaps a query: "?" followed by zero or more characters
126 // which are not "#".
127 if (in.at(p) == '?')
128 {
129 p++;
130 stringpos query_end = in.find('#', p);
131 u.query.assign(in, p, query_end - p);
132 p = query_end;
133 L(FL("matched URI query: '%s'") % u.query);
134 if (p >= in.size())
135 return;
136 }
137
138 // Finally, if there is a '#', then whatever comes after it in the string
139 // is a fragment identifier.
140 if (in.at(p) == '#')
141 {
142 u.fragment.assign(in, p + 1, string::npos);
143 L(FL("matched URI fragment: '%s'") % u.fragment);
144 }
145}
146
147#ifdef BUILD_UNIT_TESTS
148#include "unit_tests.hh"
149
150static void
151test_one_uri(string scheme,
152 string user,
153 string ipv6_host,
154 string normal_host,
155 string port,
156 string path,
157 string query,
158 string fragment)
159{
160 string built;
161
162 if (!scheme.empty())
163 built += scheme + ':';
164
165 string host;
166
167 if (! ipv6_host.empty())
168 {
169 I(normal_host.empty());
170 host += '[';
171 host += (ipv6_host + ']');
172 }
173 else
174 host = normal_host;
175
176 if (! (user.empty()
177 && host.empty()
178 && port.empty()))
179 {
180 built += "//";
181
182 if (! user.empty())
183built += (user + '@');
184
185 if (! host.empty())
186built += host;
187
188 if (! port.empty())
189{
190 built += ':';
191 built += port;
192}
193 }
194
195 if (! path.empty())
196 {
197 I(path[0] == '/');
198 built += path;
199 }
200
201 if (! query.empty())
202 {
203 built += '?';
204 built += query;
205 }
206
207 if (! fragment.empty())
208 {
209 built += '#';
210 built += fragment;
211 }
212
213 L(FL("testing parse of URI '%s'") % built);
214 uri u;
215 UNIT_TEST_CHECK_NOT_THROW(parse_uri(built, u), informative_failure);
216 UNIT_TEST_CHECK(u.scheme == scheme);
217 UNIT_TEST_CHECK(u.user == user);
218 if (!normal_host.empty())
219 UNIT_TEST_CHECK(u.host == normal_host);
220 else
221 UNIT_TEST_CHECK(u.host == ipv6_host);
222 UNIT_TEST_CHECK(u.port == port);
223 UNIT_TEST_CHECK(u.path == path);
224 UNIT_TEST_CHECK(u.query == query);
225 UNIT_TEST_CHECK(u.fragment == fragment);
226}
227
228UNIT_TEST(uri, basic)
229{
230 test_one_uri("ssh", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", "");
231 test_one_uri("ssh", "graydon", "", "venge.net", "", "/tmp/foo.mtn", "", "");
232 test_one_uri("ssh", "", "", "venge.net", "22", "/tmp/foo.mtn", "", "");
233 test_one_uri("ssh", "", "", "venge.net", "", "/tmp/foo.mtn", "", "");
234 test_one_uri("ssh", "", "fe:00:01::04:21", "", "", "/tmp/foo.mtn", "", "");
235 test_one_uri("file", "", "", "", "", "/tmp/foo.mtn", "", "");
236 test_one_uri("", "", "", "", "", "/tmp/foo.mtn", "", "");
237 test_one_uri("http", "graydon", "", "venge.net", "8080", "/foo.cgi", "branch=foo", "tip");
238 test_one_uri("http", "graydon", "", "192.168.0.104", "8080", "/foo.cgi", "branch=foo", "tip");
239 test_one_uri("http", "graydon", "fe:00:01::04:21", "", "8080", "/foo.cgi", "branch=foo", "tip");
240}
241
242UNIT_TEST(uri, bizarre)
243{
244 test_one_uri("", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", "");
245 test_one_uri("", "", "", "", "", "/graydon@venge.net:22/tmp/foo.mtn", "", "");
246 test_one_uri("ssh", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", "");
247 test_one_uri("ssh", "", "", "", "", "/graydon@venge.net:22/tmp/foo.mtn", "", "");
248}
249
250UNIT_TEST(uri, invalid)
251{
252 uri u;
253
254 UNIT_TEST_CHECK_THROW(parse_uri("http://[f3:03:21/foo/bar", u), informative_failure);
255 UNIT_TEST_CHECK_THROW(parse_uri("http://example.com:/foo/bar", u), informative_failure);
256 UNIT_TEST_CHECK_THROW(parse_uri("http://example.com:1a4/foo/bar", u), informative_failure);
257}
258
259#endif // BUILD_UNIT_TESTS
260
261// Local Variables:
262// mode: C++
263// fill-column: 76
264// c-file-style: "gnu"
265// indent-tabs-mode: nil
266// End:
267// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status