monotone

monotone Mtn Source Tree

Root/uri.cc

1// Copyright (C) 2006 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "pcrewrap.hh"
12#include "sanity.hh"
13#include "uri.hh"
14#include <vector>
15#include <algorithm>
16
17using std::string;
18using std::vector;
19typedef string::size_type stringpos;
20
21void
22parse_uri(string const & in, uri_t & uri, origin::type made_from)
23{
24 uri.clear();
25
26 // this is a little tweak to recognize paths as authorities
27 string modified = in;
28 pcre::regex has_scheme("^\\w[\\w\\d\\+\\-\\.]*:[^\\d]+", origin::internal);
29 if (!has_scheme.match(in, made_from))
30 {
31 L(FL("prepending pseudo scheme and authority marker"));
32 modified = "ZZZ://" + in;
33 }
34
35 // RFC 3986, Appendix B
36 pcre::regex matcher("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?",
37 origin::internal);
38 vector<string> matches;
39 E(matcher.match(modified, made_from, matches), made_from,
40 F("unable to parse URI '%s'") % in);
41
42 I(matches.size() == 10);
43
44 //
45 // scheme matching
46 //
47 if (matches[2] != "ZZZ")
48 {
49 uri.scheme.assign(matches[2]);
50 std::transform(uri.scheme.begin(), uri.scheme.end(), uri.scheme.begin(), ::tolower);
51 L(FL("matched URI scheme: '%s'") % uri.scheme);
52 }
53
54 //
55 // host and port matching
56 //
57 if (!matches[4].empty())
58 {
59 L(FL("parsing host and optional port of '%s'") % matches[4]);
60
61 // we do not allow non-bracketed IPv6, since
62 // host matches like "abc:123" cannot be distinguished
63 pcre::regex hostlike("^(([^@]+)@)?(([^:\\[\\]]+)|\\[([:0-9a-fA-F]+)\\])(:(\\d*))?$",
64 origin::internal);
65 vector<string> hostlike_matches;
66
67 E(hostlike.match(matches[4], made_from, hostlike_matches), made_from,
68 F("unable to parse host of URI '%s'") % in);
69
70 if (!hostlike_matches[2].empty())
71 {
72 uri.user.assign(hostlike_matches[2]);
73 L(FL("matched URI user: '%s'") % uri.user);
74 }
75
76 if (!hostlike_matches[4].empty())
77 {
78 uri.host.assign(hostlike_matches[4]);
79
80 }
81 else
82 if (!hostlike_matches[5].empty())
83 {
84 // for IPv6 we discard the square brackets
85 uri.host.assign(hostlike_matches[5]);
86 }
87 else
88 I(false);
89
90 std::transform(uri.host.begin(), uri.host.end(), uri.host.begin(), ::tolower);
91 L(FL("matched URI host: '%s'") % uri.host);
92
93 if (!hostlike_matches[7].empty())
94 {
95 uri.port.assign(hostlike_matches[7]);
96 L(FL("matched URI port: '%s'") % uri.port);
97 }
98 }
99
100 //
101 // path matching
102 //
103 if (!matches[5].empty())
104 {
105 // FIXME: we do not
106 // - remove dot components ("/./" and "/../")
107 // - check whether the path of authority-less URIs do not start with "//"
108 // - convert the path in "scheme:host/:foo" to "./:foo"
109 uri.path.assign(urldecode(matches[5], made_from));
110 L(FL("matched URI path: '%s'") % uri.path);
111 }
112
113 //
114 // query matching
115 //
116 if (!matches[7].empty())
117 {
118 // FIXME: the query string is not broken up at this point
119 // and therefor cannot be urldecoded without possible side effects
120 uri.query.assign(matches[7]);
121 L(FL("matched URI query: '%s'") % uri.query);
122 }
123
124 //
125 // fragment matching
126 //
127 if (!matches[9].empty())
128 {
129 uri.fragment.assign(urldecode(matches[9], made_from));
130 L(FL("matched URI fragment: '%s'") % uri.fragment);
131 }
132}
133
134string
135urldecode(string const & in, origin::type made_from)
136{
137 string out;
138
139 for (string::const_iterator i = in.begin(); i != in.end(); ++i)
140 {
141 if (*i == '+')
142 out += ' ';
143 else
144 if (*i != '%')
145 out += *i;
146 else
147 {
148 char d1, d2;
149 ++i;
150 E(i != in.end(), made_from, F("Bad URLencoded string '%s'") % in);
151 d1 = *i;
152 ++i;
153 E(i != in.end(), made_from, F("Bad URLencoded string '%s'") % in);
154 d2 = *i;
155
156 char c = 0;
157 switch(d1)
158 {
159 case '0': c += 0; break;
160 case '1': c += 1; break;
161 case '2': c += 2; break;
162 case '3': c += 3; break;
163 case '4': c += 4; break;
164 case '5': c += 5; break;
165 case '6': c += 6; break;
166 case '7': c += 7; break;
167 case '8': c += 8; break;
168 case '9': c += 9; break;
169 case 'a': case 'A': c += 10; break;
170 case 'b': case 'B': c += 11; break;
171 case 'c': case 'C': c += 12; break;
172 case 'd': case 'D': c += 13; break;
173 case 'e': case 'E': c += 14; break;
174 case 'f': case 'F': c += 15; break;
175 default: E(false, made_from, F("Bad URLencoded string '%s'") % in);
176 }
177 c *= 16;
178 switch(d2)
179 {
180 case '0': c += 0; break;
181 case '1': c += 1; break;
182 case '2': c += 2; break;
183 case '3': c += 3; break;
184 case '4': c += 4; break;
185 case '5': c += 5; break;
186 case '6': c += 6; break;
187 case '7': c += 7; break;
188 case '8': c += 8; break;
189 case '9': c += 9; break;
190 case 'a': case 'A': c += 10; break;
191 case 'b': case 'B': c += 11; break;
192 case 'c': case 'C': c += 12; break;
193 case 'd': case 'D': c += 13; break;
194 case 'e': case 'E': c += 14; break;
195 case 'f': case 'F': c += 15; break;
196 default: E(false, made_from, F("Bad URLencoded string '%s'") % in);
197 }
198 out += c;
199 }
200 }
201
202 return out;
203}
204
205
206
207// Local Variables:
208// mode: C++
209// fill-column: 76
210// c-file-style: "gnu"
211// indent-tabs-mode: nil
212// End:
213// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status