1 | // Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>␊ |
2 | //␊ |
3 | // This program is made available under the GNU GPL version 2.0 or␊ |
4 | // greater. See the accompanying file COPYING for details.␊ |
5 | //␊ |
6 | // This program is distributed WITHOUT ANY WARRANTY; without even the␊ |
7 | // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR␊ |
8 | // PURPOSE.␊ |
9 | ␊ |
10 | #include "base.hh"␊ |
11 | #include <algorithm>␊ |
12 | #include <cctype>␊ |
13 | #include <functional>␊ |
14 | #include <iterator>␊ |
15 | #include <sstream>␊ |
16 | #include <vector>␊ |
17 | ␊ |
18 | ␊ |
19 | #include <boost/tokenizer.hpp>␊ |
20 | #include <boost/scoped_array.hpp>␊ |
21 | ␊ |
22 | #include "botan/botan.h"␊ |
23 | #include "botan/gzip.h"␊ |
24 | #include "botan/sha160.h"␊ |
25 | ␊ |
26 | #include "cleanup.hh"␊ |
27 | #include "constants.hh"␊ |
28 | #include "sanity.hh"␊ |
29 | #include "transforms.hh"␊ |
30 | #include "simplestring_xform.hh"␊ |
31 | #include "vocab.hh"␊ |
32 | #include "xdelta.hh"␊ |
33 | ␊ |
34 | using std::string;␊ |
35 | ␊ |
36 | using boost::scoped_array;␊ |
37 | ␊ |
38 | // this file contans various sorts of string transformations. each␊ |
39 | // transformation should be self-explanatory from its type signature. see␊ |
40 | // transforms.hh for the summary.␊ |
41 | ␊ |
42 | // NB this file uses very "value-centric" functional approach; even though␊ |
43 | // many of the underlying transformations are "stream-centric" and the␊ |
44 | // underlying libraries (eg. crypto++) are stream oriented. this will␊ |
45 | // probably strike some people as contemptably inefficient, since it means␊ |
46 | // that occasionally 1, 2, or even 3 copies of an entire file will wind up␊ |
47 | // in memory at once. I am taking this approach for 3 reasons: first, I␊ |
48 | // want the type system to help me and value types are much easier to work␊ |
49 | // with than stream types. second, it is *much* easier to debug a program␊ |
50 | // that operates on values than streams, and correctness takes precedence␊ |
51 | // over all other features of this program. third, this is a peer-to-peer␊ |
52 | // sort of program for small-ish source-code text files, not a fileserver,␊ |
53 | // and is memory-limited anyways (for example, storing things in sqlite␊ |
54 | // requires they be able to fit in memory). you're hopefully not going to␊ |
55 | // be dealing with hundreds of users hammering on locks and memory␊ |
56 | // concurrently.␊ |
57 | //␊ |
58 | // if future analysis proves these assumptions wrong, feel free to revisit␊ |
59 | // the matter, but bring strong evidence along with you that the stream␊ |
60 | // paradigm "must" be used. this program is intended for source code␊ |
61 | // control and I make no bones about it.␊ |
62 | ␊ |
63 | NORETURN(static inline void error_in_transform(Botan::Exception & e));␊ |
64 | ␊ |
65 | static inline void␊ |
66 | error_in_transform(Botan::Exception & e)␊ |
67 | {␊ |
68 | // why do people make up their own out-of-memory exceptions?␊ |
69 | if (typeid(e) == typeid(Botan::Memory_Exhaustion))␊ |
70 | throw std::bad_alloc();␊ |
71 | ␊ |
72 | // these classes can all indicate data corruption␊ |
73 | else if (typeid(e) == typeid(Botan::Encoding_Error)␊ |
74 | || typeid(e) == typeid(Botan::Decoding_Error)␊ |
75 | || typeid(e) == typeid(Botan::Stream_IO_Error)␊ |
76 | || typeid(e) == typeid(Botan::Integrity_Failure))␊ |
77 | {␊ |
78 | // clean up the what() string a little: throw away the␊ |
79 | // "botan: TYPE: " part...␊ |
80 | string w(e.what());␊ |
81 | string::size_type pos = w.find(':');␊ |
82 | pos = w.find(':', pos+1);␊ |
83 | w = string(w.begin() + pos + 2, w.end());␊ |
84 | ␊ |
85 | // ... downcase the rest of it and replace underscores with spaces.␊ |
86 | for (string::iterator p = w.begin(); p != w.end(); p++)␊ |
87 | if (::isupper(*p))␊ |
88 | *p = ::tolower(*p);␊ |
89 | else if (*p == '_')␊ |
90 | *p = ' ';␊ |
91 | ␊ |
92 | E(false,␊ |
93 | F("%s\n"␊ |
94 | "this may be due to a memory glitch, data corruption during\n"␊ |
95 | "a network transfer, corruption of your database or workspace,\n"␊ |
96 | "or a bug in monotone. if the error persists, please contact\n"␊ |
97 | "%s for assistance.\n")␊ |
98 | % w % PACKAGE_BUGREPORT);␊ |
99 | }␊ |
100 | else␊ |
101 | throw;␊ |
102 | ␊ |
103 | I(false); // can't get here␊ |
104 | }␊ |
105 | ␊ |
106 | // worker function for the visible functions below␊ |
107 | namespace {␊ |
108 | template<typename XFM> string xform(XFM * x, string const & in)␊ |
109 | {␊ |
110 | string out;␊ |
111 | try␊ |
112 | {␊ |
113 | Botan::Pipe pipe(x);␊ |
114 | pipe.process_msg(in);␊ |
115 | out = pipe.read_all_as_string();␊ |
116 | }␊ |
117 | catch (Botan::Exception & e)␊ |
118 | {␊ |
119 | error_in_transform(e);␊ |
120 | }␊ |
121 | return out;␊ |
122 | }␊ |
123 | }␊ |
124 | ␊ |
125 | // full specializations for the usable cases of xform<XFM>()␊ |
126 | // use extra error checking in base64 and hex decoding␊ |
127 | #define SPECIALIZE_XFORM(T, carg) \␊ |
128 | template<> string xform<T>(string const &in) \␊ |
129 | { return xform(new T(carg), in); }␊ |
130 | ␊ |
131 | SPECIALIZE_XFORM(Botan::Base64_Encoder,);␊ |
132 | SPECIALIZE_XFORM(Botan::Base64_Decoder, Botan::IGNORE_WS);␊ |
133 | SPECIALIZE_XFORM(Botan::Hex_Encoder, Botan::Hex_Encoder::Lowercase);␊ |
134 | SPECIALIZE_XFORM(Botan::Hex_Decoder, Botan::IGNORE_WS);␊ |
135 | SPECIALIZE_XFORM(Botan::Gzip_Compression,);␊ |
136 | SPECIALIZE_XFORM(Botan::Gzip_Decompression,);␊ |
137 | ␊ |
138 | template <typename T>␊ |
139 | void pack(T const & in, base64< gzip<T> > & out)␊ |
140 | {␊ |
141 | string tmp;␊ |
142 | tmp.reserve(in().size()); // FIXME: do some benchmarking and make this a constant::␊ |
143 | ␊ |
144 | try␊ |
145 | {␊ |
146 | Botan::Pipe pipe(new Botan::Gzip_Compression(),␊ |
147 | new Botan::Base64_Encoder);␊ |
148 | pipe.process_msg(in());␊ |
149 | tmp = pipe.read_all_as_string();␊ |
150 | out = base64< gzip<T> >(tmp);␊ |
151 | }␊ |
152 | catch (Botan::Exception & e)␊ |
153 | {␊ |
154 | error_in_transform(e);␊ |
155 | }␊ |
156 | }␊ |
157 | ␊ |
158 | template <typename T>␊ |
159 | void unpack(base64< gzip<T> > const & in, T & out)␊ |
160 | {␊ |
161 | string tmp;␊ |
162 | tmp.reserve(in().size()); // FIXME: do some benchmarking and make this a constant::␊ |
163 | ␊ |
164 | try␊ |
165 | {␊ |
166 | Botan::Pipe pipe(new Botan::Base64_Decoder(),␊ |
167 | new Botan::Gzip_Decompression());␊ |
168 | pipe.process_msg(in());␊ |
169 | tmp = pipe.read_all_as_string();␊ |
170 | out = T(tmp);␊ |
171 | }␊ |
172 | catch (Botan::Exception & e)␊ |
173 | {␊ |
174 | error_in_transform(e);␊ |
175 | }␊ |
176 | }␊ |
177 | ␊ |
178 | // specialise them␊ |
179 | template void pack<data>(data const &, base64< gzip<data> > &);␊ |
180 | template void pack<delta>(delta const &, base64< gzip<delta> > &);␊ |
181 | template void unpack<data>(base64< gzip<data> > const &, data &);␊ |
182 | template void unpack<delta>(base64< gzip<delta> > const &, delta &);␊ |
183 | ␊ |
184 | // diffing and patching␊ |
185 | ␊ |
186 | void␊ |
187 | diff(data const & olddata,␊ |
188 | data const & newdata,␊ |
189 | delta & del)␊ |
190 | {␊ |
191 | string unpacked;␊ |
192 | compute_delta(olddata(), newdata(), unpacked);␊ |
193 | del = delta(unpacked);␊ |
194 | }␊ |
195 | ␊ |
196 | void␊ |
197 | patch(data const & olddata,␊ |
198 | delta const & del,␊ |
199 | data & newdata)␊ |
200 | {␊ |
201 | string result;␊ |
202 | apply_delta(olddata(), del(), result);␊ |
203 | newdata = data(result);␊ |
204 | }␊ |
205 | ␊ |
206 | // identifier (a.k.a. sha1 signature) calculation␊ |
207 | ␊ |
208 | void␊ |
209 | calculate_ident(data const & dat,␊ |
210 | hexenc<id> & ident)␊ |
211 | {␊ |
212 | try␊ |
213 | {␊ |
214 | Botan::Pipe p(new Botan::Hash_Filter("SHA-160"),␊ |
215 | new Botan::Hex_Encoder(Botan::Hex_Encoder::Lowercase));␊ |
216 | p.process_msg(dat());␊ |
217 | ident = hexenc<id>(p.read_all_as_string());␊ |
218 | }␊ |
219 | catch (Botan::Exception & e)␊ |
220 | {␊ |
221 | error_in_transform(e);␊ |
222 | }␊ |
223 | }␊ |
224 | ␊ |
225 | void␊ |
226 | calculate_ident(file_data const & dat,␊ |
227 | file_id & ident)␊ |
228 | {␊ |
229 | hexenc<id> tmp;␊ |
230 | calculate_ident(dat.inner(), tmp);␊ |
231 | ident = file_id(tmp);␊ |
232 | }␊ |
233 | ␊ |
234 | void␊ |
235 | calculate_ident(manifest_data const & dat,␊ |
236 | manifest_id & ident)␊ |
237 | {␊ |
238 | hexenc<id> tmp;␊ |
239 | calculate_ident(dat.inner(), tmp);␊ |
240 | ident = manifest_id(tmp);␊ |
241 | }␊ |
242 | ␊ |
243 | void␊ |
244 | calculate_ident(revision_data const & dat,␊ |
245 | revision_id & ident)␊ |
246 | {␊ |
247 | hexenc<id> tmp;␊ |
248 | calculate_ident(dat.inner(), tmp);␊ |
249 | ident = revision_id(tmp);␊ |
250 | }␊ |
251 | ␊ |
252 | string␊ |
253 | canonical_base64(string const & s)␊ |
254 | {␊ |
255 | try␊ |
256 | {␊ |
257 | Botan::Pipe pipe(new Botan::Base64_Decoder(),␊ |
258 | new Botan::Base64_Encoder());␊ |
259 | pipe.process_msg(s);␊ |
260 | return pipe.read_all_as_string();␊ |
261 | }␊ |
262 | catch (Botan::Exception & e)␊ |
263 | {␊ |
264 | error_in_transform(e);␊ |
265 | }␊ |
266 | }␊ |
267 | ␊ |
268 | ␊ |
269 | #ifdef BUILD_UNIT_TESTS␊ |
270 | #include "unit_tests.hh"␊ |
271 | #include <stdlib.h>␊ |
272 | ␊ |
273 | UNIT_TEST(transform, enc)␊ |
274 | {␊ |
275 | data d2, d1("the rain in spain");␊ |
276 | gzip<data> gzd1, gzd2;␊ |
277 | base64< gzip<data> > bgzd;␊ |
278 | encode_gzip(d1, gzd1);␊ |
279 | encode_base64(gzd1, bgzd);␊ |
280 | decode_base64(bgzd, gzd2);␊ |
281 | UNIT_TEST_CHECK(gzd2 == gzd1);␊ |
282 | decode_gzip(gzd2, d2);␊ |
283 | UNIT_TEST_CHECK(d2 == d1);␊ |
284 | }␊ |
285 | ␊ |
286 | UNIT_TEST(transform, rdiff)␊ |
287 | {␊ |
288 | data dat1(string("the first day of spring\nmakes me want to sing\n"));␊ |
289 | data dat2(string("the first day of summer\nis a major bummer\n"));␊ |
290 | delta del;␊ |
291 | diff(dat1, dat2, del);␊ |
292 | ␊ |
293 | data dat3;␊ |
294 | patch(dat1, del, dat3);␊ |
295 | UNIT_TEST_CHECK(dat3 == dat2);␊ |
296 | }␊ |
297 | ␊ |
298 | UNIT_TEST(transform, calculate_ident)␊ |
299 | {␊ |
300 | data input(string("the only blender which can be turned into the most powerful vaccum cleaner"));␊ |
301 | hexenc<id> output;␊ |
302 | string ident("86e03bdb3870e2a207dfd0dcbfd4c4f2e3bc97bd");␊ |
303 | calculate_ident(input, output);␊ |
304 | UNIT_TEST_CHECK(output() == ident);␊ |
305 | }␊ |
306 | ␊ |
307 | UNIT_TEST(transform, corruption_check)␊ |
308 | {␊ |
309 | data input(string("i'm so fragile, fragile when you're here"));␊ |
310 | gzip<data> gzd;␊ |
311 | encode_gzip(input, gzd);␊ |
312 | ␊ |
313 | // fake a single-bit error␊ |
314 | string gzs = gzd();␊ |
315 | string::iterator i = gzs.begin();␊ |
316 | while (*i != '+')␊ |
317 | i++;␊ |
318 | *i = 'k';␊ |
319 | ␊ |
320 | gzip<data> gzbad(gzs);␊ |
321 | data output;␊ |
322 | UNIT_TEST_CHECK_THROW(decode_gzip(gzbad, output), informative_failure);␊ |
323 | }␊ |
324 | ␊ |
325 | #endif // BUILD_UNIT_TESTS␊ |
326 | ␊ |
327 | // Local Variables:␊ |
328 | // mode: C++␊ |
329 | // fill-column: 76␊ |
330 | // c-file-style: "gnu"␊ |
331 | // indent-tabs-mode: nil␊ |
332 | // End:␊ |
333 | // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:␊ |