monotone

monotone Mtn Source Tree

Root/transforms.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <iterator>
12#include "botan_pipe_cache.hh"
13#include "botan/botan.h"
14#include "botan/sha160.h"
15#include "gzip.hh"
16
17#include "transforms.hh"
18#include "xdelta.hh"
19#include "char_classifiers.hh"
20
21using std::string;
22using Botan::Pipe;
23using Botan::Base64_Encoder;
24using Botan::Base64_Decoder;
25using Botan::Hex_Encoder;
26using Botan::Hex_Decoder;
27using Botan::Gzip_Compression;
28using Botan::Gzip_Decompression;
29using Botan::Hash_Filter;
30
31// this file contans various sorts of string transformations. each
32// transformation should be self-explanatory from its type signature. see
33// transforms.hh for the summary.
34
35// NB this file uses very "value-centric" functional approach; even though
36// many of the underlying transformations are "stream-centric" and the
37// underlying libraries (eg. crypto++) are stream oriented. this will
38// probably strike some people as contemptably inefficient, since it means
39// that occasionally 1, 2, or even 3 copies of an entire file will wind up
40// in memory at once. I am taking this approach for 3 reasons: first, I
41// want the type system to help me and value types are much easier to work
42// with than stream types. second, it is *much* easier to debug a program
43// that operates on values than streams, and correctness takes precedence
44// over all other features of this program. third, this is a peer-to-peer
45// sort of program for small-ish source-code text files, not a fileserver,
46// and is memory-limited anyways (for example, storing things in sqlite
47// requires they be able to fit in memory). you're hopefully not going to
48// be dealing with hundreds of users hammering on locks and memory
49// concurrently.
50//
51// if future analysis proves these assumptions wrong, feel free to revisit
52// the matter, but bring strong evidence along with you that the stream
53// paradigm "must" be used. this program is intended for source code
54// control and I make no bones about it.
55
56NORETURN(static inline void error_in_transform(Botan::Exception & e));
57
58static inline void
59error_in_transform(Botan::Exception & e)
60{
61 // these classes can all indicate data corruption
62 if (typeid(e) == typeid(Botan::Encoding_Error)
63 || typeid(e) == typeid(Botan::Decoding_Error)
64 || typeid(e) == typeid(Botan::Stream_IO_Error)
65 || typeid(e) == typeid(Botan::Integrity_Failure))
66 {
67 // clean up the what() string a little: throw away the
68 // "botan: TYPE: " part...
69 string w(e.what());
70 string::size_type pos = w.find(':');
71 pos = w.find(':', pos+1);
72 w = string(w.begin() + pos + 2, w.end());
73
74 // ... downcase the rest of it and replace underscores with spaces.
75 for (string::iterator p = w.begin(); p != w.end(); p++)
76 {
77 *p = to_lower(*p);
78 if (*p == '_')
79 *p = ' ';
80 }
81
82 E(false,
83 F("%s\n"
84 "this may be due to a memory glitch, data corruption during\n"
85 "a network transfer, corruption of your database or workspace,\n"
86 "or a bug in monotone. if the error persists, please contact\n"
87 "%s for assistance.\n")
88 % w % PACKAGE_BUGREPORT);
89 }
90 else
91 throw;
92
93 I(false); // can't get here
94}
95
96// full specializations for the usable cases of xform<XFM>()
97// use extra error checking in base64 and hex decoding
98#define SPECIALIZE_XFORM(T, carg) \
99 template<> string xform<T>(string const & in) \
100 { \
101 string out; \
102 try \
103 { \
104 static cached_botan_pipe pipe(new Pipe(new T(carg))); \
105 /* this might actually be a problem here */ \
106 I(pipe->message_count() < Pipe::LAST_MESSAGE); \
107 pipe->process_msg(in); \
108 out = pipe->read_all_as_string(Pipe::LAST_MESSAGE); \
109 } \
110 catch (Botan::Exception & e) \
111 { \
112 error_in_transform(e); \
113 } \
114 return out; \
115 }
116
117SPECIALIZE_XFORM(Base64_Encoder,);
118SPECIALIZE_XFORM(Base64_Decoder, Botan::IGNORE_WS);
119//SPECIALIZE_XFORM(Hex_Encoder, Hex_Encoder::Lowercase);
120template<> string xform<Botan::Hex_Encoder>(string const & in)
121{
122 string out;
123 out.reserve(in.size()<<1);
124 for (string::const_iterator i = in.begin();
125 i != in.end(); ++i)
126 {
127 int h = (*i>>4) & 0x0f;
128 if (h < 10)
129 out.push_back(h + '0');
130 else
131 out.push_back(h + 'a' - 10);
132 int l = *i & 0x0f;
133 if (l < 10)
134 out.push_back(l + '0');
135 else
136 out.push_back(l + 'a' - 10);
137 }
138 return out;
139}
140//SPECIALIZE_XFORM(Hex_Decoder, Botan::IGNORE_WS);
141template<> string xform<Botan::Hex_Decoder>(string const & in)
142{
143 string out;
144 out.reserve(in.size()>>1);
145 bool high(true);
146 int o = 0;
147 for (string::const_iterator i = in.begin();
148 i != in.end(); ++i)
149 {
150 int c = *i;
151 if (c >= '0' && c <= '9')
152 {
153 o += (c - '0');
154 }
155 else if (c >= 'a' && c <= 'f')
156 {
157 o += (c - 'a' + 10);
158 }
159 else if (c >= 'A' && c <= 'F')
160 {
161 o += (c - 'A' + 10);
162 }
163 else if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
164 {
165 continue;
166 }
167 else // garbage
168 {
169 try
170 {
171 throw Botan::Decoding_Error(string("invalid hex character '") + (char)c + "'");
172 }
173 catch(Botan::Exception & e)
174 {
175 error_in_transform(e);
176 }
177 }
178 if (high)
179 {
180 o <<= 4;
181 }
182 else
183 {
184 out.push_back(o);
185 o = 0;
186 }
187 high = !high;
188 }
189 if (!high)
190 { // Hex string wasn't a whole number of bytes
191 //I(false); // Drop the last char (!!)
192 }
193 return out;
194}
195SPECIALIZE_XFORM(Gzip_Compression,);
196SPECIALIZE_XFORM(Gzip_Decompression,);
197
198template <typename T>
199void pack(T const & in, base64< gzip<T> > & out)
200{
201 string tmp;
202 tmp.reserve(in().size()); // FIXME: do some benchmarking and make this a constant::
203
204 try
205 {
206 static cached_botan_pipe pipe(new Pipe(new Gzip_Compression,
207 new Base64_Encoder));
208 pipe->process_msg(in());
209 tmp = pipe->read_all_as_string(Pipe::LAST_MESSAGE);
210 out = base64< gzip<T> >(tmp);
211 }
212 catch (Botan::Exception & e)
213 {
214 error_in_transform(e);
215 }
216}
217
218template <typename T>
219void unpack(base64< gzip<T> > const & in, T & out)
220{
221 try
222 {
223 static cached_botan_pipe pipe(new Pipe(new Base64_Decoder,
224 new Gzip_Decompression));
225 pipe->process_msg(in());
226 out = T(pipe->read_all_as_string(Pipe::LAST_MESSAGE));
227 }
228 catch (Botan::Exception & e)
229 {
230 error_in_transform(e);
231 }
232}
233
234// specialise them
235template void pack<data>(data const &, base64< gzip<data> > &);
236template void pack<delta>(delta const &, base64< gzip<delta> > &);
237template void unpack<data>(base64< gzip<data> > const &, data &);
238template void unpack<delta>(base64< gzip<delta> > const &, delta &);
239
240
241// identifier (a.k.a. sha1 signature) calculation
242
243void
244calculate_ident(data const & dat,
245 id & ident)
246{
247 try
248 {
249 static cached_botan_pipe p(new Pipe(new Hash_Filter("SHA-160")));
250 p->process_msg(dat());
251 ident = id(p->read_all_as_string(Pipe::LAST_MESSAGE));
252 }
253 catch (Botan::Exception & e)
254 {
255 error_in_transform(e);
256 }
257}
258
259void
260calculate_ident(file_data const & dat,
261 file_id & ident)
262{
263 id tmp;
264 calculate_ident(dat.inner(), tmp);
265 ident = file_id(tmp);
266}
267
268void
269calculate_ident(manifest_data const & dat,
270 manifest_id & ident)
271{
272 id tmp;
273 calculate_ident(dat.inner(), tmp);
274 ident = manifest_id(tmp);
275}
276
277void
278calculate_ident(revision_data const & dat,
279 revision_id & ident)
280{
281 id tmp;
282 calculate_ident(dat.inner(), tmp);
283 ident = revision_id(tmp);
284}
285
286#ifdef BUILD_UNIT_TESTS
287#include "unit_tests.hh"
288#include <stdlib.h>
289
290UNIT_TEST(transform, enc)
291{
292 data d2, d1("the rain in spain");
293 gzip<data> gzd1, gzd2;
294 base64< gzip<data> > bgzd;
295 encode_gzip(d1, gzd1);
296 bgzd = encode_base64(gzd1);
297 gzd2 = decode_base64(bgzd);
298 UNIT_TEST_CHECK(gzd2 == gzd1);
299 decode_gzip(gzd2, d2);
300 UNIT_TEST_CHECK(d2 == d1);
301}
302
303UNIT_TEST(transform, rdiff)
304{
305 data dat1(string("the first day of spring\nmakes me want to sing\n"));
306 data dat2(string("the first day of summer\nis a major bummer\n"));
307 delta del;
308 diff(dat1, dat2, del);
309
310 data dat3;
311 patch(dat1, del, dat3);
312 UNIT_TEST_CHECK(dat3 == dat2);
313}
314
315UNIT_TEST(transform, calculate_ident)
316{
317 data input(string("the only blender which can be turned into the most powerful vaccum cleaner"));
318 id output;
319 string ident("86e03bdb3870e2a207dfd0dcbfd4c4f2e3bc97bd");
320 calculate_ident(input, output);
321 UNIT_TEST_CHECK(output() == decode_hexenc(ident));
322}
323
324UNIT_TEST(transform, corruption_check)
325{
326 data input(string("i'm so fragile, fragile when you're here"));
327 gzip<data> gzd;
328 encode_gzip(input, gzd);
329
330 // fake a single-bit error
331 string gzs = gzd();
332 string::iterator i = gzs.begin();
333 while (*i != '+')
334 i++;
335 *i = 'k';
336
337 gzip<data> gzbad(gzs);
338 data output;
339 UNIT_TEST_CHECK_THROW(decode_gzip(gzbad, output), informative_failure);
340}
341
342#endif // BUILD_UNIT_TESTS
343
344// Local Variables:
345// mode: C++
346// fill-column: 76
347// c-file-style: "gnu"
348// indent-tabs-mode: nil
349// End:
350// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status