monotone

monotone Mtn Source Tree

Root/transforms.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <algorithm>
12#include <cctype>
13#include <functional>
14#include <iterator>
15#include <sstream>
16#include "vector.hh"
17
18
19#include <boost/tokenizer.hpp>
20#include <boost/scoped_array.hpp>
21
22#include "botan/botan.h"
23#include "botan/sha160.h"
24#include "gzip.hh"
25
26#include "cleanup.hh"
27#include "constants.hh"
28#include "sanity.hh"
29#include "transforms.hh"
30#include "simplestring_xform.hh"
31#include "vocab.hh"
32#include "xdelta.hh"
33
34using std::string;
35
36using boost::scoped_array;
37
38// this file contans various sorts of string transformations. each
39// transformation should be self-explanatory from its type signature. see
40// transforms.hh for the summary.
41
42// NB this file uses very "value-centric" functional approach; even though
43// many of the underlying transformations are "stream-centric" and the
44// underlying libraries (eg. crypto++) are stream oriented. this will
45// probably strike some people as contemptably inefficient, since it means
46// that occasionally 1, 2, or even 3 copies of an entire file will wind up
47// in memory at once. I am taking this approach for 3 reasons: first, I
48// want the type system to help me and value types are much easier to work
49// with than stream types. second, it is *much* easier to debug a program
50// that operates on values than streams, and correctness takes precedence
51// over all other features of this program. third, this is a peer-to-peer
52// sort of program for small-ish source-code text files, not a fileserver,
53// and is memory-limited anyways (for example, storing things in sqlite
54// requires they be able to fit in memory). you're hopefully not going to
55// be dealing with hundreds of users hammering on locks and memory
56// concurrently.
57//
58// if future analysis proves these assumptions wrong, feel free to revisit
59// the matter, but bring strong evidence along with you that the stream
60// paradigm "must" be used. this program is intended for source code
61// control and I make no bones about it.
62
63NORETURN(static inline void error_in_transform(Botan::Exception & e));
64
65static inline void
66error_in_transform(Botan::Exception & e)
67{
68 // why do people make up their own out-of-memory exceptions?
69 if (typeid(e) == typeid(Botan::Memory_Exhaustion))
70 throw std::bad_alloc();
71
72 // these classes can all indicate data corruption
73 else if (typeid(e) == typeid(Botan::Encoding_Error)
74 || typeid(e) == typeid(Botan::Decoding_Error)
75 || typeid(e) == typeid(Botan::Stream_IO_Error)
76 || typeid(e) == typeid(Botan::Integrity_Failure))
77 {
78 // clean up the what() string a little: throw away the
79 // "botan: TYPE: " part...
80 string w(e.what());
81 string::size_type pos = w.find(':');
82 pos = w.find(':', pos+1);
83 w = string(w.begin() + pos + 2, w.end());
84
85 // ... downcase the rest of it and replace underscores with spaces.
86 for (string::iterator p = w.begin(); p != w.end(); p++)
87 if (::isupper(*p))
88 *p = ::tolower(*p);
89 else if (*p == '_')
90 *p = ' ';
91
92 E(false,
93 F("%s\n"
94 "this may be due to a memory glitch, data corruption during\n"
95 "a network transfer, corruption of your database or workspace,\n"
96 "or a bug in monotone. if the error persists, please contact\n"
97 "%s for assistance.\n")
98 % w % PACKAGE_BUGREPORT);
99 }
100 else
101 throw;
102
103 I(false); // can't get here
104}
105
106// worker function for the visible functions below
107namespace {
108template<typename XFM> string xform(XFM * x, string const & in)
109{
110 string out;
111 try
112 {
113 Botan::Pipe pipe(x);
114 pipe.process_msg(in);
115 out = pipe.read_all_as_string();
116 }
117 catch (Botan::Exception & e)
118 {
119 error_in_transform(e);
120 }
121 return out;
122}
123}
124
125// full specializations for the usable cases of xform<XFM>()
126// use extra error checking in base64 and hex decoding
127#define SPECIALIZE_XFORM(T, carg) \
128 template<> string xform<T>(string const &in) \
129 { return xform(new T(carg), in); }
130
131SPECIALIZE_XFORM(Botan::Base64_Encoder,);
132SPECIALIZE_XFORM(Botan::Base64_Decoder, Botan::IGNORE_WS);
133SPECIALIZE_XFORM(Botan::Hex_Encoder, Botan::Hex_Encoder::Lowercase);
134SPECIALIZE_XFORM(Botan::Hex_Decoder, Botan::IGNORE_WS);
135SPECIALIZE_XFORM(Botan::Gzip_Compression,);
136SPECIALIZE_XFORM(Botan::Gzip_Decompression,);
137
138template <typename T>
139void pack(T const & in, base64< gzip<T> > & out)
140{
141 string tmp;
142 tmp.reserve(in().size()); // FIXME: do some benchmarking and make this a constant::
143
144 try
145 {
146 Botan::Pipe pipe(new Botan::Gzip_Compression(),
147 new Botan::Base64_Encoder);
148 pipe.process_msg(in());
149 tmp = pipe.read_all_as_string();
150 out = base64< gzip<T> >(tmp);
151 }
152 catch (Botan::Exception & e)
153 {
154 error_in_transform(e);
155 }
156}
157
158template <typename T>
159void unpack(base64< gzip<T> > const & in, T & out)
160{
161 string tmp;
162 tmp.reserve(in().size()); // FIXME: do some benchmarking and make this a constant::
163
164 try
165 {
166 Botan::Pipe pipe(new Botan::Base64_Decoder(),
167 new Botan::Gzip_Decompression());
168 pipe.process_msg(in());
169 tmp = pipe.read_all_as_string();
170 out = T(tmp);
171 }
172 catch (Botan::Exception & e)
173 {
174 error_in_transform(e);
175 }
176}
177
178// specialise them
179template void pack<data>(data const &, base64< gzip<data> > &);
180template void pack<delta>(delta const &, base64< gzip<delta> > &);
181template void unpack<data>(base64< gzip<data> > const &, data &);
182template void unpack<delta>(base64< gzip<delta> > const &, delta &);
183
184// diffing and patching
185
186void
187diff(data const & olddata,
188 data const & newdata,
189 delta & del)
190{
191 string unpacked;
192 compute_delta(olddata(), newdata(), unpacked);
193 del = delta(unpacked);
194}
195
196void
197patch(data const & olddata,
198 delta const & del,
199 data & newdata)
200{
201 string result;
202 apply_delta(olddata(), del(), result);
203 newdata = data(result);
204}
205
206// identifier (a.k.a. sha1 signature) calculation
207
208void
209calculate_ident(data const & dat,
210 hexenc<id> & ident)
211{
212 try
213 {
214 Botan::Pipe p(new Botan::Hash_Filter("SHA-160"),
215 new Botan::Hex_Encoder(Botan::Hex_Encoder::Lowercase));
216 p.process_msg(dat());
217 ident = hexenc<id>(p.read_all_as_string());
218 }
219 catch (Botan::Exception & e)
220 {
221 error_in_transform(e);
222 }
223}
224
225void
226calculate_ident(file_data const & dat,
227 file_id & ident)
228{
229 hexenc<id> tmp;
230 calculate_ident(dat.inner(), tmp);
231 ident = file_id(tmp);
232}
233
234void
235calculate_ident(manifest_data const & dat,
236 manifest_id & ident)
237{
238 hexenc<id> tmp;
239 calculate_ident(dat.inner(), tmp);
240 ident = manifest_id(tmp);
241}
242
243void
244calculate_ident(revision_data const & dat,
245 revision_id & ident)
246{
247 hexenc<id> tmp;
248 calculate_ident(dat.inner(), tmp);
249 ident = revision_id(tmp);
250}
251
252string
253canonical_base64(string const & s)
254{
255 try
256 {
257 Botan::Pipe pipe(new Botan::Base64_Decoder(),
258 new Botan::Base64_Encoder());
259 pipe.process_msg(s);
260 return pipe.read_all_as_string();
261 }
262 catch (Botan::Exception & e)
263 {
264 error_in_transform(e);
265 }
266}
267
268
269#ifdef BUILD_UNIT_TESTS
270#include "unit_tests.hh"
271#include <stdlib.h>
272
273UNIT_TEST(transform, enc)
274{
275 data d2, d1("the rain in spain");
276 gzip<data> gzd1, gzd2;
277 base64< gzip<data> > bgzd;
278 encode_gzip(d1, gzd1);
279 encode_base64(gzd1, bgzd);
280 decode_base64(bgzd, gzd2);
281 UNIT_TEST_CHECK(gzd2 == gzd1);
282 decode_gzip(gzd2, d2);
283 UNIT_TEST_CHECK(d2 == d1);
284}
285
286UNIT_TEST(transform, rdiff)
287{
288 data dat1(string("the first day of spring\nmakes me want to sing\n"));
289 data dat2(string("the first day of summer\nis a major bummer\n"));
290 delta del;
291 diff(dat1, dat2, del);
292
293 data dat3;
294 patch(dat1, del, dat3);
295 UNIT_TEST_CHECK(dat3 == dat2);
296}
297
298UNIT_TEST(transform, calculate_ident)
299{
300 data input(string("the only blender which can be turned into the most powerful vaccum cleaner"));
301 hexenc<id> output;
302 string ident("86e03bdb3870e2a207dfd0dcbfd4c4f2e3bc97bd");
303 calculate_ident(input, output);
304 UNIT_TEST_CHECK(output() == ident);
305}
306
307UNIT_TEST(transform, corruption_check)
308{
309 data input(string("i'm so fragile, fragile when you're here"));
310 gzip<data> gzd;
311 encode_gzip(input, gzd);
312
313 // fake a single-bit error
314 string gzs = gzd();
315 string::iterator i = gzs.begin();
316 while (*i != '+')
317 i++;
318 *i = 'k';
319
320 gzip<data> gzbad(gzs);
321 data output;
322 UNIT_TEST_CHECK_THROW(decode_gzip(gzbad, output), informative_failure);
323}
324
325#endif // BUILD_UNIT_TESTS
326
327// Local Variables:
328// mode: C++
329// fill-column: 76
330// c-file-style: "gnu"
331// indent-tabs-mode: nil
332// End:
333// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status