monotone

monotone Mtn Source Tree

Root/transforms.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include <algorithm>
11#include <cctype>
12#include <functional>
13#include <iterator>
14#include <sstream>
15#include <string>
16#include <vector>
17#include <wchar.h>
18
19#include <boost/tokenizer.hpp>
20#include <boost/scoped_array.hpp>
21
22#include "botan/botan.h"
23#include "botan/gzip.h"
24#include "botan/sha160.h"
25
26#include "cleanup.hh"
27#include "constants.hh"
28#include "sanity.hh"
29#include "transforms.hh"
30#include "simplestring_xform.hh"
31#include "vocab.hh"
32#include "xdelta.hh"
33
34using std::string;
35
36using boost::scoped_array;
37
38// this file contans various sorts of string transformations. each
39// transformation should be self-explanatory from its type signature. see
40// transforms.hh for the summary.
41
42// NB this file uses very "value-centric" functional approach; even though
43// many of the underlying transformations are "stream-centric" and the
44// underlying libraries (eg. crypto++) are stream oriented. this will
45// probably strike some people as contemptably inefficient, since it means
46// that occasionally 1, 2, or even 3 copies of an entire file will wind up
47// in memory at once. I am taking this approach for 3 reasons: first, I
48// want the type system to help me and value types are much easier to work
49// with than stream types. second, it is *much* easier to debug a program
50// that operates on values than streams, and correctness takes precedence
51// over all other features of this program. third, this is a peer-to-peer
52// sort of program for small-ish source-code text files, not a fileserver,
53// and is memory-limited anyways (for example, storing things in sqlite
54// requires they be able to fit in memory). you're hopefully not going to
55// be dealing with hundreds of users hammering on locks and memory
56// concurrently.
57//
58// if future analysis proves these assumptions wrong, feel free to revisit
59// the matter, but bring strong evidence along with you that the stream
60// paradigm "must" be used. this program is intended for source code
61// control and I make no bones about it.
62
63// the generic function
64template<typename XFM> string xform(string const & in)
65{
66 string out;
67 Botan::Pipe pipe(new XFM());
68 pipe.process_msg(in);
69 out = pipe.read_all_as_string();
70 return out;
71}
72
73// specialize it
74template string xform<Botan::Base64_Encoder>(string const &);
75template string xform<Botan::Base64_Decoder>(string const &);
76template string xform<Botan::Hex_Encoder>(string const &);
77template string xform<Botan::Hex_Decoder>(string const &);
78template string xform<Botan::Gzip_Compression>(string const &);
79template string xform<Botan::Gzip_Decompression>(string const &);
80
81// for use in hexenc encoding
82
83static inline void
84encode_hexenc_inner(string::const_iterator i,
85 string::const_iterator end,
86 char *out)
87{
88 static char const *tab = "0123456789abcdef";
89 for (; i != end; ++i)
90 {
91 *out++ = tab[(*i >> 4) & 0xf];
92 *out++ = tab[*i & 0xf];
93 }
94}
95
96
97string encode_hexenc(string const & in)
98{
99 if (LIKELY(in.size() == constants::idlen / 2))
100 {
101 char buf[constants::idlen];
102 encode_hexenc_inner(in.begin(), in.end(), buf);
103 return string(buf, constants::idlen);
104 }
105 else
106 {
107 scoped_array<char> buf(new char[in.size() * 2]);
108 encode_hexenc_inner(in.begin(), in.end(), buf.get());
109 return string(buf.get(), in.size() *2);
110 }
111}
112
113static inline char
114decode_hex_char(char c)
115{
116 if (c >= '0' && c <= '9')
117 return c - '0';
118 else
119 {
120 I(c >= 'a' && c <= 'f');
121 return c - 'a' + 10;
122 }
123}
124
125static inline void
126decode_hexenc_inner(string::const_iterator i,
127 string::const_iterator end,
128 char *out)
129{
130 for (; i != end; ++i)
131 {
132 char t = decode_hex_char(*i++);
133 t <<= 4;
134 t |= decode_hex_char(*i);
135 *out++ = t;
136 }
137}
138
139string decode_hexenc(string const & in)
140{
141
142 I(in.size() % 2 == 0);
143 if (LIKELY(in.size() == constants::idlen))
144 {
145 char buf[constants::idlen / 2];
146 decode_hexenc_inner(in.begin(), in.end(), buf);
147 return string(buf, constants::idlen / 2);
148 }
149 else
150 {
151 scoped_array<char> buf(new char[in.size() / 2]);
152 decode_hexenc_inner(in.begin(), in.end(), buf.get());
153 return string(buf.get(), in.size() / 2);
154 }
155}
156
157template <typename T>
158void pack(T const & in, base64< gzip<T> > & out)
159{
160 string tmp;
161 tmp.reserve(in().size()); // FIXME: do some benchmarking and make this a constant::
162
163 Botan::Pipe pipe(new Botan::Gzip_Compression(), new Botan::Base64_Encoder);
164 pipe.process_msg(in());
165 tmp = pipe.read_all_as_string();
166 out = tmp;
167}
168
169template <typename T>
170void unpack(base64< gzip<T> > const & in, T & out)
171{
172 string tmp;
173 tmp.reserve(in().size()); // FIXME: do some benchmarking and make this a constant::
174
175 Botan::Pipe pipe(new Botan::Base64_Decoder(), new Botan::Gzip_Decompression());
176 pipe.process_msg(in());
177 tmp = pipe.read_all_as_string();
178
179 out = tmp;
180}
181
182// specialise them
183template void pack<data>(data const &, base64< gzip<data> > &);
184template void pack<delta>(delta const &, base64< gzip<delta> > &);
185template void unpack<data>(base64< gzip<data> > const &, data &);
186template void unpack<delta>(base64< gzip<delta> > const &, delta &);
187
188// diffing and patching
189
190void
191diff(data const & olddata,
192 data const & newdata,
193 delta & del)
194{
195 string unpacked;
196 compute_delta(olddata(), newdata(), unpacked);
197 del = delta(unpacked);
198}
199
200void
201patch(data const & olddata,
202 delta const & del,
203 data & newdata)
204{
205 string result;
206 apply_delta(olddata(), del(), result);
207 newdata = result;
208}
209
210// identifier (a.k.a. sha1 signature) calculation
211
212void
213calculate_ident(data const & dat,
214 hexenc<id> & ident)
215{
216 Botan::Pipe p(new Botan::Hash_Filter("SHA-160"));
217 p.process_msg(dat());
218
219 id ident_decoded(p.read_all_as_string());
220 encode_hexenc(ident_decoded, ident);
221}
222
223void
224calculate_ident(base64< gzip<data> > const & dat,
225 hexenc<id> & ident)
226{
227 gzip<data> data_decoded;
228 data data_decompressed;
229 decode_base64(dat, data_decoded);
230 decode_gzip(data_decoded, data_decompressed);
231 calculate_ident(data_decompressed, ident);
232}
233
234void
235calculate_ident(file_data const & dat,
236 file_id & ident)
237{
238 hexenc<id> tmp;
239 calculate_ident(dat.inner(), tmp);
240 ident = tmp;
241}
242
243void
244calculate_ident(revision_data const & dat,
245 revision_id & ident)
246{
247 hexenc<id> tmp;
248 calculate_ident(dat.inner(), tmp);
249 ident = tmp;
250}
251
252void
253calculate_ident(roster_data const & dat,
254 roster_id & ident)
255{
256 hexenc<id> tmp;
257 calculate_ident(dat.inner(), tmp);
258 ident = tmp;
259}
260
261string
262canonical_base64(string const & s)
263{
264 return xform<Botan::Base64_Encoder>
265 (xform<Botan::Base64_Decoder>(s));
266}
267
268
269#ifdef BUILD_UNIT_TESTS
270#include "unit_tests.hh"
271#include <stdlib.h>
272
273static void
274enc_test()
275{
276 data d2, d1("the rain in spain");
277 gzip<data> gzd1, gzd2;
278 base64< gzip<data> > bgzd;
279 encode_gzip(d1, gzd1);
280 encode_base64(gzd1, bgzd);
281 decode_base64(bgzd, gzd2);
282 BOOST_CHECK(gzd2 == gzd1);
283 decode_gzip(gzd2, d2);
284 BOOST_CHECK(d2 == d1);
285}
286
287static void
288rdiff_test()
289{
290 data dat1(string("the first day of spring\nmakes me want to sing\n"));
291 data dat2(string("the first day of summer\nis a major bummer\n"));
292 delta del;
293 diff(dat1, dat2, del);
294
295 data dat3;
296 patch(dat1, del, dat3);
297 BOOST_CHECK(dat3 == dat2);
298}
299
300static void
301calculate_ident_test()
302{
303 data input(string("the only blender which can be turned into the most powerful vaccum cleaner"));
304 hexenc<id> output;
305 string ident("86e03bdb3870e2a207dfd0dcbfd4c4f2e3bc97bd");
306 calculate_ident(input, output);
307 BOOST_CHECK(output() == ident);
308}
309
310void
311add_transform_tests(test_suite * suite)
312{
313 I(suite);
314 suite->add(BOOST_TEST_CASE(&enc_test));
315 suite->add(BOOST_TEST_CASE(&rdiff_test));
316 suite->add(BOOST_TEST_CASE(&calculate_ident_test));
317}
318
319#endif // BUILD_UNIT_TESTS
320
321// Local Variables:
322// mode: C++
323// fill-column: 76
324// c-file-style: "gnu"
325// indent-tabs-mode: nil
326// End:
327// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status