monotone

monotone Mtn Source Tree

Root/xdelta.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10// this file implements the xdelta algorithm, produces and consumes simple
11// copy/insert binary patches with the following structure:
12//
13// patch := (copy|insert)*
14// copy := 'C', ' ', pos=uint, ' ', len=uint, '\n'
15// insert := 'I', ' ', len=uint, '\n', payload=(byte x len), '\n'
16//
17// this means you can generally read the patch if you print it on stdout,
18// when it applies to text, but it can also apply to any binary, so the
19// hunk payload itself might look awful. I made it semi-ascii only to make
20// it slightly easier to debug; you really shouldn't read it normally. it's
21// a strict format with minimal checking, so it must be transport-encoded
22// to avoid whitespace munging.
23//
24// if you want to *read* a patch, you will like unidiff format much better.
25// take a look in diff_patch.(cc|hh) for a nice interface to that.
26
27#include "base.hh"
28#include <algorithm>
29#include <vector>
30#include <set>
31#include <sstream>
32
33#include <boost/shared_ptr.hpp>
34#include <boost/version.hpp>
35
36#include "adler32.hh"
37#include "hash_map.hh"
38#include "numeric_vocab.hh"
39#include "sanity.hh"
40#include "xdelta.hh"
41
42using std::make_pair;
43using std::min;
44using std::ostream;
45using std::ostringstream;
46using std::pair;
47using std::set;
48using std::string;
49using std::vector;
50using std::memcmp;
51using std::lower_bound;
52
53using boost::shared_ptr;
54
55struct identity {size_t operator()(u32 const & v) const { return static_cast<size_t>(v);}};
56typedef pair<string::size_type, string::size_type> extent;
57typedef hashmap::hash_map<u32, extent> match_table;
58
59struct
60insn
61{
62 insn(char c) : code(insert), pos(0), len(0), payload("") { payload += c; }
63 insn(string s) : code(insert), pos(0), len(s.size()), payload(s) {}
64 insn(u32 p, u32 l) : code(copy), pos(p), len(l) {}
65 enum { insert, copy } code;
66 u32 pos, len;
67 string payload;
68};
69
70ostream & operator<<(ostream & ost, insn const & i)
71{
72 if (i.code == insn::insert)
73 {
74 ost << "I " << i.payload.size() << '\n';
75 ost.write(i.payload.data(), i.payload.size());
76 ost << '\n';
77 }
78 else
79 ost << "C " << i.pos << ' ' << i.len << '\n';
80 return ost;
81}
82
83
84static inline void
85init_match_table(string const & a,
86 const string::size_type blocksz,
87 match_table & tab)
88{
89 string::size_type sz = a.size();
90 for (string::size_type i = 0; i < sz; i += blocksz)
91 {
92 string::size_type step = ((i + blocksz) >= sz) ? (sz - i) : blocksz;
93 u32 sum = adler32(reinterpret_cast<u8 const *>(a.data() + i), step).sum();
94 if (tab.find(sum) == tab.end())
95 tab.insert(make_pair(sum, make_pair(i, step)));
96 }
97 return;
98}
99
100
101static inline bool
102find_match(match_table const & matches,
103 vector<insn> & delta,
104 adler32 const & rolling,
105 string const & a,
106 string const & b,
107 string::size_type bpos,
108 string::size_type & apos,
109 string::size_type & alen,
110 string::size_type & badvance)
111{
112 u32 sum = rolling.sum();
113 match_table::const_iterator e = matches.find(sum);
114
115 // maybe we haven't seen it at all?
116 if (e == matches.end())
117 return false;
118
119 string::size_type tpos = e->second.first;
120 string::size_type tlen = e->second.second;
121
122 I(tpos < a.size());
123 I(tpos + tlen <= a.size());
124
125 // maybe it's a false match?
126 if (memcmp(a.data() + tpos, b.data() + bpos, tlen) != 0)
127 return false;
128
129 apos = tpos;
130
131 // see if we can extend our match forwards
132 string::const_iterator ai = a.begin() + apos + tlen;
133 string::const_iterator ae = a.end();
134 string::const_iterator bi = b.begin() + bpos + tlen;
135 string::const_iterator be = b.end();
136
137 while ((ai != ae)
138 && (bi != be)
139 && (*ai == *bi))
140 {
141 ++tlen;
142 ++ai;
143 ++bi;
144 }
145
146 alen = tlen;
147 badvance = tlen;
148
149 // see if we can extend backwards into a previous insert hunk
150 if (! delta.empty() && delta.back().code == insn::insert)
151 {
152 while (apos > 0
153 && bpos > 0
154 && a[apos - 1] == b[bpos - 1]
155 && !delta.back().payload.empty())
156 {
157 I(a[apos - 1] == *(delta.back().payload.rbegin()));
158 I(delta.back().payload.size() > 0);
159 delta.back().payload.resize(delta.back().payload.size() - 1);
160 --apos;
161 --bpos;
162 ++alen;
163 // the significant thing here is that we do not move
164 // 'badvance' forward, just alen.
165 }
166
167 // if we've extended back to consume the *entire* insert,
168 // let's do away with it altogether.
169 if (delta.back().payload.empty())
170 {
171 delta.pop_back();
172 }
173 }
174
175 I(memcmp(a.data() + apos, b.data() + bpos, alen) == 0);
176 return true;
177}
178
179static inline void
180insert_insn(vector<insn> & delta, char c)
181{
182 if (delta.empty() || delta.back().code == insn::copy)
183 delta.push_back(insn(c));
184 else
185 {
186 // design in gcc 3.3 and 4.0 STL expands the string one character
187 // at a time when appending single characters ?!
188 // see libstdc++5-3.3-dev 3.3.5-13: basic_string.h:471, calling with
189 // size_type(1), then basic_string.tcc:717 reserving one more byte
190 // if needed.
191 // see libstdc++6-4.0-dev 4.0.3-3: basic_string.h:770 calling push_back
192 // then basic_string.h: 849 again adding 1 to the length.
193 if (delta.back().payload.capacity() == delta.back().payload.size())
194 // standard amortized constant rule
195 delta.back().payload.reserve(delta.back().payload.size() * 2);
196 delta.back().payload += c;
197 }
198}
199
200
201static inline void
202copy_insn(vector<insn> & delta, string::size_type i, string::size_type matchlen)
203{
204 delta.push_back(insn(i, matchlen));
205}
206
207
208static void
209compute_delta_insns(string const & a,
210 string const & b,
211 vector<insn> & delta)
212{
213 static const string::size_type blocksz = 64;
214 match_table matches;
215 init_match_table(a, blocksz, matches);
216
217 if (b.size() < blocksz)
218 {
219 for (string::size_type i = 0; i < b.size(); ++i)
220 insert_insn(delta, b[i]);
221 return;
222 }
223
224 adler32 rolling(reinterpret_cast<u8 const *>(b.data()), blocksz);
225
226 for (string::size_type
227 sz = b.size(),
228 lo = 0;
229 lo < sz; )
230 {
231 string::size_type apos = 0, alen = 1, badvance = 1;
232
233 bool found_match = find_match(matches, delta, rolling, a, b, lo, apos, alen, badvance);
234
235 // There are basically three cases:
236 // 1) advance by 1 (common, no match found)
237 // 2) advance by >blocksz (semi-common, usual case when a match is found)
238 // 3) advance by <blocksz (rare, unusual case when a match is found)
239 // in case (2), all of the rolling checksum data will be entirely replaced, so
240 // we can do a fast skip forward.
241 if (found_match)
242 {
243 copy_insn(delta, apos, alen);
244 u32 save_lo = lo;
245 if (badvance <= blocksz)
246 {
247 string::size_type next = lo;
248 I(next < b.size() && (lo + badvance - 1) < b.size());
249 for (; next < lo + badvance; ++next)
250 {
251 rolling.out(static_cast<u8>(b[next]));
252 if (next + blocksz < b.size())
253 rolling.in(static_cast<u8>(b[next + blocksz]));
254 }
255 lo = next;
256 }
257
258 // Skip advancement is always correct; however, for a small
259 // increment it is more expensive than incremental advancement
260 // Cost of doing a in() + out() is roughly the same as doing a
261 // replace_with for 1 character, so if we are advancing more than
262 // blocksz/2, it will be better to do a replacement than an
263 // incremental advance. The out could be more expensive because
264 // it does a multiply, but for now, ignore this; it turns out that
265 // advancements in the range of [2..blocksz-1] are actually really
266 // rare.
267 if (badvance >= blocksz/2)
268 {
269 u32 new_lo = save_lo + badvance;
270 u32 new_hi = new_lo + blocksz;
271 if (new_hi > b.size())
272 {
273 new_hi = b.size();
274 }
275 I(new_lo <= new_hi);
276 rolling.replace_with(reinterpret_cast<u8 const *>(b.data() + new_lo), new_hi-new_lo);
277 lo = new_lo;
278 }
279 }
280 else
281 {
282 I(apos + alen <= a.size());
283 I(alen == 1);
284 I(alen < blocksz);
285 I(lo < b.size());
286 insert_insn(delta, b[lo]);
287 rolling.out(static_cast<u8>(b[lo]));
288 if (lo + blocksz < b.size())
289 {
290 rolling.in(static_cast<u8>(b[lo+blocksz]));
291 }
292 ++lo;
293 }
294 }
295}
296
297void
298write_delta_insns(vector<insn> const & delta_insns,
299 string & delta)
300{
301 delta.clear();
302 ostringstream oss;
303 for (vector<insn>::const_iterator i = delta_insns.begin();
304 i != delta_insns.end(); ++i)
305 {
306 oss << *i;
307 }
308 delta = oss.str();
309}
310
311void
312compute_delta(string const & a,
313 string const & b,
314 string & delta)
315{
316 vector<insn> delta_insns;
317
318 // FIXME: in theory you can do empty files and empty deltas; write some
319 // tests to be sure you're doing it right, and in any case implement the
320 // necessary logic directly in this function, don't bother doing an
321 // xdelta. several places of the xdelta code prefer assertions which are
322 // only true with non-empty chunks anyways.
323
324 if (a.size() == 0 && b.size() != 0)
325 delta_insns.push_back(insn(b));
326 else if (a.size() != 0 && b.size() == 0)
327 delta_insns.push_back(insn(0, 0));
328 else if (a == b)
329 delta_insns.push_back(insn(0, a.size()));
330 else
331 {
332 I(a.size() > 0);
333 I(b.size() > 0);
334
335 L(FL("computing binary delta instructions"));
336 compute_delta_insns(a, b, delta_insns);
337 L(FL("computed binary delta instructions"));
338 }
339
340 ostringstream oss;
341 for (vector<insn>::const_iterator i = delta_insns.begin();
342 i != delta_insns.end(); ++i)
343 {
344 oss << *i;
345 }
346 delta = oss.str();
347}
348
349struct
350simple_applicator
351 : public delta_applicator
352{
353 string src;
354 string dst;
355 virtual ~simple_applicator () {}
356 virtual void begin(string const & base)
357 {
358 src = base;
359 dst.clear();
360 }
361 virtual void next()
362 {
363 swap(src,dst);
364 dst.clear();
365 }
366 virtual void finish(string & out)
367 {
368 out = src;
369 }
370
371 virtual void copy(string::size_type pos, string::size_type len)
372 {
373 dst.append(src, pos, len);
374 }
375 virtual void insert(string const & str)
376 {
377 dst.append(str);
378 }
379};
380
381inline string::size_type
382read_num(string::const_iterator &i,
383 string::const_iterator e)
384{
385 string::size_type n = 0;
386
387 while (i != e && *i == ' ')
388 ++i;
389
390 while (i != e && *i >= '0' && *i <= '9')
391 {
392 n *= 10;
393 n += static_cast<size_t>(*i - '0');
394 ++i;
395 }
396 return n;
397}
398
399void
400apply_delta(shared_ptr<delta_applicator> da,
401 string const & delta)
402{
403 string::const_iterator i = delta.begin();
404 while (i != delta.end() && (*i == 'I' || *i == 'C'))
405 {
406 if (*i == 'I')
407 {
408 ++i;
409 I(i != delta.end());
410 string::size_type len = read_num(i, delta.end());
411 I(i != delta.end());
412 I(*i == '\n');
413 ++i;
414 I(i != delta.end());
415 I((i - delta.begin()) + len <= delta.size());
416 if (len > 0)
417 {
418 string tmp(i, i+len);
419 da->insert(tmp);
420 }
421 i += len;
422 }
423 else
424 {
425 I(*i == 'C');
426 ++i;
427 I(i != delta.end());
428 string::size_type pos = read_num(i, delta.end());
429 I(i != delta.end());
430 string::size_type len = read_num(i, delta.end());
431 if (len != 0)
432 da->copy(pos, len);
433 }
434 I(i != delta.end());
435 I(*i == '\n');
436 ++i;
437 }
438 I(i == delta.end());
439}
440
441void
442apply_delta(string const & a,
443 string const & delta,
444 string & b)
445{
446 shared_ptr<delta_applicator> da(new simple_applicator());
447 da->begin(a);
448 apply_delta(da, delta);
449 da->next();
450 da->finish(b);
451}
452
453// piecewise-applicator stuff follows (warning: ugly)
454
455typedef string::size_type version_pos;
456typedef string::size_type piece_pos;
457typedef string::size_type length;
458typedef unsigned long piece_id;
459
460struct chunk
461{
462 length len; // how many chars in this chunk
463 piece_id piece; // which piece to take chars from
464 version_pos vpos; // position in the current version
465 piece_pos ppos; // position in piece to take chars from
466
467 chunk (length ln, piece_id p, version_pos vp, piece_pos pp) :
468 len(ln), piece(p), vpos(vp), ppos(pp)
469 {}
470
471 chunk subchunk(version_pos vp,
472 length ln,
473 length offset) const
474 {
475 I(ppos + offset >= ppos);
476 I(ppos + offset + ln <= ppos + len);
477
478 chunk c = *this;
479 c.len = ln;
480 c.vpos = vp;
481 c.ppos += offset;
482 return c;
483 }
484};
485
486typedef vector<chunk> version_spec;
487
488struct
489piece_table
490{
491 vector<string> pieces;
492
493 void clear()
494 {
495 pieces.clear();
496 }
497
498 piece_id insert(string const & p)
499 {
500 pieces.push_back(p);
501 return pieces.size() - 1;
502 }
503
504 void append(string & targ, piece_id p, piece_pos pp, length ln)
505 {
506 I(p < pieces.size());
507 targ.append(pieces[p], pp, ln);
508 }
509
510 void build(version_spec const & in, string & out)
511 {
512 out.clear();
513 unsigned out_len = 0;
514 for (version_spec::const_iterator i = in.begin(); i != in.end(); ++i)
515 out_len += i->len;
516 out.reserve(out_len);
517 for (version_spec::const_iterator i = in.begin(); i != in.end(); ++i)
518 append(out, i->piece, i->ppos, i->len);
519 }
520};
521
522static void
523apply_insert(piece_table & p, version_spec & out, string const & str)
524{
525 piece_id piece = p.insert(str);
526 version_pos vpos = 0;
527 if (!out.empty())
528 vpos = out.back().vpos + out.back().len;
529 out.push_back(chunk(str.size(), piece, vpos, 0));
530}
531
532struct
533chunk_less_than
534{
535 bool operator()(chunk const & ch1, chunk const & ch2) const
536 {
537 // nb: ch.vpos + ch.len is the 0-based index of the first element *not*
538 // included in ch; thus we measure against ch.len - 1.
539// I(ch1.len > 0);
540 return (ch1.vpos + ch1.len - 1) < ch2.vpos;
541 }
542};
543
544static void
545apply_copy(version_spec const & in, version_spec & out,
546 version_pos src_vpos, length src_len)
547{
548 //
549 // this is a little tricky because there's *4* different extents
550 // we're talking about at any time:
551 //
552 //
553 // - the 'src' extent, which is 1 or more chunks in the previous version.
554 // its address in the previous version is given in terms of a version_pos
555 // + length value.
556 //
557 // - the 'dst' extent, which is 1 chunk in the new version. its address
558 // in the new version is given in terms of a version_pos + length value.
559 //
560 // - the portion of a piece referenced by the src extent, which we're
561 // selecting a subset of. this is given in terms of a piece_pos + length
562 // value, against a particular piece.
563 //
564 // - the portion of a piece going into the dst extent, which is the
565 // selected subset. this is given in terms of a piece_pos + length value,
566 // against a particular piece.
567 //
568
569 version_pos src_final = src_vpos + src_len;
570 version_pos dst_vpos = 0;
571 if (!out.empty())
572 dst_vpos = out.back().vpos + out.back().len;
573 version_pos dst_final = dst_vpos + src_len;
574 chunk src_bounding_chunk(0,0,src_vpos,0);
575 version_spec::const_iterator lo = lower_bound(in.begin(),
576 in.end(),
577 src_bounding_chunk,
578 chunk_less_than());
579 for ( ; src_len > 0; ++lo)
580 {
581 I(lo != in.end());
582
583 // now we are iterating over src extents which cover the current dst
584 // extent. we found these src extents by calling lower_bound,
585 // above. note, this entire function is called once per dst extent.
586 //
587 // there's two possible arrangements of spanning src extents:
588 //
589 // [ src extent 1 ][ src extent 2 ]
590 // [ ... dst extent .. ]
591 //
592 // or
593 //
594 // [ ... src extent ... ]
595 // [ ... dst extent .. ]
596 //
597 // the following arithmetic should bite off the lowest chunk of
598 // either of these two scenarios, append it to the dst version
599 // vector, and advance the 2 pos' and 1 len value appropriately.
600
601 version_pos src_end = min ((src_vpos + src_len), (lo->vpos + lo->len));
602 version_pos offset = src_vpos - lo->vpos;
603 length seglen = src_end - src_vpos;
604
605 I(seglen > 0);
606 I(src_vpos >= lo->vpos);
607 I(src_vpos + seglen <= lo->vpos + lo->len);
608
609 out.push_back(lo->subchunk(dst_vpos, seglen, offset));
610 src_vpos += seglen;
611 dst_vpos += seglen;
612 I(src_len >= seglen);
613 src_len -= seglen;
614 I(out.back().vpos + out.back().len == dst_vpos);
615 }
616
617 I(src_vpos == src_final);
618 I(dst_vpos == dst_final);
619 I(src_len == 0);
620}
621
622
623struct
624piecewise_applicator
625 : public delta_applicator
626{
627 piece_table pt;
628 shared_ptr<version_spec> src;
629 shared_ptr<version_spec> dst;
630
631 piecewise_applicator() :
632 src(new version_spec()),
633 dst(new version_spec())
634 {}
635
636 virtual ~piecewise_applicator () {}
637
638 virtual void begin(string const & base)
639 {
640 pt.clear();
641 piece_id piece = pt.insert(base);
642 src->clear();
643 src->push_back(chunk(base.size(), piece, 0, 0));
644 dst->clear();
645 }
646
647 virtual void next()
648 {
649 swap(src,dst);
650 dst->clear();
651 }
652
653 virtual void finish(string & out)
654 {
655 out.clear();
656 pt.build(*src, out);
657 }
658
659 virtual void copy(string::size_type pos, string::size_type len)
660 {
661 apply_copy(*src, *dst, pos, len);
662 }
663
664 virtual void insert(string const & str)
665 {
666 apply_insert(pt, *dst, str);
667 }
668};
669
670
671// these just hide our implementation types from outside
672
673shared_ptr<delta_applicator>
674new_simple_applicator()
675{
676 return shared_ptr<delta_applicator>(new simple_applicator());
677}
678
679shared_ptr<delta_applicator>
680new_piecewise_applicator()
681{
682 return shared_ptr<delta_applicator>(new piecewise_applicator());
683}
684
685
686// inversion
687
688struct copied_extent
689{
690 copied_extent(string::size_type op,
691 string::size_type np,
692 string::size_type len)
693 : old_pos(op),
694 new_pos(np),
695 len(len)
696 {}
697 string::size_type old_pos;
698 string::size_type new_pos;
699 string::size_type len;
700 bool operator<(copied_extent const & other) const
701 {
702 return (old_pos < other.old_pos) ||
703 (old_pos == other.old_pos && len > other.len);
704 }
705};
706
707struct
708inverse_delta_writing_applicator :
709 public delta_applicator
710{
711 string const & old;
712 set<copied_extent> copied_extents;
713 string::size_type new_pos;
714
715 inverse_delta_writing_applicator(string const & old)
716 : old(old),
717 new_pos(0)
718 {}
719
720 virtual void begin(string const & base) {}
721 virtual void next() {}
722 virtual void finish(string & out)
723 {
724 // We are trying to write a delta instruction stream which
725 // produces 'old' from 'new'. We don't care what was in 'new',
726 // because we're only going to copy some parts forwards, and we
727 // already know which parts: those in the table. Our table lists
728 // extents which were copied in order that they appear in 'old'.
729 //
730 // When we run into a section of 'old' which isn't in the table,
731 // we have to emit an insert instruction for the gap.
732
733 string::size_type old_pos = 0;
734 out.clear();
735 vector<insn> delta_insns;
736
737 for (set<copied_extent>::iterator i = copied_extents.begin();
738 i != copied_extents.end(); ++i)
739 {
740 // It is possible that this extent left a gap after the
741 // previously copied extent; in this case we wish to pad
742 // the intermediate space with an insert.
743 while (old_pos < i->old_pos)
744 {
745 I(old_pos < old.size());
746 // Don't worry, adjacent inserts are merged.
747 insert_insn(delta_insns, old.at(old_pos++));
748 }
749
750 // It is also possible that this extent *overlapped* the
751 // previously copied extent; in this case we wish to subtract
752 // the overlap from the inverse copy.
753
754 string::size_type overlap = 0;
755 if (i->old_pos < old_pos)
756 overlap = old_pos - i->old_pos;
757
758 if (i->len <= overlap)
759 continue;
760
761 I(i->len > overlap);
762 copy_insn(delta_insns, i->new_pos + overlap, i->len - overlap);
763 old_pos += (i->len - overlap);
764 }
765 while (old_pos < old.size())
766 insert_insn(delta_insns, old.at(old_pos++));
767
768 write_delta_insns(delta_insns, out);
769 }
770
771 virtual void copy(string::size_type old_pos,
772 string::size_type len)
773 {
774 I(old_pos < old.size());
775 copied_extents.insert(copied_extent(old_pos, new_pos, len));
776 new_pos += len;
777 }
778
779 virtual void insert(string const & str)
780 {
781 new_pos += str.size();
782 }
783};
784
785
786void
787invert_xdelta(string const & old_str,
788 string const & delta,
789 string & delta_inverse)
790{
791 shared_ptr<delta_applicator> da(new inverse_delta_writing_applicator(old_str));
792 apply_delta(da, delta);
793 da->finish(delta_inverse);
794}
795
796
797#ifdef BUILD_UNIT_TESTS
798
799#include "unit_tests.hh"
800
801string
802apply_via_normal(string const & base, string const & delta)
803{
804 string tmp;
805 apply_delta(base, delta, tmp);
806 return tmp;
807}
808
809string
810apply_via_piecewise(string const & base, string const & delta)
811{
812 shared_ptr<delta_applicator> appl = new_piecewise_applicator();
813 appl->begin(base);
814 apply_delta(appl, delta);
815 appl->next();
816 string tmp;
817 appl->finish(tmp);
818 return tmp;
819}
820
821void
822spin(string a, string b)
823{
824 string ab, ba;
825 compute_delta(a, b, ab);
826 compute_delta(b, a, ba);
827 UNIT_TEST_CHECK(a == apply_via_normal(b, ba));
828 UNIT_TEST_CHECK(a == apply_via_piecewise(b, ba));
829 UNIT_TEST_CHECK(b == apply_via_normal(a, ab));
830 UNIT_TEST_CHECK(b == apply_via_piecewise(a, ab));
831 string ab_inverted, ba_inverted;
832 invert_xdelta(a, ab, ab_inverted);
833 invert_xdelta(b, ba, ba_inverted);
834 UNIT_TEST_CHECK(a == apply_via_normal(b, ab_inverted));
835 UNIT_TEST_CHECK(a == apply_via_piecewise(b, ab_inverted));
836 UNIT_TEST_CHECK(b == apply_via_normal(a, ba_inverted));
837 UNIT_TEST_CHECK(b == apply_via_piecewise(a, ba_inverted));
838}
839
840UNIT_TEST(xdelta, simple_cases)
841{
842 L(FL("empty/empty"));
843 spin("", "");
844 L(FL("empty/short"));
845 spin("", "a");
846 L(FL("empty/longer"));
847 spin("", "asdfasdf");
848 L(FL("two identical strings"));
849 spin("same string", "same string");
850}
851
852#ifdef WIN32
853#define BOOST_NO_STDC_NAMESPACE
854#endif
855#include <boost/random.hpp>
856
857boost::mt19937 xdelta_prng;
858
859#if BOOST_VERSION >= 103100
860boost::uniform_smallint<char> xdelta_chargen('a', 'z');
861boost::uniform_smallint<size_t> xdelta_sizegen(1024, 65536);
862boost::uniform_smallint<size_t> xdelta_editgen(3, 10);
863boost::uniform_smallint<size_t> xdelta_lengen(1, 256);
864#define PRNG xdelta_prng
865#else
866boost::uniform_smallint<boost::mt19937, char> xdelta_chargen(xdelta_prng, 'a', 'z');
867boost::uniform_smallint<boost::mt19937, size_t> xdelta_sizegen(xdelta_prng, 1024, 65536);
868boost::uniform_smallint<boost::mt19937, size_t> xdelta_editgen(xdelta_prng, 3, 10);
869boost::uniform_smallint<boost::mt19937, size_t> xdelta_lengen(xdelta_prng, 1, 256);
870#define PRNG
871#endif
872
873void
874xdelta_random_string(string & str)
875{
876 size_t sz = xdelta_sizegen(PRNG);
877 str.clear();
878 str.reserve(sz);
879 while (sz-- > 0)
880 {
881 str += xdelta_chargen(PRNG);
882 }
883}
884
885void
886xdelta_randomly_insert(string & str)
887{
888 size_t nedits = xdelta_editgen(PRNG);
889 while (nedits > 0)
890 {
891 size_t pos = xdelta_sizegen(PRNG) % str.size();
892 size_t len = xdelta_lengen(PRNG);
893 if (pos+len >= str.size())
894 continue;
895 string tmp;
896 tmp.reserve(len);
897 for (size_t i = 0; i < len; ++i)
898 tmp += xdelta_chargen(PRNG);
899 str.insert(pos, tmp);
900 nedits--;
901 }
902}
903
904void
905xdelta_randomly_change(string & str)
906{
907 size_t nedits = xdelta_editgen(PRNG);
908 while (nedits > 0)
909 {
910 size_t pos = xdelta_sizegen(PRNG) % str.size();
911 size_t len = xdelta_lengen(PRNG);
912 if (pos+len >= str.size())
913 continue;
914 for (size_t i = 0; i < len; ++i)
915 str[pos+i] = xdelta_chargen(PRNG);
916 nedits--;
917 }
918}
919
920void
921xdelta_randomly_delete(string & str)
922{
923 size_t nedits = xdelta_editgen(PRNG);
924 while (nedits > 0)
925 {
926 size_t pos = xdelta_sizegen(PRNG) % str.size();
927 size_t len = xdelta_lengen(PRNG);
928 if (pos+len >= str.size())
929 continue;
930 str.erase(pos, len);
931 --nedits;
932 }
933}
934
935UNIT_TEST(xdelta, random_simple_delta)
936{
937 for (int i = 0; i < 100; ++i)
938 {
939 string a, b;
940 xdelta_random_string(a);
941 b = a;
942 xdelta_randomly_change(b);
943 xdelta_randomly_insert(b);
944 xdelta_randomly_delete(b);
945 spin(a, b);
946 }
947}
948
949UNIT_TEST(xdelta, random_piecewise_delta)
950{
951 for (int i = 0; i < 50; ++i)
952 {
953 string prev, next, got;
954 xdelta_random_string(prev);
955 shared_ptr<delta_applicator> appl = new_piecewise_applicator();
956 appl->begin(prev);
957 for (int j = 0; j < 5; ++j)
958 {
959 appl->finish(got);
960 UNIT_TEST_CHECK(got == prev);
961 next = prev;
962 xdelta_randomly_change(next);
963 xdelta_randomly_insert(next);
964 xdelta_randomly_delete(next);
965 string delta;
966 compute_delta(prev, next, delta);
967 apply_delta(appl, delta);
968 appl->next();
969 prev = next;
970 }
971 appl->finish(got);
972 UNIT_TEST_CHECK(got == prev);
973 }
974}
975
976UNIT_TEST(xdelta, rolling_sanity_check)
977{
978 const unsigned testbufsize = 512;
979 static const string::size_type blocksz = 64;
980 char testbuf[testbufsize];
981
982 for(unsigned i = 0; i < testbufsize; ++i)
983 {
984 testbuf[i] = xdelta_chargen(PRNG);
985 }
986 for(unsigned advanceby = 0; advanceby < testbufsize; ++advanceby)
987 {
988 adler32 incremental(reinterpret_cast<u8 const *>(testbuf), blocksz);
989 for(unsigned i = 0; i < advanceby; ++i)
990 {
991 incremental.out(static_cast<u8>(testbuf[i]));
992 if ((i + blocksz) < testbufsize)
993 {
994 incremental.in(static_cast<u8>(testbuf[i+blocksz]));
995 }
996 }
997 adler32 skip(reinterpret_cast<u8 const *>(testbuf), blocksz);
998 u32 new_lo = advanceby;
999 u32 new_hi = new_lo + blocksz;
1000 if (new_hi > testbufsize)
1001 {
1002 new_hi = testbufsize;
1003 }
1004 skip.replace_with(reinterpret_cast<u8 const *>(testbuf + new_lo), new_hi - new_lo);
1005
1006 UNIT_TEST_CHECK(skip.sum() == incremental.sum());
1007 }
1008 L(FL("rolling sanity check passed"));
1009}
1010
1011#endif // BUILD_UNIT_TESTS
1012
1013// Local Variables:
1014// mode: C++
1015// fill-column: 76
1016// c-file-style: "gnu"
1017// indent-tabs-mode: nil
1018// End:
1019// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status