monotone

monotone Mtn Source Tree

Root/xdelta.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10// this file implements the xdelta algorithm, produces and consumes simple
11// copy/insert binary patches with the following structure:
12//
13// patch := (copy|insert)*
14// copy := 'C', ' ', pos=uint, ' ', len=uint, '\n'
15// insert := 'I', ' ', len=uint, '\n', payload=(byte x len), '\n'
16//
17// this means you can generally read the patch if you print it on stdout,
18// when it applies to text, but it can also apply to any binary, so the
19// hunk payload itself might look awful. I made it semi-ascii only to make
20// it slightly easier to debug; you really shouldn't read it normally. it's
21// a strict format with minimal checking, so it must be transport-encoded
22// to avoid whitespace munging.
23//
24// if you want to *read* a patch, you will like unidiff format much better.
25// take a look in diff_patch.(cc|hh) for a nice interface to that.
26
27#include "base.hh"
28#include <algorithm>
29#include "vector.hh"
30#include <set>
31#include <sstream>
32#include <cstring> // memcmp
33
34#include <boost/shared_ptr.hpp>
35#include <boost/version.hpp>
36
37#include "adler32.hh"
38#include "hash_map.hh"
39#include "numeric_vocab.hh"
40#include "sanity.hh"
41#include "xdelta.hh"
42
43using std::make_pair;
44using std::min;
45using std::ostream;
46using std::ostringstream;
47using std::pair;
48using std::set;
49using std::string;
50using std::vector;
51using std::memcmp;
52using std::lower_bound;
53
54using boost::shared_ptr;
55
56struct identity {size_t operator()(u32 const & v) const { return static_cast<size_t>(v);}};
57typedef pair<string::size_type, string::size_type> extent;
58typedef hashmap::hash_map<u32, extent> match_table;
59
60struct
61insn
62{
63 insn(char c) : code(insert), pos(0), len(0), payload("") { payload += c; }
64 insn(string s) : code(insert), pos(0), len(s.size()), payload(s) {}
65 insn(u32 p, u32 l) : code(copy), pos(p), len(l) {}
66 enum { insert, copy } code;
67 u32 pos, len;
68 string payload;
69};
70
71ostream & operator<<(ostream & ost, insn const & i)
72{
73 if (i.code == insn::insert)
74 {
75 ost << "I " << i.payload.size() << '\n';
76 ost.write(i.payload.data(), i.payload.size());
77 ost << '\n';
78 }
79 else
80 ost << "C " << i.pos << ' ' << i.len << '\n';
81 return ost;
82}
83
84
85static inline void
86init_match_table(string const & a,
87 const string::size_type blocksz,
88 match_table & tab)
89{
90 string::size_type sz = a.size();
91 for (string::size_type i = 0; i < sz; i += blocksz)
92 {
93 string::size_type step = ((i + blocksz) >= sz) ? (sz - i) : blocksz;
94 u32 sum = adler32(reinterpret_cast<u8 const *>(a.data() + i), step).sum();
95 if (tab.find(sum) == tab.end())
96 tab.insert(make_pair(sum, make_pair(i, step)));
97 }
98 return;
99}
100
101
102static inline bool
103find_match(match_table const & matches,
104 vector<insn> & delta,
105 adler32 const & rolling,
106 string const & a,
107 string const & b,
108 string::size_type bpos,
109 string::size_type & apos,
110 string::size_type & alen,
111 string::size_type & badvance)
112{
113 u32 sum = rolling.sum();
114 match_table::const_iterator e = matches.find(sum);
115
116 // maybe we haven't seen it at all?
117 if (e == matches.end())
118 return false;
119
120 string::size_type tpos = e->second.first;
121 string::size_type tlen = e->second.second;
122
123 I(tpos < a.size());
124 I(tpos + tlen <= a.size());
125
126 // maybe it's a false match?
127 if (memcmp(a.data() + tpos, b.data() + bpos, tlen) != 0)
128 return false;
129
130 apos = tpos;
131
132 // see if we can extend our match forwards
133 string::const_iterator ai = a.begin() + apos + tlen;
134 string::const_iterator ae = a.end();
135 string::const_iterator bi = b.begin() + bpos + tlen;
136 string::const_iterator be = b.end();
137
138 while ((ai != ae)
139 && (bi != be)
140 && (*ai == *bi))
141 {
142 ++tlen;
143 ++ai;
144 ++bi;
145 }
146
147 alen = tlen;
148 badvance = tlen;
149
150 // see if we can extend backwards into a previous insert hunk
151 if (! delta.empty() && delta.back().code == insn::insert)
152 {
153 while (apos > 0
154 && bpos > 0
155 && a[apos - 1] == b[bpos - 1]
156 && !delta.back().payload.empty())
157 {
158 I(a[apos - 1] == *(delta.back().payload.rbegin()));
159 I(delta.back().payload.size() > 0);
160 delta.back().payload.resize(delta.back().payload.size() - 1);
161 --apos;
162 --bpos;
163 ++alen;
164 // the significant thing here is that we do not move
165 // 'badvance' forward, just alen.
166 }
167
168 // if we've extended back to consume the *entire* insert,
169 // let's do away with it altogether.
170 if (delta.back().payload.empty())
171 {
172 delta.pop_back();
173 }
174 }
175
176 I(memcmp(a.data() + apos, b.data() + bpos, alen) == 0);
177 return true;
178}
179
180static inline void
181insert_insn(vector<insn> & delta, char c)
182{
183 if (delta.empty() || delta.back().code == insn::copy)
184 delta.push_back(insn(c));
185 else
186 {
187 // design in gcc 3.3 and 4.0 STL expands the string one character
188 // at a time when appending single characters ?!
189 // see libstdc++5-3.3-dev 3.3.5-13: basic_string.h:471, calling with
190 // size_type(1), then basic_string.tcc:717 reserving one more byte
191 // if needed.
192 // see libstdc++6-4.0-dev 4.0.3-3: basic_string.h:770 calling push_back
193 // then basic_string.h: 849 again adding 1 to the length.
194 if (delta.back().payload.capacity() == delta.back().payload.size())
195 // standard amortized constant rule
196 delta.back().payload.reserve(delta.back().payload.size() * 2);
197 delta.back().payload += c;
198 }
199}
200
201
202static inline void
203copy_insn(vector<insn> & delta, string::size_type i, string::size_type matchlen)
204{
205 delta.push_back(insn(i, matchlen));
206}
207
208
209static void
210compute_delta_insns(string const & a,
211 string const & b,
212 vector<insn> & delta)
213{
214 static const string::size_type blocksz = 64;
215 match_table matches;
216 init_match_table(a, blocksz, matches);
217
218 if (b.size() < blocksz)
219 {
220 for (string::size_type i = 0; i < b.size(); ++i)
221 insert_insn(delta, b[i]);
222 return;
223 }
224
225 adler32 rolling(reinterpret_cast<u8 const *>(b.data()), blocksz);
226
227 for (string::size_type
228 sz = b.size(),
229 lo = 0;
230 lo < sz; )
231 {
232 string::size_type apos = 0, alen = 1, badvance = 1;
233
234 bool found_match = find_match(matches, delta, rolling, a, b, lo, apos, alen, badvance);
235
236 // There are basically three cases:
237 // 1) advance by 1 (common, no match found)
238 // 2) advance by >blocksz (semi-common, usual case when a match is found)
239 // 3) advance by <blocksz (rare, unusual case when a match is found)
240 // in case (2), all of the rolling checksum data will be entirely replaced, so
241 // we can do a fast skip forward.
242 if (found_match)
243 {
244 copy_insn(delta, apos, alen);
245 u32 save_lo = lo;
246 if (badvance <= blocksz)
247 {
248 string::size_type next = lo;
249 I(next < b.size() && (lo + badvance - 1) < b.size());
250 for (; next < lo + badvance; ++next)
251 {
252 rolling.out(static_cast<u8>(b[next]));
253 if (next + blocksz < b.size())
254 rolling.in(static_cast<u8>(b[next + blocksz]));
255 }
256 lo = next;
257 }
258
259 // Skip advancement is always correct; however, for a small
260 // increment it is more expensive than incremental advancement
261 // Cost of doing a in() + out() is roughly the same as doing a
262 // replace_with for 1 character, so if we are advancing more than
263 // blocksz/2, it will be better to do a replacement than an
264 // incremental advance. The out could be more expensive because
265 // it does a multiply, but for now, ignore this; it turns out that
266 // advancements in the range of [2..blocksz-1] are actually really
267 // rare.
268 if (badvance >= blocksz/2)
269 {
270 u32 new_lo = save_lo + badvance;
271 u32 new_hi = new_lo + blocksz;
272 if (new_hi > b.size())
273 {
274 new_hi = b.size();
275 }
276 I(new_lo <= new_hi);
277 rolling.replace_with(reinterpret_cast<u8 const *>(b.data() + new_lo), new_hi-new_lo);
278 lo = new_lo;
279 }
280 }
281 else
282 {
283 I(apos + alen <= a.size());
284 I(alen == 1);
285 I(alen < blocksz);
286 I(lo < b.size());
287 insert_insn(delta, b[lo]);
288 rolling.out(static_cast<u8>(b[lo]));
289 if (lo + blocksz < b.size())
290 {
291 rolling.in(static_cast<u8>(b[lo+blocksz]));
292 }
293 ++lo;
294 }
295 }
296}
297
298void
299write_delta_insns(vector<insn> const & delta_insns,
300 string & delta)
301{
302 delta.clear();
303 ostringstream oss;
304 for (vector<insn>::const_iterator i = delta_insns.begin();
305 i != delta_insns.end(); ++i)
306 {
307 oss << *i;
308 }
309 delta = oss.str();
310}
311
312void
313compute_delta(string const & a,
314 string const & b,
315 string & delta)
316{
317 vector<insn> delta_insns;
318
319 // FIXME: in theory you can do empty files and empty deltas; write some
320 // tests to be sure you're doing it right, and in any case implement the
321 // necessary logic directly in this function, don't bother doing an
322 // xdelta. several places of the xdelta code prefer assertions which are
323 // only true with non-empty chunks anyways.
324
325 if (a.size() == 0 && b.size() != 0)
326 delta_insns.push_back(insn(b));
327 else if (a.size() != 0 && b.size() == 0)
328 delta_insns.push_back(insn(0, 0));
329 else if (a == b)
330 delta_insns.push_back(insn(0, a.size()));
331 else
332 {
333 I(a.size() > 0);
334 I(b.size() > 0);
335
336 L(FL("computing binary delta instructions"));
337 compute_delta_insns(a, b, delta_insns);
338 L(FL("computed binary delta instructions"));
339 }
340
341 ostringstream oss;
342 for (vector<insn>::const_iterator i = delta_insns.begin();
343 i != delta_insns.end(); ++i)
344 {
345 oss << *i;
346 }
347 delta = oss.str();
348}
349
350struct
351simple_applicator
352 : public delta_applicator
353{
354 string src;
355 string dst;
356 virtual ~simple_applicator () {}
357 virtual void begin(string const & base)
358 {
359 src = base;
360 dst.clear();
361 }
362 virtual void next()
363 {
364 swap(src,dst);
365 dst.clear();
366 }
367 virtual void finish(string & out)
368 {
369 out = src;
370 }
371
372 virtual void copy(string::size_type pos, string::size_type len)
373 {
374 dst.append(src, pos, len);
375 }
376 virtual void insert(string const & str)
377 {
378 dst.append(str);
379 }
380};
381
382inline string::size_type
383read_num(string::const_iterator &i,
384 string::const_iterator e)
385{
386 string::size_type n = 0;
387
388 while (i != e && *i == ' ')
389 ++i;
390
391 while (i != e && *i >= '0' && *i <= '9')
392 {
393 n *= 10;
394 n += static_cast<size_t>(*i - '0');
395 ++i;
396 }
397 return n;
398}
399
400void
401apply_delta(shared_ptr<delta_applicator> da,
402 string const & delta)
403{
404 string::const_iterator i = delta.begin();
405 while (i != delta.end() && (*i == 'I' || *i == 'C'))
406 {
407 if (*i == 'I')
408 {
409 ++i;
410 I(i != delta.end());
411 string::size_type len = read_num(i, delta.end());
412 I(i != delta.end());
413 I(*i == '\n');
414 ++i;
415 I(i != delta.end());
416 I((i - delta.begin()) + len <= delta.size());
417 if (len > 0)
418 {
419 string tmp(i, i+len);
420 da->insert(tmp);
421 }
422 i += len;
423 }
424 else
425 {
426 I(*i == 'C');
427 ++i;
428 I(i != delta.end());
429 string::size_type pos = read_num(i, delta.end());
430 I(i != delta.end());
431 string::size_type len = read_num(i, delta.end());
432 if (len != 0)
433 da->copy(pos, len);
434 }
435 I(i != delta.end());
436 I(*i == '\n');
437 ++i;
438 }
439 I(i == delta.end());
440}
441
442void
443apply_delta(string const & a,
444 string const & delta,
445 string & b)
446{
447 shared_ptr<delta_applicator> da(new simple_applicator());
448 da->begin(a);
449 apply_delta(da, delta);
450 da->next();
451 da->finish(b);
452}
453
454
455// diffing and patching
456
457void
458diff(data const & olddata,
459 data const & newdata,
460 delta & del)
461{
462 string unpacked;
463 compute_delta(olddata(), newdata(), unpacked);
464 del = delta(unpacked);
465}
466
467void
468patch(data const & olddata,
469 delta const & del,
470 data & newdata)
471{
472 string result;
473 apply_delta(olddata(), del(), result);
474 newdata = data(result);
475}
476
477
478// piecewise-applicator stuff follows (warning: ugly)
479
480typedef string::size_type version_pos;
481typedef string::size_type piece_pos;
482typedef string::size_type length;
483typedef unsigned long piece_id;
484
485struct chunk
486{
487 length len; // how many chars in this chunk
488 piece_id piece; // which piece to take chars from
489 version_pos vpos; // position in the current version
490 piece_pos ppos; // position in piece to take chars from
491
492 chunk (length ln, piece_id p, version_pos vp, piece_pos pp) :
493 len(ln), piece(p), vpos(vp), ppos(pp)
494 {}
495
496 chunk subchunk(version_pos vp,
497 length ln,
498 length offset) const
499 {
500 I(ppos + offset >= ppos);
501 I(ppos + offset + ln <= ppos + len);
502
503 chunk c = *this;
504 c.len = ln;
505 c.vpos = vp;
506 c.ppos += offset;
507 return c;
508 }
509};
510
511typedef vector<chunk> version_spec;
512
513struct
514piece_table
515{
516 vector<string> pieces;
517
518 void clear()
519 {
520 pieces.clear();
521 }
522
523 piece_id insert(string const & p)
524 {
525 pieces.push_back(p);
526 return pieces.size() - 1;
527 }
528
529 void append(string & targ, piece_id p, piece_pos pp, length ln)
530 {
531 I(p < pieces.size());
532 targ.append(pieces[p], pp, ln);
533 }
534
535 void build(version_spec const & in, string & out)
536 {
537 out.clear();
538 unsigned out_len = 0;
539 for (version_spec::const_iterator i = in.begin(); i != in.end(); ++i)
540 out_len += i->len;
541 out.reserve(out_len);
542 for (version_spec::const_iterator i = in.begin(); i != in.end(); ++i)
543 append(out, i->piece, i->ppos, i->len);
544 }
545};
546
547static void
548apply_insert(piece_table & p, version_spec & out, string const & str)
549{
550 piece_id piece = p.insert(str);
551 version_pos vpos = 0;
552 if (!out.empty())
553 vpos = out.back().vpos + out.back().len;
554 out.push_back(chunk(str.size(), piece, vpos, 0));
555}
556
557struct
558chunk_less_than
559{
560 bool operator()(chunk const & ch1, chunk const & ch2) const
561 {
562 // nb: ch.vpos + ch.len is the 0-based index of the first element *not*
563 // included in ch; thus we measure against ch.len - 1.
564// I(ch1.len > 0);
565 return (ch1.vpos + ch1.len - 1) < ch2.vpos;
566 }
567};
568
569static void
570apply_copy(version_spec const & in, version_spec & out,
571 version_pos src_vpos, length src_len)
572{
573 //
574 // this is a little tricky because there's *4* different extents
575 // we're talking about at any time:
576 //
577 //
578 // - the 'src' extent, which is 1 or more chunks in the previous version.
579 // its address in the previous version is given in terms of a version_pos
580 // + length value.
581 //
582 // - the 'dst' extent, which is 1 chunk in the new version. its address
583 // in the new version is given in terms of a version_pos + length value.
584 //
585 // - the portion of a piece referenced by the src extent, which we're
586 // selecting a subset of. this is given in terms of a piece_pos + length
587 // value, against a particular piece.
588 //
589 // - the portion of a piece going into the dst extent, which is the
590 // selected subset. this is given in terms of a piece_pos + length value,
591 // against a particular piece.
592 //
593
594 version_pos src_final = src_vpos + src_len;
595 version_pos dst_vpos = 0;
596 if (!out.empty())
597 dst_vpos = out.back().vpos + out.back().len;
598 version_pos dst_final = dst_vpos + src_len;
599 chunk src_bounding_chunk(0,0,src_vpos,0);
600 version_spec::const_iterator lo = lower_bound(in.begin(),
601 in.end(),
602 src_bounding_chunk,
603 chunk_less_than());
604 for ( ; src_len > 0; ++lo)
605 {
606 I(lo != in.end());
607
608 // now we are iterating over src extents which cover the current dst
609 // extent. we found these src extents by calling lower_bound,
610 // above. note, this entire function is called once per dst extent.
611 //
612 // there's two possible arrangements of spanning src extents:
613 //
614 // [ src extent 1 ][ src extent 2 ]
615 // [ ... dst extent .. ]
616 //
617 // or
618 //
619 // [ ... src extent ... ]
620 // [ ... dst extent .. ]
621 //
622 // the following arithmetic should bite off the lowest chunk of
623 // either of these two scenarios, append it to the dst version
624 // vector, and advance the 2 pos' and 1 len value appropriately.
625
626 version_pos src_end = min ((src_vpos + src_len), (lo->vpos + lo->len));
627 version_pos offset = src_vpos - lo->vpos;
628 length seglen = src_end - src_vpos;
629
630 I(seglen > 0);
631 I(src_vpos >= lo->vpos);
632 I(src_vpos + seglen <= lo->vpos + lo->len);
633
634 out.push_back(lo->subchunk(dst_vpos, seglen, offset));
635 src_vpos += seglen;
636 dst_vpos += seglen;
637 I(src_len >= seglen);
638 src_len -= seglen;
639 I(out.back().vpos + out.back().len == dst_vpos);
640 }
641
642 I(src_vpos == src_final);
643 I(dst_vpos == dst_final);
644 I(src_len == 0);
645}
646
647
648struct
649piecewise_applicator
650 : public delta_applicator
651{
652 piece_table pt;
653 shared_ptr<version_spec> src;
654 shared_ptr<version_spec> dst;
655
656 piecewise_applicator() :
657 src(new version_spec()),
658 dst(new version_spec())
659 {}
660
661 virtual ~piecewise_applicator () {}
662
663 virtual void begin(string const & base)
664 {
665 pt.clear();
666 piece_id piece = pt.insert(base);
667 src->clear();
668 src->push_back(chunk(base.size(), piece, 0, 0));
669 dst->clear();
670 }
671
672 virtual void next()
673 {
674 swap(src,dst);
675 dst->clear();
676 }
677
678 virtual void finish(string & out)
679 {
680 out.clear();
681 pt.build(*src, out);
682 }
683
684 virtual void copy(string::size_type pos, string::size_type len)
685 {
686 apply_copy(*src, *dst, pos, len);
687 }
688
689 virtual void insert(string const & str)
690 {
691 apply_insert(pt, *dst, str);
692 }
693};
694
695
696// these just hide our implementation types from outside
697
698shared_ptr<delta_applicator>
699new_simple_applicator()
700{
701 return shared_ptr<delta_applicator>(new simple_applicator());
702}
703
704shared_ptr<delta_applicator>
705new_piecewise_applicator()
706{
707 return shared_ptr<delta_applicator>(new piecewise_applicator());
708}
709
710
711// inversion
712
713struct copied_extent
714{
715 copied_extent(string::size_type op,
716 string::size_type np,
717 string::size_type len)
718 : old_pos(op),
719 new_pos(np),
720 len(len)
721 {}
722 string::size_type old_pos;
723 string::size_type new_pos;
724 string::size_type len;
725 bool operator<(copied_extent const & other) const
726 {
727 return (old_pos < other.old_pos) ||
728 (old_pos == other.old_pos && len > other.len);
729 }
730};
731
732struct
733inverse_delta_writing_applicator :
734 public delta_applicator
735{
736 string const & old;
737 set<copied_extent> copied_extents;
738 string::size_type new_pos;
739
740 inverse_delta_writing_applicator(string const & old)
741 : old(old),
742 new_pos(0)
743 {}
744
745 virtual void begin(string const & base) {}
746 virtual void next() {}
747 virtual void finish(string & out)
748 {
749 // We are trying to write a delta instruction stream which
750 // produces 'old' from 'new'. We don't care what was in 'new',
751 // because we're only going to copy some parts forwards, and we
752 // already know which parts: those in the table. Our table lists
753 // extents which were copied in order that they appear in 'old'.
754 //
755 // When we run into a section of 'old' which isn't in the table,
756 // we have to emit an insert instruction for the gap.
757
758 string::size_type old_pos = 0;
759 out.clear();
760 vector<insn> delta_insns;
761
762 for (set<copied_extent>::iterator i = copied_extents.begin();
763 i != copied_extents.end(); ++i)
764 {
765 // It is possible that this extent left a gap after the
766 // previously copied extent; in this case we wish to pad
767 // the intermediate space with an insert.
768 while (old_pos < i->old_pos)
769 {
770 I(old_pos < old.size());
771 // Don't worry, adjacent inserts are merged.
772 insert_insn(delta_insns, old.at(old_pos++));
773 }
774
775 // It is also possible that this extent *overlapped* the
776 // previously copied extent; in this case we wish to subtract
777 // the overlap from the inverse copy.
778
779 string::size_type overlap = 0;
780 if (i->old_pos < old_pos)
781 overlap = old_pos - i->old_pos;
782
783 if (i->len <= overlap)
784 continue;
785
786 I(i->len > overlap);
787 copy_insn(delta_insns, i->new_pos + overlap, i->len - overlap);
788 old_pos += (i->len - overlap);
789 }
790 while (old_pos < old.size())
791 insert_insn(delta_insns, old.at(old_pos++));
792
793 write_delta_insns(delta_insns, out);
794 }
795
796 virtual void copy(string::size_type old_pos,
797 string::size_type len)
798 {
799 I(old_pos < old.size());
800 copied_extents.insert(copied_extent(old_pos, new_pos, len));
801 new_pos += len;
802 }
803
804 virtual void insert(string const & str)
805 {
806 new_pos += str.size();
807 }
808};
809
810
811void
812invert_xdelta(string const & old_str,
813 string const & delta,
814 string & delta_inverse)
815{
816 shared_ptr<delta_applicator> da(new inverse_delta_writing_applicator(old_str));
817 apply_delta(da, delta);
818 da->finish(delta_inverse);
819}
820
821
822#ifdef BUILD_UNIT_TESTS
823
824#include "unit_tests.hh"
825
826string
827apply_via_normal(string const & base, string const & delta)
828{
829 string tmp;
830 apply_delta(base, delta, tmp);
831 return tmp;
832}
833
834string
835apply_via_piecewise(string const & base, string const & delta)
836{
837 shared_ptr<delta_applicator> appl = new_piecewise_applicator();
838 appl->begin(base);
839 apply_delta(appl, delta);
840 appl->next();
841 string tmp;
842 appl->finish(tmp);
843 return tmp;
844}
845
846void
847spin(string a, string b)
848{
849 string ab, ba;
850 compute_delta(a, b, ab);
851 compute_delta(b, a, ba);
852 UNIT_TEST_CHECK(a == apply_via_normal(b, ba));
853 UNIT_TEST_CHECK(a == apply_via_piecewise(b, ba));
854 UNIT_TEST_CHECK(b == apply_via_normal(a, ab));
855 UNIT_TEST_CHECK(b == apply_via_piecewise(a, ab));
856 string ab_inverted, ba_inverted;
857 invert_xdelta(a, ab, ab_inverted);
858 invert_xdelta(b, ba, ba_inverted);
859 UNIT_TEST_CHECK(a == apply_via_normal(b, ab_inverted));
860 UNIT_TEST_CHECK(a == apply_via_piecewise(b, ab_inverted));
861 UNIT_TEST_CHECK(b == apply_via_normal(a, ba_inverted));
862 UNIT_TEST_CHECK(b == apply_via_piecewise(a, ba_inverted));
863}
864
865UNIT_TEST(xdelta, simple_cases)
866{
867 L(FL("empty/empty"));
868 spin("", "");
869 L(FL("empty/short"));
870 spin("", "a");
871 L(FL("empty/longer"));
872 spin("", "asdfasdf");
873 L(FL("two identical strings"));
874 spin("same string", "same string");
875}
876
877#ifdef WIN32
878#define BOOST_NO_STDC_NAMESPACE
879#endif
880#include <boost/random.hpp>
881
882boost::mt19937 xdelta_prng;
883
884#if BOOST_VERSION >= 103100
885boost::uniform_smallint<char> xdelta_chargen('a', 'z');
886boost::uniform_smallint<size_t> xdelta_sizegen(1024, 65536);
887boost::uniform_smallint<size_t> xdelta_editgen(3, 10);
888boost::uniform_smallint<size_t> xdelta_lengen(1, 256);
889#define PRNG xdelta_prng
890#else
891boost::uniform_smallint<boost::mt19937, char> xdelta_chargen(xdelta_prng, 'a', 'z');
892boost::uniform_smallint<boost::mt19937, size_t> xdelta_sizegen(xdelta_prng, 1024, 65536);
893boost::uniform_smallint<boost::mt19937, size_t> xdelta_editgen(xdelta_prng, 3, 10);
894boost::uniform_smallint<boost::mt19937, size_t> xdelta_lengen(xdelta_prng, 1, 256);
895#define PRNG
896#endif
897
898void
899xdelta_random_string(string & str)
900{
901 size_t sz = xdelta_sizegen(PRNG);
902 str.clear();
903 str.reserve(sz);
904 while (sz-- > 0)
905 {
906 str += xdelta_chargen(PRNG);
907 }
908}
909
910void
911xdelta_randomly_insert(string & str)
912{
913 size_t nedits = xdelta_editgen(PRNG);
914 while (nedits > 0)
915 {
916 size_t pos = xdelta_sizegen(PRNG) % str.size();
917 size_t len = xdelta_lengen(PRNG);
918 if (pos+len >= str.size())
919 continue;
920 string tmp;
921 tmp.reserve(len);
922 for (size_t i = 0; i < len; ++i)
923 tmp += xdelta_chargen(PRNG);
924 str.insert(pos, tmp);
925 nedits--;
926 }
927}
928
929void
930xdelta_randomly_change(string & str)
931{
932 size_t nedits = xdelta_editgen(PRNG);
933 while (nedits > 0)
934 {
935 size_t pos = xdelta_sizegen(PRNG) % str.size();
936 size_t len = xdelta_lengen(PRNG);
937 if (pos+len >= str.size())
938 continue;
939 for (size_t i = 0; i < len; ++i)
940 str[pos+i] = xdelta_chargen(PRNG);
941 nedits--;
942 }
943}
944
945void
946xdelta_randomly_delete(string & str)
947{
948 size_t nedits = xdelta_editgen(PRNG);
949 while (nedits > 0)
950 {
951 size_t pos = xdelta_sizegen(PRNG) % str.size();
952 size_t len = xdelta_lengen(PRNG);
953 if (pos+len >= str.size())
954 continue;
955 str.erase(pos, len);
956 --nedits;
957 }
958}
959
960UNIT_TEST(xdelta, random_simple_delta)
961{
962 for (int i = 0; i < 100; ++i)
963 {
964 string a, b;
965 xdelta_random_string(a);
966 b = a;
967 xdelta_randomly_change(b);
968 xdelta_randomly_insert(b);
969 xdelta_randomly_delete(b);
970 spin(a, b);
971 }
972}
973
974UNIT_TEST(xdelta, random_piecewise_delta)
975{
976 for (int i = 0; i < 50; ++i)
977 {
978 string prev, next, got;
979 xdelta_random_string(prev);
980 shared_ptr<delta_applicator> appl = new_piecewise_applicator();
981 appl->begin(prev);
982 for (int j = 0; j < 5; ++j)
983 {
984 appl->finish(got);
985 UNIT_TEST_CHECK(got == prev);
986 next = prev;
987 xdelta_randomly_change(next);
988 xdelta_randomly_insert(next);
989 xdelta_randomly_delete(next);
990 string delta;
991 compute_delta(prev, next, delta);
992 apply_delta(appl, delta);
993 appl->next();
994 prev = next;
995 }
996 appl->finish(got);
997 UNIT_TEST_CHECK(got == prev);
998 }
999}
1000
1001UNIT_TEST(xdelta, rolling_sanity_check)
1002{
1003 const unsigned testbufsize = 512;
1004 static const string::size_type blocksz = 64;
1005 char testbuf[testbufsize];
1006
1007 for(unsigned i = 0; i < testbufsize; ++i)
1008 {
1009 testbuf[i] = xdelta_chargen(PRNG);
1010 }
1011 for(unsigned advanceby = 0; advanceby < testbufsize; ++advanceby)
1012 {
1013 adler32 incremental(reinterpret_cast<u8 const *>(testbuf), blocksz);
1014 for(unsigned i = 0; i < advanceby; ++i)
1015 {
1016 incremental.out(static_cast<u8>(testbuf[i]));
1017 if ((i + blocksz) < testbufsize)
1018 {
1019 incremental.in(static_cast<u8>(testbuf[i+blocksz]));
1020 }
1021 }
1022 adler32 skip(reinterpret_cast<u8 const *>(testbuf), blocksz);
1023 u32 new_lo = advanceby;
1024 u32 new_hi = new_lo + blocksz;
1025 if (new_hi > testbufsize)
1026 {
1027 new_hi = testbufsize;
1028 }
1029 skip.replace_with(reinterpret_cast<u8 const *>(testbuf + new_lo), new_hi - new_lo);
1030
1031 UNIT_TEST_CHECK(skip.sum() == incremental.sum());
1032 }
1033 L(FL("rolling sanity check passed"));
1034}
1035
1036#endif // BUILD_UNIT_TESTS
1037
1038// Local Variables:
1039// mode: C++
1040// fill-column: 76
1041// c-file-style: "gnu"
1042// indent-tabs-mode: nil
1043// End:
1044// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status