monotone

monotone Mtn Source Tree

Root/xdelta.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10// this file implements the xdelta algorithm, produces and consumes simple
11// copy/insert binary patches with the following structure:
12//
13// patch := (copy|insert)*
14// copy := 'C', ' ', pos=uint, ' ', len=uint, '\n'
15// insert := 'I', ' ', len=uint, '\n', payload=(byte x len), '\n'
16//
17// this means you can generally read the patch if you print it on stdout,
18// when it applies to text, but it can also apply to any binary, so the
19// hunk payload itself might look awful. I made it semi-ascii only to make
20// it slightly easier to debug; you really shouldn't read it normally. it's
21// a strict format with minimal checking, so it must be transport-encoded
22// to avoid whitespace munging.
23//
24// if you want to *read* a patch, you will like unidiff format much better.
25// take a look in diff_patch.(cc|hh) for a nice interface to that.
26
27#include <algorithm>
28#include <vector>
29#include <set>
30#include <string>
31#include <sstream>
32
33#include <boost/shared_ptr.hpp>
34#include <boost/version.hpp>
35
36#include "adler32.hh"
37#include "hash_map.hh"
38#include "numeric_vocab.hh"
39#include "sanity.hh"
40#include "xdelta.hh"
41
42using std::make_pair;
43using std::min;
44using std::ostream;
45using std::ostringstream;
46using std::pair;
47using std::set;
48using std::string;
49using std::vector;
50using std::memcmp;
51using std::lower_bound;
52
53using boost::shared_ptr;
54
55struct identity {size_t operator()(u32 const & v) const { return static_cast<size_t>(v);}};
56typedef pair<string::size_type, string::size_type> extent;
57typedef hashmap::hash_map<u32, extent> match_table;
58
59struct
60insn
61{
62 insn(char c) : code(insert), pos(0), len(0), payload("") { payload += c; }
63 insn(string s) : code(insert), pos(0), len(s.size()), payload(s) {}
64 insn(u32 p, u32 l) : code(copy), pos(p), len(l) {}
65 enum { insert, copy } code;
66 u32 pos, len;
67 string payload;
68};
69
70ostream & operator<<(ostream & ost, insn const & i)
71{
72 if (i.code == insn::insert)
73 {
74 ost << "I " << i.payload.size() << '\n';
75 ost.write(i.payload.data(), i.payload.size());
76 ost << '\n';
77 }
78 else
79 ost << "C " << i.pos << ' ' << i.len << '\n';
80 return ost;
81}
82
83
84static inline void
85init_match_table(string const & a,
86 const string::size_type blocksz,
87 match_table & tab)
88{
89 string::size_type sz = a.size();
90 for (string::size_type i = 0; i < sz; i += blocksz)
91 {
92 string::size_type step = ((i + blocksz) >= sz) ? (sz - i) : blocksz;
93 u32 sum = adler32(reinterpret_cast<u8 const *>(a.data() + i), step).sum();
94 if (tab.find(sum) == tab.end())
95 tab.insert(make_pair(sum, make_pair(i, step)));
96 }
97 return;
98}
99
100
101static inline bool
102find_match(match_table const & matches,
103 vector<insn> & delta,
104 adler32 const & rolling,
105 string const & a,
106 string const & b,
107 string::size_type bpos,
108 string::size_type & apos,
109 string::size_type & alen,
110 string::size_type & badvance)
111{
112 u32 sum = rolling.sum();
113 match_table::const_iterator e = matches.find(sum);
114
115 // maybe we haven't seen it at all?
116 if (e == matches.end())
117 return false;
118
119 string::size_type tpos = e->second.first;
120 string::size_type tlen = e->second.second;
121
122 I(tpos < a.size());
123 I(tpos + tlen <= a.size());
124
125 // maybe it's a false match?
126 if (memcmp(a.data() + tpos, b.data() + bpos, tlen) != 0)
127 return false;
128
129 apos = tpos;
130
131 // see if we can extend our match forwards
132 string::const_iterator ai = a.begin() + apos + tlen;
133 string::const_iterator ae = a.end();
134 string::const_iterator bi = b.begin() + bpos + tlen;
135 string::const_iterator be = b.end();
136
137 while ((ai != ae)
138 && (bi != be)
139 && (*ai == *bi))
140 {
141 ++tlen;
142 ++ai;
143 ++bi;
144 }
145
146 alen = tlen;
147 badvance = tlen;
148
149 // see if we can extend backwards into a previous insert hunk
150 if (! delta.empty() && delta.back().code == insn::insert)
151 {
152 while (apos > 0
153 && bpos > 0
154 && a[apos - 1] == b[bpos - 1]
155 && !delta.back().payload.empty())
156 {
157 I(a[apos - 1] == *(delta.back().payload.rbegin()));
158 I(delta.back().payload.size() > 0);
159 delta.back().payload.resize(delta.back().payload.size() - 1);
160 --apos;
161 --bpos;
162 ++alen;
163 // the significant thing here is that we do not move
164 // 'badvance' forward, just alen.
165 }
166
167 // if we've extended back to consume the *entire* insert,
168 // let's do away with it altogether.
169 if (delta.back().payload.empty())
170 {
171 delta.pop_back();
172 }
173 }
174
175 I(memcmp(a.data() + apos, b.data() + bpos, alen) == 0);
176 return true;
177}
178
179static inline void
180insert_insn(vector<insn> & delta, char c)
181{
182 if (delta.empty() || delta.back().code == insn::copy)
183 {
184 delta.push_back(insn(c));
185 }
186 else
187 {
188 // design in gcc 3.3 and 4.0 STL expands the string one character
189 // at a time when appending single characters ?!
190 // see libstdc++5-3.3-dev 3.3.5-13: basic_string.h:471, calling with
191 // size_type(1), then basic_string.tcc:717 reserving one more byte
192 // if needed.
193 // see libstdc++6-4.0-dev 4.0.3-3: basic_string.h:770 calling push_back
194 // then basic_string.h: 849 again adding 1 to the length.
195 if (delta.back().payload.capacity() == delta.back().payload.size())
196 {
197 // standard amortized constant rule
198 delta.back().payload.reserve(delta.back().payload.size() * 2);
199 }
200 delta.back().payload += c;
201 }
202}
203
204
205static inline void
206copy_insn(vector<insn> & delta, string::size_type i, string::size_type matchlen)
207{
208 delta.push_back(insn(i, matchlen));
209}
210
211
212static void
213compute_delta_insns(string const & a,
214 string const & b,
215 vector<insn> & delta)
216{
217 static const string::size_type blocksz = 64;
218 match_table matches;
219 init_match_table(a, blocksz, matches);
220
221 if (b.size() < blocksz)
222 {
223 for (string::size_type i = 0; i < b.size(); ++i)
224 insert_insn(delta, b[i]);
225 return;
226 }
227
228 adler32 rolling(reinterpret_cast<u8 const *>(b.data()), blocksz);
229
230 for (string::size_type
231 sz = b.size(),
232 lo = 0;
233 lo < sz; )
234 {
235 string::size_type apos = 0, alen = 1, badvance = 1;
236
237 bool found_match = find_match(matches, delta, rolling, a, b, lo, apos, alen, badvance);
238
239 // There are basically three cases:
240 // 1) advance by 1 (common, no match found)
241 // 2) advance by >blocksz (semi-common, usual case when a match is found)
242 // 3) advance by <blocksz (rare, unusual case when a match is found)
243 // in case (2), all of the rolling checksum data will be entirely replaced, so
244 // we can do a fast skip forward.
245 if (found_match)
246 {
247 copy_insn(delta, apos, alen);
248 u32 save_lo = lo;
249 if (badvance <= blocksz)
250 {
251 string::size_type next = lo;
252 I(next < b.size() && (lo + badvance - 1) < b.size());
253 for (; next < lo + badvance; ++next)
254 {
255 rolling.out(static_cast<u8>(b[next]));
256 if (next + blocksz < b.size())
257 rolling.in(static_cast<u8>(b[next + blocksz]));
258 }
259 lo = next;
260 }
261
262 // EricAnderson: I think that >= would also be correct, because it
263 // should replace all blocksz characters in the rolling checksum,
264 // but it seemed safer to use > which is easily seen as correct.
265 if (badvance > blocksz)
266 {
267 u32 new_lo = save_lo + badvance;
268 u32 new_hi = new_lo + blocksz;
269 if (new_hi > b.size())
270 {
271 new_hi = b.size();
272 }
273 I(new_lo <= new_hi);
274 I(lo + blocksz < new_lo);
275 // if you want to verify that the skip forward advance is
276 // working properly, then you can modify the if test above
277 // to read true || badvance <= blocksz so that we always
278 // move the checksum forward by rolling, and uncomment the
279 // line below that saves the checksum and the one after
280 // that verifies that the skip-forward checksum matches
281 // the roll-forward checksum.
282 // u32 save_sum = rolling.sum();
283 rolling.replace_with(reinterpret_cast<u8 const *>(b.data() + new_lo), new_hi-new_lo);
284 // I(rolling.sum() == save_sum);
285 lo = new_lo;
286 }
287 }
288 else
289 {
290 I(apos + alen <= a.size());
291 I(alen == 1);
292 I(alen < blocksz);
293 I(lo < b.size());
294 insert_insn(delta, b[lo]);
295 rolling.out(static_cast<u8>(b[lo]));
296 if (lo + blocksz < b.size())
297 {
298 rolling.in(static_cast<u8>(b[lo+blocksz]));
299 }
300 ++lo;
301 }
302 }
303}
304
305void
306write_delta_insns(vector<insn> const & delta_insns,
307 string & delta)
308{
309 delta.clear();
310 ostringstream oss;
311 for (vector<insn>::const_iterator i = delta_insns.begin();
312 i != delta_insns.end(); ++i)
313 {
314 oss << *i;
315 }
316 delta = oss.str();
317}
318
319void
320compute_delta(string const & a,
321 string const & b,
322 string & delta)
323{
324 vector<insn> delta_insns;
325
326 static bool widen_checked = false;
327 if (!widen_checked)
328 {
329 // This code just verifies that there isn't something going on where the
330 // widen is required for some weird compiler that incorrectly sign
331 // extends unsigned constants when casting to a wider type
332 for (u32 i = 0; i < 256; ++i)
333 {
334 u8 v = (u8)(i & 0xFF);
335 u32 v_w = widen<u32,u8>(v);
336 I(v_w == i);
337 I((static_cast<u32>(v)) == i);
338 }
339 widen_checked = true;
340 }
341
342 // FIXME: in theory you can do empty files and empty deltas; write some
343 // tests to be sure you're doing it right, and in any case implement the
344 // necessary logic directly in this function, don't bother doing an
345 // xdelta. several places of the xdelta code prefer assertions which are
346 // only true with non-empty chunks anyways.
347
348 if (a == b)
349 {
350 delta.clear();
351 return;
352 }
353
354 if (a.size() == 0 && b.size() != 0)
355 delta_insns.push_back(insn(b));
356 else if (a.size() != 0 && b.size() == 0)
357 delta_insns.push_back(insn(0, 0));
358 else
359 {
360 I(a.size() > 0);
361 I(b.size() > 0);
362
363 L(FL("computing binary delta instructions"));
364 compute_delta_insns(a, b, delta_insns);
365 L(FL("computed binary delta instructions"));
366 }
367
368 ostringstream oss;
369 for (vector<insn>::const_iterator i = delta_insns.begin();
370 i != delta_insns.end(); ++i)
371 {
372 oss << *i;
373 }
374 delta = oss.str();
375}
376
377struct
378simple_applicator
379 : public delta_applicator
380{
381 string src;
382 string dst;
383 virtual ~simple_applicator () {}
384 virtual void begin(string const & base)
385 {
386 src = base;
387 dst.clear();
388 }
389 virtual void next()
390 {
391 swap(src,dst);
392 dst.clear();
393 }
394 virtual void finish(string & out)
395 {
396 out = src;
397 }
398
399 virtual void copy(string::size_type pos, string::size_type len)
400 {
401 dst.append(src, pos, len);
402 }
403 virtual void insert(string const & str)
404 {
405 dst.append(str);
406 }
407};
408
409inline string::size_type
410read_num(string::const_iterator &i,
411 string::const_iterator e)
412{
413 string::size_type n = 0;
414
415 while (i != e && *i == ' ')
416 ++i;
417
418 while (i != e && *i >= '0' && *i <= '9')
419 {
420 n *= 10;
421 n += static_cast<size_t>(*i - '0');
422 ++i;
423 }
424 return n;
425}
426
427void
428apply_delta(shared_ptr<delta_applicator> da,
429 string const & delta)
430{
431 string::const_iterator i = delta.begin();
432 while (i != delta.end() && (*i == 'I' || *i == 'C'))
433 {
434 if (*i == 'I')
435 {
436 ++i;
437 I(i != delta.end());
438 string::size_type len = read_num(i, delta.end());
439 I(i != delta.end());
440 I(*i == '\n');
441 ++i;
442 I(i != delta.end());
443 I((i - delta.begin()) + len <= delta.size());
444 if (len > 0)
445 {
446 string tmp(i, i+len);
447 da->insert(tmp);
448 }
449 i += len;
450 }
451 else
452 {
453 I(*i == 'C');
454 ++i;
455 I(i != delta.end());
456 string::size_type pos = read_num(i, delta.end());
457 I(i != delta.end());
458 string::size_type len = read_num(i, delta.end());
459 if (len != 0)
460 da->copy(pos, len);
461 }
462 I(i != delta.end());
463 I(*i == '\n');
464 ++i;
465 }
466 I(i == delta.end());
467}
468
469void
470apply_delta(string const & a,
471 string const & delta,
472 string & b)
473{
474 shared_ptr<delta_applicator> da(new simple_applicator());
475 da->begin(a);
476 apply_delta(da, delta);
477 da->next();
478 da->finish(b);
479}
480
481struct
482size_accumulating_delta_applicator :
483 public delta_applicator
484{
485 u64 & sz;
486 size_accumulating_delta_applicator(u64 & s) : sz(s) {}
487 virtual void begin(string const & base) {}
488 virtual void next() {}
489 virtual void finish(string & out) {}
490
491 virtual void copy(string::size_type pos,
492 string::size_type len)
493 { sz += len; }
494 virtual void insert(string const & str)
495 { sz += str.size(); }
496};
497
498
499u64
500measure_delta_target_size(string const & delta)
501{
502 u64 sz = 0;
503 shared_ptr<delta_applicator> da(new size_accumulating_delta_applicator(sz));
504 apply_delta(da, delta);
505 return sz;
506}
507
508
509
510// piecewise-applicator stuff follows (warning: ugly)
511
512typedef string::size_type version_pos;
513typedef string::size_type piece_pos;
514typedef string::size_type length;
515typedef unsigned long piece_id;
516
517struct chunk
518{
519 length len; // how many chars in this chunk
520 piece_id piece; // which piece to take chars from
521 version_pos vpos; // position in the current version
522 piece_pos ppos; // position in piece to take chars from
523
524 chunk (length ln, piece_id p, version_pos vp, piece_pos pp) :
525 len(ln), piece(p), vpos(vp), ppos(pp)
526 {}
527
528 chunk subchunk(version_pos vp,
529 length ln,
530 length offset) const
531 {
532 I(ppos + offset >= ppos);
533 I(ppos + offset + ln <= ppos + len);
534
535 chunk c = *this;
536 c.len = ln;
537 c.vpos = vp;
538 c.ppos += offset;
539 return c;
540 }
541};
542
543typedef vector<chunk> version_spec;
544
545struct
546piece_table
547{
548 vector<string> pieces;
549
550 void clear()
551 {
552 pieces.clear();
553 }
554
555 piece_id insert(string const & p)
556 {
557 pieces.push_back(p);
558 return pieces.size() - 1;
559 }
560
561 void append(string & targ, piece_id p, piece_pos pp, length ln)
562 {
563 I(p < pieces.size());
564 targ.append(pieces[p], pp, ln);
565 }
566
567 void build(version_spec const & in, string & out)
568 {
569 out.clear();
570 unsigned out_len = 0;
571 for (version_spec::const_iterator i = in.begin();
572 i != in.end(); ++i)
573 {
574 out_len += i->len;
575 }
576 out.reserve(out_len);
577 for (version_spec::const_iterator i = in.begin();
578 i != in.end(); ++i)
579 {
580 append(out, i->piece, i->ppos, i->len);
581 }
582 }
583};
584
585static void
586apply_insert(piece_table & p, version_spec & out, string const & str)
587{
588 piece_id piece = p.insert(str);
589 version_pos vpos = 0;
590 if (!out.empty())
591 vpos = out.back().vpos + out.back().len;
592 out.push_back(chunk(str.size(), piece, vpos, 0));
593}
594
595struct
596chunk_less_than
597{
598 bool operator()(chunk const & ch1, chunk const & ch2) const
599 {
600 // nb: ch.vpos + ch.len is the 0-based index of the first element *not*
601 // included in ch; thus we measure against ch.len - 1.
602// I(ch1.len > 0);
603 return (ch1.vpos + ch1.len - 1) < ch2.vpos;
604 }
605};
606
607static void
608apply_copy(version_spec const & in, version_spec & out,
609 version_pos src_vpos, length src_len)
610{
611 //
612 // this is a little tricky because there's *4* different extents
613 // we're talking about at any time:
614 //
615 //
616 // - the 'src' extent, which is 1 or more chunks in the previous version.
617 // its address in the previous version is given in terms of a version_pos
618 // + length value.
619 //
620 // - the 'dst' extent, which is 1 chunk in the new version. its address
621 // in the new version is given in terms of a version_pos + length value.
622 //
623 // - the portion of a piece referenced by the src extent, which we're
624 // selecting a subset of. this is given in terms of a piece_pos + length
625 // value, against a particular piece.
626 //
627 // - the portion of a piece going into the dst extent, which is the
628 // selected subset. this is given in terms of a piece_pos + length value,
629 // against a particular piece.
630 //
631
632 version_pos src_final = src_vpos + src_len;
633 version_pos dst_vpos = 0;
634 if (!out.empty())
635 dst_vpos = out.back().vpos + out.back().len;
636 version_pos dst_final = dst_vpos + src_len;
637 chunk src_bounding_chunk(0,0,src_vpos,0);
638 version_spec::const_iterator lo = lower_bound(in.begin(),
639 in.end(),
640 src_bounding_chunk,
641 chunk_less_than());
642 for ( ; src_len > 0; ++lo)
643 {
644 I(lo != in.end());
645
646 // now we are iterating over src extents which cover the current dst
647 // extent. we found these src extents by calling lower_bound,
648 // above. note, this entire function is called once per dst extent.
649 //
650 // there's two possible arrangements of spanning src extents:
651 //
652 // [ src extent 1 ][ src extent 2 ]
653 // [ ... dst extent .. ]
654 //
655 // or
656 //
657 // [ ... src extent ... ]
658 // [ ... dst extent .. ]
659 //
660 // the following arithmetic should bite off the lowest chunk of
661 // either of these two scenarios, append it to the dst version
662 // vector, and advance the 2 pos' and 1 len value appropriately.
663
664 version_pos src_end = min ((src_vpos + src_len), (lo->vpos + lo->len));
665 version_pos offset = src_vpos - lo->vpos;
666 length seglen = src_end - src_vpos;
667
668 I(seglen > 0);
669 I(src_vpos >= lo->vpos);
670 I(src_vpos + seglen <= lo->vpos + lo->len);
671
672 out.push_back(lo->subchunk(dst_vpos, seglen, offset));
673 src_vpos += seglen;
674 dst_vpos += seglen;
675 I(src_len >= seglen);
676 src_len -= seglen;
677 I(out.back().vpos + out.back().len == dst_vpos);
678 }
679
680 I(src_vpos == src_final);
681 I(dst_vpos == dst_final);
682 I(src_len == 0);
683}
684
685
686struct
687piecewise_applicator
688 : public delta_applicator
689{
690 piece_table pt;
691 shared_ptr<version_spec> src;
692 shared_ptr<version_spec> dst;
693
694 piecewise_applicator() :
695 src(new version_spec()),
696 dst(new version_spec())
697 {}
698
699 virtual ~piecewise_applicator () {}
700
701 virtual void begin(string const & base)
702 {
703 pt.clear();
704 piece_id piece = pt.insert(base);
705 src->clear();
706 src->push_back(chunk(base.size(), piece, 0, 0));
707 dst->clear();
708 }
709
710 virtual void next()
711 {
712 swap(src,dst);
713 dst->clear();
714 }
715
716 virtual void finish(string & out)
717 {
718 out.clear();
719 pt.build(*src, out);
720 }
721
722 virtual void copy(string::size_type pos, string::size_type len)
723 {
724 apply_copy(*src, *dst, pos, len);
725 }
726
727 virtual void insert(string const & str)
728 {
729 apply_insert(pt, *dst, str);
730 }
731};
732
733
734// these just hide our implementation types from outside
735
736shared_ptr<delta_applicator>
737new_simple_applicator()
738{
739 return shared_ptr<delta_applicator>(new simple_applicator());
740}
741
742shared_ptr<delta_applicator>
743new_piecewise_applicator()
744{
745 return shared_ptr<delta_applicator>(new piecewise_applicator());
746}
747
748
749// inversion
750
751struct copied_extent
752{
753 copied_extent(string::size_type op,
754string::size_type np,
755string::size_type len)
756 : old_pos(op),
757 new_pos(np),
758 len(len)
759 {}
760 string::size_type old_pos;
761 string::size_type new_pos;
762 string::size_type len;
763 bool operator<(copied_extent const & other) const
764 {
765 return (old_pos < other.old_pos) ||
766 (old_pos == other.old_pos && len > other.len);
767 }
768};
769
770struct
771inverse_delta_writing_applicator :
772 public delta_applicator
773{
774 string const & old;
775 set<copied_extent> copied_extents;
776 string::size_type new_pos;
777
778 inverse_delta_writing_applicator(string const & old)
779 : old(old),
780 new_pos(0)
781 {}
782
783 virtual void begin(string const & base) {}
784 virtual void next() {}
785 virtual void finish(string & out)
786 {
787 // We are trying to write a delta instruction stream which
788 // produces 'old' from 'new'. We don't care what was in 'new',
789 // because we're only going to copy some parts forwards, and we
790 // already know which parts: those in the table. Our table lists
791 // extents which were copied in order that they appear in 'old'.
792 //
793 // When we run into a section of 'old' which isn't in the table,
794 // we have to emit an insert instruction for the gap.
795
796 string::size_type old_pos = 0;
797 out.clear();
798 vector<insn> delta_insns;
799
800 for (set<copied_extent>::iterator i = copied_extents.begin();
801 i != copied_extents.end(); ++i)
802 {
803// It is possible that this extent left a gap after the
804// previously copied extent; in this case we wish to pad
805// the intermediate space with an insert.
806while (old_pos < i->old_pos)
807 {
808 I(old_pos < old.size());
809 // Don't worry, adjacent inserts are merged.
810 insert_insn(delta_insns, old.at(old_pos++));
811 }
812
813// It is also possible that this extent *overlapped* the
814// previously copied extent; in this case we wish to subtract
815// the overlap from the inverse copy.
816
817string::size_type overlap = 0;
818if (i->old_pos < old_pos)
819 overlap = old_pos - i->old_pos;
820
821if (i->len <= overlap)
822 continue;
823
824I(i->len > overlap);
825copy_insn(delta_insns, i->new_pos + overlap, i->len - overlap);
826old_pos += (i->len - overlap);
827 }
828 while (old_pos < old.size())
829 insert_insn(delta_insns, old.at(old_pos++));
830
831 write_delta_insns(delta_insns, out);
832 }
833
834 virtual void copy(string::size_type old_pos,
835 string::size_type len)
836 {
837 I(old_pos < old.size());
838 copied_extents.insert(copied_extent(old_pos, new_pos, len));
839 new_pos += len;
840 }
841
842 virtual void insert(string const & str)
843 {
844 new_pos += str.size();
845 }
846};
847
848
849void
850invert_xdelta(string const & old_str,
851 string const & delta,
852 string & delta_inverse)
853{
854 shared_ptr<delta_applicator> da(new inverse_delta_writing_applicator(old_str));
855 apply_delta(da, delta);
856 da->finish(delta_inverse);
857}
858
859
860#ifdef BUILD_UNIT_TESTS
861
862#include "unit_tests.hh"
863#ifdef WIN32
864#define BOOST_NO_STDC_NAMESPACE
865#endif
866#include <boost/random.hpp>
867
868boost::mt19937 xdelta_prng;
869
870#if BOOST_VERSION >= 103100
871boost::uniform_smallint<char> xdelta_chargen('a', 'z');
872boost::uniform_smallint<size_t> xdelta_sizegen(1024, 65536);
873boost::uniform_smallint<size_t> xdelta_editgen(3, 10);
874boost::uniform_smallint<size_t> xdelta_lengen(1, 256);
875#define PRNG xdelta_prng
876#else
877boost::uniform_smallint<boost::mt19937, char> xdelta_chargen(xdelta_prng, 'a', 'z');
878boost::uniform_smallint<boost::mt19937, size_t> xdelta_sizegen(xdelta_prng, 1024, 65536);
879boost::uniform_smallint<boost::mt19937, size_t> xdelta_editgen(xdelta_prng, 3, 10);
880boost::uniform_smallint<boost::mt19937, size_t> xdelta_lengen(xdelta_prng, 1, 256);
881#define PRNG
882#endif
883
884void
885xdelta_random_string(string & str)
886{
887 size_t sz = xdelta_sizegen(PRNG);
888 str.clear();
889 str.reserve(sz);
890 while (sz-- > 0)
891 {
892 str += xdelta_chargen(PRNG);
893 }
894}
895
896void
897xdelta_randomly_insert(string & str)
898{
899 size_t nedits = xdelta_editgen(PRNG);
900 while (nedits > 0)
901 {
902 size_t pos = xdelta_sizegen(PRNG) % str.size();
903 size_t len = xdelta_lengen(PRNG);
904 if (pos+len >= str.size())
905 continue;
906 string tmp;
907 tmp.reserve(len);
908 for (size_t i = 0; i < len; ++i)
909 tmp += xdelta_chargen(PRNG);
910 str.insert(pos, tmp);
911 nedits--;
912 }
913}
914
915void
916xdelta_randomly_change(string & str)
917{
918 size_t nedits = xdelta_editgen(PRNG);
919 while (nedits > 0)
920 {
921 size_t pos = xdelta_sizegen(PRNG) % str.size();
922 size_t len = xdelta_lengen(PRNG);
923 if (pos+len >= str.size())
924 continue;
925 for (size_t i = 0; i < len; ++i)
926 str[pos+i] = xdelta_chargen(PRNG);
927 nedits--;
928 }
929}
930
931void
932xdelta_randomly_delete(string & str)
933{
934 size_t nedits = xdelta_editgen(PRNG);
935 while (nedits > 0)
936 {
937 size_t pos = xdelta_sizegen(PRNG) % str.size();
938 size_t len = xdelta_lengen(PRNG);
939 if (pos+len >= str.size())
940 continue;
941 str.erase(pos, len);
942 --nedits;
943 }
944}
945
946void
947xdelta_random_simple_delta_test()
948{
949 for (int i = 0; i < 100; ++i)
950 {
951 string a, b, fdel, rdel, c, d;
952 xdelta_random_string(a);
953 b = a;
954 xdelta_randomly_change(b);
955 xdelta_randomly_insert(b);
956 xdelta_randomly_delete(b);
957 compute_delta(a, b, fdel);
958 compute_delta(b, a, rdel);
959 L(FL("src %d, dst %d, fdel %d, rdel %d")
960 % a.size() % b.size()% fdel.size() % rdel.size()) ;
961 if (fdel.size() == 0)
962 {
963 L(FL("confirming src == dst and rdel == 0"));
964 BOOST_CHECK(a == b);
965 BOOST_CHECK(rdel.size() == 0);
966 }
967 else
968 {
969 apply_delta(a, fdel, c);
970 apply_delta(b, rdel, d);
971 L(FL("confirming dst1 %d, dst2 %d") % c.size() % d.size());
972 BOOST_CHECK(b == c);
973 BOOST_CHECK(a == d);
974 }
975 }
976}
977
978void
979add_xdelta_tests(test_suite * suite)
980{
981 I(suite);
982 suite->add(BOOST_TEST_CASE(&xdelta_random_simple_delta_test));
983}
984
985#endif // BUILD_UNIT_TESTS
986
987// Local Variables:
988// mode: C++
989// fill-column: 76
990// c-file-style: "gnu"
991// indent-tabs-mode: nil
992// End:
993// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status