monotone

monotone Mtn Source Tree

Root/diff_patch.cc

1// copyright (C) 2002, 2003 graydon hoare <graydon@pobox.com>
2// all rights reserved.
3// licensed to the public under the terms of the GNU GPL (>= 2)
4// see the file COPYING for details
5
6#include <algorithm>
7#include <iterator>
8#include <vector>
9#include <string>
10#include <iostream>
11
12#include "config.h"
13#include "diff_patch.hh"
14#include "interner.hh"
15#include "lcs.hh"
16#include "manifest.hh"
17#include "packet.hh"
18#include "sanity.hh"
19#include "transforms.hh"
20#include "vocab.hh"
21
22using namespace std;
23
24bool guess_binary(string const & s)
25{
26 // these do not occur in ASCII text files
27 // FIXME: this heuristic is (a) crap and (b) hardcoded. fix both these.
28 if (s.find_first_of("\x00\x01\x02\x03\x04\x05\x06\x0e\x0f"
29 "\x10\x11\x12\x13\x14\x15\x16\x17\x18"
30 "\x19\x1a\x1c\x1d\x1e\x1f") != string::npos)
31 return true;
32 return false;
33}
34
35//
36// a 3-way merge works like this:
37//
38// /----> right
39// ancestor
40// \----> left
41//
42// first you compute the edit list "EDITS(ancestor,left)".
43//
44// then you make an offset table "leftpos" which describes positions in
45// "ancestor" as they map to "left"; that is, for 0 < apos <
46// ancestor.size(), we have
47//
48// left[leftpos[apos]] == ancestor[apos]
49//
50// you do this by walking through the edit list and either jumping the
51// current index ahead an extra position, on an insert, or remaining still,
52// on a delete. on an insert *or* a delete, you push the current index back
53// onto the leftpos array.
54//
55// next you compute the edit list "EDITS(ancestor,right)".
56//
57// you then go through this edit list applying the edits to left, rather
58// than ancestor, and using the table leftpos to map the position of each
59// edit to an appropriate spot in left. this means you walk a "curr_left"
60// index through the edits, and for each edit e:
61//
62// - if e is a delete (and e.pos is a position in ancestor)
63// - increment curr_left without copying anything to "merged"
64//
65// - if e is an insert (and e.pos is a position in right)
66// - copy right[e.pos] to "merged"
67// - leave curr_left alone
68//
69// - when advancing to apos (and apos is a position in ancestor)
70// - copy left[curr_left] to merged while curr_left < leftpos[apos]
71//
72//
73// the practical upshot is that you apply the delta from ancestor->right
74// to the adjusted contexts in left, producing something vaguely like
75// the concatenation of delta(ancestor,left) :: delta(ancestor,right).
76//
77// NB: this is, as far as I can tell, what diff3 does. I don't think I'm
78// infringing on anyone's fancy patents here.
79//
80
81typedef enum { preserved = 0, deleted = 1, changed = 2 } edit_t;
82static char *etab[3] =
83 {
84 "preserved",
85 "deleted",
86 "changed"
87 };
88
89struct extent
90{
91 extent(size_t p, size_t l, edit_t t)
92 : pos(p), len(l), type(t)
93 {}
94 size_t pos;
95 size_t len;
96 edit_t type;
97};
98
99void calculate_extents(vector<long> const & a_b_edits,
100 vector<long> const & b,
101 vector<long> & prefix,
102 vector<extent> & extents,
103 vector<long> & suffix,
104 size_t const a_len,
105 interner<long> & intern)
106{
107 extents.reserve(a_len * 2);
108
109 size_t a_pos = 0, b_pos = 0;
110
111 for (vector<long>::const_iterator i = a_b_edits.begin();
112 i != a_b_edits.end(); ++i)
113 {
114 // L(F("edit: %d") % *i);
115 if (*i < 0)
116 {
117 // negative elements code the negation of the one-based index into A
118 // of the element to be deleted
119 size_t a_deleted = (-1 - *i);
120
121 // fill positions out to the deletion point
122 while (a_pos < a_deleted)
123 {
124 a_pos++;
125 extents.push_back(extent(b_pos++, 1, preserved));
126 }
127
128 // L(F(" -- delete at A-pos %d (B-pos = %d)\n") % a_deleted % b_pos);
129
130 // skip the deleted line
131 a_pos++;
132 extents.push_back(extent(b_pos, 0, deleted));
133 }
134 else
135 {
136 // positive elements code the one-based index into B of the element to
137 // be inserted
138 size_t b_inserted = (*i - 1);
139
140 // fill positions out to the insertion point
141 while (b_pos < b_inserted)
142 {
143 a_pos++;
144 extents.push_back(extent(b_pos++, 1, preserved));
145 }
146
147 // L(F(" -- insert at B-pos %d (A-pos = %d) : '%s'\n")
148 // % b_inserted % a_pos % intern.lookup(b.at(b_inserted)));
149
150 // record that there was an insertion, but a_pos did not move.
151 if ((b_pos == 0 && extents.empty())
152 || (b_pos == prefix.size()))
153 {
154 prefix.push_back(b.at(b_pos));
155 }
156 else if (a_len == a_pos)
157 {
158 suffix.push_back(b.at(b_pos));
159 }
160 else
161 {
162 // make the insertion
163 extents.back().type = changed;
164 extents.back().len++;
165 }
166 b_pos++;
167 }
168 }
169
170 while (extents.size() < a_len)
171 extents.push_back(extent(b_pos++, 1, preserved));
172}
173
174void normalize_extents(vector<extent> & a_b_map,
175 vector<long> const & a,
176 vector<long> const & b)
177{
178 for (size_t i = 0; i < a_b_map.size(); ++i)
179 {
180 if (i > 0)
181 {
182 size_t j = i;
183 while (j > 0
184 && (a_b_map.at(j-1).type == preserved)
185 && (a_b_map.at(j).type == changed)
186 && (a.at(j) == a.at(j-1))
187 && (b.at(a_b_map.at(j-1).pos) ==
188 b.at(a_b_map.at(j).pos + a_b_map.at(j).len - 1)))
189 {
190
191 //
192 // Coming into this loop, we've identified this situation:
193 //
194 // A B
195 // -------- ----------------
196 // j-1: foo --preserved--> mapped[j-1]: foo
197 // j: foo --changed----> mapped[ j]: bar
198 // foo
199 //
200 // The normalization we want to perform is to move all
201 // "changed" extents to the earliest possible position which
202 // still causes the same B image to be produced.
203 //
204 // A B
205 // -------- ----------------
206 // j-1: foo --changed----> mapped[j-1]: foo
207 // bar
208 // j: foo --preserved--> mapped[ j]: foo
209 //
210
211 // This code is almost untested, because we've only been able to
212 // find one test case that exercises it! Maybe this text will
213 // make us look bad when/if someone discovers it, but better to
214 // have a bad reputation and good software than vice-versa.
215 W(F("You've found files that trigger a strange edge-case of the merge logic.\n"));
216 W(F("We believe it works, but don't have many tests; so, if you could send us\n"));
217 W(F("a note, and, if possible, the files that triggered it for inclusion in our\n"));
218 W(F("test suite, we'd appreciate it. Address: " PACKAGE_BUGREPORT ". Thanks!\n"));
219
220 L(F("exchanging preserved extent [%d+%d] with changed extent [%d+%d]\n")
221 % a_b_map.at(j-1).pos
222 % a_b_map.at(j-1).len
223 % a_b_map.at(j).pos
224 % a_b_map.at(j).len);
225
226 swap(a_b_map.at(j-1).len, a_b_map.at(j).len);
227 swap(a_b_map.at(j-1).type, a_b_map.at(j).type);
228 --j;
229 }
230 }
231 }
232
233 for (size_t i = 0; i < a_b_map.size(); ++i)
234 {
235 if (i > 0)
236 {
237 size_t j = i;
238 while (j > 0
239 && a_b_map.at(j).type == changed
240 && a_b_map.at(j-1).type == changed
241 && a_b_map.at(j).len > 1
242 && a_b_map.at(j-1).pos + a_b_map.at(j-1).len == a_b_map.at(j).pos)
243 {
244 // step 1: move a chunk from this insert extent to its
245 // predecessor
246 size_t piece = a_b_map.at(j).len - 1;
247 // L(F("moving change piece of len %d from pos %d to pos %d\n")
248 // % piece
249 // % a_b_map.at(j).pos
250 // % a_b_map.at(j-1).pos);
251 a_b_map.at(j).len = 1;
252 a_b_map.at(j).pos += piece;
253 a_b_map.at(j-1).len += piece;
254
255 // step 2: if this extent (now of length 1) has become a "changed"
256 // extent identical to its previous state, switch it to a "preserved"
257 // extent.
258 if (b.at(a_b_map.at(j).pos) == a.at(j))
259 {
260 // L(F("changing normalized 'changed' extent at %d to 'preserved'\n")
261 // % a_b_map.at(j).pos);
262 a_b_map.at(j).type = preserved;
263 }
264 --j;
265 }
266 }
267 }
268}
269
270
271void merge_extents(vector<extent> const & a_b_map,
272 vector<extent> const & a_c_map,
273 vector<long> const & b,
274 vector<long> const & c,
275 interner<long> const & in,
276 vector<long> & merged)
277{
278 I(a_b_map.size() == a_c_map.size());
279
280 vector<extent>::const_iterator i = a_b_map.begin();
281 vector<extent>::const_iterator j = a_c_map.begin();
282 merged.reserve(a_b_map.size() * 2);
283
284 // for (; i != a_b_map.end(); ++i, ++j)
285 // {
286
287 // L(F("trying to merge: [%s %d %d] vs. [%s %d %d] \n")
288 // % etab[i->type] % i->pos % i->len
289 // % etab[j->type] % j->pos % j->len);
290 // }
291
292 // i = a_b_map.begin();
293 // j = a_c_map.begin();
294
295 for (; i != a_b_map.end(); ++i, ++j)
296 {
297
298 // L(F("trying to merge: [%s %d %d] vs. [%s %d %d] \n")
299 // % etab[i->type] % i->pos % i->len
300 // % etab[j->type] % j->pos % j->len);
301
302 // mutual, identical preserves / inserts / changes
303 if (((i->type == changed && j->type == changed)
304 || (i->type == preserved && j->type == preserved))
305 && i->len == j->len)
306 {
307 for (size_t k = 0; k < i->len; ++k)
308 {
309 if (b.at(i->pos + k) != c.at(j->pos + k))
310 {
311 L(F("conflicting edits: %s %d[%d] '%s' vs. %s %d[%d] '%s'\n")
312 % etab[i->type] % i->pos % k % in.lookup(b.at(i->pos + k))
313 % etab[j->type] % j->pos % k % in.lookup(c.at(j->pos + k)));
314 throw conflict();
315 }
316 merged.push_back(b.at(i->pos + k));
317 }
318 }
319
320 // mutual or single-edge deletes
321 else if ((i->type == deleted && j->len == deleted)
322 || (i->type == deleted && j->type == preserved)
323 || (i->type == preserved && j->type == deleted))
324 {
325 // do nothing
326 }
327
328 // single-edge insert / changes
329 else if (i->type == changed && j->type == preserved)
330 for (size_t k = 0; k < i->len; ++k)
331 merged.push_back(b.at(i->pos + k));
332
333 else if (i->type == preserved && j->type == changed)
334 for (size_t k = 0; k < j->len; ++k)
335 merged.push_back(c.at(j->pos + k));
336
337 else
338 {
339 L(F("conflicting edits: [%s %d %d] vs. [%s %d %d]\n")
340 % etab[i->type] % i->pos % i->len
341 % etab[j->type] % j->pos % j->len);
342 throw conflict();
343 }
344
345 // if (merged.empty())
346 // L(F(" --> EMPTY\n"));
347 // else
348 // L(F(" --> [%d]: %s\n") % (merged.size() - 1) % in.lookup(merged.back()));
349 }
350}
351
352
353void merge_via_edit_scripts(vector<string> const & ancestor,
354 vector<string> const & left,
355 vector<string> const & right,
356 vector<string> & merged)
357{
358 vector<long> anc_interned;
359 vector<long> left_interned, right_interned;
360 vector<long> left_edits, right_edits;
361 vector<long> left_prefix, right_prefix;
362 vector<long> left_suffix, right_suffix;
363 vector<extent> left_extents, right_extents;
364 vector<long> merged_interned;
365 interner<long> in;
366
367 // for (int i = 0; i < std::min(std::min(left.size(), right.size()), ancestor.size()); ++i)
368 // {
369 // std::cerr << "[" << i << "] " << left[i] << " " << ancestor[i] << " " << right[i] << endl;
370 // }
371
372 anc_interned.reserve(ancestor.size());
373 for (vector<string>::const_iterator i = ancestor.begin();
374 i != ancestor.end(); ++i)
375 anc_interned.push_back(in.intern(*i));
376
377 left_interned.reserve(left.size());
378 for (vector<string>::const_iterator i = left.begin();
379 i != left.end(); ++i)
380 left_interned.push_back(in.intern(*i));
381
382 right_interned.reserve(right.size());
383 for (vector<string>::const_iterator i = right.begin();
384 i != right.end(); ++i)
385 right_interned.push_back(in.intern(*i));
386
387 L(F("calculating left edit script on %d -> %d lines\n")
388 % anc_interned.size() % left_interned.size());
389
390 edit_script(anc_interned.begin(), anc_interned.end(),
391 left_interned.begin(), left_interned.end(),
392 std::min(ancestor.size(), left.size()),
393 left_edits);
394
395 L(F("calculating right edit script on %d -> %d lines\n")
396 % anc_interned.size() % right_interned.size());
397
398 edit_script(anc_interned.begin(), anc_interned.end(),
399 right_interned.begin(), right_interned.end(),
400 std::min(ancestor.size(), right.size()),
401 right_edits);
402
403 L(F("calculating left extents on %d edits\n") % left_edits.size());
404 calculate_extents(left_edits, left_interned,
405 left_prefix, left_extents, left_suffix,
406 anc_interned.size(), in);
407
408 L(F("calculating right extents on %d edits\n") % right_edits.size());
409 calculate_extents(right_edits, right_interned,
410 right_prefix, right_extents, right_suffix,
411 anc_interned.size(), in);
412
413 L(F("normalizing %d right extents\n") % right_extents.size());
414 normalize_extents(right_extents, anc_interned, right_interned);
415
416 L(F("normalizing %d left extents\n") % left_extents.size());
417 normalize_extents(left_extents, anc_interned, left_interned);
418
419
420 if ((!right_prefix.empty()) && (!left_prefix.empty()))
421 {
422 L(F("conflicting prefixes\n"));
423 throw conflict();
424 }
425
426 if ((!right_suffix.empty()) && (!left_suffix.empty()))
427 {
428 L(F("conflicting suffixes\n"));
429 throw conflict();
430 }
431
432 L(F("merging %d left, %d right extents\n")
433 % left_extents.size() % right_extents.size());
434
435 copy(left_prefix.begin(), left_prefix.end(), back_inserter(merged_interned));
436 copy(right_prefix.begin(), right_prefix.end(), back_inserter(merged_interned));
437
438 merge_extents(left_extents, right_extents,
439 left_interned, right_interned,
440 in, merged_interned);
441
442 copy(left_suffix.begin(), left_suffix.end(), back_inserter(merged_interned));
443 copy(right_suffix.begin(), right_suffix.end(), back_inserter(merged_interned));
444
445 merged.reserve(merged_interned.size());
446 for (vector<long>::const_iterator i = merged_interned.begin();
447 i != merged_interned.end(); ++i)
448 merged.push_back(in.lookup(*i));
449}
450
451
452bool merge3(vector<string> const & ancestor,
453 vector<string> const & left,
454 vector<string> const & right,
455 vector<string> & merged)
456{
457 try
458 {
459 merge_via_edit_scripts(ancestor, left, right, merged);
460 }
461 catch(conflict & c)
462 {
463 L(F("conflict detected. no merge.\n"));
464 return false;
465 }
466 return true;
467}
468
469
470merge_provider::merge_provider(app_state & app,
471 manifest_map const & anc_man,
472 manifest_map const & left_man,
473 manifest_map const & right_man)
474 : app(app), anc_man(anc_man), left_man(left_man), right_man(right_man)
475{}
476
477void merge_provider::record_merge(file_id const & left_ident,
478 file_id const & right_ident,
479 file_id const & merged_ident,
480 file_data const & left_data,
481 file_data const & merged_data)
482{
483 L(F("recording successful merge of %s <-> %s into %s\n")
484 % left_ident % right_ident % merged_ident);
485
486 base64< gzip<delta> > merge_delta;
487 transaction_guard guard(app.db);
488
489 diff(left_data.inner(), merged_data.inner(), merge_delta);
490 packet_db_writer dbw(app);
491 dbw.consume_file_delta (left_ident, merged_ident, file_delta(merge_delta));
492 guard.commit();
493}
494
495void merge_provider::get_version(file_path const & path,
496 file_id const & ident,
497 file_data & dat)
498{
499 app.db.get_file_version(ident, dat);
500}
501
502std::string merge_provider::get_file_encoding(file_path const & path,
503 manifest_map const & man)
504{
505 std::string enc;
506 if (get_attribute_from_db(path, encoding_attribute, man, enc, app))
507 return enc;
508 else
509 return default_encoding;
510}
511
512bool merge_provider::try_to_merge_files(file_path const & anc_path,
513 file_path const & left_path,
514 file_path const & right_path,
515 file_path const & merged_path,
516 file_id const & ancestor_id,
517 file_id const & left_id,
518 file_id const & right_id,
519 file_id & merged_id)
520{
521 // This version of try_to_merge_files should only be called when there is a
522 // real merge3 to perform.
523 I(!null_id(ancestor_id));
524 I(!null_id(left_id));
525 I(!null_id(right_id));
526
527 L(F("trying to merge %s <-> %s (ancestor: %s)\n")
528 % left_id % right_id % ancestor_id);
529
530 if (left_id == right_id)
531 {
532 L(F("files are identical\n"));
533 merged_id = left_id;
534 return true;
535 }
536
537 file_data left_data, right_data, ancestor_data;
538 data left_unpacked, ancestor_unpacked, right_unpacked, merged_unpacked;
539 string left_encoding, anc_encoding, right_encoding;
540 vector<string> left_lines, ancestor_lines, right_lines, merged_lines;
541
542 this->get_version(left_path, left_id, left_data);
543 this->get_version(anc_path, ancestor_id, ancestor_data);
544 this->get_version(right_path, right_id, right_data);
545
546 left_encoding = this->get_file_encoding(left_path, left_man);
547 anc_encoding = this->get_file_encoding(anc_path, anc_man);
548 right_encoding = this->get_file_encoding(right_path, right_man);
549
550 unpack(left_data.inner(), left_unpacked);
551 unpack(ancestor_data.inner(), ancestor_unpacked);
552 unpack(right_data.inner(), right_unpacked);
553
554 split_into_lines(left_unpacked(), left_encoding, left_lines);
555 split_into_lines(ancestor_unpacked(), anc_encoding, ancestor_lines);
556 split_into_lines(right_unpacked(), right_encoding, right_lines);
557
558 if (merge3(ancestor_lines,
559 left_lines,
560 right_lines,
561 merged_lines))
562 {
563 hexenc<id> tmp_id;
564 base64< gzip<data> > packed_merge;
565 string tmp;
566
567 L(F("internal 3-way merged ok\n"));
568 join_lines(merged_lines, tmp);
569 calculate_ident(data(tmp), tmp_id);
570 file_id merged_fid(tmp_id);
571 pack(data(tmp), packed_merge);
572
573 merged_id = merged_fid;
574 record_merge(left_id, right_id, merged_fid,
575 left_data, packed_merge);
576
577 return true;
578 }
579
580 P(F("help required for 3-way merge\n"));
581 P(F("[ancestor] %s\n") % anc_path);
582 P(F("[ left] %s\n") % left_path);
583 P(F("[ right] %s\n") % right_path);
584 P(F("[ merged] %s\n") % merged_path);
585
586 if (app.lua.hook_merge3(anc_path, left_path, right_path, merged_path,
587 ancestor_unpacked, left_unpacked,
588 right_unpacked, merged_unpacked))
589 {
590 hexenc<id> tmp_id;
591 base64< gzip<data> > packed_merge;
592
593 L(F("lua merge3 hook merged ok\n"));
594 calculate_ident(merged_unpacked, tmp_id);
595 file_id merged_fid(tmp_id);
596 pack(merged_unpacked, packed_merge);
597
598 merged_id = merged_fid;
599 record_merge(left_id, right_id, merged_fid,
600 left_data, packed_merge);
601 return true;
602 }
603
604 return false;
605}
606
607bool merge_provider::try_to_merge_files(file_path const & left_path,
608 file_path const & right_path,
609 file_path const & merged_path,
610 file_id const & left_id,
611 file_id const & right_id,
612 file_id & merged_id)
613{
614 I(!null_id(left_id));
615 I(!null_id(right_id));
616
617 file_data left_data, right_data;
618 data left_unpacked, right_unpacked, merged_unpacked;
619
620 L(F("trying to merge %s <-> %s\n")
621 % left_id % right_id);
622
623 if (left_id == right_id)
624 {
625 L(F("files are identical\n"));
626 merged_id = left_id;
627 return true;
628 }
629
630 this->get_version(left_path, left_id, left_data);
631 this->get_version(right_path, right_id, right_data);
632
633 unpack(left_data.inner(), left_unpacked);
634 unpack(right_data.inner(), right_unpacked);
635
636 P(F("help required for 2-way merge\n"));
637 P(F("[ left] %s\n") % left_path);
638 P(F("[ right] %s\n") % right_path);
639 P(F("[ merged] %s\n") % merged_path);
640
641 if (app.lua.hook_merge2(left_path, right_path, merged_path,
642 left_unpacked, right_unpacked, merged_unpacked))
643 {
644 hexenc<id> tmp_id;
645 base64< gzip<data> > packed_merge;
646
647 L(F("lua merge2 hook merged ok\n"));
648 calculate_ident(merged_unpacked, tmp_id);
649 file_id merged_fid(tmp_id);
650 pack(merged_unpacked, packed_merge);
651
652 merged_id = merged_fid;
653 record_merge(left_id, right_id, merged_fid,
654 left_data, packed_merge);
655 return true;
656 }
657
658 return false;
659}
660
661
662// during the "update" command, the only real differences from merging
663// are that we take our right versions from the filesystem, not the db,
664// and we only record the merges in a transient, in-memory table.
665
666update_merge_provider::update_merge_provider(app_state & app,
667 manifest_map const & anc_man,
668 manifest_map const & left_man,
669 manifest_map const & right_man)
670 : merge_provider(app, anc_man, left_man, right_man) {}
671
672void update_merge_provider::record_merge(file_id const & left_id,
673 file_id const & right_id,
674 file_id const & merged_id,
675 file_data const & left_data,
676 file_data const & merged_data)
677{
678 L(F("temporarily recording merge of %s <-> %s into %s\n")
679 % left_id % right_id % merged_id);
680 I(temporary_store.find(merged_id) == temporary_store.end());
681 temporary_store.insert(make_pair(merged_id, merged_data));
682}
683
684void update_merge_provider::get_version(file_path const & path,
685 file_id const & ident,
686 file_data & dat)
687{
688 if (app.db.file_version_exists(ident))
689 app.db.get_file_version(ident, dat);
690 else
691 {
692 base64< gzip<data> > tmp;
693 file_id fid;
694 N(file_exists (path),
695 F("file %s does not exist in working copy") % path);
696 read_localized_data(path, tmp, app.lua);
697 calculate_ident(tmp, fid);
698 N(fid == ident,
699 F("file %s in working copy has id %s, wanted %s")
700 % path % fid % ident);
701 dat = tmp;
702 }
703}
704
705std::string update_merge_provider::get_file_encoding(file_path const & path,
706 manifest_map const & man)
707{
708 std::string enc;
709 if (get_attribute_from_working_copy(path, encoding_attribute, enc))
710 return enc;
711 else if (get_attribute_from_db(path, encoding_attribute, man, enc, app))
712 return enc;
713 else
714 return default_encoding;
715}
716
717
718
719// the remaining part of this file just handles printing out various
720// diff formats for the case where someone wants to *read* a diff
721// rather than apply it.
722
723struct hunk_consumer
724{
725 virtual void flush_hunk(size_t pos) = 0;
726 virtual void advance_to(size_t newpos) = 0;
727 virtual void insert_at(size_t b_pos) = 0;
728 virtual void delete_at(size_t a_pos) = 0;
729 virtual ~hunk_consumer() {}
730};
731
732void walk_hunk_consumer(vector<long> const & lcs,
733 vector<long> const & lines1,
734 vector<long> const & lines2,
735 hunk_consumer & cons)
736{
737
738 size_t a = 0, b = 0;
739 if (lcs.begin() == lcs.end())
740 {
741 // degenerate case: files have nothing in common
742 cons.advance_to(0);
743 while (a < lines1.size())
744 cons.delete_at(a++);
745 while (b < lines2.size())
746 cons.insert_at(b++);
747 cons.flush_hunk(a);
748 }
749 else
750 {
751 // normal case: files have something in common
752 for (vector<long>::const_iterator i = lcs.begin();
753 i != lcs.end(); ++i, ++a, ++b)
754 {
755 if (idx(lines1, a) == *i && idx(lines2, b) == *i)
756 continue;
757
758 cons.advance_to(a);
759 while (idx(lines1,a) != *i)
760 cons.delete_at(a++);
761 while (idx(lines2,b) != *i)
762 cons.insert_at(b++);
763 }
764 if (b < lines2.size())
765 {
766 cons.advance_to(a);
767 while(b < lines2.size())
768 cons.insert_at(b++);
769 }
770 if (a < lines1.size())
771 {
772 cons.advance_to(a);
773 while(a < lines1.size())
774 cons.delete_at(a++);
775 }
776 cons.flush_hunk(a);
777 }
778}
779
780struct unidiff_hunk_writer : public hunk_consumer
781{
782 vector<string> const & a;
783 vector<string> const & b;
784 size_t ctx;
785 ostream & ost;
786 size_t a_begin, b_begin, a_len, b_len;
787 long skew;
788 vector<string> hunk;
789 unidiff_hunk_writer(vector<string> const & a,
790 vector<string> const & b,
791 size_t ctx,
792 ostream & ost);
793 virtual void flush_hunk(size_t pos);
794 virtual void advance_to(size_t newpos);
795 virtual void insert_at(size_t b_pos);
796 virtual void delete_at(size_t a_pos);
797 virtual ~unidiff_hunk_writer() {}
798};
799
800unidiff_hunk_writer::unidiff_hunk_writer(vector<string> const & a,
801 vector<string> const & b,
802 size_t ctx,
803 ostream & ost)
804: a(a), b(b), ctx(ctx), ost(ost),
805 a_begin(0), b_begin(0),
806 a_len(0), b_len(0), skew(0)
807{}
808
809void unidiff_hunk_writer::insert_at(size_t b_pos)
810{
811 b_len++;
812 hunk.push_back(string("+") + b[b_pos]);
813}
814
815void unidiff_hunk_writer::delete_at(size_t a_pos)
816{
817 a_len++;
818 hunk.push_back(string("-") + a[a_pos]);
819}
820
821void unidiff_hunk_writer::flush_hunk(size_t pos)
822{
823 if (hunk.size() > 0)
824 {
825 // insert trailing context
826 size_t a_pos = a_begin + a_len;
827 for (size_t i = 0; (i < ctx) && (a_pos + i < a.size()); ++i)
828 {
829 hunk.push_back(string(" ") + a[a_pos + i]);
830 a_len++;
831 b_len++;
832 }
833
834 // write hunk to stream
835 ost << "@@ -" << a_begin+1;
836 if (a_len > 1)
837 ost << "," << a_len;
838
839 ost << " +" << b_begin+1;
840 if (b_len > 1)
841 ost << "," << b_len;
842 ost << " @@" << endl;
843
844 copy(hunk.begin(), hunk.end(), ostream_iterator<string>(ost, "\n"));
845 }
846
847 // reset hunk
848 hunk.clear();
849 skew += b_len - a_len;
850 a_begin = pos;
851 b_begin = pos + skew;
852 a_len = 0;
853 b_len = 0;
854}
855
856void unidiff_hunk_writer::advance_to(size_t newpos)
857{
858 if (a_begin + a_len + (2 * ctx) < newpos)
859 {
860 flush_hunk(newpos);
861
862 // insert new leading context
863 if (newpos - ctx < a.size())
864 {
865 for (int i = ctx; i > 0; --i)
866 {
867 if (newpos - i < 0)
868 continue;
869 hunk.push_back(string(" ") + a[newpos - i]);
870 a_begin--; a_len++;
871 b_begin--; b_len++;
872 }
873 }
874 }
875 else
876 {
877 // pad intermediate context
878 while(a_begin + a_len < newpos)
879 {
880 hunk.push_back(string(" ") + a[a_begin + a_len]);
881 a_len++;
882 b_len++;
883 }
884 }
885}
886
887struct cxtdiff_hunk_writer : public hunk_consumer
888{
889 vector<string> const & a;
890 vector<string> const & b;
891 size_t ctx;
892 ostream & ost;
893 size_t a_begin, b_begin, a_len, b_len;
894 long skew;
895 vector<size_t> inserts;
896 vector<size_t> deletes;
897 vector<string> from_file;
898 vector<string> to_file;
899 bool have_insertions;
900 bool have_deletions;
901 cxtdiff_hunk_writer(vector<string> const & a,
902 vector<string> const & b,
903 size_t ctx,
904 ostream & ost);
905 virtual void flush_hunk(size_t pos);
906 virtual void advance_to(size_t newpos);
907 virtual void insert_at(size_t b_pos);
908 virtual void delete_at(size_t a_pos);
909 void flush_pending_mods();
910 virtual ~cxtdiff_hunk_writer() {}
911};
912
913cxtdiff_hunk_writer::cxtdiff_hunk_writer(vector<string> const & a,
914 vector<string> const & b,
915 size_t ctx,
916 ostream & ost)
917 : a(a), b(b), ctx(ctx), ost(ost),
918 a_begin(0), b_begin(0),
919 a_len(0), b_len(0), skew(0),
920 have_insertions(false), have_deletions(false)
921{}
922
923void cxtdiff_hunk_writer::insert_at(size_t b_pos)
924{
925 inserts.push_back(b_pos);
926 have_insertions = true;
927}
928
929void cxtdiff_hunk_writer::delete_at(size_t a_pos)
930{
931 deletes.push_back(a_pos);
932 have_deletions = true;
933}
934
935void cxtdiff_hunk_writer::flush_hunk(size_t pos)
936{
937 flush_pending_mods();
938
939 if (have_deletions || have_insertions)
940 {
941 // insert trailing context
942 size_t ctx_start = a_begin + a_len;
943 for (size_t i = 0; (i < ctx) && (ctx_start + i < a.size()); ++i)
944 {
945 from_file.push_back(string(" ") + a[ctx_start + i]);
946 a_len++;
947 }
948
949 ctx_start = b_begin + b_len;
950 for (size_t i = 0; (i < ctx) && (ctx_start + i < b.size()); ++i)
951 {
952 to_file.push_back(string(" ") + b[ctx_start + i]);
953 b_len++;
954 }
955
956 ost << "***************" << endl;
957
958 ost << "*** " << (a_begin + 1) << "," << (a_begin + a_len) << " ****" << endl;
959 if (have_deletions)
960 copy(from_file.begin(), from_file.end(), ostream_iterator<string>(ost, "\n"));
961
962 ost << "--- " << (b_begin + 1) << "," << (b_begin + b_len) << " ----" << endl;
963 if (have_insertions)
964 copy(to_file.begin(), to_file.end(), ostream_iterator<string>(ost, "\n"));
965 }
966
967 // reset hunk
968 to_file.clear();
969 from_file.clear();
970 have_insertions = false;
971 have_deletions = false;
972 skew += b_len - a_len;
973 a_begin = pos;
974 b_begin = pos + skew;
975 a_len = 0;
976 b_len = 0;
977}
978
979void cxtdiff_hunk_writer::flush_pending_mods()
980{
981 // nothing to flush?
982 if (inserts.empty() && deletes.empty())
983 return;
984
985 string prefix;
986
987 // if we have just insertions to flush, prefix them with "+"; if
988 // just deletions, prefix with "-"; if both, prefix with "!"
989 if (inserts.empty() && !deletes.empty())
990 prefix = "-";
991 else if (deletes.empty() && !inserts.empty())
992 prefix = "+";
993 else
994 prefix = "!";
995
996 for (vector<size_t>::const_iterator i = deletes.begin();
997 i != deletes.end(); ++i)
998 {
999 from_file.push_back(prefix + string(" ") + a[*i]);
1000 a_len++;
1001 }
1002 for (vector<size_t>::const_iterator i = inserts.begin();
1003 i != inserts.end(); ++i)
1004 {
1005 to_file.push_back(prefix + string(" ") + b[*i]);
1006 b_len++;
1007 }
1008
1009 // clear pending mods
1010 inserts.clear();
1011 deletes.clear();
1012}
1013
1014void cxtdiff_hunk_writer::advance_to(size_t newpos)
1015{
1016 if (a_begin + a_len + (2 * ctx) < newpos)
1017 {
1018 flush_hunk(newpos);
1019
1020 // insert new leading context
1021 if (newpos - ctx < a.size())
1022 {
1023 for (int i = ctx; i > 0; --i)
1024 {
1025 if (newpos - i < 0)
1026 continue;
1027
1028 // note that context diffs prefix common text with two
1029 // spaces, whereas unified diffs use a single space
1030 from_file.push_back(string(" ") + a[newpos - i]);
1031 to_file.push_back(string(" ") + a[newpos - i]);
1032 a_begin--; a_len++;
1033 b_begin--; b_len++;
1034 }
1035 }
1036 }
1037 else
1038 {
1039 flush_pending_mods();
1040 // pad intermediate context
1041 while (a_begin + a_len < newpos)
1042 {
1043 from_file.push_back(string(" ") + a[a_begin + a_len]);
1044 to_file.push_back(string(" ") + a[a_begin + a_len]);
1045 a_len++;
1046 b_len++;
1047 }
1048 }
1049}
1050
1051void make_diff(string const & filename1,
1052 string const & filename2,
1053 vector<string> const & lines1,
1054 vector<string> const & lines2,
1055 ostream & ost,
1056 diff_type type)
1057{
1058 vector<long> left_interned;
1059 vector<long> right_interned;
1060 vector<long> lcs;
1061
1062 interner<long> in;
1063
1064 left_interned.reserve(lines1.size());
1065 for (vector<string>::const_iterator i = lines1.begin();
1066 i != lines1.end(); ++i)
1067 left_interned.push_back(in.intern(*i));
1068
1069 right_interned.reserve(lines2.size());
1070 for (vector<string>::const_iterator i = lines2.begin();
1071 i != lines2.end(); ++i)
1072 right_interned.push_back(in.intern(*i));
1073
1074 lcs.reserve(std::min(lines1.size(),lines2.size()));
1075 longest_common_subsequence(left_interned.begin(), left_interned.end(),
1076 right_interned.begin(), right_interned.end(),
1077 std::min(lines1.size(), lines2.size()),
1078 back_inserter(lcs));
1079
1080 switch (type)
1081 {
1082 case unified_diff:
1083 {
1084 ost << "--- " << filename1 << endl;
1085 ost << "+++ " << filename2 << endl;
1086
1087 unidiff_hunk_writer hunks(lines1, lines2, 3, ost);
1088 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1089 break;
1090 }
1091 case context_diff:
1092 {
1093 ost << "*** " << filename1 << endl;
1094 ost << "--- " << filename2 << endl;
1095
1096 cxtdiff_hunk_writer hunks(lines1, lines2, 3, ost);
1097 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1098 break;
1099 }
1100 }
1101}
1102
1103#ifdef BUILD_UNIT_TESTS
1104#include "unit_tests.hh"
1105#include "transforms.hh"
1106#include <boost/lexical_cast.hpp>
1107#include "randomfile.hh"
1108
1109using boost::lexical_cast;
1110
1111static void dump_incorrect_merge(vector<string> const & expected,
1112 vector<string> const & got,
1113 string const & prefix)
1114{
1115 size_t mx = expected.size();
1116 if (mx < got.size())
1117 mx = got.size();
1118 for (size_t i = 0; i < mx; ++i)
1119 {
1120 cerr << "bad merge: " << i << " [" << prefix << "]\t";
1121
1122 if (i < expected.size())
1123 cerr << "[" << expected[i] << "]\t";
1124 else
1125 cerr << "[--nil--]\t";
1126
1127 if (i < got.size())
1128 cerr << "[" << got[i] << "]\t";
1129 else
1130 cerr << "[--nil--]\t";
1131
1132 cerr << endl;
1133 }
1134}
1135
1136// regression blockers go here
1137static void unidiff_append_test()
1138{
1139 string src(string("#include \"hello.h\"\n")
1140 + "\n"
1141 + "void say_hello()\n"
1142 + "{\n"
1143 + " printf(\"hello, world\\n\");\n"
1144 + "}\n"
1145 + "\n"
1146 + "int main()\n"
1147 + "{\n"
1148 + " say_hello();\n"
1149 + "}\n");
1150
1151 string dst(string("#include \"hello.h\"\n")
1152 + "\n"
1153 + "void say_hello()\n"
1154 + "{\n"
1155 + " printf(\"hello, world\\n\");\n"
1156 + "}\n"
1157 + "\n"
1158 + "int main()\n"
1159 + "{\n"
1160 + " say_hello();\n"
1161 + "}\n"
1162 + "\n"
1163 + "void say_goodbye()\n"
1164 + "{\n"
1165 + " printf(\"goodbye\\n\");\n"
1166 + "}\n"
1167 + "\n");
1168
1169 string ud(string("--- hello.c\n")
1170 + "+++ hello.c\n"
1171 + "@@ -9,3 +9,9 @@\n"
1172 + " {\n"
1173 + " say_hello();\n"
1174 + " }\n"
1175 + "+\n"
1176 + "+void say_goodbye()\n"
1177 + "+{\n"
1178 + "+ printf(\"goodbye\\n\");\n"
1179 + "+}\n"
1180 + "+\n");
1181
1182 vector<string> src_lines, dst_lines;
1183 split_into_lines(src, src_lines);
1184 split_into_lines(dst, dst_lines);
1185 stringstream sst;
1186 make_diff("hello.c", "hello.c", src_lines, dst_lines, sst, unified_diff);
1187 BOOST_CHECK(sst.str() == ud);
1188}
1189
1190
1191// high tech randomizing test
1192
1193static void randomizing_merge_test()
1194{
1195 for (int i = 0; i < 30; ++i)
1196 {
1197 vector<string> anc, d1, d2, m1, m2, gm;
1198
1199 file_randomizer::build_random_fork(anc, d1, d2, gm,
1200 i * 1023, (10 + 2 * i));
1201
1202 BOOST_CHECK(merge3(anc, d1, d2, m1));
1203 if (gm != m1)
1204 dump_incorrect_merge (gm, m1, "random_merge 1");
1205 BOOST_CHECK(gm == m1);
1206
1207 BOOST_CHECK(merge3(anc, d2, d1, m2));
1208 if (gm != m2)
1209 dump_incorrect_merge (gm, m2, "random_merge 2");
1210 BOOST_CHECK(gm == m2);
1211 }
1212}
1213
1214
1215// old boring tests
1216
1217static void merge_prepend_test()
1218{
1219 BOOST_CHECKPOINT("prepend test");
1220 vector<string> anc, d1, d2, m1, m2, gm;
1221 for (int i = 10; i < 20; ++i)
1222 {
1223 d2.push_back(lexical_cast<string>(i));
1224 gm.push_back(lexical_cast<string>(i));
1225 }
1226
1227 for (int i = 0; i < 10; ++i)
1228 {
1229 anc.push_back(lexical_cast<string>(i));
1230 d1.push_back(lexical_cast<string>(i));
1231 d2.push_back(lexical_cast<string>(i));
1232 gm.push_back(lexical_cast<string>(i));
1233 }
1234
1235 BOOST_CHECK(merge3(anc, d1, d2, m1));
1236 if (gm != m1)
1237 dump_incorrect_merge (gm, m1, "merge_prepend 1");
1238 BOOST_CHECK(gm == m1);
1239
1240
1241 BOOST_CHECK(merge3(anc, d2, d1, m2));
1242 if (gm != m2)
1243 dump_incorrect_merge (gm, m2, "merge_prepend 2");
1244 BOOST_CHECK(gm == m2);
1245}
1246
1247
1248static void merge_append_test()
1249{
1250 BOOST_CHECKPOINT("append test");
1251 vector<string> anc, d1, d2, m1, m2, gm;
1252 for (int i = 0; i < 10; ++i)
1253 anc.push_back(lexical_cast<string>(i));
1254
1255 d1 = anc;
1256 d2 = anc;
1257 gm = anc;
1258
1259 for (int i = 10; i < 20; ++i)
1260 {
1261 d2.push_back(lexical_cast<string>(i));
1262 gm.push_back(lexical_cast<string>(i));
1263 }
1264
1265 BOOST_CHECK(merge3(anc, d1, d2, m1));
1266 if (gm != m1)
1267 dump_incorrect_merge (gm, m1, "merge_append 1");
1268 BOOST_CHECK(gm == m1);
1269
1270 BOOST_CHECK(merge3(anc, d2, d1, m2));
1271 if (gm != m2)
1272 dump_incorrect_merge (gm, m2, "merge_append 2");
1273 BOOST_CHECK(gm == m2);
1274
1275
1276}
1277
1278static void merge_additions_test()
1279{
1280 BOOST_CHECKPOINT("additions test");
1281 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1282 string desc1("I like oatmeal\nI don't like spam\nI like orange juice\nI like toast");
1283 string confl("I like oatmeal\nI don't like tuna\nI like orange juice\nI like toast");
1284 string desc2("I like oatmeal\nI like orange juice\nI don't like tuna\nI like toast");
1285 string good_merge("I like oatmeal\nI don't like spam\nI like orange juice\nI don't like tuna\nI like toast");
1286 vector<string> anc, d1, cf, d2, m1, m2, gm;
1287
1288 split_into_lines(ancestor, anc);
1289 split_into_lines(desc1, d1);
1290 split_into_lines(confl, cf);
1291 split_into_lines(desc2, d2);
1292 split_into_lines(good_merge, gm);
1293
1294 BOOST_CHECK(merge3(anc, d1, d2, m1));
1295 if (gm != m1)
1296 dump_incorrect_merge (gm, m1, "merge_addition 1");
1297 BOOST_CHECK(gm == m1);
1298
1299 BOOST_CHECK(merge3(anc, d2, d1, m2));
1300 if (gm != m2)
1301 dump_incorrect_merge (gm, m2, "merge_addition 2");
1302 BOOST_CHECK(gm == m2);
1303
1304 BOOST_CHECK(!merge3(anc, d1, cf, m1));
1305}
1306
1307static void merge_deletions_test()
1308{
1309 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1310 string desc2("I like oatmeal\nI like toast");
1311
1312 vector<string> anc, d1, d2, m1, m2, gm;
1313
1314 split_into_lines(ancestor, anc);
1315 split_into_lines(desc2, d2);
1316 d1 = anc;
1317 gm = d2;
1318
1319 BOOST_CHECK(merge3(anc, d1, d2, m1));
1320 if (gm != m1)
1321 dump_incorrect_merge (gm, m1, "merge_deletion 1");
1322 BOOST_CHECK(gm == m1);
1323
1324 BOOST_CHECK(merge3(anc, d2, d1, m2));
1325 if (gm != m2)
1326 dump_incorrect_merge (gm, m2, "merge_deletion 2");
1327 BOOST_CHECK(gm == m2);
1328}
1329
1330
1331void add_diff_patch_tests(test_suite * suite)
1332{
1333 I(suite);
1334 suite->add(BOOST_TEST_CASE(&unidiff_append_test));
1335 suite->add(BOOST_TEST_CASE(&merge_prepend_test));
1336 suite->add(BOOST_TEST_CASE(&merge_append_test));
1337 suite->add(BOOST_TEST_CASE(&merge_additions_test));
1338 suite->add(BOOST_TEST_CASE(&merge_deletions_test));
1339 suite->add(BOOST_TEST_CASE(&randomizing_merge_test));
1340}
1341
1342
1343#endif // BUILD_UNIT_TESTS

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status