monotone

monotone Mtn Source Tree

Root/diff_patch.cc

1// copyright (C) 2002, 2003 graydon hoare <graydon@pobox.com>
2// all rights reserved.
3// licensed to the public under the terms of the GNU GPL (>= 2)
4// see the file COPYING for details
5
6#include <algorithm>
7#include <iterator>
8#include <vector>
9#include <string>
10#include <iostream>
11
12#include "config.h"
13#include "diff_patch.hh"
14#include "interner.hh"
15#include "lcs.hh"
16#include "manifest.hh"
17#include "packet.hh"
18#include "sanity.hh"
19#include "transforms.hh"
20#include "vocab.hh"
21
22using namespace std;
23
24//
25// a 3-way merge works like this:
26//
27// /----> right
28// ancestor
29// \----> left
30//
31// first you compute the edit list "EDITS(ancestor,left)".
32//
33// then you make an offset table "leftpos" which describes positions in
34// "ancestor" as they map to "left"; that is, for 0 < apos <
35// ancestor.size(), we have
36//
37// left[leftpos[apos]] == ancestor[apos]
38//
39// you do this by walking through the edit list and either jumping the
40// current index ahead an extra position, on an insert, or remaining still,
41// on a delete. on an insert *or* a delete, you push the current index back
42// onto the leftpos array.
43//
44// next you compute the edit list "EDITS(ancestor,right)".
45//
46// you then go through this edit list applying the edits to left, rather
47// than ancestor, and using the table leftpos to map the position of each
48// edit to an appropriate spot in left. this means you walk a "curr_left"
49// index through the edits, and for each edit e:
50//
51// - if e is a delete (and e.pos is a position in ancestor)
52// - increment curr_left without copying anything to "merged"
53//
54// - if e is an insert (and e.pos is a position in right)
55// - copy right[e.pos] to "merged"
56// - leave curr_left alone
57//
58// - when advancing to apos (and apos is a position in ancestor)
59// - copy left[curr_left] to merged while curr_left < leftpos[apos]
60//
61//
62// the practical upshot is that you apply the delta from ancestor->right
63// to the adjusted contexts in left, producing something vaguely like
64// the concatenation of delta(ancestor,left) :: delta(ancestor,right).
65//
66// NB: this is, as far as I can tell, what diff3 does. I don't think I'm
67// infringing on anyone's fancy patents here.
68//
69
70typedef enum { preserved = 0, deleted = 1, changed = 2 } edit_t;
71static char *etab[3] =
72 {
73 "preserved",
74 "deleted",
75 "changed"
76 };
77
78struct extent
79{
80 extent(size_t p, size_t l, edit_t t)
81 : pos(p), len(l), type(t)
82 {}
83 size_t pos;
84 size_t len;
85 edit_t type;
86};
87
88void calculate_extents(vector<long, QA(long)> const & a_b_edits,
89 vector<long, QA(long)> const & b,
90 vector<long, QA(long)> & prefix,
91 vector<extent> & extents,
92 vector<long, QA(long)> & suffix,
93 size_t const a_len,
94 interner<long> & intern)
95{
96 extents.reserve(a_len * 2);
97
98 size_t a_pos = 0, b_pos = 0;
99
100 for (vector<long, QA(long)>::const_iterator i = a_b_edits.begin();
101 i != a_b_edits.end(); ++i)
102 {
103 // L(F("edit: %d") % *i);
104 if (*i < 0)
105 {
106 // negative elements code the negation of the one-based index into A
107 // of the element to be deleted
108 size_t a_deleted = (-1 - *i);
109
110 // fill positions out to the deletion point
111 while (a_pos < a_deleted)
112 {
113 a_pos++;
114 extents.push_back(extent(b_pos++, 1, preserved));
115 }
116
117 // L(F(" -- delete at A-pos %d (B-pos = %d)\n") % a_deleted % b_pos);
118
119 // skip the deleted line
120 a_pos++;
121 extents.push_back(extent(b_pos, 0, deleted));
122 }
123 else
124 {
125 // positive elements code the one-based index into B of the element to
126 // be inserted
127 size_t b_inserted = (*i - 1);
128
129 // fill positions out to the insertion point
130 while (b_pos < b_inserted)
131 {
132 a_pos++;
133 extents.push_back(extent(b_pos++, 1, preserved));
134 }
135
136 // L(F(" -- insert at B-pos %d (A-pos = %d) : '%s'\n")
137 // % b_inserted % a_pos % intern.lookup(b.at(b_inserted)));
138
139 // record that there was an insertion, but a_pos did not move.
140 if ((b_pos == 0 && extents.empty())
141 || (b_pos == prefix.size()))
142 {
143 prefix.push_back(b.at(b_pos));
144 }
145 else if (a_len == a_pos)
146 {
147 suffix.push_back(b.at(b_pos));
148 }
149 else
150 {
151 // make the insertion
152 extents.back().type = changed;
153 extents.back().len++;
154 }
155 b_pos++;
156 }
157 }
158
159 while (extents.size() < a_len)
160 extents.push_back(extent(b_pos++, 1, preserved));
161}
162
163void normalize_extents(vector<extent> & a_b_map,
164 vector<long, QA(long)> const & a,
165 vector<long, QA(long)> const & b)
166{
167 for (size_t i = 0; i < a_b_map.size(); ++i)
168 {
169 if (i > 0)
170 {
171 size_t j = i;
172 while (j > 0
173 && (a_b_map.at(j-1).type == preserved)
174 && (a_b_map.at(j).type == changed)
175 && (a.at(j) == b.at(a_b_map.at(j).pos + a_b_map.at(j).len - 1)))
176 {
177 // This is implied by (a_b_map.at(j-1).type == preserved)
178 I(a.at(j-1) == b.at(a_b_map.at(j-1).pos));
179
180 // Coming into loop we have:
181 // i
182 // z --pres--> z 0
183 // o --pres--> o 1
184 // a --chng--> a 2 The important thing here is that 'a' in
185 // t the LHS matches with ...
186 // u
187 // v
188 // a ... the a on the RHS here. Hence we can
189 // q --pres--> q 3 'shift' the entire 'changed' block
190 // e --chng--> d 4 upwards, leaving a 'preserved' line
191 // g --pres--> g 5 'a'->'a'
192 //
193 // Want to end up with:
194 // i
195 // z --pres--> z 0
196 // o --chng--> o 1
197 // a
198 // t
199 // u
200 // v
201 // a --pres--> a 2
202 // q --pres--> q 3
203 // e --chng--> d 4
204 // g --pres--> g 5
205 //
206 // Now all the 'changed' extents are normalised to the
207 // earliest possible position.
208
209 L(F("exchanging preserved extent [%d+%d] with changed extent [%d+%d]\n")
210 % a_b_map.at(j-1).pos
211 % a_b_map.at(j-1).len
212 % a_b_map.at(j).pos
213 % a_b_map.at(j).len);
214
215 swap(a_b_map.at(j-1).len, a_b_map.at(j).len);
216 swap(a_b_map.at(j-1).type, a_b_map.at(j).type);
217 --j;
218 }
219 }
220 }
221
222 for (size_t i = 0; i < a_b_map.size(); ++i)
223 {
224 if (i > 0)
225 {
226 size_t j = i;
227 while (j > 0
228 && a_b_map.at(j).type == changed
229 && a_b_map.at(j-1).type == changed
230 && a_b_map.at(j).len > 1
231 && a_b_map.at(j-1).pos + a_b_map.at(j-1).len == a_b_map.at(j).pos)
232 {
233 // step 1: move a chunk from this insert extent to its
234 // predecessor
235 size_t piece = a_b_map.at(j).len - 1;
236 // L(F("moving change piece of len %d from pos %d to pos %d\n")
237 // % piece
238 // % a_b_map.at(j).pos
239 // % a_b_map.at(j-1).pos);
240 a_b_map.at(j).len = 1;
241 a_b_map.at(j).pos += piece;
242 a_b_map.at(j-1).len += piece;
243
244 // step 2: if this extent (now of length 1) has become a "changed"
245 // extent identical to its previous state, switch it to a "preserved"
246 // extent.
247 if (b.at(a_b_map.at(j).pos) == a.at(j))
248 {
249 // L(F("changing normalized 'changed' extent at %d to 'preserved'\n")
250 // % a_b_map.at(j).pos);
251 a_b_map.at(j).type = preserved;
252 }
253 --j;
254 }
255 }
256 }
257}
258
259
260void merge_extents(vector<extent> const & a_b_map,
261 vector<extent> const & a_c_map,
262 vector<long, QA(long)> const & b,
263 vector<long, QA(long)> const & c,
264 interner<long> const & in,
265 vector<long, QA(long)> & merged)
266{
267 I(a_b_map.size() == a_c_map.size());
268
269 vector<extent>::const_iterator i = a_b_map.begin();
270 vector<extent>::const_iterator j = a_c_map.begin();
271 merged.reserve(a_b_map.size() * 2);
272
273 // for (; i != a_b_map.end(); ++i, ++j)
274 // {
275
276 // L(F("trying to merge: [%s %d %d] vs. [%s %d %d] \n")
277 // % etab[i->type] % i->pos % i->len
278 // % etab[j->type] % j->pos % j->len);
279 // }
280
281 // i = a_b_map.begin();
282 // j = a_c_map.begin();
283
284 for (; i != a_b_map.end(); ++i, ++j)
285 {
286
287 // L(F("trying to merge: [%s %d %d] vs. [%s %d %d] \n")
288 // % etab[i->type] % i->pos % i->len
289 // % etab[j->type] % j->pos % j->len);
290
291 // mutual, identical preserves / inserts / changes
292 if (((i->type == changed && j->type == changed)
293 || (i->type == preserved && j->type == preserved))
294 && i->len == j->len)
295 {
296 for (size_t k = 0; k < i->len; ++k)
297 {
298 if (b.at(i->pos + k) != c.at(j->pos + k))
299 {
300 L(F("conflicting edits: %s %d[%d] '%s' vs. %s %d[%d] '%s'\n")
301 % etab[i->type] % i->pos % k % in.lookup(b.at(i->pos + k))
302 % etab[j->type] % j->pos % k % in.lookup(c.at(j->pos + k)));
303 throw conflict();
304 }
305 merged.push_back(b.at(i->pos + k));
306 }
307 }
308
309 // mutual or single-edge deletes
310 else if ((i->type == deleted && j->type == deleted)
311 || (i->type == deleted && j->type == preserved)
312 || (i->type == preserved && j->type == deleted))
313 {
314 // do nothing
315 }
316
317 // single-edge insert / changes
318 else if (i->type == changed && j->type == preserved)
319 for (size_t k = 0; k < i->len; ++k)
320 merged.push_back(b.at(i->pos + k));
321
322 else if (i->type == preserved && j->type == changed)
323 for (size_t k = 0; k < j->len; ++k)
324 merged.push_back(c.at(j->pos + k));
325
326 else
327 {
328 L(F("conflicting edits: [%s %d %d] vs. [%s %d %d]\n")
329 % etab[i->type] % i->pos % i->len
330 % etab[j->type] % j->pos % j->len);
331 throw conflict();
332 }
333
334 // if (merged.empty())
335 // L(F(" --> EMPTY\n"));
336 // else
337 // L(F(" --> [%d]: %s\n") % (merged.size() - 1) % in.lookup(merged.back()));
338 }
339}
340
341
342void merge_via_edit_scripts(vector<string> const & ancestor,
343 vector<string> const & left,
344 vector<string> const & right,
345 vector<string> & merged)
346{
347 vector<long, QA(long)> anc_interned;
348 vector<long, QA(long)> left_interned, right_interned;
349 vector<long, QA(long)> left_edits, right_edits;
350 vector<long, QA(long)> left_prefix, right_prefix;
351 vector<long, QA(long)> left_suffix, right_suffix;
352 vector<extent> left_extents, right_extents;
353 vector<long, QA(long)> merged_interned;
354 interner<long> in;
355
356 // for (int i = 0; i < std::min(std::min(left.size(), right.size()), ancestor.size()); ++i)
357 // {
358 // std::cerr << "[" << i << "] " << left[i] << " " << ancestor[i] << " " << right[i] << endl;
359 // }
360
361 anc_interned.reserve(ancestor.size());
362 for (vector<string>::const_iterator i = ancestor.begin();
363 i != ancestor.end(); ++i)
364 anc_interned.push_back(in.intern(*i));
365
366 left_interned.reserve(left.size());
367 for (vector<string>::const_iterator i = left.begin();
368 i != left.end(); ++i)
369 left_interned.push_back(in.intern(*i));
370
371 right_interned.reserve(right.size());
372 for (vector<string>::const_iterator i = right.begin();
373 i != right.end(); ++i)
374 right_interned.push_back(in.intern(*i));
375
376 L(F("calculating left edit script on %d -> %d lines\n")
377 % anc_interned.size() % left_interned.size());
378
379 edit_script(anc_interned.begin(), anc_interned.end(),
380 left_interned.begin(), left_interned.end(),
381 std::min(ancestor.size(), left.size()),
382 left_edits);
383
384 L(F("calculating right edit script on %d -> %d lines\n")
385 % anc_interned.size() % right_interned.size());
386
387 edit_script(anc_interned.begin(), anc_interned.end(),
388 right_interned.begin(), right_interned.end(),
389 std::min(ancestor.size(), right.size()),
390 right_edits);
391
392 L(F("calculating left extents on %d edits\n") % left_edits.size());
393 calculate_extents(left_edits, left_interned,
394 left_prefix, left_extents, left_suffix,
395 anc_interned.size(), in);
396
397 L(F("calculating right extents on %d edits\n") % right_edits.size());
398 calculate_extents(right_edits, right_interned,
399 right_prefix, right_extents, right_suffix,
400 anc_interned.size(), in);
401
402 L(F("normalizing %d right extents\n") % right_extents.size());
403 normalize_extents(right_extents, anc_interned, right_interned);
404
405 L(F("normalizing %d left extents\n") % left_extents.size());
406 normalize_extents(left_extents, anc_interned, left_interned);
407
408
409 if ((!right_prefix.empty()) && (!left_prefix.empty()))
410 {
411 L(F("conflicting prefixes\n"));
412 throw conflict();
413 }
414
415 if ((!right_suffix.empty()) && (!left_suffix.empty()))
416 {
417 L(F("conflicting suffixes\n"));
418 throw conflict();
419 }
420
421 L(F("merging %d left, %d right extents\n")
422 % left_extents.size() % right_extents.size());
423
424 copy(left_prefix.begin(), left_prefix.end(), back_inserter(merged_interned));
425 copy(right_prefix.begin(), right_prefix.end(), back_inserter(merged_interned));
426
427 merge_extents(left_extents, right_extents,
428 left_interned, right_interned,
429 in, merged_interned);
430
431 copy(left_suffix.begin(), left_suffix.end(), back_inserter(merged_interned));
432 copy(right_suffix.begin(), right_suffix.end(), back_inserter(merged_interned));
433
434 merged.reserve(merged_interned.size());
435 for (vector<long, QA(long)>::const_iterator i = merged_interned.begin();
436 i != merged_interned.end(); ++i)
437 merged.push_back(in.lookup(*i));
438}
439
440
441bool merge3(vector<string> const & ancestor,
442 vector<string> const & left,
443 vector<string> const & right,
444 vector<string> & merged)
445{
446 try
447 {
448 merge_via_edit_scripts(ancestor, left, right, merged);
449 }
450 catch(conflict & c)
451 {
452 L(F("conflict detected. no merge.\n"));
453 return false;
454 }
455 return true;
456}
457
458
459merge_provider::merge_provider(app_state & app,
460 manifest_map const & anc_man,
461 manifest_map const & left_man,
462 manifest_map const & right_man)
463 : app(app), anc_man(anc_man), left_man(left_man), right_man(right_man)
464{}
465
466void merge_provider::record_merge(file_id const & left_ident,
467 file_id const & right_ident,
468 file_id const & merged_ident,
469 file_data const & left_data,
470 file_data const & merged_data)
471{
472 L(F("recording successful merge of %s <-> %s into %s\n")
473 % left_ident % right_ident % merged_ident);
474
475 delta left_delta, right_delta;
476 transaction_guard guard(app.db);
477
478 diff(left_data.inner(), merged_data.inner(), left_delta);
479 diff(left_data.inner(), merged_data.inner(), right_delta);
480 packet_db_writer dbw(app);
481 dbw.consume_file_delta (left_ident, merged_ident, file_delta(left_delta));
482 dbw.consume_file_delta (right_ident, merged_ident, file_delta(right_delta));
483 guard.commit();
484}
485
486void merge_provider::get_version(file_path const & path,
487 file_id const & ident,
488 file_data & dat)
489{
490 app.db.get_file_version(ident, dat);
491}
492
493std::string merge_provider::get_file_encoding(file_path const & path,
494 manifest_map const & man)
495{
496 std::string enc;
497 if (get_attribute_from_db(path, encoding_attribute, man, enc, app))
498 return enc;
499 else
500 return default_encoding;
501}
502
503bool merge_provider::attribute_manual_merge(file_path const & path,
504 manifest_map const & man)
505{
506 std::string mmf;
507 if (get_attribute_from_db(path, manual_merge_attribute, man, mmf, app))
508 {
509 return mmf == std::string("true");
510 }
511 else
512 return false; // default: enable auto merge
513}
514
515bool merge_provider::try_to_merge_files(file_path const & anc_path,
516 file_path const & left_path,
517 file_path const & right_path,
518 file_path const & merged_path,
519 file_id const & ancestor_id,
520 file_id const & left_id,
521 file_id const & right_id,
522 file_id & merged_id)
523{
524 // This version of try_to_merge_files should only be called when there is a
525 // real merge3 to perform.
526 I(!null_id(ancestor_id));
527 I(!null_id(left_id));
528 I(!null_id(right_id));
529
530 L(F("trying to merge %s <-> %s (ancestor: %s)\n")
531 % left_id % right_id % ancestor_id);
532
533 if (left_id == right_id)
534 {
535 L(F("files are identical\n"));
536 merged_id = left_id;
537 return true;
538 }
539
540 file_data left_data, right_data, ancestor_data;
541 data left_unpacked, ancestor_unpacked, right_unpacked, merged_unpacked;
542
543 this->get_version(left_path, left_id, left_data);
544 this->get_version(anc_path, ancestor_id, ancestor_data);
545 this->get_version(right_path, right_id, right_data);
546
547 left_unpacked = left_data.inner();
548 ancestor_unpacked = ancestor_data.inner();
549 right_unpacked = right_data.inner();
550
551 if (!attribute_manual_merge(left_path, left_man) &&
552 !attribute_manual_merge(right_path, right_man))
553 {
554 // both files mergeable by monotone internal algorithm, try to merge
555 // note: the ancestor is not considered for manual merging. Forcing the
556 // user to merge manually just because of an ancestor mistakenly marked
557 // manual seems too harsh
558 string left_encoding, anc_encoding, right_encoding;
559 left_encoding = this->get_file_encoding(left_path, left_man);
560 anc_encoding = this->get_file_encoding(anc_path, anc_man);
561 right_encoding = this->get_file_encoding(right_path, right_man);
562
563 vector<string> left_lines, ancestor_lines, right_lines, merged_lines;
564 split_into_lines(left_unpacked(), left_encoding, left_lines);
565 split_into_lines(ancestor_unpacked(), anc_encoding, ancestor_lines);
566 split_into_lines(right_unpacked(), right_encoding, right_lines);
567
568 if (merge3(ancestor_lines,
569 left_lines,
570 right_lines,
571 merged_lines))
572 {
573 hexenc<id> tmp_id;
574 file_data merge_data;
575 string tmp;
576
577 L(F("internal 3-way merged ok\n"));
578 join_lines(merged_lines, tmp);
579 calculate_ident(data(tmp), tmp_id);
580 file_id merged_fid(tmp_id);
581 merge_data = file_data(tmp);
582
583 merged_id = merged_fid;
584 record_merge(left_id, right_id, merged_fid,
585 left_data, merge_data);
586
587 return true;
588 }
589 }
590
591 P(F("help required for 3-way merge\n"
592 "[ancestor] %s\n"
593 "[ left] %s\n"
594 "[ right] %s\n"
595 "[ merged] %s\n")
596 % anc_path
597 % left_path
598 % right_path
599 % merged_path);
600
601 if (app.lua.hook_merge3(anc_path, left_path, right_path, merged_path,
602 ancestor_unpacked, left_unpacked,
603 right_unpacked, merged_unpacked))
604 {
605 hexenc<id> tmp_id;
606 file_data merge_data;
607
608 L(F("lua merge3 hook merged ok\n"));
609 calculate_ident(merged_unpacked, tmp_id);
610 file_id merged_fid(tmp_id);
611 merge_data = file_data(merged_unpacked);
612
613 merged_id = merged_fid;
614 record_merge(left_id, right_id, merged_fid,
615 left_data, merge_data);
616 return true;
617 }
618
619 return false;
620}
621
622bool merge_provider::try_to_merge_files(file_path const & left_path,
623 file_path const & right_path,
624 file_path const & merged_path,
625 file_id const & left_id,
626 file_id const & right_id,
627 file_id & merged_id)
628{
629 I(!null_id(left_id));
630 I(!null_id(right_id));
631
632 file_data left_data, right_data;
633 data left_unpacked, right_unpacked, merged_unpacked;
634
635 L(F("trying to merge %s <-> %s\n")
636 % left_id % right_id);
637
638 if (left_id == right_id)
639 {
640 L(F("files are identical\n"));
641 merged_id = left_id;
642 return true;
643 }
644
645 this->get_version(left_path, left_id, left_data);
646 this->get_version(right_path, right_id, right_data);
647
648 left_unpacked = left_data.inner();
649 right_unpacked = right_data.inner();
650
651 P(F("help required for 2-way merge\n"
652 "[ left] %s\n"
653 "[ right] %s\n"
654 "[ merged] %s\n")
655 % left_path
656 % right_path
657 % merged_path);
658
659 if (app.lua.hook_merge2(left_path, right_path, merged_path,
660 left_unpacked, right_unpacked, merged_unpacked))
661 {
662 hexenc<id> tmp_id;
663 file_data merge_data;
664
665 L(F("lua merge2 hook merged ok\n"));
666 calculate_ident(merged_unpacked, tmp_id);
667 file_id merged_fid(tmp_id);
668 merge_data = file_data(merged_unpacked);
669
670 merged_id = merged_fid;
671 record_merge(left_id, right_id, merged_fid,
672 left_data, merge_data);
673 return true;
674 }
675
676 return false;
677}
678
679
680// during the "update" command, the only real differences from merging
681// are that we take our right versions from the filesystem, not the db,
682// and we only record the merges in a transient, in-memory table.
683
684update_merge_provider::update_merge_provider(app_state & app,
685 manifest_map const & anc_man,
686 manifest_map const & left_man,
687 manifest_map const & right_man)
688 : merge_provider(app, anc_man, left_man, right_man) {}
689
690void update_merge_provider::record_merge(file_id const & left_id,
691 file_id const & right_id,
692 file_id const & merged_id,
693 file_data const & left_data,
694 file_data const & merged_data)
695{
696 L(F("temporarily recording merge of %s <-> %s into %s\n")
697 % left_id % right_id % merged_id);
698 I(temporary_store.find(merged_id) == temporary_store.end());
699 temporary_store.insert(make_pair(merged_id, merged_data));
700}
701
702void update_merge_provider::get_version(file_path const & path,
703 file_id const & ident,
704 file_data & dat)
705{
706 if (app.db.file_version_exists(ident))
707 app.db.get_file_version(ident, dat);
708 else
709 {
710 data tmp;
711 file_id fid;
712 require_path_is_file(path,
713 F("file '%s' does not exist in working copy") % path,
714 F("'%s' in working copy is a directory, not a file") % path);
715 read_localized_data(path, tmp, app.lua);
716 calculate_ident(tmp, fid);
717 N(fid == ident,
718 F("file %s in working copy has id %s, wanted %s")
719 % path % fid % ident);
720 dat = tmp;
721 }
722}
723
724std::string update_merge_provider::get_file_encoding(file_path const & path,
725 manifest_map const & man)
726{
727 std::string enc;
728 if (get_attribute_from_working_copy(path, encoding_attribute, enc))
729 return enc;
730 else if (get_attribute_from_db(path, encoding_attribute, man, enc, app))
731 return enc;
732 else
733 return default_encoding;
734}
735
736bool update_merge_provider::attribute_manual_merge(file_path const & path,
737 manifest_map const & man)
738{
739 std::string mmf;
740 if (get_attribute_from_working_copy(path, manual_merge_attribute, mmf))
741 return mmf == std::string("true");
742 else if (get_attribute_from_db(path, manual_merge_attribute, man, mmf, app))
743 return mmf == std::string("true");
744 else
745 return false; // default: enable auto merge
746}
747
748// the remaining part of this file just handles printing out various
749// diff formats for the case where someone wants to *read* a diff
750// rather than apply it.
751
752struct hunk_consumer
753{
754 virtual void flush_hunk(size_t pos) = 0;
755 virtual void advance_to(size_t newpos) = 0;
756 virtual void insert_at(size_t b_pos) = 0;
757 virtual void delete_at(size_t a_pos) = 0;
758 virtual ~hunk_consumer() {}
759};
760
761void walk_hunk_consumer(vector<long, QA(long)> const & lcs,
762 vector<long, QA(long)> const & lines1,
763 vector<long, QA(long)> const & lines2,
764 hunk_consumer & cons)
765{
766
767 size_t a = 0, b = 0;
768 if (lcs.begin() == lcs.end())
769 {
770 // degenerate case: files have nothing in common
771 cons.advance_to(0);
772 while (a < lines1.size())
773 cons.delete_at(a++);
774 while (b < lines2.size())
775 cons.insert_at(b++);
776 cons.flush_hunk(a);
777 }
778 else
779 {
780 // normal case: files have something in common
781 for (vector<long, QA(long)>::const_iterator i = lcs.begin();
782 i != lcs.end(); ++i, ++a, ++b)
783 {
784 if (idx(lines1, a) == *i && idx(lines2, b) == *i)
785 continue;
786
787 cons.advance_to(a);
788 while (idx(lines1,a) != *i)
789 cons.delete_at(a++);
790 while (idx(lines2,b) != *i)
791 cons.insert_at(b++);
792 }
793 if (b < lines2.size())
794 {
795 cons.advance_to(a);
796 while(b < lines2.size())
797 cons.insert_at(b++);
798 }
799 if (a < lines1.size())
800 {
801 cons.advance_to(a);
802 while(a < lines1.size())
803 cons.delete_at(a++);
804 }
805 cons.flush_hunk(a);
806 }
807}
808
809struct unidiff_hunk_writer : public hunk_consumer
810{
811 vector<string> const & a;
812 vector<string> const & b;
813 size_t ctx;
814 ostream & ost;
815 size_t a_begin, b_begin, a_len, b_len;
816 long skew;
817 vector<string> hunk;
818 unidiff_hunk_writer(vector<string> const & a,
819 vector<string> const & b,
820 size_t ctx,
821 ostream & ost);
822 virtual void flush_hunk(size_t pos);
823 virtual void advance_to(size_t newpos);
824 virtual void insert_at(size_t b_pos);
825 virtual void delete_at(size_t a_pos);
826 virtual ~unidiff_hunk_writer() {}
827};
828
829unidiff_hunk_writer::unidiff_hunk_writer(vector<string> const & a,
830 vector<string> const & b,
831 size_t ctx,
832 ostream & ost)
833: a(a), b(b), ctx(ctx), ost(ost),
834 a_begin(0), b_begin(0),
835 a_len(0), b_len(0), skew(0)
836{}
837
838void unidiff_hunk_writer::insert_at(size_t b_pos)
839{
840 b_len++;
841 hunk.push_back(string("+") + b[b_pos]);
842}
843
844void unidiff_hunk_writer::delete_at(size_t a_pos)
845{
846 a_len++;
847 hunk.push_back(string("-") + a[a_pos]);
848}
849
850void unidiff_hunk_writer::flush_hunk(size_t pos)
851{
852 if (hunk.size() > 0)
853 {
854 // insert trailing context
855 size_t a_pos = a_begin + a_len;
856 for (size_t i = 0; (i < ctx) && (a_pos + i < a.size()); ++i)
857 {
858 hunk.push_back(string(" ") + a[a_pos + i]);
859 a_len++;
860 b_len++;
861 }
862
863 // write hunk to stream
864 if (a_len == 0)
865 ost << "@@ -0,0";
866 else
867 {
868 ost << "@@ -" << a_begin+1;
869 if (a_len > 1)
870 ost << "," << a_len;
871 }
872
873 if (b_len == 0)
874 ost << " +0,0";
875 else
876 {
877 ost << " +" << b_begin+1;
878 if (b_len > 1)
879 ost << "," << b_len;
880 }
881 ost << " @@" << endl;
882
883 copy(hunk.begin(), hunk.end(), ostream_iterator<string>(ost, "\n"));
884 }
885
886 // reset hunk
887 hunk.clear();
888 skew += b_len - a_len;
889 a_begin = pos;
890 b_begin = pos + skew;
891 a_len = 0;
892 b_len = 0;
893}
894
895void unidiff_hunk_writer::advance_to(size_t newpos)
896{
897 if (a_begin + a_len + (2 * ctx) < newpos)
898 {
899 flush_hunk(newpos);
900
901 // insert new leading context
902 if (newpos - ctx < a.size())
903 {
904 for (int i = ctx; i > 0; --i)
905 {
906 if (newpos - i < 0)
907 continue;
908 hunk.push_back(string(" ") + a[newpos - i]);
909 a_begin--; a_len++;
910 b_begin--; b_len++;
911 }
912 }
913 }
914 else
915 {
916 // pad intermediate context
917 while(a_begin + a_len < newpos)
918 {
919 hunk.push_back(string(" ") + a[a_begin + a_len]);
920 a_len++;
921 b_len++;
922 }
923 }
924}
925
926struct cxtdiff_hunk_writer : public hunk_consumer
927{
928 vector<string> const & a;
929 vector<string> const & b;
930 size_t ctx;
931 ostream & ost;
932 size_t a_begin, b_begin, a_len, b_len;
933 long skew;
934 vector<size_t> inserts;
935 vector<size_t> deletes;
936 vector<string> from_file;
937 vector<string> to_file;
938 bool have_insertions;
939 bool have_deletions;
940 cxtdiff_hunk_writer(vector<string> const & a,
941 vector<string> const & b,
942 size_t ctx,
943 ostream & ost);
944 virtual void flush_hunk(size_t pos);
945 virtual void advance_to(size_t newpos);
946 virtual void insert_at(size_t b_pos);
947 virtual void delete_at(size_t a_pos);
948 void flush_pending_mods();
949 virtual ~cxtdiff_hunk_writer() {}
950};
951
952cxtdiff_hunk_writer::cxtdiff_hunk_writer(vector<string> const & a,
953 vector<string> const & b,
954 size_t ctx,
955 ostream & ost)
956 : a(a), b(b), ctx(ctx), ost(ost),
957 a_begin(0), b_begin(0),
958 a_len(0), b_len(0), skew(0),
959 have_insertions(false), have_deletions(false)
960{}
961
962void cxtdiff_hunk_writer::insert_at(size_t b_pos)
963{
964 inserts.push_back(b_pos);
965 have_insertions = true;
966}
967
968void cxtdiff_hunk_writer::delete_at(size_t a_pos)
969{
970 deletes.push_back(a_pos);
971 have_deletions = true;
972}
973
974void cxtdiff_hunk_writer::flush_hunk(size_t pos)
975{
976 flush_pending_mods();
977
978 if (have_deletions || have_insertions)
979 {
980 // insert trailing context
981 size_t ctx_start = a_begin + a_len;
982 for (size_t i = 0; (i < ctx) && (ctx_start + i < a.size()); ++i)
983 {
984 from_file.push_back(string(" ") + a[ctx_start + i]);
985 a_len++;
986 }
987
988 ctx_start = b_begin + b_len;
989 for (size_t i = 0; (i < ctx) && (ctx_start + i < b.size()); ++i)
990 {
991 to_file.push_back(string(" ") + b[ctx_start + i]);
992 b_len++;
993 }
994
995 ost << "***************" << endl;
996
997 ost << "*** " << (a_begin + 1) << "," << (a_begin + a_len) << " ****" << endl;
998 if (have_deletions)
999 copy(from_file.begin(), from_file.end(), ostream_iterator<string>(ost, "\n"));
1000
1001 ost << "--- " << (b_begin + 1) << "," << (b_begin + b_len) << " ----" << endl;
1002 if (have_insertions)
1003 copy(to_file.begin(), to_file.end(), ostream_iterator<string>(ost, "\n"));
1004 }
1005
1006 // reset hunk
1007 to_file.clear();
1008 from_file.clear();
1009 have_insertions = false;
1010 have_deletions = false;
1011 skew += b_len - a_len;
1012 a_begin = pos;
1013 b_begin = pos + skew;
1014 a_len = 0;
1015 b_len = 0;
1016}
1017
1018void cxtdiff_hunk_writer::flush_pending_mods()
1019{
1020 // nothing to flush?
1021 if (inserts.empty() && deletes.empty())
1022 return;
1023
1024 string prefix;
1025
1026 // if we have just insertions to flush, prefix them with "+"; if
1027 // just deletions, prefix with "-"; if both, prefix with "!"
1028 if (inserts.empty() && !deletes.empty())
1029 prefix = "-";
1030 else if (deletes.empty() && !inserts.empty())
1031 prefix = "+";
1032 else
1033 prefix = "!";
1034
1035 for (vector<size_t>::const_iterator i = deletes.begin();
1036 i != deletes.end(); ++i)
1037 {
1038 from_file.push_back(prefix + string(" ") + a[*i]);
1039 a_len++;
1040 }
1041 for (vector<size_t>::const_iterator i = inserts.begin();
1042 i != inserts.end(); ++i)
1043 {
1044 to_file.push_back(prefix + string(" ") + b[*i]);
1045 b_len++;
1046 }
1047
1048 // clear pending mods
1049 inserts.clear();
1050 deletes.clear();
1051}
1052
1053void cxtdiff_hunk_writer::advance_to(size_t newpos)
1054{
1055 if (a_begin + a_len + (2 * ctx) < newpos)
1056 {
1057 flush_hunk(newpos);
1058
1059 // insert new leading context
1060 if (newpos - ctx < a.size())
1061 {
1062 for (int i = ctx; i > 0; --i)
1063 {
1064 if (newpos - i < 0)
1065 continue;
1066
1067 // note that context diffs prefix common text with two
1068 // spaces, whereas unified diffs use a single space
1069 from_file.push_back(string(" ") + a[newpos - i]);
1070 to_file.push_back(string(" ") + a[newpos - i]);
1071 a_begin--; a_len++;
1072 b_begin--; b_len++;
1073 }
1074 }
1075 }
1076 else
1077 {
1078 flush_pending_mods();
1079 // pad intermediate context
1080 while (a_begin + a_len < newpos)
1081 {
1082 from_file.push_back(string(" ") + a[a_begin + a_len]);
1083 to_file.push_back(string(" ") + a[a_begin + a_len]);
1084 a_len++;
1085 b_len++;
1086 }
1087 }
1088}
1089
1090void make_diff(string const & filename1,
1091 string const & filename2,
1092 file_id const & id1,
1093 file_id const & id2,
1094 vector<string> const & lines1,
1095 vector<string> const & lines2,
1096 ostream & ost,
1097 diff_type type)
1098{
1099 vector<long, QA(long)> left_interned;
1100 vector<long, QA(long)> right_interned;
1101 vector<long, QA(long)> lcs;
1102
1103 interner<long> in;
1104
1105 left_interned.reserve(lines1.size());
1106 for (vector<string>::const_iterator i = lines1.begin();
1107 i != lines1.end(); ++i)
1108 left_interned.push_back(in.intern(*i));
1109
1110 right_interned.reserve(lines2.size());
1111 for (vector<string>::const_iterator i = lines2.begin();
1112 i != lines2.end(); ++i)
1113 right_interned.push_back(in.intern(*i));
1114
1115 lcs.reserve(std::min(lines1.size(),lines2.size()));
1116 longest_common_subsequence(left_interned.begin(), left_interned.end(),
1117 right_interned.begin(), right_interned.end(),
1118 std::min(lines1.size(), lines2.size()),
1119 back_inserter(lcs));
1120
1121 switch (type)
1122 {
1123 case unified_diff:
1124 {
1125 ost << "--- " << filename1 << "\t" << id1 << endl;
1126 ost << "+++ " << filename2 << "\t" << id2 << endl;
1127
1128 unidiff_hunk_writer hunks(lines1, lines2, 3, ost);
1129 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1130 break;
1131 }
1132 case context_diff:
1133 {
1134 ost << "*** " << filename1 << "\t" << id1 << endl;
1135 ost << "--- " << filename2 << "\t" << id2 << endl;
1136
1137 cxtdiff_hunk_writer hunks(lines1, lines2, 3, ost);
1138 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1139 break;
1140 }
1141 default:
1142 {
1143 // should never reach this; the external_diff type is not
1144 // handled by this function.
1145 I(false);
1146 }
1147 }
1148}
1149
1150#ifdef BUILD_UNIT_TESTS
1151#include "unit_tests.hh"
1152#include "transforms.hh"
1153#include <boost/lexical_cast.hpp>
1154#include "randomfile.hh"
1155
1156using boost::lexical_cast;
1157
1158static void dump_incorrect_merge(vector<string> const & expected,
1159 vector<string> const & got,
1160 string const & prefix)
1161{
1162 size_t mx = expected.size();
1163 if (mx < got.size())
1164 mx = got.size();
1165 for (size_t i = 0; i < mx; ++i)
1166 {
1167 cerr << "bad merge: " << i << " [" << prefix << "]\t";
1168
1169 if (i < expected.size())
1170 cerr << "[" << expected[i] << "]\t";
1171 else
1172 cerr << "[--nil--]\t";
1173
1174 if (i < got.size())
1175 cerr << "[" << got[i] << "]\t";
1176 else
1177 cerr << "[--nil--]\t";
1178
1179 cerr << endl;
1180 }
1181}
1182
1183// regression blockers go here
1184static void unidiff_append_test()
1185{
1186 string src(string("#include \"hello.h\"\n")
1187 + "\n"
1188 + "void say_hello()\n"
1189 + "{\n"
1190 + " printf(\"hello, world\\n\");\n"
1191 + "}\n"
1192 + "\n"
1193 + "int main()\n"
1194 + "{\n"
1195 + " say_hello();\n"
1196 + "}\n");
1197
1198 string dst(string("#include \"hello.h\"\n")
1199 + "\n"
1200 + "void say_hello()\n"
1201 + "{\n"
1202 + " printf(\"hello, world\\n\");\n"
1203 + "}\n"
1204 + "\n"
1205 + "int main()\n"
1206 + "{\n"
1207 + " say_hello();\n"
1208 + "}\n"
1209 + "\n"
1210 + "void say_goodbye()\n"
1211 + "{\n"
1212 + " printf(\"goodbye\\n\");\n"
1213 + "}\n"
1214 + "\n");
1215
1216 string ud(string("--- hello.c\t0123456789abcdef0123456789abcdef01234567\n")
1217 + "+++ hello.c\tabcdef0123456789abcdef0123456789abcdef01\n"
1218 + "@@ -9,3 +9,9 @@\n"
1219 + " {\n"
1220 + " say_hello();\n"
1221 + " }\n"
1222 + "+\n"
1223 + "+void say_goodbye()\n"
1224 + "+{\n"
1225 + "+ printf(\"goodbye\\n\");\n"
1226 + "+}\n"
1227 + "+\n");
1228
1229 vector<string> src_lines, dst_lines;
1230 split_into_lines(src, src_lines);
1231 split_into_lines(dst, dst_lines);
1232 stringstream sst;
1233 make_diff("hello.c", "hello.c",
1234 file_id(id("0123456789abcdef0123456789abcdef01234567")),
1235 file_id(id("abcdef0123456789abcdef0123456789abcdef01")),
1236 src_lines, dst_lines, sst, unified_diff);
1237 cout << sst.str() << std::endl;
1238 BOOST_CHECK(sst.str() == ud);
1239}
1240
1241
1242// high tech randomizing test
1243
1244static void randomizing_merge_test()
1245{
1246 for (int i = 0; i < 30; ++i)
1247 {
1248 vector<string> anc, d1, d2, m1, m2, gm;
1249
1250 file_randomizer::build_random_fork(anc, d1, d2, gm,
1251 i * 1023, (10 + 2 * i));
1252
1253 BOOST_CHECK(merge3(anc, d1, d2, m1));
1254 if (gm != m1)
1255 dump_incorrect_merge (gm, m1, "random_merge 1");
1256 BOOST_CHECK(gm == m1);
1257
1258 BOOST_CHECK(merge3(anc, d2, d1, m2));
1259 if (gm != m2)
1260 dump_incorrect_merge (gm, m2, "random_merge 2");
1261 BOOST_CHECK(gm == m2);
1262 }
1263}
1264
1265
1266// old boring tests
1267
1268static void merge_prepend_test()
1269{
1270 BOOST_CHECKPOINT("prepend test");
1271 vector<string> anc, d1, d2, m1, m2, gm;
1272 for (int i = 10; i < 20; ++i)
1273 {
1274 d2.push_back(lexical_cast<string>(i));
1275 gm.push_back(lexical_cast<string>(i));
1276 }
1277
1278 for (int i = 0; i < 10; ++i)
1279 {
1280 anc.push_back(lexical_cast<string>(i));
1281 d1.push_back(lexical_cast<string>(i));
1282 d2.push_back(lexical_cast<string>(i));
1283 gm.push_back(lexical_cast<string>(i));
1284 }
1285
1286 BOOST_CHECK(merge3(anc, d1, d2, m1));
1287 if (gm != m1)
1288 dump_incorrect_merge (gm, m1, "merge_prepend 1");
1289 BOOST_CHECK(gm == m1);
1290
1291
1292 BOOST_CHECK(merge3(anc, d2, d1, m2));
1293 if (gm != m2)
1294 dump_incorrect_merge (gm, m2, "merge_prepend 2");
1295 BOOST_CHECK(gm == m2);
1296}
1297
1298
1299static void merge_append_test()
1300{
1301 BOOST_CHECKPOINT("append test");
1302 vector<string> anc, d1, d2, m1, m2, gm;
1303 for (int i = 0; i < 10; ++i)
1304 anc.push_back(lexical_cast<string>(i));
1305
1306 d1 = anc;
1307 d2 = anc;
1308 gm = anc;
1309
1310 for (int i = 10; i < 20; ++i)
1311 {
1312 d2.push_back(lexical_cast<string>(i));
1313 gm.push_back(lexical_cast<string>(i));
1314 }
1315
1316 BOOST_CHECK(merge3(anc, d1, d2, m1));
1317 if (gm != m1)
1318 dump_incorrect_merge (gm, m1, "merge_append 1");
1319 BOOST_CHECK(gm == m1);
1320
1321 BOOST_CHECK(merge3(anc, d2, d1, m2));
1322 if (gm != m2)
1323 dump_incorrect_merge (gm, m2, "merge_append 2");
1324 BOOST_CHECK(gm == m2);
1325
1326
1327}
1328
1329static void merge_additions_test()
1330{
1331 BOOST_CHECKPOINT("additions test");
1332 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1333 string desc1("I like oatmeal\nI don't like spam\nI like orange juice\nI like toast");
1334 string confl("I like oatmeal\nI don't like tuna\nI like orange juice\nI like toast");
1335 string desc2("I like oatmeal\nI like orange juice\nI don't like tuna\nI like toast");
1336 string good_merge("I like oatmeal\nI don't like spam\nI like orange juice\nI don't like tuna\nI like toast");
1337 vector<string> anc, d1, cf, d2, m1, m2, gm;
1338
1339 split_into_lines(ancestor, anc);
1340 split_into_lines(desc1, d1);
1341 split_into_lines(confl, cf);
1342 split_into_lines(desc2, d2);
1343 split_into_lines(good_merge, gm);
1344
1345 BOOST_CHECK(merge3(anc, d1, d2, m1));
1346 if (gm != m1)
1347 dump_incorrect_merge (gm, m1, "merge_addition 1");
1348 BOOST_CHECK(gm == m1);
1349
1350 BOOST_CHECK(merge3(anc, d2, d1, m2));
1351 if (gm != m2)
1352 dump_incorrect_merge (gm, m2, "merge_addition 2");
1353 BOOST_CHECK(gm == m2);
1354
1355 BOOST_CHECK(!merge3(anc, d1, cf, m1));
1356}
1357
1358static void merge_deletions_test()
1359{
1360 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1361 string desc2("I like oatmeal\nI like toast");
1362
1363 vector<string> anc, d1, d2, m1, m2, gm;
1364
1365 split_into_lines(ancestor, anc);
1366 split_into_lines(desc2, d2);
1367 d1 = anc;
1368 gm = d2;
1369
1370 BOOST_CHECK(merge3(anc, d1, d2, m1));
1371 if (gm != m1)
1372 dump_incorrect_merge (gm, m1, "merge_deletion 1");
1373 BOOST_CHECK(gm == m1);
1374
1375 BOOST_CHECK(merge3(anc, d2, d1, m2));
1376 if (gm != m2)
1377 dump_incorrect_merge (gm, m2, "merge_deletion 2");
1378 BOOST_CHECK(gm == m2);
1379}
1380
1381
1382void add_diff_patch_tests(test_suite * suite)
1383{
1384 I(suite);
1385 suite->add(BOOST_TEST_CASE(&unidiff_append_test));
1386 suite->add(BOOST_TEST_CASE(&merge_prepend_test));
1387 suite->add(BOOST_TEST_CASE(&merge_append_test));
1388 suite->add(BOOST_TEST_CASE(&merge_additions_test));
1389 suite->add(BOOST_TEST_CASE(&merge_deletions_test));
1390 suite->add(BOOST_TEST_CASE(&randomizing_merge_test));
1391}
1392
1393
1394#endif // BUILD_UNIT_TESTS

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status