monotone

monotone Mtn Source Tree

Root/diff_patch.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10
11#include "base.hh"
12#include <algorithm>
13#include <iterator>
14#include <map>
15#include "vector.hh"
16
17#include <boost/shared_ptr.hpp>
18#include <boost/scoped_ptr.hpp>
19#include "diff_patch.hh"
20#include "interner.hh"
21#include "lcs.hh"
22#include "roster.hh"
23#include "safe_map.hh"
24#include "sanity.hh"
25#include "xdelta.hh"
26#include "simplestring_xform.hh"
27#include "vocab.hh"
28#include "revision.hh"
29#include "constants.hh"
30#include "file_io.hh"
31#include "pcrewrap.hh"
32#include "lua_hooks.hh"
33#include "database.hh"
34#include "transforms.hh"
35
36using std::make_pair;
37using std::map;
38using std::min;
39using std::max;
40using std::ostream;
41using std::ostream_iterator;
42using std::string;
43using std::swap;
44using std::vector;
45
46using boost::shared_ptr;
47
48//
49// a 3-way merge works like this:
50//
51// /----> right
52// ancestor
53// \----> left
54//
55// first you compute the edit list "EDITS(ancestor,left)".
56//
57// then you make an offset table "leftpos" which describes positions in
58// "ancestor" as they map to "left"; that is, for 0 < apos <
59// ancestor.size(), we have
60//
61// left[leftpos[apos]] == ancestor[apos]
62//
63// you do this by walking through the edit list and either jumping the
64// current index ahead an extra position, on an insert, or remaining still,
65// on a delete. on an insert *or* a delete, you push the current index back
66// onto the leftpos array.
67//
68// next you compute the edit list "EDITS(ancestor,right)".
69//
70// you then go through this edit list applying the edits to left, rather
71// than ancestor, and using the table leftpos to map the position of each
72// edit to an appropriate spot in left. this means you walk a "curr_left"
73// index through the edits, and for each edit e:
74//
75// - if e is a delete (and e.pos is a position in ancestor)
76// - increment curr_left without copying anything to "merged"
77//
78// - if e is an insert (and e.pos is a position in right)
79// - copy right[e.pos] to "merged"
80// - leave curr_left alone
81//
82// - when advancing to apos (and apos is a position in ancestor)
83// - copy left[curr_left] to merged while curr_left < leftpos[apos]
84//
85//
86// the practical upshot is that you apply the delta from ancestor->right
87// to the adjusted contexts in left, producing something vaguely like
88// the concatenation of delta(ancestor,left) :: delta(ancestor,right).
89//
90// NB: this is, as far as I can tell, what diff3 does. I don't think I'm
91// infringing on anyone's fancy patents here.
92//
93
94typedef enum { preserved = 0, deleted = 1, changed = 2 } edit_t;
95static const char etab[3][10] =
96 {
97 "preserved",
98 "deleted",
99 "changed"
100 };
101
102struct extent
103{
104 extent(size_t p, size_t l, edit_t t)
105 : pos(p), len(l), type(t)
106 {}
107 size_t pos;
108 size_t len;
109 edit_t type;
110};
111
112void calculate_extents(vector<long, QA(long)> const & a_b_edits,
113 vector<long, QA(long)> const & b,
114 vector<long, QA(long)> & prefix,
115 vector<extent> & extents,
116 vector<long, QA(long)> & suffix,
117 size_t const a_len,
118 interner<long> & intern)
119{
120 extents.reserve(a_len * 2);
121
122 size_t a_pos = 0, b_pos = 0;
123
124 for (vector<long, QA(long)>::const_iterator i = a_b_edits.begin();
125 i != a_b_edits.end(); ++i)
126 {
127 // L(FL("edit: %d") % *i);
128 if (*i < 0)
129 {
130 // negative elements code the negation of the one-based index into A
131 // of the element to be deleted
132 size_t a_deleted = (-1 - *i);
133
134 // fill positions out to the deletion point
135 while (a_pos < a_deleted)
136 {
137 a_pos++;
138 extents.push_back(extent(b_pos++, 1, preserved));
139 }
140
141 // L(FL(" -- delete at A-pos %d (B-pos = %d)") % a_deleted % b_pos);
142
143 // skip the deleted line
144 a_pos++;
145 extents.push_back(extent(b_pos, 0, deleted));
146 }
147 else
148 {
149 // positive elements code the one-based index into B of the element to
150 // be inserted
151 size_t b_inserted = (*i - 1);
152
153 // fill positions out to the insertion point
154 while (b_pos < b_inserted)
155 {
156 a_pos++;
157 extents.push_back(extent(b_pos++, 1, preserved));
158 }
159
160 // L(FL(" -- insert at B-pos %d (A-pos = %d) : '%s'")
161 // % b_inserted % a_pos % intern.lookup(b.at(b_inserted)));
162
163 // record that there was an insertion, but a_pos did not move.
164 if ((b_pos == 0 && extents.empty())
165 || (b_pos == prefix.size()))
166 {
167 prefix.push_back(b.at(b_pos));
168 }
169 else if (a_len == a_pos)
170 {
171 suffix.push_back(b.at(b_pos));
172 }
173 else
174 {
175 // make the insertion
176 extents.back().type = changed;
177 extents.back().len++;
178 }
179 b_pos++;
180 }
181 }
182
183 while (extents.size() < a_len)
184 extents.push_back(extent(b_pos++, 1, preserved));
185}
186
187void normalize_extents(vector<extent> & a_b_map,
188 vector<long, QA(long)> const & a,
189 vector<long, QA(long)> const & b)
190{
191 for (size_t i = 0; i < a_b_map.size(); ++i)
192 {
193 if (i > 0)
194 {
195 size_t j = i;
196 while (j > 0
197 && (a_b_map.at(j-1).type == preserved)
198 && (a_b_map.at(j).type == changed)
199 && (a.at(j) == b.at(a_b_map.at(j).pos + a_b_map.at(j).len - 1)))
200 {
201 // This is implied by (a_b_map.at(j-1).type == preserved)
202 I(a.at(j-1) == b.at(a_b_map.at(j-1).pos));
203
204 // Coming into loop we have:
205 // i
206 // z --pres--> z 0
207 // o --pres--> o 1
208 // a --chng--> a 2 The important thing here is that 'a' in
209 // t the LHS matches with ...
210 // u
211 // v
212 // a ... the a on the RHS here. Hence we can
213 // q --pres--> q 3 'shift' the entire 'changed' block
214 // e --chng--> d 4 upwards, leaving a 'preserved' line
215 // g --pres--> g 5 'a'->'a'
216 //
217 // Want to end up with:
218 // i
219 // z --pres--> z 0
220 // o --chng--> o 1
221 // a
222 // t
223 // u
224 // v
225 // a --pres--> a 2
226 // q --pres--> q 3
227 // e --chng--> d 4
228 // g --pres--> g 5
229 //
230 // Now all the 'changed' extents are normalised to the
231 // earliest possible position.
232
233 L(FL("exchanging preserved extent [%d+%d] with changed extent [%d+%d]")
234 % a_b_map.at(j-1).pos
235 % a_b_map.at(j-1).len
236 % a_b_map.at(j).pos
237 % a_b_map.at(j).len);
238
239 swap(a_b_map.at(j-1).len, a_b_map.at(j).len);
240 swap(a_b_map.at(j-1).type, a_b_map.at(j).type);
241
242 // Adjust position of the later, preserved extent. It should
243 // better point to the second 'a' in the above example.
244 a_b_map.at(j).pos = a_b_map.at(j-1).pos + a_b_map.at(j-1).len;
245
246 --j;
247 }
248 }
249 }
250
251 for (size_t i = 0; i < a_b_map.size(); ++i)
252 {
253 if (i > 0)
254 {
255 size_t j = i;
256 while (j > 0
257 && a_b_map.at(j).type == changed
258 && a_b_map.at(j-1).type == changed
259 && a_b_map.at(j).len > 1
260 && a_b_map.at(j-1).pos + a_b_map.at(j-1).len == a_b_map.at(j).pos)
261 {
262 // step 1: move a chunk from this insert extent to its
263 // predecessor
264 size_t piece = a_b_map.at(j).len - 1;
265 // L(FL("moving change piece of len %d from pos %d to pos %d")
266 // % piece
267 // % a_b_map.at(j).pos
268 // % a_b_map.at(j-1).pos);
269 a_b_map.at(j).len = 1;
270 a_b_map.at(j).pos += piece;
271 a_b_map.at(j-1).len += piece;
272
273 // step 2: if this extent (now of length 1) has become a "changed"
274 // extent identical to its previous state, switch it to a "preserved"
275 // extent.
276 if (b.at(a_b_map.at(j).pos) == a.at(j))
277 {
278 // L(FL("changing normalized 'changed' extent at %d to 'preserved'")
279 // % a_b_map.at(j).pos);
280 a_b_map.at(j).type = preserved;
281 }
282 --j;
283 }
284 }
285 }
286}
287
288
289void merge_extents(vector<extent> const & a_b_map,
290 vector<extent> const & a_c_map,
291 vector<long, QA(long)> const & b,
292 vector<long, QA(long)> const & c,
293 interner<long> const & in,
294 vector<long, QA(long)> & merged)
295{
296 I(a_b_map.size() == a_c_map.size());
297
298 vector<extent>::const_iterator i = a_b_map.begin();
299 vector<extent>::const_iterator j = a_c_map.begin();
300 merged.reserve(a_b_map.size() * 2);
301
302 // for (; i != a_b_map.end(); ++i, ++j)
303 // {
304
305 // L(FL("trying to merge: [%s %d %d] vs. [%s %d %d]")
306 // % etab[i->type] % i->pos % i->len
307 // % etab[j->type] % j->pos % j->len);
308 // }
309
310 // i = a_b_map.begin();
311 // j = a_c_map.begin();
312
313 for (; i != a_b_map.end(); ++i, ++j)
314 {
315
316 // L(FL("trying to merge: [%s %d %d] vs. [%s %d %d]")
317 // % etab[i->type] % i->pos % i->len
318 // % etab[j->type] % j->pos % j->len);
319
320 // mutual, identical preserves / inserts / changes
321 if (((i->type == changed && j->type == changed)
322 || (i->type == preserved && j->type == preserved))
323 && i->len == j->len)
324 {
325 for (size_t k = 0; k < i->len; ++k)
326 {
327 if (b.at(i->pos + k) != c.at(j->pos + k))
328 {
329 L(FL("conflicting edits: %s %d[%d] '%s' vs. %s %d[%d] '%s'")
330 % etab[i->type] % i->pos % k % in.lookup(b.at(i->pos + k))
331 % etab[j->type] % j->pos % k % in.lookup(c.at(j->pos + k)));
332 throw conflict();
333 }
334 merged.push_back(b.at(i->pos + k));
335 }
336 }
337
338 // mutual or single-edge deletes
339 else if ((i->type == deleted && j->type == deleted)
340 || (i->type == deleted && j->type == preserved)
341 || (i->type == preserved && j->type == deleted))
342 {
343 // do nothing
344 }
345
346 // single-edge insert / changes
347 else if (i->type == changed && j->type == preserved)
348 for (size_t k = 0; k < i->len; ++k)
349 merged.push_back(b.at(i->pos + k));
350
351 else if (i->type == preserved && j->type == changed)
352 for (size_t k = 0; k < j->len; ++k)
353 merged.push_back(c.at(j->pos + k));
354
355 else
356 {
357 L(FL("conflicting edits: [%s %d %d] vs. [%s %d %d]")
358 % etab[i->type] % i->pos % i->len
359 % etab[j->type] % j->pos % j->len);
360 throw conflict();
361 }
362
363 // if (merged.empty())
364 // L(FL(" --> EMPTY"));
365 // else
366 // L(FL(" --> [%d]: %s") % (merged.size() - 1) % in.lookup(merged.back()));
367 }
368}
369
370
371void merge_via_edit_scripts(vector<string> const & ancestor,
372 vector<string> const & left,
373 vector<string> const & right,
374 vector<string> & merged)
375{
376 vector<long, QA(long)> anc_interned;
377 vector<long, QA(long)> left_interned, right_interned;
378 vector<long, QA(long)> left_edits, right_edits;
379 vector<long, QA(long)> left_prefix, right_prefix;
380 vector<long, QA(long)> left_suffix, right_suffix;
381 vector<extent> left_extents, right_extents;
382 vector<long, QA(long)> merged_interned;
383 interner<long> in;
384
385 // for (int i = 0; i < min(min(left.size(), right.size()), ancestor.size()); ++i)
386 // {
387 // cerr << '[' << i << "] " << left[i] << ' ' << ancestor[i] << ' ' << right[i] << '\n';
388 // }
389
390 anc_interned.reserve(ancestor.size());
391 for (vector<string>::const_iterator i = ancestor.begin();
392 i != ancestor.end(); ++i)
393 anc_interned.push_back(in.intern(*i));
394
395 left_interned.reserve(left.size());
396 for (vector<string>::const_iterator i = left.begin();
397 i != left.end(); ++i)
398 left_interned.push_back(in.intern(*i));
399
400 right_interned.reserve(right.size());
401 for (vector<string>::const_iterator i = right.begin();
402 i != right.end(); ++i)
403 right_interned.push_back(in.intern(*i));
404
405 L(FL("calculating left edit script on %d -> %d lines")
406 % anc_interned.size() % left_interned.size());
407
408 edit_script(anc_interned.begin(), anc_interned.end(),
409 left_interned.begin(), left_interned.end(),
410 min(ancestor.size(), left.size()),
411 left_edits);
412
413 L(FL("calculating right edit script on %d -> %d lines")
414 % anc_interned.size() % right_interned.size());
415
416 edit_script(anc_interned.begin(), anc_interned.end(),
417 right_interned.begin(), right_interned.end(),
418 min(ancestor.size(), right.size()),
419 right_edits);
420
421 L(FL("calculating left extents on %d edits") % left_edits.size());
422 calculate_extents(left_edits, left_interned,
423 left_prefix, left_extents, left_suffix,
424 anc_interned.size(), in);
425
426 L(FL("calculating right extents on %d edits") % right_edits.size());
427 calculate_extents(right_edits, right_interned,
428 right_prefix, right_extents, right_suffix,
429 anc_interned.size(), in);
430
431 L(FL("normalizing %d right extents") % right_extents.size());
432 normalize_extents(right_extents, anc_interned, right_interned);
433
434 L(FL("normalizing %d left extents") % left_extents.size());
435 normalize_extents(left_extents, anc_interned, left_interned);
436
437
438 if ((!right_prefix.empty()) && (!left_prefix.empty()))
439 {
440 L(FL("conflicting prefixes"));
441 throw conflict();
442 }
443
444 if ((!right_suffix.empty()) && (!left_suffix.empty()))
445 {
446 L(FL("conflicting suffixes"));
447 throw conflict();
448 }
449
450 L(FL("merging %d left, %d right extents")
451 % left_extents.size() % right_extents.size());
452
453 copy(left_prefix.begin(), left_prefix.end(), back_inserter(merged_interned));
454 copy(right_prefix.begin(), right_prefix.end(), back_inserter(merged_interned));
455
456 merge_extents(left_extents, right_extents,
457 left_interned, right_interned,
458 in, merged_interned);
459
460 copy(left_suffix.begin(), left_suffix.end(), back_inserter(merged_interned));
461 copy(right_suffix.begin(), right_suffix.end(), back_inserter(merged_interned));
462
463 merged.reserve(merged_interned.size());
464 for (vector<long, QA(long)>::const_iterator i = merged_interned.begin();
465 i != merged_interned.end(); ++i)
466 merged.push_back(in.lookup(*i));
467}
468
469
470bool merge3(vector<string> const & ancestor,
471 vector<string> const & left,
472 vector<string> const & right,
473 vector<string> & merged)
474{
475 try
476 {
477 merge_via_edit_scripts(ancestor, left, right, merged);
478 }
479 catch(conflict &)
480 {
481 L(FL("conflict detected. no merge."));
482 return false;
483 }
484 return true;
485}
486
487
488///////////////////////////////////////////////////////////////////////////
489// content_merge_database_adaptor
490///////////////////////////////////////////////////////////////////////////
491
492
493content_merge_database_adaptor::content_merge_database_adaptor(database & db,
494 revision_id const & left,
495 revision_id const & right,
496 marking_map const & left_mm,
497 marking_map const & right_mm)
498 : db(db), left_rid (left), right_rid (right), left_mm(left_mm), right_mm(right_mm)
499{
500 // FIXME: possibly refactor to run this lazily, as we don't
501 // need to find common ancestors if we're never actually
502 // called on to do content merging.
503 find_common_ancestor_for_merge(db, left, right, lca);
504}
505
506void
507content_merge_database_adaptor::record_merge(file_id const & left_ident,
508 file_id const & right_ident,
509 file_id const & merged_ident,
510 file_data const & left_data,
511 file_data const & right_data,
512 file_data const & merged_data)
513{
514 L(FL("recording successful merge of %s <-> %s into %s")
515 % left_ident
516 % right_ident
517 % merged_ident);
518
519 transaction_guard guard(db);
520
521 if (!(left_ident == merged_ident))
522 {
523 delta left_delta;
524 diff(left_data.inner(), merged_data.inner(), left_delta);
525 db.put_file_version(left_ident, merged_ident, file_delta(left_delta));
526 }
527 if (!(right_ident == merged_ident))
528 {
529 delta right_delta;
530 diff(right_data.inner(), merged_data.inner(), right_delta);
531 db.put_file_version(right_ident, merged_ident, file_delta(right_delta));
532 }
533 guard.commit();
534}
535
536void
537content_merge_database_adaptor::cache_roster(revision_id const & rid,
538 boost::shared_ptr<roster_t const> roster)
539{
540 safe_insert(rosters, make_pair(rid, roster));
541};
542
543static void
544load_and_cache_roster(database & db, revision_id const & rid,
545 map<revision_id, shared_ptr<roster_t const> > & rmap,
546 shared_ptr<roster_t const> & rout)
547{
548 map<revision_id, shared_ptr<roster_t const> >::const_iterator i = rmap.find(rid);
549 if (i != rmap.end())
550 rout = i->second;
551 else
552 {
553 cached_roster cr;
554 db.get_roster(rid, cr);
555 safe_insert(rmap, make_pair(rid, cr.first));
556 rout = cr.first;
557 }
558}
559
560void
561content_merge_database_adaptor::get_ancestral_roster(node_id nid,
562 revision_id & rid,
563 shared_ptr<roster_t const> & anc)
564{
565 // Given a file, if the lca is nonzero and its roster contains the file,
566 // then we use its roster. Otherwise we use the roster at the file's
567 // birth revision, which is the "per-file worst case" lca.
568
569 // Begin by loading any non-empty file lca roster
570 rid = lca;
571 if (!lca.inner()().empty())
572 load_and_cache_roster(db, lca, rosters, anc);
573
574 // If there is no LCA, or the LCA's roster doesn't contain the file,
575 // then use the file's birth roster.
576 if (!anc || !anc->has_node(nid))
577 {
578 marking_map::const_iterator lmm = left_mm.find(nid);
579 marking_map::const_iterator rmm = right_mm.find(nid);
580
581 MM(left_mm);
582 MM(right_mm);
583
584 if (lmm == left_mm.end())
585 {
586 I(rmm != right_mm.end());
587 rid = rmm->second.birth_revision;
588 }
589 else if (rmm == right_mm.end())
590 {
591 I(lmm != left_mm.end());
592 rid = lmm->second.birth_revision;
593 }
594 else
595 {
596 I(lmm->second.birth_revision == rmm->second.birth_revision);
597 rid = lmm->second.birth_revision;
598 }
599
600 load_and_cache_roster(db, rid, rosters, anc);
601 }
602 I(anc);
603}
604
605void
606content_merge_database_adaptor::get_version(file_id const & ident,
607 file_data & dat) const
608{
609 db.get_file_version(ident, dat);
610}
611
612
613///////////////////////////////////////////////////////////////////////////
614// content_merge_workspace_adaptor
615///////////////////////////////////////////////////////////////////////////
616
617void
618content_merge_workspace_adaptor::cache_roster(revision_id const & rid,
619 boost::shared_ptr<roster_t const> roster)
620{
621 rosters.insert(std::make_pair(rid, roster));
622}
623
624void
625content_merge_workspace_adaptor::record_merge(file_id const & left_id,
626 file_id const & right_id,
627 file_id const & merged_id,
628 file_data const & left_data,
629 file_data const & right_data,
630 file_data const & merged_data)
631{
632 L(FL("temporarily recording merge of %s <-> %s into %s")
633 % left_id
634 % right_id
635 % merged_id);
636 // this is an insert instead of a safe_insert because it is perfectly
637 // legal (though rare) to have multiple merges resolve to the same file
638 // contents.
639 temporary_store.insert(make_pair(merged_id, merged_data));
640}
641
642void
643content_merge_workspace_adaptor::get_ancestral_roster(node_id nid,
644 revision_id & rid,
645 shared_ptr<roster_t const> & anc)
646{
647 // Begin by loading any non-empty file lca roster
648 if (base->has_node(nid))
649 {
650 rid = lca;
651 anc = base;
652 }
653 else
654 {
655 marking_map::const_iterator lmm = left_mm.find(nid);
656 marking_map::const_iterator rmm = right_mm.find(nid);
657
658 MM(left_mm);
659 MM(right_mm);
660
661 if (lmm == left_mm.end())
662 {
663 I(rmm != right_mm.end());
664 rid = rmm->second.birth_revision;
665 }
666 else if (rmm == right_mm.end())
667 {
668 I(lmm != left_mm.end());
669 rid = lmm->second.birth_revision;
670 }
671 else
672 {
673 I(lmm->second.birth_revision == rmm->second.birth_revision);
674 rid = lmm->second.birth_revision;
675 }
676
677 load_and_cache_roster(db, rid, rosters, anc);
678 }
679 I(anc);
680}
681
682void
683content_merge_workspace_adaptor::get_version(file_id const & ident,
684 file_data & dat) const
685{
686 map<file_id,file_data>::const_iterator i = temporary_store.find(ident);
687 if (i != temporary_store.end())
688 dat = i->second;
689 else if (db.file_version_exists(ident))
690 db.get_file_version(ident, dat);
691 else
692 {
693 data tmp;
694 file_id fid;
695 map<file_id, file_path>::const_iterator i = content_paths.find(ident);
696 I(i != content_paths.end());
697
698 require_path_is_file(i->second,
699 F("file '%s' does not exist in workspace") % i->second,
700 F("'%s' in workspace is a directory, not a file") % i->second);
701 read_data(i->second, tmp);
702 calculate_ident(file_data(tmp), fid);
703 E(fid == ident,
704 F("file %s in workspace has id %s, wanted %s")
705 % i->second
706 % fid
707 % ident);
708 dat = file_data(tmp);
709 }
710}
711
712
713///////////////////////////////////////////////////////////////////////////
714// content_merge_checkout_adaptor
715///////////////////////////////////////////////////////////////////////////
716
717void
718content_merge_checkout_adaptor::record_merge(file_id const & left_ident,
719 file_id const & right_ident,
720 file_id const & merged_ident,
721 file_data const & left_data,
722 file_data const & right_data,
723 file_data const & merged_data)
724{
725 I(false);
726}
727
728void
729content_merge_checkout_adaptor::get_ancestral_roster(node_id nid,
730 revision_id & rid,
731 shared_ptr<roster_t const> & anc)
732{
733 I(false);
734}
735
736void
737content_merge_checkout_adaptor::get_version(file_id const & ident,
738 file_data & dat) const
739{
740 db.get_file_version(ident, dat);
741}
742
743
744///////////////////////////////////////////////////////////////////////////
745// content_merger
746///////////////////////////////////////////////////////////////////////////
747
748string
749content_merger::get_file_encoding(file_path const & path,
750 roster_t const & ros)
751{
752 attr_value v;
753 if (ros.get_attr(path, attr_key(constants::encoding_attribute), v))
754 return v();
755 return constants::default_encoding;
756}
757
758bool
759content_merger::attribute_manual_merge(file_path const & path,
760 roster_t const & ros)
761{
762 attr_value v;
763 if (ros.get_attr(path, attr_key(constants::manual_merge_attribute), v)
764 && v() == "true")
765 return true;
766 return false; // default: enable auto merge
767}
768
769bool
770content_merger::try_auto_merge(file_path const & anc_path,
771 file_path const & left_path,
772 file_path const & right_path,
773 file_path const & merged_path,
774 file_id const & ancestor_id,
775 file_id const & left_id,
776 file_id const & right_id,
777 file_id & merged_id)
778{
779 // This version of try_to_merge_files should only be called when there is a
780 // real merge3 to perform.
781 I(!null_id(ancestor_id));
782 I(!null_id(left_id));
783 I(!null_id(right_id));
784
785 L(FL("trying auto merge '%s' %s <-> %s (ancestor: %s)")
786 % merged_path
787 % left_id
788 % right_id
789 % ancestor_id);
790
791 if (left_id == right_id)
792 {
793 L(FL("files are identical"));
794 merged_id = left_id;
795 return true;
796 }
797
798 file_data left_data, right_data, ancestor_data;
799 data left_unpacked, ancestor_unpacked, right_unpacked, merged_unpacked;
800
801 adaptor.get_version(left_id, left_data);
802 adaptor.get_version(ancestor_id, ancestor_data);
803 adaptor.get_version(right_id, right_data);
804
805 left_unpacked = left_data.inner();
806 ancestor_unpacked = ancestor_data.inner();
807 right_unpacked = right_data.inner();
808
809 if (!attribute_manual_merge(left_path, left_ros) &&
810 !attribute_manual_merge(right_path, right_ros))
811 {
812 // both files mergeable by monotone internal algorithm, try to merge
813 // note: the ancestor is not considered for manual merging. Forcing the
814 // user to merge manually just because of an ancestor mistakenly marked
815 // manual seems too harsh
816 string left_encoding, anc_encoding, right_encoding;
817 left_encoding = this->get_file_encoding(left_path, left_ros);
818 anc_encoding = this->get_file_encoding(anc_path, anc_ros);
819 right_encoding = this->get_file_encoding(right_path, right_ros);
820
821 vector<string> left_lines, ancestor_lines, right_lines, merged_lines;
822 split_into_lines(left_unpacked(), left_encoding, left_lines);
823 split_into_lines(ancestor_unpacked(), anc_encoding, ancestor_lines);
824 split_into_lines(right_unpacked(), right_encoding, right_lines);
825
826 if (merge3(ancestor_lines, left_lines, right_lines, merged_lines))
827 {
828 file_id tmp_id;
829 file_data merge_data;
830 string tmp;
831
832 L(FL("internal 3-way merged ok"));
833 join_lines(merged_lines, tmp);
834 merge_data = file_data(tmp);
835 calculate_ident(merge_data, merged_id);
836
837 adaptor.record_merge(left_id, right_id, merged_id,
838 left_data, right_data, merge_data);
839
840 return true;
841 }
842 }
843
844 return false;
845}
846
847bool
848content_merger::try_user_merge(file_path const & anc_path,
849 file_path const & left_path,
850 file_path const & right_path,
851 file_path const & merged_path,
852 file_id const & ancestor_id,
853 file_id const & left_id,
854 file_id const & right_id,
855 file_id & merged_id)
856{
857 // This version of try_to_merge_files should only be called when there is a
858 // real merge3 to perform.
859 I(!null_id(ancestor_id));
860 I(!null_id(left_id));
861 I(!null_id(right_id));
862
863 L(FL("trying user merge '%s' %s <-> %s (ancestor: %s)")
864 % merged_path
865 % left_id
866 % right_id
867 % ancestor_id);
868
869 if (left_id == right_id)
870 {
871 L(FL("files are identical"));
872 merged_id = left_id;
873 return true;
874 }
875
876 file_data left_data, right_data, ancestor_data;
877 data left_unpacked, ancestor_unpacked, right_unpacked, merged_unpacked;
878
879 adaptor.get_version(left_id, left_data);
880 adaptor.get_version(ancestor_id, ancestor_data);
881 adaptor.get_version(right_id, right_data);
882
883 left_unpacked = left_data.inner();
884 ancestor_unpacked = ancestor_data.inner();
885 right_unpacked = right_data.inner();
886
887 P(F("help required for 3-way merge\n"
888 "[ancestor] %s\n"
889 "[ left] %s\n"
890 "[ right] %s\n"
891 "[ merged] %s")
892 % anc_path
893 % left_path
894 % right_path
895 % merged_path);
896
897 if (lua.hook_merge3(anc_path, left_path, right_path, merged_path,
898 ancestor_unpacked, left_unpacked,
899 right_unpacked, merged_unpacked))
900 {
901 file_data merge_data(merged_unpacked);
902
903 L(FL("lua merge3 hook merged ok"));
904 calculate_ident(merge_data, merged_id);
905
906 adaptor.record_merge(left_id, right_id, merged_id,
907 left_data, right_data, merge_data);
908 return true;
909 }
910
911 return false;
912}
913
914// the remaining part of this file just handles printing out various
915// diff formats for the case where someone wants to *read* a diff
916// rather than apply it.
917
918struct hunk_consumer
919{
920 vector<string> const & a;
921 vector<string> const & b;
922 size_t ctx;
923 ostream & ost;
924 boost::scoped_ptr<pcre::regex const> encloser_re;
925 size_t a_begin, b_begin, a_len, b_len;
926 long skew;
927
928 vector<string>::const_reverse_iterator encloser_last_match;
929 vector<string>::const_reverse_iterator encloser_last_search;
930
931 virtual void flush_hunk(size_t pos) = 0;
932 virtual void advance_to(size_t newpos) = 0;
933 virtual void insert_at(size_t b_pos) = 0;
934 virtual void delete_at(size_t a_pos) = 0;
935 virtual void find_encloser(size_t pos, string & encloser);
936 virtual ~hunk_consumer() {}
937 hunk_consumer(vector<string> const & a,
938 vector<string> const & b,
939 size_t ctx,
940 ostream & ost,
941 string const & encloser_pattern)
942 : a(a), b(b), ctx(ctx), ost(ost), encloser_re(0),
943 a_begin(0), b_begin(0), a_len(0), b_len(0), skew(0),
944 encloser_last_match(a.rend()), encloser_last_search(a.rend())
945 {
946 if (encloser_pattern != "")
947 encloser_re.reset(new pcre::regex(encloser_pattern));
948 }
949};
950
951/* Find, and write to ENCLOSER, the nearest line before POS which matches
952 ENCLOSER_PATTERN. We remember the last line scanned, and the matched, to
953 avoid duplication of effort. */
954
955void
956hunk_consumer::find_encloser(size_t pos, string & encloser)
957{
958 typedef vector<string>::const_reverse_iterator riter;
959
960 // Precondition: encloser_last_search <= pos <= a.size().
961 I(pos <= a.size());
962 // static_cast<> to silence compiler unsigned vs. signed comparison
963 // warning, after first making sure that the static_cast is safe.
964 I(a.rend() - encloser_last_search >= 0);
965 I(pos >= static_cast<size_t>(a.rend() - encloser_last_search));
966
967 if (!encloser_re)
968 return;
969
970 riter last = encloser_last_search;
971 riter i = riter(a.begin() + pos);
972
973 encloser_last_search = i;
974
975 // i is a reverse_iterator, so this loop goes backward through the vector.
976 for (; i != last; i++)
977 if (encloser_re->match(*i))
978 {
979 encloser_last_match = i;
980 break;
981 }
982
983 if (encloser_last_match == a.rend())
984 return;
985
986 L(FL("find_encloser: from %u matching %d, \"%s\"")
987 % pos % (a.rend() - encloser_last_match) % *encloser_last_match);
988
989 // the number 40 is chosen to match GNU diff. it could safely be
990 // increased up to about 60 without overflowing the standard
991 // terminal width.
992 encloser = string(" ") + (*encloser_last_match).substr(0, 40);
993}
994
995void walk_hunk_consumer(vector<long, QA(long)> const & lcs,
996 vector<long, QA(long)> const & lines1,
997 vector<long, QA(long)> const & lines2,
998 hunk_consumer & cons)
999{
1000
1001 size_t a = 0, b = 0;
1002 if (lcs.begin() == lcs.end())
1003 {
1004 // degenerate case: files have nothing in common
1005 cons.advance_to(0);
1006 while (a < lines1.size())
1007 cons.delete_at(a++);
1008 while (b < lines2.size())
1009 cons.insert_at(b++);
1010 cons.flush_hunk(a);
1011 }
1012 else
1013 {
1014 // normal case: files have something in common
1015 for (vector<long, QA(long)>::const_iterator i = lcs.begin();
1016 i != lcs.end(); ++i, ++a, ++b)
1017 {
1018 if (idx(lines1, a) == *i && idx(lines2, b) == *i)
1019 continue;
1020
1021 cons.advance_to(a);
1022 while (idx(lines1,a) != *i)
1023 cons.delete_at(a++);
1024 while (idx(lines2,b) != *i)
1025 cons.insert_at(b++);
1026 }
1027 if (a < lines1.size())
1028 {
1029 cons.advance_to(a);
1030 while(a < lines1.size())
1031 cons.delete_at(a++);
1032 }
1033 if (b < lines2.size())
1034 {
1035 cons.advance_to(a);
1036 while(b < lines2.size())
1037 cons.insert_at(b++);
1038 }
1039 cons.flush_hunk(a);
1040 }
1041}
1042
1043struct unidiff_hunk_writer : public hunk_consumer
1044{
1045 vector<string> hunk;
1046
1047 virtual void flush_hunk(size_t pos);
1048 virtual void advance_to(size_t newpos);
1049 virtual void insert_at(size_t b_pos);
1050 virtual void delete_at(size_t a_pos);
1051 virtual ~unidiff_hunk_writer() {}
1052 unidiff_hunk_writer(vector<string> const & a,
1053 vector<string> const & b,
1054 size_t ctx,
1055 ostream & ost,
1056 string const & encloser_pattern)
1057 : hunk_consumer(a, b, ctx, ost, encloser_pattern)
1058 {}
1059};
1060
1061void unidiff_hunk_writer::insert_at(size_t b_pos)
1062{
1063 b_len++;
1064 hunk.push_back(string("+") + b[b_pos]);
1065}
1066
1067void unidiff_hunk_writer::delete_at(size_t a_pos)
1068{
1069 a_len++;
1070 hunk.push_back(string("-") + a[a_pos]);
1071}
1072
1073void unidiff_hunk_writer::flush_hunk(size_t pos)
1074{
1075 if (!hunk.empty())
1076 {
1077 // insert trailing context
1078 size_t a_pos = a_begin + a_len;
1079 for (size_t i = 0; (i < ctx) && (a_pos + i < a.size()); ++i)
1080 {
1081 hunk.push_back(string(" ") + a[a_pos + i]);
1082 a_len++;
1083 b_len++;
1084 }
1085
1086 // write hunk to stream
1087 if (a_len == 0)
1088 ost << "@@ -0,0";
1089 else
1090 {
1091 ost << "@@ -" << a_begin+1;
1092 if (a_len > 1)
1093 ost << ',' << a_len;
1094 }
1095
1096 if (b_len == 0)
1097 ost << " +0,0";
1098 else
1099 {
1100 ost << " +" << b_begin+1;
1101 if (b_len > 1)
1102 ost << ',' << b_len;
1103 }
1104
1105 {
1106 string encloser;
1107 ptrdiff_t first_mod = 0;
1108 vector<string>::const_iterator i;
1109 for (i = hunk.begin(); i != hunk.end(); i++)
1110 if ((*i)[0] != ' ')
1111 {
1112 first_mod = i - hunk.begin();
1113 break;
1114 }
1115
1116 find_encloser(a_begin + first_mod, encloser);
1117 ost << " @@" << encloser << '\n';
1118 }
1119 copy(hunk.begin(), hunk.end(), ostream_iterator<string>(ost, "\n"));
1120 }
1121
1122 // reset hunk
1123 hunk.clear();
1124 skew += b_len - a_len;
1125 a_begin = pos;
1126 b_begin = pos + skew;
1127 a_len = 0;
1128 b_len = 0;
1129}
1130
1131void unidiff_hunk_writer::advance_to(size_t newpos)
1132{
1133 if (a_begin + a_len + (2 * ctx) < newpos || hunk.empty())
1134 {
1135 flush_hunk(newpos);
1136
1137 // insert new leading context
1138 for (size_t p = max(ctx, newpos) - ctx;
1139 p < min(a.size(), newpos); ++p)
1140 {
1141 hunk.push_back(string(" ") + a[p]);
1142 a_begin--; a_len++;
1143 b_begin--; b_len++;
1144 }
1145 }
1146 else
1147 {
1148 // pad intermediate context
1149 while(a_begin + a_len < newpos)
1150 {
1151 hunk.push_back(string(" ") + a[a_begin + a_len]);
1152 a_len++;
1153 b_len++;
1154 }
1155 }
1156}
1157
1158struct cxtdiff_hunk_writer : public hunk_consumer
1159{
1160 // For context diffs, we have to queue up calls to insert_at/delete_at
1161 // until we hit an advance_to, so that we can get the tags right: an
1162 // unpaired insert gets a + in the left margin, an unpaired delete a -,
1163 // but if they are paired, they both get !. Hence, we have both the
1164 // 'inserts' and 'deletes' queues of line numbers, and the 'from_file' and
1165 // 'to_file' queues of line strings.
1166 vector<size_t> inserts;
1167 vector<size_t> deletes;
1168 vector<string> from_file;
1169 vector<string> to_file;
1170 bool have_insertions;
1171 bool have_deletions;
1172
1173 virtual void flush_hunk(size_t pos);
1174 virtual void advance_to(size_t newpos);
1175 virtual void insert_at(size_t b_pos);
1176 virtual void delete_at(size_t a_pos);
1177 void flush_pending_mods();
1178 virtual ~cxtdiff_hunk_writer() {}
1179 cxtdiff_hunk_writer(vector<string> const & a,
1180 vector<string> const & b,
1181 size_t ctx,
1182 ostream & ost,
1183 string const & encloser_pattern)
1184 : hunk_consumer(a, b, ctx, ost, encloser_pattern),
1185 have_insertions(false), have_deletions(false)
1186 {}
1187};
1188
1189void cxtdiff_hunk_writer::insert_at(size_t b_pos)
1190{
1191 inserts.push_back(b_pos);
1192 have_insertions = true;
1193}
1194
1195void cxtdiff_hunk_writer::delete_at(size_t a_pos)
1196{
1197 deletes.push_back(a_pos);
1198 have_deletions = true;
1199}
1200
1201void cxtdiff_hunk_writer::flush_hunk(size_t pos)
1202{
1203 flush_pending_mods();
1204
1205 if (have_deletions || have_insertions)
1206 {
1207 // insert trailing context
1208 size_t ctx_start = a_begin + a_len;
1209 for (size_t i = 0; (i < ctx) && (ctx_start + i < a.size()); ++i)
1210 {
1211 from_file.push_back(string(" ") + a[ctx_start + i]);
1212 a_len++;
1213 }
1214
1215 ctx_start = b_begin + b_len;
1216 for (size_t i = 0; (i < ctx) && (ctx_start + i < b.size()); ++i)
1217 {
1218 to_file.push_back(string(" ") + b[ctx_start + i]);
1219 b_len++;
1220 }
1221
1222 {
1223 string encloser;
1224 ptrdiff_t first_insert = b_len;
1225 ptrdiff_t first_delete = a_len;
1226 vector<string>::const_iterator i;
1227
1228 if (have_deletions)
1229 for (i = from_file.begin(); i != from_file.end(); i++)
1230 if ((*i)[0] != ' ')
1231 {
1232 first_delete = i - from_file.begin();
1233 break;
1234 }
1235 if (have_insertions)
1236 for (i = to_file.begin(); i != to_file.end(); i++)
1237 if ((*i)[0] != ' ')
1238 {
1239 first_insert = i - to_file.begin();
1240 break;
1241 }
1242
1243 find_encloser(a_begin + min(first_insert, first_delete),
1244 encloser);
1245
1246 ost << "***************" << encloser << '\n';
1247 }
1248
1249 ost << "*** " << (a_begin + 1) << ',' << (a_begin + a_len) << " ****\n";
1250 if (have_deletions)
1251 copy(from_file.begin(), from_file.end(), ostream_iterator<string>(ost, "\n"));
1252
1253 ost << "--- " << (b_begin + 1) << ',' << (b_begin + b_len) << " ----\n";
1254 if (have_insertions)
1255 copy(to_file.begin(), to_file.end(), ostream_iterator<string>(ost, "\n"));
1256 }
1257
1258 // reset hunk
1259 to_file.clear();
1260 from_file.clear();
1261 have_insertions = false;
1262 have_deletions = false;
1263 skew += b_len - a_len;
1264 a_begin = pos;
1265 b_begin = pos + skew;
1266 a_len = 0;
1267 b_len = 0;
1268}
1269
1270void cxtdiff_hunk_writer::flush_pending_mods()
1271{
1272 // nothing to flush?
1273 if (inserts.empty() && deletes.empty())
1274 return;
1275
1276 string prefix;
1277
1278 // if we have just insertions to flush, prefix them with "+"; if
1279 // just deletions, prefix with "-"; if both, prefix with "!"
1280 if (inserts.empty() && !deletes.empty())
1281 prefix = "-";
1282 else if (deletes.empty() && !inserts.empty())
1283 prefix = "+";
1284 else
1285 prefix = "!";
1286
1287 for (vector<size_t>::const_iterator i = deletes.begin();
1288 i != deletes.end(); ++i)
1289 {
1290 from_file.push_back(prefix + string(" ") + a[*i]);
1291 a_len++;
1292 }
1293 for (vector<size_t>::const_iterator i = inserts.begin();
1294 i != inserts.end(); ++i)
1295 {
1296 to_file.push_back(prefix + string(" ") + b[*i]);
1297 b_len++;
1298 }
1299
1300 // clear pending mods
1301 inserts.clear();
1302 deletes.clear();
1303}
1304
1305void cxtdiff_hunk_writer::advance_to(size_t newpos)
1306{
1307 // We must first flush out pending mods because otherwise our calculation
1308 // of whether we need to generate a new hunk header will be way off.
1309 // It is correct (i.e. consistent with diff(1)) to reset the +/-/!
1310 // generation algorithm between sub-components of a single hunk.
1311 flush_pending_mods();
1312
1313 if (a_begin + a_len + (2 * ctx) < newpos)
1314 {
1315 flush_hunk(newpos);
1316
1317 // insert new leading context
1318 if (newpos - ctx < a.size())
1319 {
1320 for (size_t i = ctx; i > 0; --i)
1321 {
1322 // The original test was (newpos - i < 0), but since newpos
1323 // is size_t (unsigned), it will never go negative. Testing
1324 // that newpos is smaller than i is the same test, really.
1325 if (newpos < i)
1326 continue;
1327
1328 // note that context diffs prefix common text with two
1329 // spaces, whereas unified diffs use a single space
1330 from_file.push_back(string(" ") + a[newpos - i]);
1331 to_file.push_back(string(" ") + a[newpos - i]);
1332 a_begin--; a_len++;
1333 b_begin--; b_len++;
1334 }
1335 }
1336 }
1337 else
1338 // pad intermediate context
1339 while (a_begin + a_len < newpos)
1340 {
1341 from_file.push_back(string(" ") + a[a_begin + a_len]);
1342 to_file.push_back(string(" ") + a[a_begin + a_len]);
1343 a_len++;
1344 b_len++;
1345 }
1346}
1347
1348void
1349make_diff(string const & filename1,
1350 string const & filename2,
1351 file_id const & id1,
1352 file_id const & id2,
1353 data const & data1,
1354 data const & data2,
1355 ostream & ost,
1356 diff_type type,
1357 string const & pattern)
1358{
1359 if (guess_binary(data1()) || guess_binary(data2()))
1360 {
1361 ost << "# " << filename2 << " is binary\n";
1362 return;
1363 }
1364
1365 vector<string> lines1, lines2;
1366 split_into_lines(data1(), lines1, true);
1367 split_into_lines(data2(), lines2, true);
1368
1369 vector<long, QA(long)> left_interned;
1370 vector<long, QA(long)> right_interned;
1371 vector<long, QA(long)> lcs;
1372
1373 interner<long> in;
1374
1375 left_interned.reserve(lines1.size());
1376 for (vector<string>::const_iterator i = lines1.begin();
1377 i != lines1.end(); ++i)
1378 left_interned.push_back(in.intern(*i));
1379
1380 right_interned.reserve(lines2.size());
1381 for (vector<string>::const_iterator i = lines2.begin();
1382 i != lines2.end(); ++i)
1383 right_interned.push_back(in.intern(*i));
1384
1385 lcs.reserve(min(lines1.size(),lines2.size()));
1386 longest_common_subsequence(left_interned.begin(), left_interned.end(),
1387 right_interned.begin(), right_interned.end(),
1388 min(lines1.size(), lines2.size()),
1389 back_inserter(lcs));
1390
1391 // The existence of various hacky diff parsers in the world somewhat
1392 // constrains what output we can use. Here are some notes on how various
1393 // tools interpret the header lines of a diff file:
1394 //
1395 // interdiff/filterdiff (patchutils):
1396 // Attempt to parse a timestamp after each whitespace. If they succeed,
1397 // then they take the filename as everything up to the whitespace they
1398 // succeeded at, and the timestamp as everything after. If they fail,
1399 // then they take the filename to be everything up to the first
1400 // whitespace. Have hardcoded that /dev/null and timestamps at the
1401 // epoch (in any timezone) indicate a file that did not exist.
1402 //
1403 // filterdiff filters on the first filename line. interdiff matches on
1404 // the first filename line.
1405 // PatchReader perl library (used by Bugzilla):
1406 // Takes the filename to be everything up to the first tab; requires
1407 // that there be a tab. Determines the filename based on the first
1408 // filename line.
1409 // diffstat:
1410 // Can handle pretty much everything; tries to read up to the first tab
1411 // to get the filename. Knows that "/dev/null", "", and anything
1412 // beginning "/tmp/" are meaningless. Uses the second filename line.
1413 // patch:
1414 // If there is a tab, considers everything up to that tab to be the
1415 // filename. If there is not a tab, considers everything up to the
1416 // first whitespace to be the filename.
1417 //
1418 // Contains comment: 'If the [file]name is "/dev/null", ignore the name
1419 // and mark the file as being nonexistent. The name "/dev/null" appears
1420 // in patches regardless of how NULL_DEVICE is spelled.' Also detects
1421 // timestamps at the epoch as indicating that a file does not exist.
1422 //
1423 // Uses the first filename line as the target, unless it is /dev/null or
1424 // has an epoch timestamp in which case it uses the second.
1425 // trac:
1426 // Anything up to the first whitespace, or end of line, is considered
1427 // filename. Does not care about timestamp. Uses the shorter of the
1428 // two filenames as the filename (!).
1429 //
1430 // Conclusions:
1431 // -- You must have a tab, both to prevent PatchReader blowing up, and
1432 // to make it possible to have filenames with spaces in them.
1433 // (Filenames with tabs in them are always impossible to properly
1434 // express; FIXME what should be done if one occurs?)
1435 // -- What comes after that tab matters not at all, though it probably
1436 // shouldn't look like a timestamp, or have any trailing part that
1437 // looks like a timestamp, unless it really is a timestamp. Simply
1438 // having a trailing tab should work fine.
1439 // -- If you need to express that some file does not exist, you should
1440 // use /dev/null as the path. patch(1) goes so far as to claim that
1441 // this is part of the diff format definition.
1442 // -- If you want your patches to actually _work_ with patch(1), then
1443 // renames are basically hopeless (you can do them by hand _after_
1444 // running patch), adds work so long as the first line says either
1445 // the new file's name or "/dev/null", nothing else, and deletes work
1446 // if the new file name is "/dev/null", nothing else. (ATM we don't
1447 // write out patches for deletes anyway.)
1448 switch (type)
1449 {
1450 case unified_diff:
1451 {
1452 ost << "--- " << filename1 << '\t'
1453 << id1 << '\n';
1454 ost << "+++ " << filename2 << '\t'
1455 << id2 << '\n';
1456
1457 unidiff_hunk_writer hunks(lines1, lines2, 3, ost, pattern);
1458 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1459 break;
1460 }
1461 case context_diff:
1462 {
1463 ost << "*** " << filename1 << '\t'
1464 << id1 << '\n';
1465 ost << "--- " << filename2 << '\t'
1466 << id2 << '\n';
1467
1468 cxtdiff_hunk_writer hunks(lines1, lines2, 3, ost, pattern);
1469 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1470 break;
1471 }
1472 default:
1473 {
1474 // should never reach this; the external_diff type is not
1475 // handled by this function.
1476 I(false);
1477 }
1478 }
1479}
1480
1481#ifdef BUILD_UNIT_TESTS
1482#include "unit_tests.hh"
1483#include "lexical_cast.hh"
1484#include "randomfile.hh"
1485
1486using std::cerr;
1487using std::cout;
1488using std::stringstream;
1489
1490using boost::lexical_cast;
1491
1492static void dump_incorrect_merge(vector<string> const & expected,
1493 vector<string> const & got,
1494 string const & prefix)
1495{
1496 size_t mx = expected.size();
1497 if (mx < got.size())
1498 mx = got.size();
1499 for (size_t i = 0; i < mx; ++i)
1500 {
1501 cerr << "bad merge: " << i << " [" << prefix << "]\t";
1502
1503 if (i < expected.size())
1504 cerr << '[' << expected[i] << "]\t";
1505 else
1506 cerr << "[--nil--]\t";
1507
1508 if (i < got.size())
1509 cerr << '[' << got[i] << "]\t";
1510 else
1511 cerr << "[--nil--]\t";
1512
1513 cerr << '\n';
1514 }
1515}
1516
1517// high tech randomizing test
1518UNIT_TEST(diff_patch, randomizing_merge)
1519{
1520 randomizer rng;
1521 for (int i = 0; i < 30; ++i)
1522 {
1523 vector<string> anc, d1, d2, m1, m2, gm;
1524
1525 file_randomizer::build_random_fork(anc, d1, d2, gm, (10 + 2 * i), rng);
1526
1527 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1528 if (gm != m1)
1529 dump_incorrect_merge (gm, m1, "random_merge 1");
1530 UNIT_TEST_CHECK(gm == m1);
1531
1532 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1533 if (gm != m2)
1534 dump_incorrect_merge (gm, m2, "random_merge 2");
1535 UNIT_TEST_CHECK(gm == m2);
1536 }
1537}
1538
1539
1540// old boring tests
1541UNIT_TEST(diff_patch, merge_prepend)
1542{
1543 UNIT_TEST_CHECKPOINT("prepend test");
1544 vector<string> anc, d1, d2, m1, m2, gm;
1545 for (int i = 10; i < 20; ++i)
1546 {
1547 d2.push_back(lexical_cast<string>(i));
1548 gm.push_back(lexical_cast<string>(i));
1549 }
1550
1551 for (int i = 0; i < 10; ++i)
1552 {
1553 anc.push_back(lexical_cast<string>(i));
1554 d1.push_back(lexical_cast<string>(i));
1555 d2.push_back(lexical_cast<string>(i));
1556 gm.push_back(lexical_cast<string>(i));
1557 }
1558
1559 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1560 if (gm != m1)
1561 dump_incorrect_merge (gm, m1, "merge_prepend 1");
1562 UNIT_TEST_CHECK(gm == m1);
1563
1564
1565 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1566 if (gm != m2)
1567 dump_incorrect_merge (gm, m2, "merge_prepend 2");
1568 UNIT_TEST_CHECK(gm == m2);
1569}
1570
1571UNIT_TEST(diff_patch, merge_append)
1572{
1573 UNIT_TEST_CHECKPOINT("append test");
1574 vector<string> anc, d1, d2, m1, m2, gm;
1575 for (int i = 0; i < 10; ++i)
1576 anc.push_back(lexical_cast<string>(i));
1577
1578 d1 = anc;
1579 d2 = anc;
1580 gm = anc;
1581
1582 for (int i = 10; i < 20; ++i)
1583 {
1584 d2.push_back(lexical_cast<string>(i));
1585 gm.push_back(lexical_cast<string>(i));
1586 }
1587
1588 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1589 if (gm != m1)
1590 dump_incorrect_merge (gm, m1, "merge_append 1");
1591 UNIT_TEST_CHECK(gm == m1);
1592
1593 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1594 if (gm != m2)
1595 dump_incorrect_merge (gm, m2, "merge_append 2");
1596 UNIT_TEST_CHECK(gm == m2);
1597
1598
1599}
1600
1601UNIT_TEST(diff_patch, merge_additions)
1602{
1603 UNIT_TEST_CHECKPOINT("additions test");
1604 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1605 string desc1("I like oatmeal\nI don't like spam\nI like orange juice\nI like toast");
1606 string confl("I like oatmeal\nI don't like tuna\nI like orange juice\nI like toast");
1607 string desc2("I like oatmeal\nI like orange juice\nI don't like tuna\nI like toast");
1608 string good_merge("I like oatmeal\nI don't like spam\nI like orange juice\nI don't like tuna\nI like toast");
1609 vector<string> anc, d1, cf, d2, m1, m2, gm;
1610
1611 split_into_lines(ancestor, anc);
1612 split_into_lines(desc1, d1);
1613 split_into_lines(confl, cf);
1614 split_into_lines(desc2, d2);
1615 split_into_lines(good_merge, gm);
1616
1617 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1618 if (gm != m1)
1619 dump_incorrect_merge (gm, m1, "merge_addition 1");
1620 UNIT_TEST_CHECK(gm == m1);
1621
1622 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1623 if (gm != m2)
1624 dump_incorrect_merge (gm, m2, "merge_addition 2");
1625 UNIT_TEST_CHECK(gm == m2);
1626
1627 UNIT_TEST_CHECK(!merge3(anc, d1, cf, m1));
1628}
1629
1630UNIT_TEST(diff_patch, merge_deletions)
1631{
1632 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1633 string desc2("I like oatmeal\nI like toast");
1634
1635 vector<string> anc, d1, d2, m1, m2, gm;
1636
1637 split_into_lines(ancestor, anc);
1638 split_into_lines(desc2, d2);
1639 d1 = anc;
1640 gm = d2;
1641
1642 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1643 if (gm != m1)
1644 dump_incorrect_merge (gm, m1, "merge_deletion 1");
1645 UNIT_TEST_CHECK(gm == m1);
1646
1647 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1648 if (gm != m2)
1649 dump_incorrect_merge (gm, m2, "merge_deletion 2");
1650 UNIT_TEST_CHECK(gm == m2);
1651}
1652
1653#endif // BUILD_UNIT_TESTS
1654
1655// Local Variables:
1656// mode: C++
1657// fill-column: 76
1658// c-file-style: "gnu"
1659// indent-tabs-mode: nil
1660// End:
1661// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status