monotone

monotone Mtn Source Tree

Root/diff_patch.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10
11#include "base.hh"
12#include <algorithm>
13#include <iterator>
14#include <map>
15#include "vector.hh"
16
17#include <boost/shared_ptr.hpp>
18#include <boost/scoped_ptr.hpp>
19#include "diff_patch.hh"
20#include "interner.hh"
21#include "lcs.hh"
22#include "roster.hh"
23#include "safe_map.hh"
24#include "sanity.hh"
25#include "transforms.hh"
26#include "simplestring_xform.hh"
27#include "vocab.hh"
28#include "revision.hh"
29#include "constants.hh"
30#include "file_io.hh"
31#include "app_state.hh"
32#include "pcrewrap.hh"
33
34using std::make_pair;
35using std::map;
36using std::min;
37using std::max;
38using std::ostream;
39using std::ostream_iterator;
40using std::string;
41using std::swap;
42using std::vector;
43
44using boost::shared_ptr;
45
46//
47// a 3-way merge works like this:
48//
49// /----> right
50// ancestor
51// \----> left
52//
53// first you compute the edit list "EDITS(ancestor,left)".
54//
55// then you make an offset table "leftpos" which describes positions in
56// "ancestor" as they map to "left"; that is, for 0 < apos <
57// ancestor.size(), we have
58//
59// left[leftpos[apos]] == ancestor[apos]
60//
61// you do this by walking through the edit list and either jumping the
62// current index ahead an extra position, on an insert, or remaining still,
63// on a delete. on an insert *or* a delete, you push the current index back
64// onto the leftpos array.
65//
66// next you compute the edit list "EDITS(ancestor,right)".
67//
68// you then go through this edit list applying the edits to left, rather
69// than ancestor, and using the table leftpos to map the position of each
70// edit to an appropriate spot in left. this means you walk a "curr_left"
71// index through the edits, and for each edit e:
72//
73// - if e is a delete (and e.pos is a position in ancestor)
74// - increment curr_left without copying anything to "merged"
75//
76// - if e is an insert (and e.pos is a position in right)
77// - copy right[e.pos] to "merged"
78// - leave curr_left alone
79//
80// - when advancing to apos (and apos is a position in ancestor)
81// - copy left[curr_left] to merged while curr_left < leftpos[apos]
82//
83//
84// the practical upshot is that you apply the delta from ancestor->right
85// to the adjusted contexts in left, producing something vaguely like
86// the concatenation of delta(ancestor,left) :: delta(ancestor,right).
87//
88// NB: this is, as far as I can tell, what diff3 does. I don't think I'm
89// infringing on anyone's fancy patents here.
90//
91
92typedef enum { preserved = 0, deleted = 1, changed = 2 } edit_t;
93static const char etab[3][10] =
94 {
95 "preserved",
96 "deleted",
97 "changed"
98 };
99
100struct extent
101{
102 extent(size_t p, size_t l, edit_t t)
103 : pos(p), len(l), type(t)
104 {}
105 size_t pos;
106 size_t len;
107 edit_t type;
108};
109
110void calculate_extents(vector<long, QA(long)> const & a_b_edits,
111 vector<long, QA(long)> const & b,
112 vector<long, QA(long)> & prefix,
113 vector<extent> & extents,
114 vector<long, QA(long)> & suffix,
115 size_t const a_len,
116 interner<long> & intern)
117{
118 extents.reserve(a_len * 2);
119
120 size_t a_pos = 0, b_pos = 0;
121
122 for (vector<long, QA(long)>::const_iterator i = a_b_edits.begin();
123 i != a_b_edits.end(); ++i)
124 {
125 // L(FL("edit: %d") % *i);
126 if (*i < 0)
127 {
128 // negative elements code the negation of the one-based index into A
129 // of the element to be deleted
130 size_t a_deleted = (-1 - *i);
131
132 // fill positions out to the deletion point
133 while (a_pos < a_deleted)
134 {
135 a_pos++;
136 extents.push_back(extent(b_pos++, 1, preserved));
137 }
138
139 // L(FL(" -- delete at A-pos %d (B-pos = %d)") % a_deleted % b_pos);
140
141 // skip the deleted line
142 a_pos++;
143 extents.push_back(extent(b_pos, 0, deleted));
144 }
145 else
146 {
147 // positive elements code the one-based index into B of the element to
148 // be inserted
149 size_t b_inserted = (*i - 1);
150
151 // fill positions out to the insertion point
152 while (b_pos < b_inserted)
153 {
154 a_pos++;
155 extents.push_back(extent(b_pos++, 1, preserved));
156 }
157
158 // L(FL(" -- insert at B-pos %d (A-pos = %d) : '%s'")
159 // % b_inserted % a_pos % intern.lookup(b.at(b_inserted)));
160
161 // record that there was an insertion, but a_pos did not move.
162 if ((b_pos == 0 && extents.empty())
163 || (b_pos == prefix.size()))
164 {
165 prefix.push_back(b.at(b_pos));
166 }
167 else if (a_len == a_pos)
168 {
169 suffix.push_back(b.at(b_pos));
170 }
171 else
172 {
173 // make the insertion
174 extents.back().type = changed;
175 extents.back().len++;
176 }
177 b_pos++;
178 }
179 }
180
181 while (extents.size() < a_len)
182 extents.push_back(extent(b_pos++, 1, preserved));
183}
184
185void normalize_extents(vector<extent> & a_b_map,
186 vector<long, QA(long)> const & a,
187 vector<long, QA(long)> const & b)
188{
189 for (size_t i = 0; i < a_b_map.size(); ++i)
190 {
191 if (i > 0)
192 {
193 size_t j = i;
194 while (j > 0
195 && (a_b_map.at(j-1).type == preserved)
196 && (a_b_map.at(j).type == changed)
197 && (a.at(j) == b.at(a_b_map.at(j).pos + a_b_map.at(j).len - 1)))
198 {
199 // This is implied by (a_b_map.at(j-1).type == preserved)
200 I(a.at(j-1) == b.at(a_b_map.at(j-1).pos));
201
202 // Coming into loop we have:
203 // i
204 // z --pres--> z 0
205 // o --pres--> o 1
206 // a --chng--> a 2 The important thing here is that 'a' in
207 // t the LHS matches with ...
208 // u
209 // v
210 // a ... the a on the RHS here. Hence we can
211 // q --pres--> q 3 'shift' the entire 'changed' block
212 // e --chng--> d 4 upwards, leaving a 'preserved' line
213 // g --pres--> g 5 'a'->'a'
214 //
215 // Want to end up with:
216 // i
217 // z --pres--> z 0
218 // o --chng--> o 1
219 // a
220 // t
221 // u
222 // v
223 // a --pres--> a 2
224 // q --pres--> q 3
225 // e --chng--> d 4
226 // g --pres--> g 5
227 //
228 // Now all the 'changed' extents are normalised to the
229 // earliest possible position.
230
231 L(FL("exchanging preserved extent [%d+%d] with changed extent [%d+%d]")
232 % a_b_map.at(j-1).pos
233 % a_b_map.at(j-1).len
234 % a_b_map.at(j).pos
235 % a_b_map.at(j).len);
236
237 swap(a_b_map.at(j-1).len, a_b_map.at(j).len);
238 swap(a_b_map.at(j-1).type, a_b_map.at(j).type);
239
240 // Adjust position of the later, preserved extent. It should
241 // better point to the second 'a' in the above example.
242 a_b_map.at(j).pos = a_b_map.at(j-1).pos + a_b_map.at(j-1).len;
243
244 --j;
245 }
246 }
247 }
248
249 for (size_t i = 0; i < a_b_map.size(); ++i)
250 {
251 if (i > 0)
252 {
253 size_t j = i;
254 while (j > 0
255 && a_b_map.at(j).type == changed
256 && a_b_map.at(j-1).type == changed
257 && a_b_map.at(j).len > 1
258 && a_b_map.at(j-1).pos + a_b_map.at(j-1).len == a_b_map.at(j).pos)
259 {
260 // step 1: move a chunk from this insert extent to its
261 // predecessor
262 size_t piece = a_b_map.at(j).len - 1;
263 // L(FL("moving change piece of len %d from pos %d to pos %d")
264 // % piece
265 // % a_b_map.at(j).pos
266 // % a_b_map.at(j-1).pos);
267 a_b_map.at(j).len = 1;
268 a_b_map.at(j).pos += piece;
269 a_b_map.at(j-1).len += piece;
270
271 // step 2: if this extent (now of length 1) has become a "changed"
272 // extent identical to its previous state, switch it to a "preserved"
273 // extent.
274 if (b.at(a_b_map.at(j).pos) == a.at(j))
275 {
276 // L(FL("changing normalized 'changed' extent at %d to 'preserved'")
277 // % a_b_map.at(j).pos);
278 a_b_map.at(j).type = preserved;
279 }
280 --j;
281 }
282 }
283 }
284}
285
286
287void merge_extents(vector<extent> const & a_b_map,
288 vector<extent> const & a_c_map,
289 vector<long, QA(long)> const & b,
290 vector<long, QA(long)> const & c,
291 interner<long> const & in,
292 vector<long, QA(long)> & merged)
293{
294 I(a_b_map.size() == a_c_map.size());
295
296 vector<extent>::const_iterator i = a_b_map.begin();
297 vector<extent>::const_iterator j = a_c_map.begin();
298 merged.reserve(a_b_map.size() * 2);
299
300 // for (; i != a_b_map.end(); ++i, ++j)
301 // {
302
303 // L(FL("trying to merge: [%s %d %d] vs. [%s %d %d]")
304 // % etab[i->type] % i->pos % i->len
305 // % etab[j->type] % j->pos % j->len);
306 // }
307
308 // i = a_b_map.begin();
309 // j = a_c_map.begin();
310
311 for (; i != a_b_map.end(); ++i, ++j)
312 {
313
314 // L(FL("trying to merge: [%s %d %d] vs. [%s %d %d]")
315 // % etab[i->type] % i->pos % i->len
316 // % etab[j->type] % j->pos % j->len);
317
318 // mutual, identical preserves / inserts / changes
319 if (((i->type == changed && j->type == changed)
320 || (i->type == preserved && j->type == preserved))
321 && i->len == j->len)
322 {
323 for (size_t k = 0; k < i->len; ++k)
324 {
325 if (b.at(i->pos + k) != c.at(j->pos + k))
326 {
327 L(FL("conflicting edits: %s %d[%d] '%s' vs. %s %d[%d] '%s'")
328 % etab[i->type] % i->pos % k % in.lookup(b.at(i->pos + k))
329 % etab[j->type] % j->pos % k % in.lookup(c.at(j->pos + k)));
330 throw conflict();
331 }
332 merged.push_back(b.at(i->pos + k));
333 }
334 }
335
336 // mutual or single-edge deletes
337 else if ((i->type == deleted && j->type == deleted)
338 || (i->type == deleted && j->type == preserved)
339 || (i->type == preserved && j->type == deleted))
340 {
341 // do nothing
342 }
343
344 // single-edge insert / changes
345 else if (i->type == changed && j->type == preserved)
346 for (size_t k = 0; k < i->len; ++k)
347 merged.push_back(b.at(i->pos + k));
348
349 else if (i->type == preserved && j->type == changed)
350 for (size_t k = 0; k < j->len; ++k)
351 merged.push_back(c.at(j->pos + k));
352
353 else
354 {
355 L(FL("conflicting edits: [%s %d %d] vs. [%s %d %d]")
356 % etab[i->type] % i->pos % i->len
357 % etab[j->type] % j->pos % j->len);
358 throw conflict();
359 }
360
361 // if (merged.empty())
362 // L(FL(" --> EMPTY"));
363 // else
364 // L(FL(" --> [%d]: %s") % (merged.size() - 1) % in.lookup(merged.back()));
365 }
366}
367
368
369void merge_via_edit_scripts(vector<string> const & ancestor,
370 vector<string> const & left,
371 vector<string> const & right,
372 vector<string> & merged)
373{
374 vector<long, QA(long)> anc_interned;
375 vector<long, QA(long)> left_interned, right_interned;
376 vector<long, QA(long)> left_edits, right_edits;
377 vector<long, QA(long)> left_prefix, right_prefix;
378 vector<long, QA(long)> left_suffix, right_suffix;
379 vector<extent> left_extents, right_extents;
380 vector<long, QA(long)> merged_interned;
381 interner<long> in;
382
383 // for (int i = 0; i < min(min(left.size(), right.size()), ancestor.size()); ++i)
384 // {
385 // cerr << '[' << i << "] " << left[i] << ' ' << ancestor[i] << ' ' << right[i] << '\n';
386 // }
387
388 anc_interned.reserve(ancestor.size());
389 for (vector<string>::const_iterator i = ancestor.begin();
390 i != ancestor.end(); ++i)
391 anc_interned.push_back(in.intern(*i));
392
393 left_interned.reserve(left.size());
394 for (vector<string>::const_iterator i = left.begin();
395 i != left.end(); ++i)
396 left_interned.push_back(in.intern(*i));
397
398 right_interned.reserve(right.size());
399 for (vector<string>::const_iterator i = right.begin();
400 i != right.end(); ++i)
401 right_interned.push_back(in.intern(*i));
402
403 L(FL("calculating left edit script on %d -> %d lines")
404 % anc_interned.size() % left_interned.size());
405
406 edit_script(anc_interned.begin(), anc_interned.end(),
407 left_interned.begin(), left_interned.end(),
408 min(ancestor.size(), left.size()),
409 left_edits);
410
411 L(FL("calculating right edit script on %d -> %d lines")
412 % anc_interned.size() % right_interned.size());
413
414 edit_script(anc_interned.begin(), anc_interned.end(),
415 right_interned.begin(), right_interned.end(),
416 min(ancestor.size(), right.size()),
417 right_edits);
418
419 L(FL("calculating left extents on %d edits") % left_edits.size());
420 calculate_extents(left_edits, left_interned,
421 left_prefix, left_extents, left_suffix,
422 anc_interned.size(), in);
423
424 L(FL("calculating right extents on %d edits") % right_edits.size());
425 calculate_extents(right_edits, right_interned,
426 right_prefix, right_extents, right_suffix,
427 anc_interned.size(), in);
428
429 L(FL("normalizing %d right extents") % right_extents.size());
430 normalize_extents(right_extents, anc_interned, right_interned);
431
432 L(FL("normalizing %d left extents") % left_extents.size());
433 normalize_extents(left_extents, anc_interned, left_interned);
434
435
436 if ((!right_prefix.empty()) && (!left_prefix.empty()))
437 {
438 L(FL("conflicting prefixes"));
439 throw conflict();
440 }
441
442 if ((!right_suffix.empty()) && (!left_suffix.empty()))
443 {
444 L(FL("conflicting suffixes"));
445 throw conflict();
446 }
447
448 L(FL("merging %d left, %d right extents")
449 % left_extents.size() % right_extents.size());
450
451 copy(left_prefix.begin(), left_prefix.end(), back_inserter(merged_interned));
452 copy(right_prefix.begin(), right_prefix.end(), back_inserter(merged_interned));
453
454 merge_extents(left_extents, right_extents,
455 left_interned, right_interned,
456 in, merged_interned);
457
458 copy(left_suffix.begin(), left_suffix.end(), back_inserter(merged_interned));
459 copy(right_suffix.begin(), right_suffix.end(), back_inserter(merged_interned));
460
461 merged.reserve(merged_interned.size());
462 for (vector<long, QA(long)>::const_iterator i = merged_interned.begin();
463 i != merged_interned.end(); ++i)
464 merged.push_back(in.lookup(*i));
465}
466
467
468bool merge3(vector<string> const & ancestor,
469 vector<string> const & left,
470 vector<string> const & right,
471 vector<string> & merged)
472{
473 try
474 {
475 merge_via_edit_scripts(ancestor, left, right, merged);
476 }
477 catch(conflict &)
478 {
479 L(FL("conflict detected. no merge."));
480 return false;
481 }
482 return true;
483}
484
485
486///////////////////////////////////////////////////////////////////////////
487// content_merge_database_adaptor
488///////////////////////////////////////////////////////////////////////////
489
490
491content_merge_database_adaptor::content_merge_database_adaptor(app_state & app,
492 revision_id const & left,
493 revision_id const & right,
494 marking_map const & left_mm,
495 marking_map const & right_mm)
496 : app(app), left_mm(left_mm), right_mm(right_mm)
497{
498 // FIXME: possibly refactor to run this lazily, as we don't
499 // need to find common ancestors if we're never actually
500 // called on to do content merging.
501 find_common_ancestor_for_merge(left, right, lca, app);
502}
503
504void
505content_merge_database_adaptor::record_merge(file_id const & left_ident,
506 file_id const & right_ident,
507 file_id const & merged_ident,
508 file_data const & left_data,
509 file_data const & right_data,
510 file_data const & merged_data)
511{
512 L(FL("recording successful merge of %s <-> %s into %s")
513 % left_ident % right_ident % merged_ident);
514
515 transaction_guard guard(app.db);
516
517 if (!(left_ident == merged_ident))
518 {
519 delta left_delta;
520 diff(left_data.inner(), merged_data.inner(), left_delta);
521 app.db.put_file_version(left_ident, merged_ident, file_delta(left_delta));
522 }
523 if (!(right_ident == merged_ident))
524 {
525 delta right_delta;
526 diff(right_data.inner(), merged_data.inner(), right_delta);
527 app.db.put_file_version(right_ident, merged_ident, file_delta(right_delta));
528 }
529 guard.commit();
530}
531
532static void
533load_and_cache_roster(revision_id const & rid,
534 map<revision_id, shared_ptr<roster_t const> > & rmap,
535 shared_ptr<roster_t const> & rout,
536 app_state & app)
537{
538 map<revision_id, shared_ptr<roster_t const> >::const_iterator i = rmap.find(rid);
539 if (i != rmap.end())
540 rout = i->second;
541 else
542 {
543 database::cached_roster cr;
544 app.db.get_roster(rid, cr);
545 safe_insert(rmap, make_pair(rid, cr.first));
546 rout = cr.first;
547 }
548}
549
550void
551content_merge_database_adaptor::get_ancestral_roster(node_id nid,
552 revision_id & rid,
553 shared_ptr<roster_t const> & anc)
554{
555 // Given a file, if the lca is nonzero and its roster contains the file,
556 // then we use its roster. Otherwise we use the roster at the file's
557 // birth revision, which is the "per-file worst case" lca.
558
559 // Begin by loading any non-empty file lca roster
560 rid = lca;
561 if (!lca.inner()().empty())
562 load_and_cache_roster(lca, rosters, anc, app);
563
564 // If there is no LCA, or the LCA's roster doesn't contain the file,
565 // then use the file's birth roster.
566 if (!anc || !anc->has_node(nid))
567 {
568 marking_map::const_iterator lmm = left_mm.find(nid);
569 marking_map::const_iterator rmm = right_mm.find(nid);
570
571 MM(left_mm);
572 MM(right_mm);
573
574 if (lmm == left_mm.end())
575 {
576 I(rmm != right_mm.end());
577 rid = rmm->second.birth_revision;
578 }
579 else if (rmm == right_mm.end())
580 {
581 I(lmm != left_mm.end());
582 rid = lmm->second.birth_revision;
583 }
584 else
585 {
586 I(lmm->second.birth_revision == rmm->second.birth_revision);
587 rid = lmm->second.birth_revision;
588 }
589
590 load_and_cache_roster(rid, rosters, anc, app);
591 }
592 I(anc);
593}
594
595void
596content_merge_database_adaptor::get_version(file_id const & ident,
597 file_data & dat) const
598{
599 app.db.get_file_version(ident, dat);
600}
601
602
603///////////////////////////////////////////////////////////////////////////
604// content_merge_workspace_adaptor
605///////////////////////////////////////////////////////////////////////////
606
607
608void
609content_merge_workspace_adaptor::cache_roster(revision_id const & rid,
610 boost::shared_ptr<roster_t const> roster)
611{
612 rosters.insert(std::make_pair(rid, roster));
613}
614
615void
616content_merge_workspace_adaptor::record_merge(file_id const & left_id,
617 file_id const & right_id,
618 file_id const & merged_id,
619 file_data const & left_data,
620 file_data const & right_data,
621 file_data const & merged_data)
622{
623 L(FL("temporarily recording merge of %s <-> %s into %s")
624 % left_id % right_id % merged_id);
625 // this is an insert instead of a safe_insert because it is perfectly
626 // legal (though rare) to have multiple merges resolve to the same file
627 // contents.
628 temporary_store.insert(make_pair(merged_id, merged_data));
629}
630
631void
632content_merge_workspace_adaptor::get_ancestral_roster(node_id nid,
633 revision_id & rid,
634 shared_ptr<roster_t const> & anc)
635{
636 // Begin by loading any non-empty file lca roster
637 if (base->has_node(nid))
638 {
639 rid = lca;
640 anc = base;
641 }
642 else
643 {
644 marking_map::const_iterator lmm = left_mm.find(nid);
645 marking_map::const_iterator rmm = right_mm.find(nid);
646
647 MM(left_mm);
648 MM(right_mm);
649
650 if (lmm == left_mm.end())
651 {
652 I(rmm != right_mm.end());
653 rid = rmm->second.birth_revision;
654 }
655 else if (rmm == right_mm.end())
656 {
657 I(lmm != left_mm.end());
658 rid = lmm->second.birth_revision;
659 }
660 else
661 {
662 I(lmm->second.birth_revision == rmm->second.birth_revision);
663 rid = lmm->second.birth_revision;
664 }
665
666 load_and_cache_roster(rid, rosters, anc, app);
667 }
668 I(anc);
669}
670
671void
672content_merge_workspace_adaptor::get_version(file_id const & ident,
673 file_data & dat) const
674{
675 map<file_id,file_data>::const_iterator i = temporary_store.find(ident);
676 if (i != temporary_store.end())
677 dat = i->second;
678 else if (app.db.file_version_exists(ident))
679 app.db.get_file_version(ident, dat);
680 else
681 {
682 data tmp;
683 file_id fid;
684 map<file_id, file_path>::const_iterator i = content_paths.find(ident);
685 I(i != content_paths.end());
686
687 require_path_is_file(i->second,
688 F("file '%s' does not exist in workspace") % i->second,
689 F("'%s' in workspace is a directory, not a file") % i->second);
690 read_data(i->second, tmp);
691 calculate_ident(file_data(tmp), fid);
692 E(fid == ident,
693 F("file %s in workspace has id %s, wanted %s")
694 % i->second % fid % ident);
695 dat = file_data(tmp);
696 }
697}
698
699
700///////////////////////////////////////////////////////////////////////////
701// content_merge_checkout_adaptor
702///////////////////////////////////////////////////////////////////////////
703
704void
705content_merge_checkout_adaptor::record_merge(file_id const & left_ident,
706 file_id const & right_ident,
707 file_id const & merged_ident,
708 file_data const & left_data,
709 file_data const & right_data,
710 file_data const & merged_data)
711{
712 I(false);
713}
714
715void
716content_merge_checkout_adaptor::get_ancestral_roster(node_id nid,
717 revision_id & rid,
718 shared_ptr<roster_t const> & anc)
719{
720 I(false);
721}
722
723void
724content_merge_checkout_adaptor::get_version(file_id const & ident,
725 file_data & dat) const
726{
727 app.db.get_file_version(ident, dat);
728}
729
730
731///////////////////////////////////////////////////////////////////////////
732// content_merger
733///////////////////////////////////////////////////////////////////////////
734
735content_merger::content_merger(app_state & app,
736 roster_t const & anc_ros,
737 roster_t const & left_ros,
738 roster_t const & right_ros,
739 content_merge_adaptor & adaptor)
740 : app(app),
741 anc_ros(anc_ros),
742 left_ros(left_ros),
743 right_ros(right_ros),
744 adaptor(adaptor)
745{}
746
747string
748content_merger::get_file_encoding(file_path const & path,
749 roster_t const & ros)
750{
751 attr_value v;
752 if (ros.get_attr(path, attr_key(constants::encoding_attribute), v))
753 return v();
754 return constants::default_encoding;
755}
756
757bool
758content_merger::attribute_manual_merge(file_path const & path,
759 roster_t const & ros)
760{
761 attr_value v;
762 if (ros.get_attr(path, attr_key(constants::manual_merge_attribute), v)
763 && v() == "true")
764 return true;
765 return false; // default: enable auto merge
766}
767
768bool
769content_merger::try_auto_merge(file_path const & anc_path,
770 file_path const & left_path,
771 file_path const & right_path,
772 file_path const & merged_path,
773 file_id const & ancestor_id,
774 file_id const & left_id,
775 file_id const & right_id,
776 file_id & merged_id)
777{
778 // This version of try_to_merge_files should only be called when there is a
779 // real merge3 to perform.
780 I(!null_id(ancestor_id));
781 I(!null_id(left_id));
782 I(!null_id(right_id));
783
784 L(FL("trying auto merge '%s' %s <-> %s (ancestor: %s)")
785 % merged_path % left_id % right_id % ancestor_id);
786
787 if (left_id == right_id)
788 {
789 L(FL("files are identical"));
790 merged_id = left_id;
791 return true;
792 }
793
794 file_data left_data, right_data, ancestor_data;
795 data left_unpacked, ancestor_unpacked, right_unpacked, merged_unpacked;
796
797 adaptor.get_version(left_id, left_data);
798 adaptor.get_version(ancestor_id, ancestor_data);
799 adaptor.get_version(right_id, right_data);
800
801 left_unpacked = left_data.inner();
802 ancestor_unpacked = ancestor_data.inner();
803 right_unpacked = right_data.inner();
804
805 if (!attribute_manual_merge(left_path, left_ros) &&
806 !attribute_manual_merge(right_path, right_ros))
807 {
808 // both files mergeable by monotone internal algorithm, try to merge
809 // note: the ancestor is not considered for manual merging. Forcing the
810 // user to merge manually just because of an ancestor mistakenly marked
811 // manual seems too harsh
812 string left_encoding, anc_encoding, right_encoding;
813 left_encoding = this->get_file_encoding(left_path, left_ros);
814 anc_encoding = this->get_file_encoding(anc_path, anc_ros);
815 right_encoding = this->get_file_encoding(right_path, right_ros);
816
817 vector<string> left_lines, ancestor_lines, right_lines, merged_lines;
818 split_into_lines(left_unpacked(), left_encoding, left_lines);
819 split_into_lines(ancestor_unpacked(), anc_encoding, ancestor_lines);
820 split_into_lines(right_unpacked(), right_encoding, right_lines);
821
822 if (merge3(ancestor_lines, left_lines, right_lines, merged_lines))
823 {
824 hexenc<id> tmp_id;
825 file_data merge_data;
826 string tmp;
827
828 L(FL("internal 3-way merged ok"));
829 join_lines(merged_lines, tmp);
830 calculate_ident(data(tmp), tmp_id);
831 file_id merged_fid(tmp_id);
832 merge_data = file_data(tmp);
833
834 merged_id = merged_fid;
835 adaptor.record_merge(left_id, right_id, merged_fid,
836 left_data, right_data, merge_data);
837
838 return true;
839 }
840 }
841
842 return false;
843}
844
845bool
846content_merger::try_user_merge(file_path const & anc_path,
847 file_path const & left_path,
848 file_path const & right_path,
849 file_path const & merged_path,
850 file_id const & ancestor_id,
851 file_id const & left_id,
852 file_id const & right_id,
853 file_id & merged_id)
854{
855 // This version of try_to_merge_files should only be called when there is a
856 // real merge3 to perform.
857 I(!null_id(ancestor_id));
858 I(!null_id(left_id));
859 I(!null_id(right_id));
860
861 L(FL("trying user merge '%s' %s <-> %s (ancestor: %s)")
862 % merged_path % left_id % right_id % ancestor_id);
863
864 if (left_id == right_id)
865 {
866 L(FL("files are identical"));
867 merged_id = left_id;
868 return true;
869 }
870
871 file_data left_data, right_data, ancestor_data;
872 data left_unpacked, ancestor_unpacked, right_unpacked, merged_unpacked;
873
874 adaptor.get_version(left_id, left_data);
875 adaptor.get_version(ancestor_id, ancestor_data);
876 adaptor.get_version(right_id, right_data);
877
878 left_unpacked = left_data.inner();
879 ancestor_unpacked = ancestor_data.inner();
880 right_unpacked = right_data.inner();
881
882 P(F("help required for 3-way merge\n"
883 "[ancestor] %s\n"
884 "[ left] %s\n"
885 "[ right] %s\n"
886 "[ merged] %s")
887 % anc_path
888 % left_path
889 % right_path
890 % merged_path);
891
892 if (app.lua.hook_merge3(anc_path, left_path, right_path, merged_path,
893 ancestor_unpacked, left_unpacked,
894 right_unpacked, merged_unpacked))
895 {
896 hexenc<id> tmp_id;
897 file_data merge_data;
898
899 L(FL("lua merge3 hook merged ok"));
900 calculate_ident(merged_unpacked, tmp_id);
901 file_id merged_fid(tmp_id);
902 merge_data = file_data(merged_unpacked);
903
904 merged_id = merged_fid;
905 adaptor.record_merge(left_id, right_id, merged_fid,
906 left_data, right_data, merge_data);
907 return true;
908 }
909
910 return false;
911}
912
913// the remaining part of this file just handles printing out various
914// diff formats for the case where someone wants to *read* a diff
915// rather than apply it.
916
917struct hunk_consumer
918{
919 vector<string> const & a;
920 vector<string> const & b;
921 size_t ctx;
922 ostream & ost;
923 boost::scoped_ptr<pcre::regex const> encloser_re;
924 size_t a_begin, b_begin, a_len, b_len;
925 long skew;
926
927 vector<string>::const_reverse_iterator encloser_last_match;
928 vector<string>::const_reverse_iterator encloser_last_search;
929
930 virtual void flush_hunk(size_t pos) = 0;
931 virtual void advance_to(size_t newpos) = 0;
932 virtual void insert_at(size_t b_pos) = 0;
933 virtual void delete_at(size_t a_pos) = 0;
934 virtual void find_encloser(size_t pos, string & encloser);
935 virtual ~hunk_consumer() {}
936 hunk_consumer(vector<string> const & a,
937 vector<string> const & b,
938 size_t ctx,
939 ostream & ost,
940 string const & encloser_pattern)
941 : a(a), b(b), ctx(ctx), ost(ost), encloser_re(0),
942 a_begin(0), b_begin(0), a_len(0), b_len(0), skew(0),
943 encloser_last_match(a.rend()), encloser_last_search(a.rend())
944 {
945 if (encloser_pattern != "")
946 encloser_re.reset(new pcre::regex(encloser_pattern));
947 }
948};
949
950/* Find, and write to ENCLOSER, the nearest line before POS which matches
951 ENCLOSER_PATTERN. We remember the last line scanned, and the matched, to
952 avoid duplication of effort. */
953
954void
955hunk_consumer::find_encloser(size_t pos, string & encloser)
956{
957 typedef vector<string>::const_reverse_iterator riter;
958
959 // Precondition: encloser_last_search <= pos <= a.size().
960 I(pos <= a.size());
961 // static_cast<> to silence compiler unsigned vs. signed comparison
962 // warning, after first making sure that the static_cast is safe.
963 I(a.rend() - encloser_last_search >= 0);
964 I(pos >= static_cast<size_t>(a.rend() - encloser_last_search));
965
966 if (!encloser_re)
967 return;
968
969 riter last = encloser_last_search;
970 riter i = riter(a.begin() + pos);
971
972 encloser_last_search = i;
973
974 // i is a reverse_iterator, so this loop goes backward through the vector.
975 for (; i != last; i++)
976 if (encloser_re->match(*i))
977 {
978 encloser_last_match = i;
979 break;
980 }
981
982 if (encloser_last_match == a.rend())
983 return;
984
985 L(FL("find_encloser: from %u matching %d, \"%s\"")
986 % pos % (a.rend() - encloser_last_match) % *encloser_last_match);
987
988 // the number 40 is chosen to match GNU diff. it could safely be
989 // increased up to about 60 without overflowing the standard
990 // terminal width.
991 encloser = string(" ") + (*encloser_last_match).substr(0, 40);
992}
993
994void walk_hunk_consumer(vector<long, QA(long)> const & lcs,
995 vector<long, QA(long)> const & lines1,
996 vector<long, QA(long)> const & lines2,
997 hunk_consumer & cons)
998{
999
1000 size_t a = 0, b = 0;
1001 if (lcs.begin() == lcs.end())
1002 {
1003 // degenerate case: files have nothing in common
1004 cons.advance_to(0);
1005 while (a < lines1.size())
1006 cons.delete_at(a++);
1007 while (b < lines2.size())
1008 cons.insert_at(b++);
1009 cons.flush_hunk(a);
1010 }
1011 else
1012 {
1013 // normal case: files have something in common
1014 for (vector<long, QA(long)>::const_iterator i = lcs.begin();
1015 i != lcs.end(); ++i, ++a, ++b)
1016 {
1017 if (idx(lines1, a) == *i && idx(lines2, b) == *i)
1018 continue;
1019
1020 cons.advance_to(a);
1021 while (idx(lines1,a) != *i)
1022 cons.delete_at(a++);
1023 while (idx(lines2,b) != *i)
1024 cons.insert_at(b++);
1025 }
1026 if (b < lines2.size())
1027 {
1028 cons.advance_to(a);
1029 while(b < lines2.size())
1030 cons.insert_at(b++);
1031 }
1032 if (a < lines1.size())
1033 {
1034 cons.advance_to(a);
1035 while(a < lines1.size())
1036 cons.delete_at(a++);
1037 }
1038 cons.flush_hunk(a);
1039 }
1040}
1041
1042struct unidiff_hunk_writer : public hunk_consumer
1043{
1044 vector<string> hunk;
1045
1046 virtual void flush_hunk(size_t pos);
1047 virtual void advance_to(size_t newpos);
1048 virtual void insert_at(size_t b_pos);
1049 virtual void delete_at(size_t a_pos);
1050 virtual ~unidiff_hunk_writer() {}
1051 unidiff_hunk_writer(vector<string> const & a,
1052 vector<string> const & b,
1053 size_t ctx,
1054 ostream & ost,
1055 string const & encloser_pattern)
1056 : hunk_consumer(a, b, ctx, ost, encloser_pattern)
1057 {}
1058};
1059
1060void unidiff_hunk_writer::insert_at(size_t b_pos)
1061{
1062 b_len++;
1063 hunk.push_back(string("+") + b[b_pos]);
1064}
1065
1066void unidiff_hunk_writer::delete_at(size_t a_pos)
1067{
1068 a_len++;
1069 hunk.push_back(string("-") + a[a_pos]);
1070}
1071
1072void unidiff_hunk_writer::flush_hunk(size_t pos)
1073{
1074 if (hunk.size() > 0)
1075 {
1076 // insert trailing context
1077 size_t a_pos = a_begin + a_len;
1078 for (size_t i = 0; (i < ctx) && (a_pos + i < a.size()); ++i)
1079 {
1080 hunk.push_back(string(" ") + a[a_pos + i]);
1081 a_len++;
1082 b_len++;
1083 }
1084
1085 // write hunk to stream
1086 if (a_len == 0)
1087 ost << "@@ -0,0";
1088 else
1089 {
1090 ost << "@@ -" << a_begin+1;
1091 if (a_len > 1)
1092 ost << ',' << a_len;
1093 }
1094
1095 if (b_len == 0)
1096 ost << " +0,0";
1097 else
1098 {
1099 ost << " +" << b_begin+1;
1100 if (b_len > 1)
1101 ost << ',' << b_len;
1102 }
1103
1104 {
1105 string encloser;
1106 ptrdiff_t first_mod = 0;
1107 vector<string>::const_iterator i;
1108 for (i = hunk.begin(); i != hunk.end(); i++)
1109 if ((*i)[0] != ' ')
1110 {
1111 first_mod = i - hunk.begin();
1112 break;
1113 }
1114
1115 find_encloser(a_begin + first_mod, encloser);
1116 ost << " @@" << encloser << '\n';
1117 }
1118 copy(hunk.begin(), hunk.end(), ostream_iterator<string>(ost, "\n"));
1119 }
1120
1121 // reset hunk
1122 hunk.clear();
1123 skew += b_len - a_len;
1124 a_begin = pos;
1125 b_begin = pos + skew;
1126 a_len = 0;
1127 b_len = 0;
1128}
1129
1130void unidiff_hunk_writer::advance_to(size_t newpos)
1131{
1132 if (a_begin + a_len + (2 * ctx) < newpos || hunk.empty())
1133 {
1134 flush_hunk(newpos);
1135
1136 // insert new leading context
1137 for (size_t p = max(ctx, newpos) - ctx;
1138 p < min(a.size(), newpos); ++p)
1139 {
1140 hunk.push_back(string(" ") + a[p]);
1141 a_begin--; a_len++;
1142 b_begin--; b_len++;
1143 }
1144 }
1145 else
1146 {
1147 // pad intermediate context
1148 while(a_begin + a_len < newpos)
1149 {
1150 hunk.push_back(string(" ") + a[a_begin + a_len]);
1151 a_len++;
1152 b_len++;
1153 }
1154 }
1155}
1156
1157struct cxtdiff_hunk_writer : public hunk_consumer
1158{
1159 // For context diffs, we have to queue up calls to insert_at/delete_at
1160 // until we hit an advance_to, so that we can get the tags right: an
1161 // unpaired insert gets a + in the left margin, an unpaired delete a -,
1162 // but if they are paired, they both get !. Hence, we have both the
1163 // 'inserts' and 'deletes' queues of line numbers, and the 'from_file' and
1164 // 'to_file' queues of line strings.
1165 vector<size_t> inserts;
1166 vector<size_t> deletes;
1167 vector<string> from_file;
1168 vector<string> to_file;
1169 bool have_insertions;
1170 bool have_deletions;
1171
1172 virtual void flush_hunk(size_t pos);
1173 virtual void advance_to(size_t newpos);
1174 virtual void insert_at(size_t b_pos);
1175 virtual void delete_at(size_t a_pos);
1176 void flush_pending_mods();
1177 virtual ~cxtdiff_hunk_writer() {}
1178 cxtdiff_hunk_writer(vector<string> const & a,
1179 vector<string> const & b,
1180 size_t ctx,
1181 ostream & ost,
1182 string const & encloser_pattern)
1183 : hunk_consumer(a, b, ctx, ost, encloser_pattern),
1184 have_insertions(false), have_deletions(false)
1185 {}
1186};
1187
1188void cxtdiff_hunk_writer::insert_at(size_t b_pos)
1189{
1190 inserts.push_back(b_pos);
1191 have_insertions = true;
1192}
1193
1194void cxtdiff_hunk_writer::delete_at(size_t a_pos)
1195{
1196 deletes.push_back(a_pos);
1197 have_deletions = true;
1198}
1199
1200void cxtdiff_hunk_writer::flush_hunk(size_t pos)
1201{
1202 flush_pending_mods();
1203
1204 if (have_deletions || have_insertions)
1205 {
1206 // insert trailing context
1207 size_t ctx_start = a_begin + a_len;
1208 for (size_t i = 0; (i < ctx) && (ctx_start + i < a.size()); ++i)
1209 {
1210 from_file.push_back(string(" ") + a[ctx_start + i]);
1211 a_len++;
1212 }
1213
1214 ctx_start = b_begin + b_len;
1215 for (size_t i = 0; (i < ctx) && (ctx_start + i < b.size()); ++i)
1216 {
1217 to_file.push_back(string(" ") + b[ctx_start + i]);
1218 b_len++;
1219 }
1220
1221 {
1222 string encloser;
1223 ptrdiff_t first_insert = b_len;
1224 ptrdiff_t first_delete = a_len;
1225 vector<string>::const_iterator i;
1226
1227 if (have_deletions)
1228 for (i = from_file.begin(); i != from_file.end(); i++)
1229 if ((*i)[0] != ' ')
1230 {
1231 first_delete = i - from_file.begin();
1232 break;
1233 }
1234 if (have_insertions)
1235 for (i = to_file.begin(); i != to_file.end(); i++)
1236 if ((*i)[0] != ' ')
1237 {
1238 first_insert = i - to_file.begin();
1239 break;
1240 }
1241
1242 find_encloser(a_begin + min(first_insert, first_delete),
1243 encloser);
1244
1245 ost << "***************" << encloser << '\n';
1246 }
1247
1248 ost << "*** " << (a_begin + 1) << ',' << (a_begin + a_len) << " ****\n";
1249 if (have_deletions)
1250 copy(from_file.begin(), from_file.end(), ostream_iterator<string>(ost, "\n"));
1251
1252 ost << "--- " << (b_begin + 1) << ',' << (b_begin + b_len) << " ----\n";
1253 if (have_insertions)
1254 copy(to_file.begin(), to_file.end(), ostream_iterator<string>(ost, "\n"));
1255 }
1256
1257 // reset hunk
1258 to_file.clear();
1259 from_file.clear();
1260 have_insertions = false;
1261 have_deletions = false;
1262 skew += b_len - a_len;
1263 a_begin = pos;
1264 b_begin = pos + skew;
1265 a_len = 0;
1266 b_len = 0;
1267}
1268
1269void cxtdiff_hunk_writer::flush_pending_mods()
1270{
1271 // nothing to flush?
1272 if (inserts.empty() && deletes.empty())
1273 return;
1274
1275 string prefix;
1276
1277 // if we have just insertions to flush, prefix them with "+"; if
1278 // just deletions, prefix with "-"; if both, prefix with "!"
1279 if (inserts.empty() && !deletes.empty())
1280 prefix = "-";
1281 else if (deletes.empty() && !inserts.empty())
1282 prefix = "+";
1283 else
1284 prefix = "!";
1285
1286 for (vector<size_t>::const_iterator i = deletes.begin();
1287 i != deletes.end(); ++i)
1288 {
1289 from_file.push_back(prefix + string(" ") + a[*i]);
1290 a_len++;
1291 }
1292 for (vector<size_t>::const_iterator i = inserts.begin();
1293 i != inserts.end(); ++i)
1294 {
1295 to_file.push_back(prefix + string(" ") + b[*i]);
1296 b_len++;
1297 }
1298
1299 // clear pending mods
1300 inserts.clear();
1301 deletes.clear();
1302}
1303
1304void cxtdiff_hunk_writer::advance_to(size_t newpos)
1305{
1306 // We must first flush out pending mods because otherwise our calculation
1307 // of whether we need to generate a new hunk header will be way off.
1308 // It is correct (i.e. consistent with diff(1)) to reset the +/-/!
1309 // generation algorithm between sub-components of a single hunk.
1310 flush_pending_mods();
1311
1312 if (a_begin + a_len + (2 * ctx) < newpos)
1313 {
1314 flush_hunk(newpos);
1315
1316 // insert new leading context
1317 if (newpos - ctx < a.size())
1318 {
1319 for (size_t i = ctx; i > 0; --i)
1320 {
1321 // The original test was (newpos - i < 0), but since newpos
1322 // is size_t (unsigned), it will never go negative. Testing
1323 // that newpos is smaller than i is the same test, really.
1324 if (newpos < i)
1325 continue;
1326
1327 // note that context diffs prefix common text with two
1328 // spaces, whereas unified diffs use a single space
1329 from_file.push_back(string(" ") + a[newpos - i]);
1330 to_file.push_back(string(" ") + a[newpos - i]);
1331 a_begin--; a_len++;
1332 b_begin--; b_len++;
1333 }
1334 }
1335 }
1336 else
1337 // pad intermediate context
1338 while (a_begin + a_len < newpos)
1339 {
1340 from_file.push_back(string(" ") + a[a_begin + a_len]);
1341 to_file.push_back(string(" ") + a[a_begin + a_len]);
1342 a_len++;
1343 b_len++;
1344 }
1345}
1346
1347void
1348make_diff(string const & filename1,
1349 string const & filename2,
1350 file_id const & id1,
1351 file_id const & id2,
1352 data const & data1,
1353 data const & data2,
1354 ostream & ost,
1355 diff_type type,
1356 string const & pattern)
1357{
1358 if (guess_binary(data1()) || guess_binary(data2()))
1359 {
1360 ost << "# " << filename2 << " is binary\n";
1361 return;
1362 }
1363
1364 vector<string> lines1, lines2;
1365 split_into_lines(data1(), lines1);
1366 split_into_lines(data2(), lines2);
1367
1368 vector<long, QA(long)> left_interned;
1369 vector<long, QA(long)> right_interned;
1370 vector<long, QA(long)> lcs;
1371
1372 interner<long> in;
1373
1374 left_interned.reserve(lines1.size());
1375 for (vector<string>::const_iterator i = lines1.begin();
1376 i != lines1.end(); ++i)
1377 left_interned.push_back(in.intern(*i));
1378
1379 right_interned.reserve(lines2.size());
1380 for (vector<string>::const_iterator i = lines2.begin();
1381 i != lines2.end(); ++i)
1382 right_interned.push_back(in.intern(*i));
1383
1384 lcs.reserve(min(lines1.size(),lines2.size()));
1385 longest_common_subsequence(left_interned.begin(), left_interned.end(),
1386 right_interned.begin(), right_interned.end(),
1387 min(lines1.size(), lines2.size()),
1388 back_inserter(lcs));
1389
1390 // The existence of various hacky diff parsers in the world somewhat
1391 // constrains what output we can use. Here are some notes on how various
1392 // tools interpret the header lines of a diff file:
1393 //
1394 // interdiff/filterdiff (patchutils):
1395 // Attempt to parse a timestamp after each whitespace. If they succeed,
1396 // then they take the filename as everything up to the whitespace they
1397 // succeeded at, and the timestamp as everything after. If they fail,
1398 // then they take the filename to be everything up to the first
1399 // whitespace. Have hardcoded that /dev/null and timestamps at the
1400 // epoch (in any timezone) indicate a file that did not exist.
1401 //
1402 // filterdiff filters on the first filename line. interdiff matches on
1403 // the first filename line.
1404 // PatchReader perl library (used by Bugzilla):
1405 // Takes the filename to be everything up to the first tab; requires
1406 // that there be a tab. Determines the filename based on the first
1407 // filename line.
1408 // diffstat:
1409 // Can handle pretty much everything; tries to read up to the first tab
1410 // to get the filename. Knows that "/dev/null", "", and anything
1411 // beginning "/tmp/" are meaningless. Uses the second filename line.
1412 // patch:
1413 // If there is a tab, considers everything up to that tab to be the
1414 // filename. If there is not a tab, considers everything up to the
1415 // first whitespace to be the filename.
1416 //
1417 // Contains comment: 'If the [file]name is "/dev/null", ignore the name
1418 // and mark the file as being nonexistent. The name "/dev/null" appears
1419 // in patches regardless of how NULL_DEVICE is spelled.' Also detects
1420 // timestamps at the epoch as indicating that a file does not exist.
1421 //
1422 // Uses the first filename line as the target, unless it is /dev/null or
1423 // has an epoch timestamp in which case it uses the second.
1424 // trac:
1425 // Anything up to the first whitespace, or end of line, is considered
1426 // filename. Does not care about timestamp. Uses the shorter of the
1427 // two filenames as the filename (!).
1428 //
1429 // Conclusions:
1430 // -- You must have a tab, both to prevent PatchReader blowing up, and
1431 // to make it possible to have filenames with spaces in them.
1432 // (Filenames with tabs in them are always impossible to properly
1433 // express; FIXME what should be done if one occurs?)
1434 // -- What comes after that tab matters not at all, though it probably
1435 // shouldn't look like a timestamp, or have any trailing part that
1436 // looks like a timestamp, unless it really is a timestamp. Simply
1437 // having a trailing tab should work fine.
1438 // -- If you need to express that some file does not exist, you should
1439 // use /dev/null as the path. patch(1) goes so far as to claim that
1440 // this is part of the diff format definition.
1441 // -- If you want your patches to actually _work_ with patch(1), then
1442 // renames are basically hopeless (you can do them by hand _after_
1443 // running patch), adds work so long as the first line says either
1444 // the new file's name or "/dev/null", nothing else, and deletes work
1445 // if the new file name is "/dev/null", nothing else. (ATM we don't
1446 // write out patches for deletes anyway.)
1447 switch (type)
1448 {
1449 case unified_diff:
1450 {
1451 ost << "--- " << filename1 << '\t' << id1 << '\n';
1452 ost << "+++ " << filename2 << '\t' << id2 << '\n';
1453
1454 unidiff_hunk_writer hunks(lines1, lines2, 3, ost, pattern);
1455 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1456 break;
1457 }
1458 case context_diff:
1459 {
1460 ost << "*** " << filename1 << '\t' << id1 << '\n';
1461 ost << "--- " << filename2 << '\t' << id2 << '\n';
1462
1463 cxtdiff_hunk_writer hunks(lines1, lines2, 3, ost, pattern);
1464 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1465 break;
1466 }
1467 default:
1468 {
1469 // should never reach this; the external_diff type is not
1470 // handled by this function.
1471 I(false);
1472 }
1473 }
1474}
1475
1476#ifdef BUILD_UNIT_TESTS
1477#include "unit_tests.hh"
1478#include "transforms.hh"
1479#include "lexical_cast.hh"
1480#include "randomfile.hh"
1481
1482using std::cerr;
1483using std::cout;
1484using std::stringstream;
1485
1486using boost::lexical_cast;
1487
1488static void dump_incorrect_merge(vector<string> const & expected,
1489 vector<string> const & got,
1490 string const & prefix)
1491{
1492 size_t mx = expected.size();
1493 if (mx < got.size())
1494 mx = got.size();
1495 for (size_t i = 0; i < mx; ++i)
1496 {
1497 cerr << "bad merge: " << i << " [" << prefix << "]\t";
1498
1499 if (i < expected.size())
1500 cerr << '[' << expected[i] << "]\t";
1501 else
1502 cerr << "[--nil--]\t";
1503
1504 if (i < got.size())
1505 cerr << '[' << got[i] << "]\t";
1506 else
1507 cerr << "[--nil--]\t";
1508
1509 cerr << '\n';
1510 }
1511}
1512
1513// high tech randomizing test
1514UNIT_TEST(diff_patch, randomizing_merge)
1515{
1516 randomizer rng;
1517 for (int i = 0; i < 30; ++i)
1518 {
1519 vector<string> anc, d1, d2, m1, m2, gm;
1520
1521 file_randomizer::build_random_fork(anc, d1, d2, gm, (10 + 2 * i), rng);
1522
1523 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1524 if (gm != m1)
1525 dump_incorrect_merge (gm, m1, "random_merge 1");
1526 UNIT_TEST_CHECK(gm == m1);
1527
1528 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1529 if (gm != m2)
1530 dump_incorrect_merge (gm, m2, "random_merge 2");
1531 UNIT_TEST_CHECK(gm == m2);
1532 }
1533}
1534
1535
1536// old boring tests
1537UNIT_TEST(diff_patch, merge_prepend)
1538{
1539 UNIT_TEST_CHECKPOINT("prepend test");
1540 vector<string> anc, d1, d2, m1, m2, gm;
1541 for (int i = 10; i < 20; ++i)
1542 {
1543 d2.push_back(lexical_cast<string>(i));
1544 gm.push_back(lexical_cast<string>(i));
1545 }
1546
1547 for (int i = 0; i < 10; ++i)
1548 {
1549 anc.push_back(lexical_cast<string>(i));
1550 d1.push_back(lexical_cast<string>(i));
1551 d2.push_back(lexical_cast<string>(i));
1552 gm.push_back(lexical_cast<string>(i));
1553 }
1554
1555 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1556 if (gm != m1)
1557 dump_incorrect_merge (gm, m1, "merge_prepend 1");
1558 UNIT_TEST_CHECK(gm == m1);
1559
1560
1561 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1562 if (gm != m2)
1563 dump_incorrect_merge (gm, m2, "merge_prepend 2");
1564 UNIT_TEST_CHECK(gm == m2);
1565}
1566
1567UNIT_TEST(diff_patch, merge_append)
1568{
1569 UNIT_TEST_CHECKPOINT("append test");
1570 vector<string> anc, d1, d2, m1, m2, gm;
1571 for (int i = 0; i < 10; ++i)
1572 anc.push_back(lexical_cast<string>(i));
1573
1574 d1 = anc;
1575 d2 = anc;
1576 gm = anc;
1577
1578 for (int i = 10; i < 20; ++i)
1579 {
1580 d2.push_back(lexical_cast<string>(i));
1581 gm.push_back(lexical_cast<string>(i));
1582 }
1583
1584 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1585 if (gm != m1)
1586 dump_incorrect_merge (gm, m1, "merge_append 1");
1587 UNIT_TEST_CHECK(gm == m1);
1588
1589 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1590 if (gm != m2)
1591 dump_incorrect_merge (gm, m2, "merge_append 2");
1592 UNIT_TEST_CHECK(gm == m2);
1593
1594
1595}
1596
1597UNIT_TEST(diff_patch, merge_additions)
1598{
1599 UNIT_TEST_CHECKPOINT("additions test");
1600 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1601 string desc1("I like oatmeal\nI don't like spam\nI like orange juice\nI like toast");
1602 string confl("I like oatmeal\nI don't like tuna\nI like orange juice\nI like toast");
1603 string desc2("I like oatmeal\nI like orange juice\nI don't like tuna\nI like toast");
1604 string good_merge("I like oatmeal\nI don't like spam\nI like orange juice\nI don't like tuna\nI like toast");
1605 vector<string> anc, d1, cf, d2, m1, m2, gm;
1606
1607 split_into_lines(ancestor, anc);
1608 split_into_lines(desc1, d1);
1609 split_into_lines(confl, cf);
1610 split_into_lines(desc2, d2);
1611 split_into_lines(good_merge, gm);
1612
1613 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1614 if (gm != m1)
1615 dump_incorrect_merge (gm, m1, "merge_addition 1");
1616 UNIT_TEST_CHECK(gm == m1);
1617
1618 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1619 if (gm != m2)
1620 dump_incorrect_merge (gm, m2, "merge_addition 2");
1621 UNIT_TEST_CHECK(gm == m2);
1622
1623 UNIT_TEST_CHECK(!merge3(anc, d1, cf, m1));
1624}
1625
1626UNIT_TEST(diff_patch, merge_deletions)
1627{
1628 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1629 string desc2("I like oatmeal\nI like toast");
1630
1631 vector<string> anc, d1, d2, m1, m2, gm;
1632
1633 split_into_lines(ancestor, anc);
1634 split_into_lines(desc2, d2);
1635 d1 = anc;
1636 gm = d2;
1637
1638 UNIT_TEST_CHECK(merge3(anc, d1, d2, m1));
1639 if (gm != m1)
1640 dump_incorrect_merge (gm, m1, "merge_deletion 1");
1641 UNIT_TEST_CHECK(gm == m1);
1642
1643 UNIT_TEST_CHECK(merge3(anc, d2, d1, m2));
1644 if (gm != m2)
1645 dump_incorrect_merge (gm, m2, "merge_deletion 2");
1646 UNIT_TEST_CHECK(gm == m2);
1647}
1648
1649#endif // BUILD_UNIT_TESTS
1650
1651// Local Variables:
1652// mode: C++
1653// fill-column: 76
1654// c-file-style: "gnu"
1655// indent-tabs-mode: nil
1656// End:
1657// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status