monotone

monotone Mtn Source Tree

Root/diff_patch.cc

1// copyright (C) 2002, 2003 graydon hoare <graydon@pobox.com>
2// all rights reserved.
3// licensed to the public under the terms of the GNU GPL (>= 2)
4// see the file COPYING for details
5
6#include <algorithm>
7#include <iterator>
8#include <vector>
9#include <string>
10#include <iostream>
11
12#include "diff_patch.hh"
13#include "interner.hh"
14#include "lcs.hh"
15#include "manifest.hh"
16#include "packet.hh"
17#include "patch_set.hh"
18#include "sanity.hh"
19#include "transforms.hh"
20#include "vocab.hh"
21
22using namespace std;
23
24bool guess_binary(string const & s)
25{
26 // these do not occur in text files
27 if (s.find_first_of("\x00\x01\x02\x03\x04\x05\x06\x0e\x0f"
28 "\x10\x11\x12\x13\x14\x15\x16\x17\x18"
29 "\x19\x1a\x1c\x1d\x1e\x1f") != string::npos)
30 return true;
31 return false;
32}
33
34//
35// a 3-way merge works like this:
36//
37// /----> right
38// ancestor
39// \----> left
40//
41// first you compute the edit list "EDITS(ancestor,left)".
42//
43// then you make an offset table "leftpos" which describes positions in
44// "ancestor" as they map to "left"; that is, for 0 < apos <
45// ancestor.size(), we have
46//
47// left[leftpos[apos]] == ancestor[apos]
48//
49// you do this by walking through the edit list and either jumping the
50// current index ahead an extra position, on an insert, or remaining still,
51// on a delete. on an insert *or* a delete, you push the current index back
52// onto the leftpos array.
53//
54// next you compute the edit list "EDITS(ancestor,right)".
55//
56// you then go through this edit list applying the edits to left, rather
57// than ancestor, and using the table leftpos to map the position of each
58// edit to an appropriate spot in left. this means you walk a "curr_left"
59// index through the edits, and for each edit e:
60//
61// - if e is a delete (and e.pos is a position in ancestor)
62// - increment curr_left without copying anything to "merged"
63//
64// - if e is an insert (and e.pos is a position in right)
65// - copy right[e.pos] to "merged"
66// - leave curr_left alone
67//
68// - when advancing to apos (and apos is a position in ancestor)
69// - copy left[curr_left] to merged while curr_left < leftpos[apos]
70//
71//
72// the practical upshot is that you apply the delta from ancestor->right
73// to the adjusted contexts in left, producing something vaguely like
74// the concatenation of delta(ancestor,left) :: delta(ancestor,right).
75//
76// NB: this is, as far as I can tell, what diff3 does. I don't think I'm
77// infringing on anyone's fancy patents here.
78//
79
80struct hunk_consumer
81{
82 virtual void flush_hunk(size_t pos) = 0;
83 virtual void advance_to(size_t newpos) = 0;
84 virtual void insert_at(size_t b_pos) = 0;
85 virtual void delete_at(size_t a_pos) = 0;
86 virtual ~hunk_consumer() {}
87};
88
89void walk_hunk_consumer(vector<long> const & lcs,
90vector<long> const & lines1,
91vector<long> const & lines2,
92hunk_consumer & cons)
93{
94
95 size_t a = 0, b = 0;
96 if (lcs.begin() == lcs.end())
97 {
98 // degenerate case: files have nothing in common
99 cons.advance_to(0);
100 while (a < lines1.size())
101cons.delete_at(a++);
102 while (b < lines2.size())
103cons.insert_at(b++);
104 cons.flush_hunk(a);
105 }
106 else
107 {
108 // normal case: files have something in common
109 for (vector<long>::const_iterator i = lcs.begin();
110 i != lcs.end(); ++i, ++a, ++b)
111{
112 if (idx(lines1, a) == *i && idx(lines2, b) == *i)
113 continue;
114
115 cons.advance_to(a);
116 while (idx(lines1,a) != *i)
117 cons.delete_at(a++);
118 while (idx(lines2,b) != *i)
119 cons.insert_at(b++);
120}
121 if (b < lines2.size())
122{
123 cons.advance_to(a);
124 while(b < lines2.size())
125 cons.insert_at(b++);
126}
127 if (a < lines1.size())
128{
129 cons.advance_to(a);
130 while(a < lines1.size())
131 cons.delete_at(a++);
132}
133 cons.flush_hunk(a);
134 }
135}
136
137
138// helper class which calculates the offset table
139
140struct hunk_offset_calculator : public hunk_consumer
141{
142 vector<size_t> & leftpos;
143 set<size_t> & deletes;
144 set<size_t> & inserts;
145 size_t apos;
146 size_t lpos;
147 size_t final;
148 hunk_offset_calculator(vector<size_t> & lp, size_t fin,
149 set<size_t> & dels, set<size_t> & inss);
150 virtual void flush_hunk(size_t pos);
151 virtual void advance_to(size_t newpos);
152 virtual void insert_at(size_t b_pos);
153 virtual void delete_at(size_t a_pos);
154 virtual ~hunk_offset_calculator();
155};
156
157hunk_offset_calculator::hunk_offset_calculator(vector<size_t> & off, size_t fin,
158 set<size_t> & dels, set<size_t> & inss)
159 : leftpos(off), deletes(dels), inserts(inss), apos(0), lpos(0), final(fin)
160{}
161
162hunk_offset_calculator::~hunk_offset_calculator()
163{
164 while(leftpos.size() < final)
165 leftpos.push_back(leftpos.back());
166}
167
168void hunk_offset_calculator::flush_hunk(size_t ap)
169{
170 this->advance_to(ap);
171}
172
173void hunk_offset_calculator::advance_to(size_t ap)
174{
175 while(apos < ap)
176 {
177 // L(F("advance to %d: [%d,%d] -> [%d,%d] (sz=%d)\n") %
178 // ap % apos % lpos % apos+1 % lpos+1 % leftpos.size());
179 apos++;
180 leftpos.push_back(lpos++);
181 }
182}
183
184void hunk_offset_calculator::insert_at(size_t lp)
185{
186 // L(F("insert at %d: [%d,%d] -> [%d,%d] (sz=%d)\n") %
187 // lp % apos % lpos % apos % lpos+1 % leftpos.size());
188 inserts.insert(apos);
189 I(lpos == lp);
190 lpos++;
191}
192
193void hunk_offset_calculator::delete_at(size_t ap)
194{
195 // L(F("delete at %d: [%d,%d] -> [%d,%d] (sz=%d)\n") %
196 // ap % apos % lpos % apos+1 % lpos % leftpos.size());
197 deletes.insert(apos);
198 I(apos == ap);
199 apos++;
200 leftpos.push_back(lpos);
201}
202
203void calculate_hunk_offsets(vector<string> const & ancestor,
204 vector<string> const & left,
205 vector<size_t> & leftpos,
206 set<size_t> & deletes,
207 set<size_t> & inserts)
208{
209
210 vector<long> anc_interned;
211 vector<long> left_interned;
212 vector<long> lcs;
213
214 interner<long> in;
215
216 anc_interned.reserve(ancestor.size());
217 for (vector<string>::const_iterator i = ancestor.begin();
218 i != ancestor.end(); ++i)
219 anc_interned.push_back(in.intern(*i));
220
221 left_interned.reserve(left.size());
222 for (vector<string>::const_iterator i = left.begin();
223 i != left.end(); ++i)
224 left_interned.push_back(in.intern(*i));
225
226 lcs.reserve(std::min(left.size(),ancestor.size()));
227 longest_common_subsequence(anc_interned.begin(), anc_interned.end(),
228 left_interned.begin(), left_interned.end(),
229 std::min(ancestor.size(), left.size()),
230 back_inserter(lcs));
231
232 leftpos.clear();
233 hunk_offset_calculator calc(leftpos, ancestor.size(), deletes, inserts);
234 walk_hunk_consumer(lcs, anc_interned, left_interned, calc);
235}
236
237
238struct conflict {};
239
240// utility class which performs the merge
241
242typedef enum { preserved = 0, deleted = 1, changed = 2 } edit_t;
243static char *etab[3] =
244 {
245 "preserved",
246 "deleted",
247 "changed"
248 };
249
250struct extent
251{
252 extent(size_t p, size_t l, edit_t t)
253 : pos(p), len(l), type(t)
254 {}
255 size_t pos;
256 size_t len;
257 edit_t type;
258};
259
260void calculate_extents(vector<long> const & a_b_edits,
261 vector<long> const & b,
262 vector<long> & prefix,
263 vector<extent> & extents,
264 vector<long> & suffix,
265 size_t const a_len,
266 interner<long> & intern)
267{
268 extents.reserve(a_len * 2);
269
270 size_t a_pos = 0, b_pos = 0;
271
272 for (vector<long>::const_iterator i = a_b_edits.begin();
273 i != a_b_edits.end(); ++i)
274 {
275 // L(F("edit: %d") % *i);
276 if (*i < 0)
277{
278 // negative elements code the negation of the one-based index into A
279 // of the element to be deleted
280 size_t a_deleted = (-1 - *i);
281
282 // fill positions out to the deletion point
283 while (a_pos < a_deleted)
284 {
285 a_pos++;
286 extents.push_back(extent(b_pos++, 1, preserved));
287 }
288
289 // L(F(" -- delete at A-pos %d (B-pos = %d)\n") % a_deleted % b_pos);
290
291 // skip the deleted line
292 a_pos++;
293 extents.push_back(extent(b_pos, 0, deleted));
294}
295 else
296{
297 // positive elements code the one-based index into B of the element to
298 // be inserted
299 size_t b_inserted = (*i - 1);
300
301 // fill positions out to the insertion point
302 while (b_pos < b_inserted)
303 {
304 a_pos++;
305 extents.push_back(extent(b_pos++, 1, preserved));
306 }
307
308 // L(F(" -- insert at B-pos %d (A-pos = %d) : '%s'\n")
309 // % b_inserted % a_pos % intern.lookup(b.at(b_inserted)));
310
311 // record that there was an insertion, but a_pos did not move.
312 if ((b_pos == 0 && extents.empty())
313 || (b_pos == prefix.size()))
314 {
315 prefix.push_back(b.at(b_pos));
316 }
317 else if (a_len == a_pos)
318 {
319 suffix.push_back(b.at(b_pos));
320 }
321 else
322 {
323 // make the insertion
324 extents.back().type = changed;
325 extents.back().len++;
326 }
327 b_pos++;
328}
329 }
330
331 while (extents.size() < a_len)
332 extents.push_back(extent(b_pos++, 1, preserved));
333}
334
335void normalize_extents(vector<extent> & a_b_map,
336 vector<long> const & a,
337 vector<long> const & b)
338{
339 for (size_t i = 0; i < a_b_map.size(); ++i)
340 {
341 if (i > 0)
342 {
343size_t j = i;
344while (j > 0
345 && (a_b_map.at(j-1).type == preserved)
346 && (a_b_map.at(j).type == changed)
347 && (b.at(a_b_map.at(j-1).pos) ==
348 b.at(a_b_map.at(j).pos + a_b_map.at(j).len - 1)))
349 {
350 // the idea here is that if preserved extent j-1 has the same
351 // contents as the last line in changed extent j of length N,
352 // then it's exactly the same to consider j-1 as changed, of
353 // length N, (starting 1 line earlier) and j as preserved as
354 // length 1.
355
356 L(F("exchanging preserved extent [%d+%d] with changed extent [%d+%d]\n")
357 % a_b_map.at(j-1).pos
358 % a_b_map.at(j-1).len
359 % a_b_map.at(j).pos
360 % a_b_map.at(j).len);
361
362 swap(a_b_map.at(j-1).len, a_b_map.at(j).len);
363 swap(a_b_map.at(j-1).type, a_b_map.at(j).type);
364 --j;
365 }
366 }
367 }
368
369
370 for (size_t i = 0; i < a_b_map.size(); ++i)
371 {
372 if (i > 0)
373 {
374size_t j = i;
375while (j > 0
376 && a_b_map.at(j).type == changed
377 && a_b_map.at(j-1).type == changed
378 && a_b_map.at(j).len > 1
379 && a_b_map.at(j-1).pos + a_b_map.at(j-1).len == a_b_map.at(j).pos)
380 {
381 // step 1: move a chunk from this insert extent to its
382 // predecessor
383 size_t piece = a_b_map.at(j).len - 1;
384 // L(F("moving change piece of len %d from pos %d to pos %d\n")
385 // % piece
386 // % a_b_map.at(j).pos
387 // % a_b_map.at(j-1).pos);
388 a_b_map.at(j).len = 1;
389 a_b_map.at(j).pos += piece;
390 a_b_map.at(j-1).len += piece;
391
392 // step 2: if this extent (now of length 1) has become a "changed"
393 // extent identical to its previous state, switch it to a "preserved"
394 // extent.
395 if (b.at(a_b_map.at(j).pos) == a.at(j))
396 {
397// L(F("changing normalized 'changed' extent at %d to 'preserved'\n")
398// % a_b_map.at(j).pos);
399a_b_map.at(j).type = preserved;
400 }
401 --j;
402 }
403 }
404 }
405}
406
407
408void merge_extents(vector<extent> const & a_b_map,
409 vector<extent> const & a_c_map,
410 vector<long> const & b,
411 vector<long> const & c,
412 interner<long> const & in,
413 vector<long> & merged)
414{
415 I(a_b_map.size() == a_c_map.size());
416
417 vector<extent>::const_iterator i = a_b_map.begin();
418 vector<extent>::const_iterator j = a_c_map.begin();
419 merged.reserve(a_b_map.size() * 2);
420
421 // for (; i != a_b_map.end(); ++i, ++j)
422 // {
423
424 // L(F("trying to merge: [%s %d %d] vs. [%s %d %d] \n")
425 // % etab[i->type] % i->pos % i->len
426 // % etab[j->type] % j->pos % j->len);
427 // }
428
429 // i = a_b_map.begin();
430 // j = a_c_map.begin();
431
432 for (; i != a_b_map.end(); ++i, ++j)
433 {
434
435 // L(F("trying to merge: [%s %d %d] vs. [%s %d %d] \n")
436 // % etab[i->type] % i->pos % i->len
437 // % etab[j->type] % j->pos % j->len);
438
439 // mutual, identical preserves / inserts / changes
440 if (((i->type == changed && j->type == changed)
441 || (i->type == preserved && j->type == preserved))
442 && i->len == j->len)
443{
444 for (size_t k = 0; k < i->len; ++k)
445 {
446 if (b.at(i->pos + k) != c.at(j->pos + k))
447{
448 L(F("conflicting edits: %s %d[%d] '%s' vs. %s %d[%d] '%s'\n")
449 % etab[i->type] % i->pos % k % in.lookup(b.at(i->pos + k))
450 % etab[j->type] % j->pos % k % in.lookup(c.at(j->pos + k)));
451 throw conflict();
452}
453 merged.push_back(b.at(i->pos + k));
454 }
455}
456
457 // mutual or single-edge deletes
458 else if ((i->type == deleted && j->len == deleted)
459 || (i->type == deleted && j->type == preserved)
460 || (i->type == preserved && j->type == deleted))
461{
462 // do nothing
463}
464
465 // single-edge insert / changes
466 else if (i->type == changed && j->type == preserved)
467for (size_t k = 0; k < i->len; ++k)
468 merged.push_back(b.at(i->pos + k));
469
470 else if (i->type == preserved && j->type == changed)
471for (size_t k = 0; k < j->len; ++k)
472 merged.push_back(c.at(j->pos + k));
473
474 else
475{
476 L(F("conflicting edits: [%s %d %d] vs. [%s %d %d]\n")
477 % etab[i->type] % i->pos % i->len
478 % etab[j->type] % j->pos % j->len);
479 throw conflict();
480}
481
482 // if (merged.empty())
483 // L(F(" --> EMPTY\n"));
484 // else
485 // L(F(" --> [%d]: %s\n") % (merged.size() - 1) % in.lookup(merged.back()));
486 }
487}
488
489
490void merge_via_edit_scripts(vector<string> const & ancestor,
491 vector<string> const & left,
492 vector<string> const & right,
493 vector<string> & merged)
494{
495 vector<long> anc_interned;
496 vector<long> left_interned, right_interned;
497 vector<long> left_edits, right_edits;
498 vector<long> left_prefix, right_prefix;
499 vector<long> left_suffix, right_suffix;
500 vector<extent> left_extents, right_extents;
501 vector<long> merged_interned;
502 vector<long> lcs;
503 interner<long> in;
504
505 // for (int i = 0; i < std::min(std::min(left.size(), right.size()), ancestor.size()); ++i)
506 // {
507 // std::cerr << "[" << i << "] " << left[i] << " " << ancestor[i] << " " << right[i] << endl;
508 // }
509
510 anc_interned.reserve(ancestor.size());
511 for (vector<string>::const_iterator i = ancestor.begin();
512 i != ancestor.end(); ++i)
513 anc_interned.push_back(in.intern(*i));
514
515 left_interned.reserve(left.size());
516 for (vector<string>::const_iterator i = left.begin();
517 i != left.end(); ++i)
518 left_interned.push_back(in.intern(*i));
519
520 right_interned.reserve(right.size());
521 for (vector<string>::const_iterator i = right.begin();
522 i != right.end(); ++i)
523 right_interned.push_back(in.intern(*i));
524
525 L(F("calculating left edit script on %d -> %d lines\n")
526 % anc_interned.size() % left_interned.size());
527
528 edit_script(anc_interned.begin(), anc_interned.end(),
529 left_interned.begin(), left_interned.end(),
530 std::min(ancestor.size(), left.size()),
531 left_edits, back_inserter(lcs));
532
533 L(F("calculating right edit script on %d -> %d lines\n")
534 % anc_interned.size() % right_interned.size());
535
536 edit_script(anc_interned.begin(), anc_interned.end(),
537 right_interned.begin(), right_interned.end(),
538 std::min(ancestor.size(), right.size()),
539 right_edits, back_inserter(lcs));
540
541 L(F("calculating left extents on %d edits\n") % left_edits.size());
542 calculate_extents(left_edits, left_interned,
543 left_prefix, left_extents, left_suffix,
544 anc_interned.size(), in);
545
546 L(F("calculating right extents on %d edits\n") % right_edits.size());
547 calculate_extents(right_edits, right_interned,
548 right_prefix, right_extents, right_suffix,
549 anc_interned.size(), in);
550
551 L(F("normalizing %d right extents\n") % right_extents.size());
552 normalize_extents(right_extents, anc_interned, right_interned);
553
554 L(F("normalizing %d left extents\n") % left_extents.size());
555 normalize_extents(left_extents, anc_interned, left_interned);
556
557
558 if ((!right_prefix.empty()) && (!left_prefix.empty()))
559 {
560 L(F("conflicting prefixes\n"));
561 throw conflict();
562 }
563
564 if ((!right_suffix.empty()) && (!left_suffix.empty()))
565 {
566 L(F("conflicting suffixes\n"));
567 throw conflict();
568 }
569
570 L(F("merging %d left, %d right extents\n")
571 % left_extents.size() % right_extents.size());
572
573 copy(left_prefix.begin(), left_prefix.end(), back_inserter(merged_interned));
574 copy(right_prefix.begin(), right_prefix.end(), back_inserter(merged_interned));
575
576 merge_extents(left_extents, right_extents,
577left_interned, right_interned,
578in, merged_interned);
579
580 copy(left_suffix.begin(), left_suffix.end(), back_inserter(merged_interned));
581 copy(right_suffix.begin(), right_suffix.end(), back_inserter(merged_interned));
582
583 merged.reserve(merged_interned.size());
584 for (vector<long>::const_iterator i = merged_interned.begin();
585 i != merged_interned.end(); ++i)
586 merged.push_back(in.lookup(*i));
587}
588
589
590bool merge3(vector<string> const & ancestor,
591 vector<string> const & left,
592 vector<string> const & right,
593 vector<string> & merged)
594{
595 try
596 {
597 merge_via_edit_scripts(ancestor, left, right, merged);
598 }
599 catch(conflict & c)
600 {
601 L(F("conflict detected. no merge.\n"));
602 return false;
603 }
604 return true;
605}
606
607/*
608
609bool merge3_lcs(vector<string> const & ancestor,
610vector<string> const & left,
611vector<string> const & right,
612vector<string> & merged)
613{
614 try
615 {
616 vector<size_t> leftpos;
617 set<size_t> deletes;
618 set<size_t> inserts;
619
620 L(F("calculating offsets from ancestor:[%d..%d) to left:[%d..%d)\n")
621% 0 % ancestor.size() % 0 % left.size());
622 calculate_hunk_offsets(ancestor, left, leftpos, deletes, inserts);
623
624 L(F("sanity-checking offset table (sz=%d, ancestor=%d)\n")
625% leftpos.size() % ancestor.size());
626 I(leftpos.size() == ancestor.size());
627 for(size_t i = 0; i < ancestor.size(); ++i)
628{
629 if (idx(leftpos,i) > left.size())
630 L(F("weird offset table: leftpos[%d] = %d (left.size() = %d)\n")
631 % i % idx(leftpos,i) % left.size());
632 I(idx(leftpos,i) <= left.size());
633}
634
635 L(F("merging differences from ancestor:[%d..%d) to right:[%d..%d)\n")
636% 0 % ancestor.size() % 0 % right.size());
637 merge_hunks_via_offsets(left, ancestor, right, leftpos,
638 deletes, inserts, merged);
639 }
640 catch(conflict & c)
641 {
642 L(F("conflict detected. no merge.\n"));
643 return false;
644 }
645 return true;
646}
647
648*/
649
650// this does a merge2 on manifests. it's not as likely to succeed as the
651// merge3 on manifests, but you do what you can..
652
653bool merge2(manifest_map const & left,
654 manifest_map const & right,
655 app_state & app,
656 file_merge_provider & file_merger,
657 manifest_map & merged)
658{
659 for (manifest_map::const_iterator i = left.begin();
660 i != left.end(); ++i)
661 {
662 // we go left-to-right. we could go right-to-left too, but who
663 // knows. merge2 is really weak. FIXME: replace this please.
664 path_id_pair l_pip(i);
665 manifest_map::const_iterator j = right.find(l_pip.path());
666 if (right.find(l_pip.path()) != right.end())
667{
668 // right has this file, we can try to merge2 the file.
669 path_id_pair r_pip(j);
670 path_id_pair m_pip;
671 L(F("merging existant versions of %s in both manifests\n")
672 % l_pip.path());
673 if (file_merger.try_to_merge_files(l_pip, r_pip, m_pip))
674 {
675 L(F("arrived at merged version %s\n") % m_pip.ident());
676 merged.insert(m_pip.get_entry());
677 }
678 else
679 {
680 merged.clear();
681 return false;
682 }
683}
684 else
685{
686 // right hasn't seen this file at all.
687 merged.insert(l_pip.get_entry());
688}
689 }
690 return true;
691}
692
693
694void check_no_intersect(set<file_path> const & a,
695set<file_path> const & b,
696string const & a_name,
697string const & b_name)
698{
699 set<file_path> tmp;
700 set_intersection(a.begin(), a.end(), b.begin(), b.end(),
701 inserter(tmp, tmp.begin()));
702 if (tmp.empty())
703 {
704 L(F("file sets '%s' and '%s' do not intersect\n")
705% a_name % b_name);
706 }
707 else
708 {
709 L(F("conflict: file sets '%s' and '%s' intersect\n")
710% a_name % b_name);
711 throw conflict();
712 }
713}
714
715template <typename K, typename V>
716void check_map_inclusion(map<K,V> const & a,
717 map<K,V> const & b,
718 string const & a_name,
719 string const & b_name)
720{
721 for (typename map<K, V>::const_iterator self = a.begin();
722 self != a.end(); ++self)
723 {
724 typename map<K, V>::const_iterator other =
725b.find(self->first);
726
727 if (other == b.end())
728{
729 L(F("map '%s' has unique entry for %s -> %s\n")
730 % a_name % self->first % self->second);
731}
732
733 else if (other->second == self->second)
734{
735 L(F("maps '%s' and '%s' agree on %s -> %s\n")
736 % a_name % b_name % self->first % self->second);
737}
738
739 else
740{
741 L(F("conflict: maps %s and %s disagree: %s -> %s and %s\n")
742 % a_name % b_name
743 % self->first % self->second % other->second);
744 throw conflict();
745}
746 }
747}
748
749
750void merge_deltas (map<file_path, patch_delta> const & self_map,
751 map<file_path, patch_delta> & other_map,
752 file_merge_provider & file_merger,
753 patch_set & merged_edge)
754{
755 for (map<file_path, patch_delta>::const_iterator delta = self_map.begin();
756 delta != self_map.end(); ++delta)
757 {
758
759 map<file_path, patch_delta>::const_iterator other =
760other_map.find(delta->first);
761
762 if (other == other_map.end())
763{
764 merged_edge.f_deltas.insert(patch_delta(delta->second.id_old,
765 delta->second.id_new,
766 delta->first));
767}
768 else
769{
770 // concurrent deltas! into the file merger we go..
771 I(other->second.id_old == delta->second.id_old);
772 I(other->second.path == delta->second.path);
773 L(F("attempting to merge deltas on %s : %s -> %s and %s\n")
774 % delta->first
775 % delta->second.id_old
776 % delta->second.id_new
777 % other->second.id_new);
778
779 path_id_pair a_pip = path_id_pair(make_pair(delta->second.path,
780 delta->second.id_old));
781 path_id_pair l_pip = path_id_pair(make_pair(delta->first,
782 delta->second.id_new));
783 path_id_pair r_pip = path_id_pair(make_pair(delta->first,
784 other->second.id_new));
785 path_id_pair m_pip;
786 if (file_merger.try_to_merge_files(a_pip, l_pip, r_pip, m_pip))
787 {
788 L(F("concurrent deltas on %s merged to %s OK\n")
789% delta->first % m_pip.ident());
790 I(m_pip.path() == delta->first);
791 other_map.erase(delta->first);
792 merged_edge.f_deltas.insert(patch_delta(delta->second.id_old,
793 m_pip.ident(),
794 m_pip.path()));
795 }
796 else
797 {
798 L(F("conflicting deltas on %s, merge failed\n")
799% delta->first);
800 throw conflict();
801 }
802}
803 }
804}
805
806
807static void apply_directory_moves(map<file_path, file_path> const & dir_moves,
808 file_path & fp)
809{
810 fs::path stem = mkpath(fp());
811 fs::path rest;
812 while (stem.has_branch_path())
813 {
814 rest = stem.leaf() / rest;
815 stem = stem.branch_path();
816 map<file_path, file_path>::const_iterator j =
817dir_moves.find(file_path(stem.string()));
818 if (j != dir_moves.end())
819{
820 file_path old = fp;
821 fp = file_path((mkpath(j->second()) / rest).string());
822 L(F("applying dir rename: %s -> %s\n") % old % fp);
823 return;
824}
825 }
826}
827
828static void rebuild_under_directory_moves(map<file_path, file_path> const & dir_moves,
829 manifest_map const & in,
830 manifest_map & out)
831{
832 for (manifest_map::const_iterator mm = in.begin();
833 mm != in.end(); ++mm)
834 {
835 file_path fp = mm->first;
836 apply_directory_moves(dir_moves, fp);
837 I(out.find(fp) == out.end());
838 out.insert(make_pair(fp, mm->second));
839 }
840}
841
842
843static void infer_directory_moves(manifest_map const & ancestor,
844 manifest_map const & child,
845 map<file_path, file_path> const & moves,
846 map<file_path, file_path> & dir_portion)
847{
848 for (map<file_path, file_path>::const_iterator mov = moves.begin();
849 mov != moves.end(); ++mov)
850 {
851 fs::path src = mkpath(mov->first());
852 fs::path dst = mkpath(mov->second());
853
854 // we will call this a "directory move" if the branch path changed,
855 // and the new branch path didn't exist in the ancestor, and there
856 // old branch path doesn't exist in the child
857
858 if (src.empty()
859 || dst.empty()
860 || !src.has_branch_path()
861 || !dst.has_branch_path())
862continue;
863
864 if (src.branch_path().string() != dst.branch_path().string())
865{
866 fs::path dp = dst.branch_path();
867 fs::path sp = src.branch_path();
868
869 if (dir_portion.find(file_path(sp.string())) != dir_portion.end())
870 continue;
871
872 bool clean_move = true;
873
874 for (manifest_map::const_iterator mm = ancestor.begin();
875 mm != ancestor.end(); ++mm)
876 {
877 fs::path mp = mkpath(mm->first());
878 if (mp.branch_path().string() == dp.string())
879{
880 clean_move = false;
881 break;
882}
883 }
884
885 if (clean_move)
886 for (manifest_map::const_iterator mm = child.begin();
887 mm != child.end(); ++mm)
888 {
889fs::path mp = mkpath(mm->first());
890if (mp.branch_path().string() == sp.string())
891 {
892 clean_move = false;
893 break;
894 }
895 }
896
897
898 if (clean_move)
899 {
900 L(F("inferred pure directory rename %s -> %s\n")
901% sp.string() % dst.branch_path().string());
902 dir_portion.insert
903(make_pair(file_path(sp.string()),
904 file_path(dst.branch_path().string())));
905 }
906 else
907 {
908 L(F("skipping uncertain directory rename %s -> %s\n")
909% sp.string() % dst.branch_path().string());
910 }
911
912}
913 }
914}
915
916// this is a 3-way merge algorithm on manifests.
917
918bool merge3(manifest_map const & ancestor,
919 manifest_map const & left,
920 manifest_map const & right,
921 app_state & app,
922 file_merge_provider & file_merger,
923 manifest_map & merged,
924 rename_set & left_renames,
925 rename_set & right_renames)
926{
927 // in phase #1 we make a bunch of indexes of the changes we saw on each
928 // edge.
929
930 patch_set left_edge, right_edge, merged_edge;
931 manifests_to_patch_set(ancestor, left, app, left_edge);
932 manifests_to_patch_set(ancestor, right, app, right_edge);
933
934 // this is an unusual map of deltas, in that the "path" field of the
935 // patch_delta is used to store the ancestral path of the delta, and the
936 // key of this map is used to store the merged path of the delta (in
937 // cases where the delta happened along with a merge, these differ)
938 map<file_path, patch_delta>
939 left_delta_map, right_delta_map;
940
941 map<file_path, file_path>
942 left_move_map, right_move_map,
943 left_dir_move_map, right_dir_move_map;
944
945 map<file_path, file_id>
946 left_add_map, right_add_map;
947
948 set<file_path>
949 left_adds, right_adds,
950 left_move_srcs, right_move_srcs,
951 left_move_dsts, right_move_dsts,
952 left_deltas, right_deltas;
953
954
955 for(set<patch_addition>::const_iterator a = left_edge.f_adds.begin();
956 a != left_edge.f_adds.end(); ++a)
957 {
958 left_adds.insert(a->path);
959 left_add_map.insert(make_pair(a->path, a->ident));
960 }
961
962 for(set<patch_addition>::const_iterator a = right_edge.f_adds.begin();
963 a != right_edge.f_adds.end(); ++a)
964 {
965 right_adds.insert(a->path);
966 right_add_map.insert(make_pair(a->path, a->ident));
967 }
968
969
970 for(set<patch_move>::const_iterator m = left_edge.f_moves.begin();
971 m != left_edge.f_moves.end(); ++m)
972 {
973 left_move_srcs.insert(m->path_old);
974 left_move_dsts.insert(m->path_new);
975 left_move_map.insert(make_pair(m->path_old, m->path_new));
976 }
977
978 for(set<patch_move>::const_iterator m = right_edge.f_moves.begin();
979 m != right_edge.f_moves.end(); ++m)
980 {
981 right_move_srcs.insert(m->path_old);
982 right_move_dsts.insert(m->path_new);
983 right_move_map.insert(make_pair(m->path_old, m->path_new));
984 }
985
986 for(set<patch_delta>::const_iterator d = left_edge.f_deltas.begin();
987 d != left_edge.f_deltas.end(); ++d)
988 {
989 left_deltas.insert(d->path);
990 left_delta_map.insert(make_pair(d->path, *d));
991 }
992
993 for(set<patch_delta>::const_iterator d = right_edge.f_deltas.begin();
994 d != right_edge.f_deltas.end(); ++d)
995 {
996 right_deltas.insert(d->path);
997 right_delta_map.insert(make_pair(d->path, *d));
998 }
999
1000
1001 // phase #2 is a sort of "pre-processing" phase, in which we handle
1002 // "interesting" move cases:
1003 //
1004 // - locating any moves which moved *directories* and modifying any adds
1005 // in the opposing edge which have the source directory as input, to go
1006 // to the target directory
1007 //
1008 // - locating any moves between non-equal ids. these are move+edit
1009 // events, so we degrade them to a move + an edit (on the move target).
1010 // note that in the rest of this function, moves *must* therefore be
1011 // applied before edits. just in this function (specifically phase #6)
1012 //
1013
1014 // find any left-edge directory renames
1015 infer_directory_moves(ancestor, left, left_move_map, left_dir_move_map);
1016 infer_directory_moves(ancestor, right, right_move_map, right_dir_move_map);
1017 {
1018 manifest_map left_new, right_new;
1019 rebuild_under_directory_moves(left_dir_move_map, right, right_new);
1020 rebuild_under_directory_moves(right_dir_move_map, left, left_new);
1021 if (left != left_new || right != right_new)
1022 {
1023L(F("restarting merge under propagated directory renames\n"));
1024return merge3(ancestor, left_new, right_new, app, file_merger, merged,
1025 left_renames, right_renames);
1026 }
1027 }
1028
1029 // split edit part off of left move+edits
1030 for (map<file_path, file_path>::const_iterator mov = left_move_map.begin();
1031 mov != left_move_map.end(); ++mov)
1032 {
1033 manifest_map::const_iterator
1034anc = ancestor.find(mov->first),
1035lf = left.find(mov->second);
1036
1037 if (anc != ancestor.end() && lf != left.end()
1038 && ! (anc->second == lf->second))
1039{
1040 // it's possible this one has already been split by patch_set.cc
1041 map<file_path, patch_delta>::const_iterator left_patch;
1042 left_patch = left_delta_map.find(lf->first);
1043 if (left_patch != left_delta_map.end())
1044 {
1045 I(left_patch->second.id_new == lf->second);
1046 }
1047 else
1048 {
1049 left_delta_map.insert
1050(make_pair
1051 (lf->first, patch_delta(anc->second, lf->second, anc->first)));
1052 }
1053}
1054 }
1055
1056 // split edit part off of right move+edits
1057 for (map<file_path, file_path>::const_iterator mov = right_move_map.begin();
1058 mov != right_move_map.end(); ++mov)
1059 {
1060 manifest_map::const_iterator
1061anc = ancestor.find(mov->first),
1062rt = right.find(mov->second);
1063
1064 if (anc != ancestor.end() && rt != right.end()
1065 && ! (anc->second == rt->second))
1066{
1067 // it's possible this one has already been split by patch_set.cc
1068 map<file_path, patch_delta>::const_iterator right_patch;
1069 right_patch = right_delta_map.find(rt->first);
1070 if (right_patch != right_delta_map.end())
1071 {
1072 I(right_patch->second.id_new == rt->second);
1073 }
1074 else
1075 {
1076 right_delta_map.insert
1077(make_pair
1078 (rt->first, patch_delta(anc->second, rt->second, anc->first)));
1079 }
1080}
1081 }
1082
1083 // phase #3 detects conflicts. any conflicts here -- including those
1084 // which were a result of the actions taken in phase #2 -- result in an
1085 // early return.
1086
1087 try
1088 {
1089
1090 // no adding and deleting the same file
1091 check_no_intersect (left_adds, right_edge.f_dels,
1092 "left adds", "right dels");
1093 check_no_intersect (left_edge.f_dels, right_adds,
1094 "left dels", "right adds");
1095
1096
1097 // no fiddling with the source of a move
1098 check_no_intersect (left_move_srcs, right_adds,
1099 "left move sources", "right adds");
1100 check_no_intersect (left_adds, right_move_srcs,
1101 "left adds", "right move sources");
1102
1103 check_no_intersect (left_move_srcs, right_edge.f_dels,
1104 "left move sources", "right dels");
1105 check_no_intersect (left_edge.f_dels, right_move_srcs,
1106 "left dels", "right move sources");
1107
1108 check_no_intersect (left_move_srcs, right_deltas,
1109 "left move sources", "right deltas");
1110 check_no_intersect (left_deltas, right_move_srcs,
1111 "left deltas", "right move sources");
1112
1113
1114 // no fiddling with the destinations of a move
1115 check_no_intersect (left_move_dsts, right_adds,
1116 "left move destinations", "right adds");
1117 check_no_intersect (left_adds, right_move_dsts,
1118 "left adds", "right move destinations");
1119
1120 check_no_intersect (left_move_dsts, right_edge.f_dels,
1121 "left move destinations", "right dels");
1122 check_no_intersect (left_edge.f_dels, right_move_dsts,
1123 "left dels", "right move destinations");
1124
1125 check_no_intersect (left_move_dsts, right_deltas,
1126 "left move destinations", "right deltas");
1127 check_no_intersect (left_deltas, right_move_dsts,
1128 "left deltas", "right move destinations");
1129
1130
1131 // we're not going to do anything clever like chaining moves together
1132 check_no_intersect (left_move_srcs, right_move_dsts,
1133 "left move sources", "right move destinations");
1134 check_no_intersect (left_move_dsts, right_move_srcs,
1135 "left move destinations", "right move sources");
1136
1137 // check specific add-map conflicts
1138 check_map_inclusion (left_add_map, right_add_map,
1139 "left add map", "right add map");
1140 check_map_inclusion (right_add_map, left_add_map,
1141 "right add map", "left add map");
1142
1143 // check specific move-map conflicts
1144 check_map_inclusion (left_move_map, right_move_map,
1145 "left move map", "right move map");
1146 check_map_inclusion (right_move_map, left_move_map,
1147 "right move map", "left move map");
1148
1149
1150 }
1151 catch (conflict & c)
1152 {
1153 // FIXME: this catch should call out to a user-provided manifest
1154 // merge resolution hook, the same way the file merger does.
1155 merged.clear();
1156 return false;
1157 }
1158
1159 // in phase #4 we union all the (now non-conflicting) deletes, adds, and
1160 // moves into a "merged" edge
1161
1162 set_union(left_edge.f_adds.begin(), left_edge.f_adds.end(),
1163 right_edge.f_adds.begin(), right_edge.f_adds.end(),
1164 inserter(merged_edge.f_adds, merged_edge.f_adds.begin()));
1165
1166 set_union(left_edge.f_dels.begin(), left_edge.f_dels.end(),
1167 right_edge.f_dels.begin(), right_edge.f_dels.end(),
1168 inserter(merged_edge.f_dels, merged_edge.f_dels.begin()));
1169
1170 set_union(left_edge.f_moves.begin(), left_edge.f_moves.end(),
1171 right_edge.f_moves.begin(), right_edge.f_moves.end(),
1172 inserter(merged_edge.f_moves, merged_edge.f_moves.begin()));
1173
1174 // (phase 4.5, copy the renames into the disjoint rename sets for independent
1175 // certification in our caller)
1176
1177 left_renames.clear();
1178 for (set<patch_move>::const_iterator mv = left_edge.f_moves.begin();
1179 mv != left_edge.f_moves.end(); ++mv)
1180 left_renames.insert(make_pair(mv->path_old, mv->path_new));
1181
1182 right_renames.clear();
1183 for (set<patch_move>::const_iterator mv = right_edge.f_moves.begin();
1184 mv != right_edge.f_moves.end(); ++mv)
1185 right_renames.insert(make_pair(mv->path_old, mv->path_new));
1186
1187 // in phase #5 we run 3-way file merges on all the files which have
1188 // a delta on both edges, and union the results of the 3-way merges with
1189 // all the deltas which only happen on one edge, and dump all this into
1190 // the merged edge too.
1191
1192 try
1193 {
1194 merge_deltas (left_delta_map, right_delta_map,
1195 file_merger, merged_edge);
1196
1197 merge_deltas (right_delta_map, left_delta_map,
1198 file_merger, merged_edge);
1199 }
1200 catch (conflict & c)
1201 {
1202 merged.clear();
1203 return false;
1204 }
1205
1206 // in phase #6 (finally) we copy ancestor into our result, and apply the
1207 // merged edge to it, mutating it into the merged manifest.
1208
1209 copy(ancestor.begin(), ancestor.end(), inserter(merged, merged.begin()));
1210
1211 for (set<file_path>::const_iterator del = merged_edge.f_dels.begin();
1212 del != merged_edge.f_dels.end(); ++del)
1213 {
1214 L(F("applying merged delete of file %s\n") % *del);
1215 I(merged.find(*del) != merged.end());
1216 merged.erase(*del);
1217 }
1218
1219 for (set<patch_move>::const_iterator mov = merged_edge.f_moves.begin();
1220 mov != merged_edge.f_moves.end(); ++mov)
1221 {
1222 L(F("applying merged move of file %s -> %s\n")
1223% mov->path_old % mov->path_new);
1224 I(merged.find(mov->path_new) == merged.end());
1225 file_id fid = merged[mov->path_old];
1226 merged.erase(mov->path_old);
1227 merged.insert(make_pair(mov->path_new, fid));
1228 }
1229
1230 for (set<patch_addition>::const_iterator add = merged_edge.f_adds.begin();
1231 add != merged_edge.f_adds.end(); ++add)
1232 {
1233 L(F("applying merged addition of file %s: %s\n")
1234% add->path % add->ident);
1235 I(merged.find(add->path) == merged.end());
1236 merged.insert(make_pair(add->path, add->ident));
1237 }
1238
1239 for (set<patch_delta>::const_iterator delta = merged_edge.f_deltas.begin();
1240 delta != merged_edge.f_deltas.end(); ++delta)
1241 {
1242 if (merged_edge.f_dels.find(delta->path) != merged_edge.f_dels.end())
1243{
1244 L(F("skipping merged delta on deleted file %s\n") % delta->path);
1245 continue;
1246}
1247 L(F("applying merged delta to file %s: %s -> %s\n")
1248% delta->path % delta->id_old % delta->id_old);
1249 I(merged.find(delta->path) != merged.end());
1250 I(merged[delta->path] == delta->id_old);
1251 merged[delta->path] = delta->id_new;
1252 }
1253
1254 return true;
1255}
1256
1257
1258simple_merge_provider::simple_merge_provider(app_state & app)
1259 : app(app) {}
1260
1261void simple_merge_provider::record_merge(file_id const & left_ident,
1262 file_id const & right_ident,
1263 file_id const & merged_ident,
1264 file_data const & left_data,
1265 file_data const & merged_data)
1266{
1267 L(F("recording successful merge of %s <-> %s into %s\n")
1268 % left_ident % right_ident % merged_ident);
1269
1270 base64< gzip<delta> > merge_delta;
1271 transaction_guard guard(app.db);
1272
1273 diff(left_data.inner(), merged_data.inner(), merge_delta);
1274 packet_db_writer dbw(app);
1275 dbw.consume_file_delta (left_ident, merged_ident, file_delta(merge_delta));
1276 cert_file_ancestor(left_ident, merged_ident, app, dbw);
1277 cert_file_ancestor(right_ident, merged_ident, app, dbw);
1278 guard.commit();
1279}
1280
1281void simple_merge_provider::get_version(path_id_pair const & pip, file_data & dat)
1282{
1283 app.db.get_file_version(pip.ident(), dat);
1284}
1285
1286bool simple_merge_provider::try_to_merge_files(path_id_pair const & ancestor,
1287 path_id_pair const & left,
1288 path_id_pair const & right,
1289 path_id_pair & merged)
1290{
1291
1292 L(F("trying to merge %s <-> %s (ancestor: %s)\n")
1293 % left.ident() % right.ident() % ancestor.ident());
1294
1295 if (left.ident() == right.ident() &&
1296 left.path() == right.path())
1297 {
1298 L(F("files are identical\n"));
1299 merged = left;
1300 return true;
1301 }
1302
1303 // check for an existing merge, use it if available
1304 {
1305 vector< file<cert> > left_edges, right_edges;
1306 set< base64<cert_value> > left_children, right_children, common_children;
1307 app.db.get_file_certs(left.ident(), cert_name(ancestor_cert_name), left_edges);
1308 app.db.get_file_certs(right.ident(), cert_name(ancestor_cert_name), left_edges);
1309 for (vector< file<cert> >::const_iterator i = left_edges.begin();
1310 i != left_edges.end(); ++i)
1311 left_children.insert(i->inner().value);
1312
1313 for (vector< file<cert> >::const_iterator i = right_edges.begin();
1314 i != right_edges.end(); ++i)
1315 right_children.insert(i->inner().value);
1316
1317 set_intersection(left_children.begin(), left_children.end(),
1318 right_children.begin(), right_children.end(),
1319 inserter(common_children, common_children.begin()));
1320
1321 L(F("existing merges: %s <-> %s, %d found\n")
1322 % left.ident() % right.ident() % common_children.size());
1323
1324 if (common_children.size() == 1)
1325 {
1326cert_value unpacked;
1327decode_base64(*common_children.begin(), unpacked);
1328file_id ident = file_id(unpacked());
1329merged.ident(ident);
1330merged.path(left.path());
1331L(F("reusing existing merge\n"));
1332return true;
1333 }
1334 L(F("no reusable merge\n"));
1335 }
1336
1337 // no existing merges, we'll have to do it ourselves.
1338 file_data left_data, right_data, ancestor_data;
1339 data left_unpacked, ancestor_unpacked, right_unpacked, merged_unpacked;
1340 vector<string> left_lines, ancestor_lines, right_lines, merged_lines;
1341
1342 this->get_version(left, left_data);
1343 this->get_version(ancestor, ancestor_data);
1344 this->get_version(right, right_data);
1345
1346 unpack(left_data.inner(), left_unpacked);
1347 unpack(ancestor_data.inner(), ancestor_unpacked);
1348 unpack(right_data.inner(), right_unpacked);
1349
1350 split_into_lines(left_unpacked(), left_lines);
1351 split_into_lines(ancestor_unpacked(), ancestor_lines);
1352 split_into_lines(right_unpacked(), right_lines);
1353
1354 if (merge3(ancestor_lines,
1355 left_lines,
1356 right_lines,
1357 merged_lines))
1358 {
1359 hexenc<id> merged_id;
1360 base64< gzip<data> > packed_merge;
1361 string tmp;
1362
1363 L(F("internal 3-way merged ok\n"));
1364 join_lines(merged_lines, tmp);
1365 calculate_ident(data(tmp), merged_id);
1366 file_id merged_fid(merged_id);
1367 pack(data(tmp), packed_merge);
1368
1369 merged.path(left.path());
1370 merged.ident(merged_fid);
1371 record_merge(left.ident(), right.ident(), merged_fid,
1372 left_data, packed_merge);
1373
1374 return true;
1375 }
1376 else if (app.lua.hook_merge3(ancestor_unpacked, left_unpacked,
1377 right_unpacked, merged_unpacked))
1378 {
1379 hexenc<id> merged_id;
1380 base64< gzip<data> > packed_merge;
1381
1382 L(F("lua merge3 hook merged ok\n"));
1383 calculate_ident(merged_unpacked, merged_id);
1384 file_id merged_fid(merged_id);
1385 pack(merged_unpacked, packed_merge);
1386
1387 merged.path(left.path());
1388 merged.ident(merged_fid);
1389 record_merge(left.ident(), right.ident(), merged_fid,
1390 left_data, packed_merge);
1391 return true;
1392 }
1393
1394 return false;
1395}
1396
1397bool simple_merge_provider::try_to_merge_files(path_id_pair const & left,
1398 path_id_pair const & right,
1399 path_id_pair & merged)
1400{
1401 file_data left_data, right_data;
1402 data left_unpacked, right_unpacked, merged_unpacked;
1403
1404 L(F("trying to merge %s <-> %s\n")
1405 % left.ident() % right.ident());
1406
1407 if (left.ident() == right.ident() &&
1408 left.path() == right.path())
1409 {
1410 L(F("files are identical\n"));
1411 merged = left;
1412 return true;
1413 }
1414
1415 this->get_version(left, left_data);
1416 this->get_version(right, right_data);
1417
1418 unpack(left_data.inner(), left_unpacked);
1419 unpack(right_data.inner(), right_unpacked);
1420
1421 if (app.lua.hook_merge2(left_unpacked, right_unpacked, merged_unpacked))
1422 {
1423 hexenc<id> merged_id;
1424 base64< gzip<data> > packed_merge;
1425
1426 L(F("lua merge2 hook merged ok\n"));
1427 calculate_ident(merged_unpacked, merged_id);
1428 file_id merged_fid(merged_id);
1429 pack(merged_unpacked, packed_merge);
1430
1431 merged.path(left.path());
1432 merged.ident(merged_fid);
1433 record_merge(left.ident(), right.ident(), merged_fid,
1434 left_data, packed_merge);
1435 return true;
1436 }
1437
1438 return false;
1439}
1440
1441
1442// during the "update" command, the only real differences from merging
1443// are that we take our right versions from the filesystem, not the db,
1444// and we only record the merges in a transient, in-memory table.
1445
1446update_merge_provider::update_merge_provider(app_state & app)
1447 : simple_merge_provider(app) {}
1448
1449void update_merge_provider::record_merge(file_id const & left_ident,
1450 file_id const & right_ident,
1451 file_id const & merged_ident,
1452 file_data const & left_data,
1453 file_data const & merged_data)
1454{
1455 L(F("temporarily recording merge of %s <-> %s into %s\n")
1456 % left_ident % right_ident % merged_ident);
1457 I(temporary_store.find(merged_ident) == temporary_store.end());
1458 temporary_store.insert(make_pair(merged_ident, merged_data));
1459}
1460
1461void update_merge_provider::get_version(path_id_pair const & pip, file_data & dat)
1462{
1463 if (app.db.file_version_exists(pip.ident()))
1464 app.db.get_file_version(pip.ident(), dat);
1465 else
1466 {
1467 base64< gzip<data> > tmp;
1468 file_id fid;
1469 N(file_exists (pip.path()),
1470F("file %s does not exist in working copy") % pip.path());
1471 read_localized_data(pip.path(), tmp, app.lua);
1472 calculate_ident(tmp, fid);
1473 N(fid == pip.ident(),
1474F("file %s in working copy has id %s, wanted %s")
1475% pip.path() % fid % pip.ident());
1476 dat = tmp;
1477 }
1478}
1479
1480
1481
1482// the remaining part of this file just handles printing out unidiffs for
1483// the case where someone wants to *read* a diff rather than apply it.
1484
1485struct unidiff_hunk_writer : public hunk_consumer
1486{
1487 vector<string> const & a;
1488 vector<string> const & b;
1489 size_t ctx;
1490 ostream & ost;
1491 size_t a_begin, b_begin, a_len, b_len;
1492 vector<string> hunk;
1493 unidiff_hunk_writer(vector<string> const & a,
1494 vector<string> const & b,
1495 size_t ctx,
1496 ostream & ost);
1497 virtual void flush_hunk(size_t pos);
1498 virtual void advance_to(size_t newpos);
1499 virtual void insert_at(size_t b_pos);
1500 virtual void delete_at(size_t a_pos);
1501 virtual ~unidiff_hunk_writer() {}
1502};
1503
1504unidiff_hunk_writer::unidiff_hunk_writer(vector<string> const & a,
1505 vector<string> const & b,
1506 size_t ctx,
1507 ostream & ost)
1508: a(a), b(b), ctx(ctx), ost(ost),
1509 a_begin(0), b_begin(0),
1510 a_len(0), b_len(0)
1511{}
1512
1513void unidiff_hunk_writer::insert_at(size_t b_pos)
1514{
1515 b_len++;
1516 hunk.push_back(string("+") + b[b_pos]);
1517}
1518
1519void unidiff_hunk_writer::delete_at(size_t a_pos)
1520{
1521 a_len++;
1522 hunk.push_back(string("-") + a[a_pos]);
1523}
1524
1525void unidiff_hunk_writer::flush_hunk(size_t pos)
1526{
1527 if (hunk.size() > ctx)
1528 {
1529 // insert trailing context
1530 size_t a_pos = a_begin + a_len;
1531 for (size_t i = 0; (i < ctx) && (a_pos + i < a.size()); ++i)
1532{
1533 hunk.push_back(string(" ") + a[a_pos + i]);
1534 a_len++;
1535 b_len++;
1536}
1537 }
1538
1539 if (hunk.size() > 0)
1540 {
1541
1542 // write hunk to stream
1543 ost << "@@ -" << a_begin+1;
1544 if (a_len > 1)
1545ost << "," << a_len;
1546
1547 ost << " +" << b_begin+1;
1548 if (b_len > 1)
1549 ost << "," << b_len;
1550 ost << " @@" << endl;
1551
1552 copy(hunk.begin(), hunk.end(), ostream_iterator<string>(ost, "\n"));
1553 }
1554
1555 // reset hunk
1556 hunk.clear();
1557 int skew = b_len - a_len;
1558 a_begin = pos;
1559 b_begin = pos + skew;
1560 a_len = 0;
1561 b_len = 0;
1562}
1563
1564void unidiff_hunk_writer::advance_to(size_t newpos)
1565{
1566 if (a_begin + a_len + (2 * ctx) < newpos)
1567 {
1568 flush_hunk(newpos);
1569
1570 // insert new leading context
1571 if (newpos - ctx < a.size())
1572{
1573 for (int i = ctx; i > 0; --i)
1574 {
1575 if (newpos - i < 0)
1576continue;
1577 hunk.push_back(string(" ") + a[newpos - i]);
1578 a_begin--; a_len++;
1579 b_begin--; b_len++;
1580 }
1581}
1582 }
1583 else
1584 {
1585 // pad intermediate context
1586 while(a_begin + a_len < newpos)
1587{
1588 hunk.push_back(string(" ") + a[a_begin + a_len]);
1589 a_len++;
1590 b_len++;
1591}
1592 }
1593}
1594
1595
1596void unidiff(string const & filename1,
1597 string const & filename2,
1598 vector<string> const & lines1,
1599 vector<string> const & lines2,
1600 ostream & ost)
1601{
1602 ost << "--- " << filename1 << endl;
1603 ost << "+++ " << filename2 << endl;
1604
1605 vector<long> left_interned;
1606 vector<long> right_interned;
1607 vector<long> lcs;
1608
1609 interner<long> in;
1610
1611 left_interned.reserve(lines1.size());
1612 for (vector<string>::const_iterator i = lines1.begin();
1613 i != lines1.end(); ++i)
1614 left_interned.push_back(in.intern(*i));
1615
1616 right_interned.reserve(lines2.size());
1617 for (vector<string>::const_iterator i = lines2.begin();
1618 i != lines2.end(); ++i)
1619 right_interned.push_back(in.intern(*i));
1620
1621 lcs.reserve(std::min(lines1.size(),lines2.size()));
1622 longest_common_subsequence(left_interned.begin(), left_interned.end(),
1623 right_interned.begin(), right_interned.end(),
1624 std::min(lines1.size(), lines2.size()),
1625 back_inserter(lcs));
1626
1627 unidiff_hunk_writer hunks(lines1, lines2, 3, ost);
1628 walk_hunk_consumer(lcs, left_interned, right_interned, hunks);
1629}
1630
1631
1632#ifdef BUILD_UNIT_TESTS
1633#include "unit_tests.hh"
1634#include "transforms.hh"
1635#include <boost/lexical_cast.hpp>
1636#include "randomfile.hh"
1637
1638using boost::lexical_cast;
1639
1640static void dump_incorrect_merge(vector<string> const & expected,
1641 vector<string> const & got,
1642 string const & prefix)
1643{
1644 size_t mx = expected.size();
1645 if (mx < got.size())
1646 mx = got.size();
1647 for (size_t i = 0; i < mx; ++i)
1648 {
1649 cerr << "bad merge: " << i << " [" << prefix << "]\t";
1650
1651 if (i < expected.size())
1652cerr << "[" << expected[i] << "]\t";
1653 else
1654cerr << "[--nil--]\t";
1655
1656 if (i < got.size())
1657cerr << "[" << got[i] << "]\t";
1658 else
1659cerr << "[--nil--]\t";
1660
1661 cerr << endl;
1662 }
1663}
1664
1665// regression blockers go here
1666static void unidiff_append_test()
1667{
1668 string src(string("#include \"hello.h\"\n")
1669 + "\n"
1670 + "void say_hello()\n"
1671 + "{\n"
1672 + " printf(\"hello, world\\n\");\n"
1673 + "}\n"
1674 + "\n"
1675 + "int main()\n"
1676 + "{\n"
1677 + " say_hello();\n"
1678 + "}\n");
1679
1680 string dst(string("#include \"hello.h\"\n")
1681 + "\n"
1682 + "void say_hello()\n"
1683 + "{\n"
1684 + " printf(\"hello, world\\n\");\n"
1685 + "}\n"
1686 + "\n"
1687 + "int main()\n"
1688 + "{\n"
1689 + " say_hello();\n"
1690 + "}\n"
1691 + "\n"
1692 + "void say_goodbye()\n"
1693 + "{\n"
1694 + " printf(\"goodbye\\n\");\n"
1695 + "}\n"
1696 + "\n");
1697
1698 string ud(string("--- hello.c\n")
1699 + "+++ hello.c\n"
1700 + "@@ -9,3 +9,9 @@\n"
1701 + " {\n"
1702 + " say_hello();\n"
1703 + " }\n"
1704 + "+\n"
1705 + "+void say_goodbye()\n"
1706 + "+{\n"
1707 + "+ printf(\"goodbye\\n\");\n"
1708 + "+}\n"
1709 + "+\n");
1710
1711 vector<string> src_lines, dst_lines;
1712 split_into_lines(src, src_lines);
1713 split_into_lines(dst, dst_lines);
1714 stringstream sst;
1715 unidiff("hello.c", "hello.c", src_lines, dst_lines, sst);
1716 BOOST_CHECK(sst.str() == ud);
1717}
1718
1719
1720// high tech randomizing test
1721
1722static void randomizing_merge_test()
1723{
1724 for (int i = 0; i < 30; ++i)
1725 {
1726 vector<string> anc, d1, d2, m1, m2, gm;
1727
1728 file_randomizer::build_random_fork(anc, d1, d2, gm,
1729 i * 1023, (10 + 2 * i));
1730
1731 BOOST_CHECK(merge3(anc, d1, d2, m1));
1732 if (gm != m1)
1733dump_incorrect_merge (gm, m1, "random_merge 1");
1734 BOOST_CHECK(gm == m1);
1735
1736 BOOST_CHECK(merge3(anc, d2, d1, m2));
1737 if (gm != m2)
1738dump_incorrect_merge (gm, m2, "random_merge 2");
1739 BOOST_CHECK(gm == m2);
1740 }
1741}
1742
1743
1744// old boring tests
1745
1746static void merge_prepend_test()
1747{
1748 BOOST_CHECKPOINT("prepend test");
1749 vector<string> anc, d1, d2, m1, m2, gm;
1750 for (int i = 10; i < 20; ++i)
1751 {
1752 d2.push_back(lexical_cast<string>(i));
1753 gm.push_back(lexical_cast<string>(i));
1754 }
1755
1756 for (int i = 0; i < 10; ++i)
1757 {
1758 anc.push_back(lexical_cast<string>(i));
1759 d1.push_back(lexical_cast<string>(i));
1760 d2.push_back(lexical_cast<string>(i));
1761 gm.push_back(lexical_cast<string>(i));
1762 }
1763
1764 BOOST_CHECK(merge3(anc, d1, d2, m1));
1765 if (gm != m1)
1766 dump_incorrect_merge (gm, m1, "merge_prepend 1");
1767 BOOST_CHECK(gm == m1);
1768
1769
1770 BOOST_CHECK(merge3(anc, d2, d1, m2));
1771 if (gm != m2)
1772 dump_incorrect_merge (gm, m2, "merge_prepend 2");
1773 BOOST_CHECK(gm == m2);
1774}
1775
1776
1777static void merge_append_test()
1778{
1779 BOOST_CHECKPOINT("append test");
1780 vector<string> anc, d1, d2, m1, m2, gm;
1781 for (int i = 0; i < 10; ++i)
1782 anc.push_back(lexical_cast<string>(i));
1783
1784 d1 = anc;
1785 d2 = anc;
1786 gm = anc;
1787
1788 for (int i = 10; i < 20; ++i)
1789 {
1790 d2.push_back(lexical_cast<string>(i));
1791 gm.push_back(lexical_cast<string>(i));
1792 }
1793
1794 BOOST_CHECK(merge3(anc, d1, d2, m1));
1795 if (gm != m1)
1796 dump_incorrect_merge (gm, m1, "merge_append 1");
1797 BOOST_CHECK(gm == m1);
1798
1799 BOOST_CHECK(merge3(anc, d2, d1, m2));
1800 if (gm != m2)
1801 dump_incorrect_merge (gm, m2, "merge_append 2");
1802 BOOST_CHECK(gm == m2);
1803
1804
1805}
1806
1807static void merge_additions_test()
1808{
1809 BOOST_CHECKPOINT("additions test");
1810 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1811 string desc1("I like oatmeal\nI don't like spam\nI like orange juice\nI like toast");
1812 string confl("I like oatmeal\nI don't like tuna\nI like orange juice\nI like toast");
1813 string desc2("I like oatmeal\nI like orange juice\nI don't like tuna\nI like toast");
1814 string good_merge("I like oatmeal\nI don't like spam\nI like orange juice\nI don't like tuna\nI like toast");
1815 vector<string> anc, d1, cf, d2, m1, m2, gm;
1816
1817 split_into_lines(ancestor, anc);
1818 split_into_lines(desc1, d1);
1819 split_into_lines(confl, cf);
1820 split_into_lines(desc2, d2);
1821 split_into_lines(good_merge, gm);
1822
1823 BOOST_CHECK(merge3(anc, d1, d2, m1));
1824 if (gm != m1)
1825 dump_incorrect_merge (gm, m1, "merge_addition 1");
1826 BOOST_CHECK(gm == m1);
1827
1828 BOOST_CHECK(merge3(anc, d2, d1, m2));
1829 if (gm != m2)
1830 dump_incorrect_merge (gm, m2, "merge_addition 2");
1831 BOOST_CHECK(gm == m2);
1832
1833 BOOST_CHECK(!merge3(anc, d1, cf, m1));
1834}
1835
1836static void merge_deletions_test()
1837{
1838 string ancestor("I like oatmeal\nI like orange juice\nI like toast");
1839 string desc2("I like oatmeal\nI like toast");
1840
1841 vector<string> anc, d1, d2, m1, m2, gm;
1842
1843 split_into_lines(ancestor, anc);
1844 split_into_lines(desc2, d2);
1845 d1 = anc;
1846 gm = d2;
1847
1848 BOOST_CHECK(merge3(anc, d1, d2, m1));
1849 if (gm != m1)
1850 dump_incorrect_merge (gm, m1, "merge_deletion 1");
1851 BOOST_CHECK(gm == m1);
1852
1853 BOOST_CHECK(merge3(anc, d2, d1, m2));
1854 if (gm != m2)
1855 dump_incorrect_merge (gm, m2, "merge_deletion 2");
1856 BOOST_CHECK(gm == m2);
1857}
1858
1859
1860void add_diff_patch_tests(test_suite * suite)
1861{
1862 I(suite);
1863 suite->add(BOOST_TEST_CASE(&unidiff_append_test));
1864 suite->add(BOOST_TEST_CASE(&merge_prepend_test));
1865 suite->add(BOOST_TEST_CASE(&merge_append_test));
1866 suite->add(BOOST_TEST_CASE(&merge_additions_test));
1867 suite->add(BOOST_TEST_CASE(&merge_deletions_test));
1868 suite->add(BOOST_TEST_CASE(&randomizing_merge_test));
1869}
1870
1871
1872#endif // BUILD_UNIT_TESTS

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status