monotone

monotone Mtn Source Tree

Root/src/merge_3way.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2// 2008 Stephen Leake <stephen_leake@stephe-leake.org>
3//
4// This program is made available under the GNU GPL version 2.0 or
5// greater. See the accompanying file COPYING for details.
6//
7// This program is distributed WITHOUT ANY WARRANTY; without even the
8// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9// PURPOSE.
10
11#include "base.hh"
12#include "merge_content.hh"
13
14#include "interner.hh"
15#include "lcs.hh"
16#include "paths.hh"
17#include "vector.hh"
18
19using std::min;
20using std::vector;
21using std::string;
22using std::swap;
23
24//
25// a 3-way merge works like this:
26//
27// /----> right
28// ancestor
29// \----> left
30//
31// first you compute the edit list "EDITS(ancestor,left)".
32//
33// then you make an offset table "leftpos" which describes positions in
34// "ancestor" as they map to "left"; that is, for 0 < apos <
35// ancestor.size(), we have
36//
37// left[leftpos[apos]] == ancestor[apos]
38//
39// you do this by walking through the edit list and either jumping the
40// current index ahead an extra position, on an insert, or remaining still,
41// on a delete. on an insert *or* a delete, you push the current index back
42// onto the leftpos array.
43//
44// next you compute the edit list "EDITS(ancestor,right)".
45//
46// you then go through this edit list applying the edits to left, rather
47// than ancestor, and using the table leftpos to map the position of each
48// edit to an appropriate spot in left. this means you walk a "curr_left"
49// index through the edits, and for each edit e:
50//
51// - if e is a delete (and e.pos is a position in ancestor)
52// - increment curr_left without copying anything to "merged"
53//
54// - if e is an insert (and e.pos is a position in right)
55// - copy right[e.pos] to "merged"
56// - leave curr_left alone
57//
58// - when advancing to apos (and apos is a position in ancestor)
59// - copy left[curr_left] to merged while curr_left < leftpos[apos]
60//
61//
62// the practical upshot is that you apply the delta from ancestor->right
63// to the adjusted contexts in left, producing something vaguely like
64// the concatenation of delta(ancestor,left) :: delta(ancestor,right).
65//
66// NB: this is, as far as I can tell, what diff3 does. I don't think I'm
67// infringing on anyone's fancy patents here.
68//
69
70struct conflict {};
71
72typedef enum { preserved = 0, deleted = 1, changed = 2 } edit_t;
73static const char etab[3][10] =
74 {
75 "preserved",
76 "deleted",
77 "changed"
78 };
79
80struct extent
81{
82 extent(size_t p, size_t l, edit_t t)
83 : pos(p), len(l), type(t)
84 {}
85 size_t pos;
86 size_t len;
87 edit_t type;
88};
89
90void calculate_extents(vector<long, QA(long)> const & a_b_edits,
91 vector<long, QA(long)> const & b,
92 vector<long, QA(long)> & prefix,
93 vector<extent> & extents,
94 vector<long, QA(long)> & suffix,
95 size_t const a_len,
96 interner<long> & intern)
97{
98 extents.reserve(a_len * 2);
99
100 size_t a_pos = 0, b_pos = 0;
101
102 for (vector<long, QA(long)>::const_iterator i = a_b_edits.begin();
103 i != a_b_edits.end(); ++i)
104 {
105 // L(FL("edit: %d") % *i);
106 if (*i < 0)
107 {
108 // negative elements code the negation of the one-based index into A
109 // of the element to be deleted
110 size_t a_deleted = (-1 - *i);
111
112 // fill positions out to the deletion point
113 while (a_pos < a_deleted)
114 {
115 a_pos++;
116 extents.push_back(extent(b_pos++, 1, preserved));
117 }
118
119 // L(FL(" -- delete at A-pos %d (B-pos = %d)") % a_deleted % b_pos);
120
121 // skip the deleted line
122 a_pos++;
123 extents.push_back(extent(b_pos, 0, deleted));
124 }
125 else
126 {
127 // positive elements code the one-based index into B of the element to
128 // be inserted
129 size_t b_inserted = (*i - 1);
130
131 // fill positions out to the insertion point
132 while (b_pos < b_inserted)
133 {
134 a_pos++;
135 extents.push_back(extent(b_pos++, 1, preserved));
136 }
137
138 // L(FL(" -- insert at B-pos %d (A-pos = %d) : '%s'")
139 // % b_inserted % a_pos % intern.lookup(b.at(b_inserted)));
140
141 // record that there was an insertion, but a_pos did not move.
142 if ((b_pos == 0 && extents.empty())
143 || (b_pos == prefix.size()))
144 {
145 prefix.push_back(b.at(b_pos));
146 }
147 else if (a_len == a_pos)
148 {
149 suffix.push_back(b.at(b_pos));
150 }
151 else
152 {
153 // make the insertion
154 extents.back().type = changed;
155 extents.back().len++;
156 }
157 b_pos++;
158 }
159 }
160
161 while (extents.size() < a_len)
162 extents.push_back(extent(b_pos++, 1, preserved));
163}
164
165void normalize_extents(vector<extent> & a_b_map,
166 vector<long, QA(long)> const & a,
167 vector<long, QA(long)> const & b)
168{
169 for (size_t i = 0; i < a_b_map.size(); ++i)
170 {
171 if (i > 0)
172 {
173 size_t j = i;
174 while (j > 0
175 && (a_b_map.at(j-1).type == preserved)
176 && (a_b_map.at(j).type == changed)
177 && (a.at(j) == b.at(a_b_map.at(j).pos + a_b_map.at(j).len - 1)))
178 {
179 // This is implied by (a_b_map.at(j-1).type == preserved)
180 I(a.at(j-1) == b.at(a_b_map.at(j-1).pos));
181
182 // Coming into loop we have:
183 // i
184 // z --pres--> z 0
185 // o --pres--> o 1
186 // a --chng--> a 2 The important thing here is that 'a' in
187 // t the LHS matches with ...
188 // u
189 // v
190 // a ... the a on the RHS here. Hence we can
191 // q --pres--> q 3 'shift' the entire 'changed' block
192 // e --chng--> d 4 upwards, leaving a 'preserved' line
193 // g --pres--> g 5 'a'->'a'
194 //
195 // Want to end up with:
196 // i
197 // z --pres--> z 0
198 // o --chng--> o 1
199 // a
200 // t
201 // u
202 // v
203 // a --pres--> a 2
204 // q --pres--> q 3
205 // e --chng--> d 4
206 // g --pres--> g 5
207 //
208 // Now all the 'changed' extents are normalised to the
209 // earliest possible position.
210
211 L(FL("exchanging preserved extent [%d+%d] with changed extent [%d+%d]")
212 % a_b_map.at(j-1).pos
213 % a_b_map.at(j-1).len
214 % a_b_map.at(j).pos
215 % a_b_map.at(j).len);
216
217 swap(a_b_map.at(j-1).len, a_b_map.at(j).len);
218 swap(a_b_map.at(j-1).type, a_b_map.at(j).type);
219
220 // Adjust position of the later, preserved extent. It should
221 // better point to the second 'a' in the above example.
222 a_b_map.at(j).pos = a_b_map.at(j-1).pos + a_b_map.at(j-1).len;
223
224 --j;
225 }
226 }
227 }
228
229 for (size_t i = 0; i < a_b_map.size(); ++i)
230 {
231 if (i > 0)
232 {
233 size_t j = i;
234 while (j > 0
235 && a_b_map.at(j).type == changed
236 && a_b_map.at(j-1).type == changed
237 && a_b_map.at(j).len > 1
238 && a_b_map.at(j-1).pos + a_b_map.at(j-1).len == a_b_map.at(j).pos)
239 {
240 // step 1: move a chunk from this insert extent to its
241 // predecessor
242 size_t piece = a_b_map.at(j).len - 1;
243 // L(FL("moving change piece of len %d from pos %d to pos %d")
244 // % piece
245 // % a_b_map.at(j).pos
246 // % a_b_map.at(j-1).pos);
247 a_b_map.at(j).len = 1;
248 a_b_map.at(j).pos += piece;
249 a_b_map.at(j-1).len += piece;
250
251 // step 2: if this extent (now of length 1) has become a "changed"
252 // extent identical to its previous state, switch it to a "preserved"
253 // extent.
254 if (b.at(a_b_map.at(j).pos) == a.at(j))
255 {
256 // L(FL("changing normalized 'changed' extent at %d to 'preserved'")
257 // % a_b_map.at(j).pos);
258 a_b_map.at(j).type = preserved;
259 }
260 --j;
261 }
262 }
263 }
264}
265
266
267void merge_extents(vector<extent> const & a_b_map,
268 vector<extent> const & a_c_map,
269 vector<long, QA(long)> const & b,
270 vector<long, QA(long)> const & c,
271 interner<long> const & in,
272 vector<long, QA(long)> & merged)
273{
274 I(a_b_map.size() == a_c_map.size());
275
276 vector<extent>::const_iterator i = a_b_map.begin();
277 vector<extent>::const_iterator j = a_c_map.begin();
278 merged.reserve(a_b_map.size() * 2);
279
280 // for (; i != a_b_map.end(); ++i, ++j)
281 // {
282
283 // L(FL("trying to merge: [%s %d %d] vs. [%s %d %d]")
284 // % etab[i->type] % i->pos % i->len
285 // % etab[j->type] % j->pos % j->len);
286 // }
287
288 // i = a_b_map.begin();
289 // j = a_c_map.begin();
290
291 for (; i != a_b_map.end(); ++i, ++j)
292 {
293
294 // L(FL("trying to merge: [%s %d %d] vs. [%s %d %d]")
295 // % etab[i->type] % i->pos % i->len
296 // % etab[j->type] % j->pos % j->len);
297
298 // mutual, identical preserves / inserts / changes
299 if (((i->type == changed && j->type == changed)
300 || (i->type == preserved && j->type == preserved))
301 && i->len == j->len)
302 {
303 for (size_t k = 0; k < i->len; ++k)
304 {
305 if (b.at(i->pos + k) != c.at(j->pos + k))
306 {
307 L(FL("conflicting edits: %s %d[%d] '%s' vs. %s %d[%d] '%s'")
308 % etab[i->type] % i->pos % k % in.lookup(b.at(i->pos + k))
309 % etab[j->type] % j->pos % k % in.lookup(c.at(j->pos + k)));
310 throw conflict();
311 }
312 merged.push_back(b.at(i->pos + k));
313 }
314 }
315
316 // mutual or single-edge deletes
317 else if ((i->type == deleted && j->type == deleted)
318 || (i->type == deleted && j->type == preserved)
319 || (i->type == preserved && j->type == deleted))
320 {
321 // do nothing
322 }
323
324 // single-edge insert / changes
325 else if (i->type == changed && j->type == preserved)
326 for (size_t k = 0; k < i->len; ++k)
327 merged.push_back(b.at(i->pos + k));
328
329 else if (i->type == preserved && j->type == changed)
330 for (size_t k = 0; k < j->len; ++k)
331 merged.push_back(c.at(j->pos + k));
332
333 else
334 {
335 L(FL("conflicting edits: [%s %d %d] vs. [%s %d %d]")
336 % etab[i->type] % i->pos % i->len
337 % etab[j->type] % j->pos % j->len);
338 throw conflict();
339 }
340
341 // if (merged.empty())
342 // L(FL(" --> EMPTY"));
343 // else
344 // L(FL(" --> [%d]: %s") % (merged.size() - 1) % in.lookup(merged.back()));
345 }
346}
347
348
349void merge_via_edit_scripts(vector<string> const & ancestor,
350 vector<string> const & left,
351 vector<string> const & right,
352 vector<string> & merged)
353{
354 vector<long, QA(long)> anc_interned;
355 vector<long, QA(long)> left_interned, right_interned;
356 vector<long, QA(long)> left_edits, right_edits;
357 vector<long, QA(long)> left_prefix, right_prefix;
358 vector<long, QA(long)> left_suffix, right_suffix;
359 vector<extent> left_extents, right_extents;
360 vector<long, QA(long)> merged_interned;
361 interner<long> in;
362
363 // for (int i = 0; i < min(min(left.size(), right.size()), ancestor.size()); ++i)
364 // {
365 // cerr << '[' << i << "] " << left[i] << ' ' << ancestor[i] << ' ' << right[i] << '\n';
366 // }
367
368 anc_interned.reserve(ancestor.size());
369 for (vector<string>::const_iterator i = ancestor.begin();
370 i != ancestor.end(); ++i)
371 anc_interned.push_back(in.intern(*i));
372
373 left_interned.reserve(left.size());
374 for (vector<string>::const_iterator i = left.begin();
375 i != left.end(); ++i)
376 left_interned.push_back(in.intern(*i));
377
378 right_interned.reserve(right.size());
379 for (vector<string>::const_iterator i = right.begin();
380 i != right.end(); ++i)
381 right_interned.push_back(in.intern(*i));
382
383 L(FL("calculating left edit script on %d -> %d lines")
384 % anc_interned.size() % left_interned.size());
385
386 edit_script(anc_interned.begin(), anc_interned.end(),
387 left_interned.begin(), left_interned.end(),
388 left_edits);
389
390 L(FL("calculating right edit script on %d -> %d lines")
391 % anc_interned.size() % right_interned.size());
392
393 edit_script(anc_interned.begin(), anc_interned.end(),
394 right_interned.begin(), right_interned.end(),
395 right_edits);
396
397 L(FL("calculating left extents on %d edits") % left_edits.size());
398 calculate_extents(left_edits, left_interned,
399 left_prefix, left_extents, left_suffix,
400 anc_interned.size(), in);
401
402 L(FL("calculating right extents on %d edits") % right_edits.size());
403 calculate_extents(right_edits, right_interned,
404 right_prefix, right_extents, right_suffix,
405 anc_interned.size(), in);
406
407 L(FL("normalizing %d right extents") % right_extents.size());
408 normalize_extents(right_extents, anc_interned, right_interned);
409
410 L(FL("normalizing %d left extents") % left_extents.size());
411 normalize_extents(left_extents, anc_interned, left_interned);
412
413
414 if ((!right_prefix.empty()) && (!left_prefix.empty()))
415 {
416 L(FL("conflicting prefixes"));
417 throw conflict();
418 }
419
420 if ((!right_suffix.empty()) && (!left_suffix.empty()))
421 {
422 L(FL("conflicting suffixes"));
423 throw conflict();
424 }
425
426 L(FL("merging %d left, %d right extents")
427 % left_extents.size() % right_extents.size());
428
429 copy(left_prefix.begin(), left_prefix.end(), back_inserter(merged_interned));
430 copy(right_prefix.begin(), right_prefix.end(), back_inserter(merged_interned));
431
432 merge_extents(left_extents, right_extents,
433 left_interned, right_interned,
434 in, merged_interned);
435
436 copy(left_suffix.begin(), left_suffix.end(), back_inserter(merged_interned));
437 copy(right_suffix.begin(), right_suffix.end(), back_inserter(merged_interned));
438
439 merged.reserve(merged_interned.size());
440 for (vector<long, QA(long)>::const_iterator i = merged_interned.begin();
441 i != merged_interned.end(); ++i)
442 merged.push_back(in.lookup(*i));
443}
444
445
446bool merge3(vector<string> const & ancestor,
447 vector<string> const & left,
448 vector<string> const & right,
449 vector<string> & merged)
450{
451 try
452 {
453 merge_via_edit_scripts(ancestor, left, right, merged);
454 }
455 catch(conflict &)
456 {
457 L(FL("conflict detected. no merge."));
458 return false;
459 }
460 return true;
461}
462
463
464// Local Variables:
465// mode: C++
466// fill-column: 76
467// c-file-style: "gnu"
468// indent-tabs-mode: nil
469// End:
470// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status