monotone

monotone Mtn Source Tree

Root/src/git_export.cc

1// Copyright (C) 2009 Derek Scherger <derek@echologic.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "cert.hh"
12#include "database.hh"
13#include "dates.hh"
14#include "file_io.hh"
15#include "git_change.hh"
16#include "git_export.hh"
17#include "lua_hooks.hh"
18#include "outdated_indicator.hh"
19#include "project.hh"
20#include "revision.hh"
21#include "roster.hh"
22#include "simplestring_xform.hh"
23#include "transforms.hh"
24#include "ui.hh"
25
26#include <iostream>
27#include <sstream>
28
29using std::cout;
30using std::istringstream;
31using std::map;
32using std::ostringstream;
33using std::set;
34using std::string;
35using std::vector;
36
37namespace
38{
39 string quote_path(file_path const & path)
40 {
41 string raw = path.as_internal();
42 string quoted;
43 quoted.reserve(raw.size() + 8);
44
45 quoted += "\"";
46
47 for (string::const_iterator i = raw.begin(); i != raw.end(); ++i)
48 {
49 if (*i == '"')
50 quoted += "\\";
51 quoted += *i;
52 }
53
54 quoted += "\"";
55
56 return quoted;
57 }
58};
59
60void
61read_mappings(system_path const & path, map<string, string> & mappings)
62{
63 data names;
64 vector<string> lines;
65
66 read_data(path, names);
67 split_into_lines(names(), lines);
68
69 for (vector<string>::const_iterator i = lines.begin(); i != lines.end(); ++i)
70 {
71 string line = trim(*i);
72 size_t index = line.find('=');
73 if (index != string::npos || index < line.length()-1)
74 {
75 string key = trim(line.substr(0, index));
76 string value = trim(line.substr(index+1));
77 mappings[key] = value;
78 }
79 else if (!line.empty())
80 W(F("ignored invalid mapping '%s'") % line);
81 }
82}
83
84void
85validate_author_mappings(lua_hooks & lua,
86 map<string, string> const & authors)
87{
88 for (map<string, string>::const_iterator i = authors.begin();
89 i != authors.end(); ++i)
90 {
91 E(lua.hook_validate_git_author(i->second), origin::user,
92 F("invalid git author '%s' mapped from monotone author '%s'")
93 % i->second % i->first);
94 }
95}
96
97void
98import_marks(system_path const & marks_file,
99 map<revision_id, size_t> & marked_revs)
100{
101 size_t mark_id = 1;
102
103 data mark_data;
104 read_data(marks_file, mark_data);
105 istringstream marks(mark_data());
106 marks.peek();
107 while (!marks.eof())
108 {
109 char c;
110 size_t mark;
111 string tmp;
112
113 marks.get(c);
114 E(c == ':', origin::user, F("missing leading ':' in marks file"));
115 marks >> mark;
116
117 marks.get(c);
118 E(c == ' ', origin::user, F("missing space after mark"));
119 marks >> tmp;
120 E(tmp.size() == 40, origin::user, F("bad revision id in marks file"));
121 revision_id revid(decode_hexenc(tmp, origin::user), origin::user);
122
123 marks.get(c);
124 E(c == '\n', origin::user, F("incomplete line in marks file"));
125
126 marked_revs[revid] = mark;
127 if (mark > mark_id) mark_id = mark+1;
128 marks.peek();
129 }
130}
131
132
133void
134export_marks(system_path const & marks_file,
135 map<revision_id, size_t> const & marked_revs)
136{
137 ostringstream marks;
138 for (map<revision_id, size_t>::const_iterator
139 i = marked_revs.begin(); i != marked_revs.end(); ++i)
140 marks << ":" << i->second << " " << i->first << "\n";
141
142 data mark_data(marks.str(), origin::internal);
143 system_path tmp("."); // use the current directory for tmp
144 write_data(marks_file, mark_data, tmp);
145}
146
147void
148load_changes(database & db,
149 vector<revision_id> const & revisions,
150 map<revision_id, git_change> & change_map)
151{
152 // process revisions in reverse order and calculate the git changes for
153 // each revision. these are cached in a map for use in the export phase
154 // where revisions are processed in forward order. this trades off memory
155 // for speed, loading rosters in reverse order is ~5x faster than loading
156 // them in forward order and the memory required for git changes is
157 // generally quite small. the memory required here should be comparable to
158 // that for all of the revision texts in the database being exported.
159 //
160 // testing exports of a current monotone database with ~18MB of revision
161 // text in ~15K revisions and a current pidgin database with ~20MB of
162 // revision text in ~27K revisions indicate that this is a reasonable
163 // approach. the export process reaches around 203MB VSS and 126MB RSS
164 // for the monotone database and around 206MB VSS and 129MB RSS for the
165 // pidgin database.
166
167 ticker loaded(_("loading"), "r", 1);
168 loaded.set_total(revisions.size());
169
170 for (vector<revision_id>::const_reverse_iterator
171 r = revisions.rbegin(); r != revisions.rend(); ++r)
172 {
173 revision_t revision;
174 db.get_revision(*r, revision);
175
176 // we apparently only need/want the changes from the first parent.
177 // including the changes from the second parent seems to cause
178 // failures due to repeated renames. verification of git merge nodes
179 // against the monotone source seems to show that they are correct.
180 // presumably this is somehow because of the 'from' and 'merge'
181 // lines in exported commits below.
182
183 revision_id parent1;
184 edge_map::const_iterator edge = revision.edges.begin();
185 parent1 = edge_old_revision(edge);
186
187 roster_t old_roster, new_roster;
188 db.get_roster(parent1, old_roster);
189 db.get_roster(*r, new_roster);
190
191 git_change changes;
192 get_change(old_roster, new_roster, changes);
193 change_map[*r] = changes;
194
195 ++loaded;
196 }
197}
198
199void
200export_changes(database & db, lua_hooks & lua,
201 vector<revision_id> const & revisions,
202 map<revision_id, size_t> & marked_revs,
203 map<string, string> const & author_map,
204 map<string, string> const & branch_map,
205 map<revision_id, git_change> const & change_map,
206 bool log_revids, bool log_certs,
207 bool use_one_changelog)
208{
209 size_t revnum = 0;
210 size_t revmax = revisions.size();
211
212 size_t mark_id = 0;
213 for (map<revision_id, size_t>::const_iterator i = marked_revs.begin();
214 i != marked_revs.end(); ++i)
215 if (i->second > mark_id) mark_id = i->second;
216 mark_id++;
217
218 map<file_id, size_t> marked_files;
219
220 // process the revisions in forward order and write out the fast-export
221 // data stream.
222
223 ticker exported(_("exporting"), "r", 1);
224 exported.set_total(revisions.size());
225
226 // keep a map of valid authors to avoid redundant lua validation calls
227 map<string, string> valid_authors(author_map);
228
229 for (vector<revision_id>::const_iterator
230 r = revisions.begin(); r != revisions.end(); ++r)
231 {
232 revnum++;
233
234 typedef vector<cert> cert_vector;
235 typedef cert_vector::const_iterator cert_iterator;
236 typedef map<string, string>::const_iterator lookup_iterator;
237
238 cert_vector certs;
239 cert_vector authors;
240 cert_vector branches;
241 cert_vector changelogs;
242 cert_vector comments;
243 cert_vector dates;
244 cert_vector tags;
245
246 db.get_revision_certs(*r, certs);
247
248 for (cert_iterator i = certs.begin(); i != certs.end(); ++i)
249 {
250 if (i->name == author_cert_name)
251 authors.push_back(*i);
252 else if (i->name == branch_cert_name)
253 branches.push_back(*i);
254 else if (i->name == changelog_cert_name)
255 changelogs.push_back(*i);
256 else if (i->name == date_cert_name)
257 dates.push_back(*i);
258 else if (i->name == tag_cert_name)
259 tags.push_back(*i);
260 else if (i->name == comment_cert_name)
261 comments.push_back(*i);
262 }
263
264 // default to <unknown> committer and author if no author certs exist
265 // this may be mapped to a different value with the authors-file option
266 string author_name = "Unknown <unknown>"; // used as the git author
267 string author_key = "Unknown <unknown>"; // used as the git committer
268 date_t author_date = date_t::now();
269
270 cert_iterator author = authors.begin();
271
272 if (author != authors.end())
273 {
274 author_name = trim(author->value());
275 if (db.public_key_exists(author->key))
276 {
277 rsa_pub_key pub;
278 key_name name;
279 db.get_pubkey(author->key, name, pub);
280 author_key = trim(name());
281 }
282 }
283
284 // all monotone keys and authors that don't follow the "Name <email>"
285 // convention used by git must be mapped or they may cause the import
286 // to fail. the full list of these values is available from monotone
287 // using the 'db execute' command. the following queries will list all
288 // author keys and author cert values.
289 //
290 // all values from author certs:
291 //
292 // 'select distinct value from revision_certs where name = "author"'
293 //
294 // all keys that have signed author certs:
295 //
296 // 'select distinct public_keys.name
297 // from public_keys
298 // left join revision_certs on revision_certs.keypair_id = public_keys.id
299 // where revision_certs.name = "author"'
300
301 lookup_iterator key_lookup = valid_authors.find(author_key);
302
303 if (key_lookup != valid_authors.end())
304 {
305 author_key = key_lookup->second;
306 }
307 else
308 {
309 string unmapped_key;
310 lua.hook_unmapped_git_author(author_key, unmapped_key);
311 E(lua.hook_validate_git_author(unmapped_key), origin::user,
312 F("invalid git author '%s' from monotone author key '%s'")
313 % unmapped_key % author_key);
314 valid_authors.insert(make_pair(author_key, unmapped_key));
315 author_key = unmapped_key;
316 }
317
318 lookup_iterator name_lookup = valid_authors.find(author_name);
319
320 if (name_lookup != valid_authors.end())
321 {
322 author_name = name_lookup->second;
323 }
324 else
325 {
326 string unmapped_name;
327 lua.hook_unmapped_git_author(author_name, unmapped_name);
328 E(lua.hook_validate_git_author(unmapped_name), origin::user,
329 F("invalid git author '%s' from monotone author value '%s'")
330 % unmapped_name % author_name);
331 valid_authors.insert(make_pair(author_name, unmapped_name));
332 author_name = unmapped_name;
333 }
334
335 cert_iterator date = dates.begin();
336
337 if (date != dates.end())
338 author_date = date_t(date->value());
339
340 // default to unknown branch if no branch certs exist
341 // this may be mapped to a different value with the branches-file option
342 string branch_name = "unknown";
343
344 if (!branches.empty())
345 branch_name = branches.begin()->value();
346
347 branch_name = trim(branch_name);
348
349 lookup_iterator branch_lookup = branch_map.find(branch_name);
350
351 if (branch_lookup != branch_map.end())
352 branch_name = branch_lookup->second;
353
354 ostringstream message;
355 set<string> messages;
356
357 // process comment certs with changelog certs
358
359 if (!use_one_changelog)
360 changelogs.insert(changelogs.end(),
361 comments.begin(), comments.end());
362
363 for (cert_iterator changelog = changelogs.begin();
364 changelog != changelogs.end(); ++changelog)
365 {
366 string value = changelog->value();
367 if (messages.find(value) == messages.end())
368 {
369 messages.insert(value);
370 message << value;
371 if (value[value.size()-1] != '\n')
372 message << "\n";
373 if (use_one_changelog)
374 break;
375 }
376 }
377
378 revision_t revision;
379 db.get_revision(*r, revision);
380
381 edge_map::const_iterator edge = revision.edges.begin();
382
383 revision_id parent1, parent2;
384
385 if (revision.edges.size() == 1)
386 {
387 parent1 = edge_old_revision(edge);
388 }
389 else if (revision.edges.size() == 2)
390 {
391 parent1 = edge_old_revision(edge);
392 ++edge;
393 parent2 = edge_old_revision(edge);
394 }
395 else
396 I(false);
397
398 map<revision_id, git_change>::const_iterator f = change_map.find(*r);
399 I(f != change_map.end());
400 git_change const & change = f->second;
401
402 vector<git_rename> reordered_renames;
403 reorder_renames(change.renames, reordered_renames);
404
405 // emit file data blobs for modified and added files
406
407 for (add_iterator
408 i = change.additions.begin(); i != change.additions.end(); ++i)
409 {
410 if (marked_files.find(i->content) == marked_files.end())
411 {
412 // only mark and emit a blob the first time it is encountered
413 file_data data;
414 db.get_file_version(i->content, data);
415 marked_files[i->content] = mark_id++;
416 cout << "blob\n"
417 << "mark :" << marked_files[i->content] << "\n"
418 << "data " << data.inner()().size() << "\n"
419 << data.inner()() << "\n";
420 }
421 }
422
423 if (log_revids)
424 {
425 message << "\n";
426
427 if (!null_id(parent1))
428 message << "Monotone-Parent: " << parent1 << "\n";
429
430 if (!null_id(parent2))
431 message << "Monotone-Parent: " << parent2 << "\n";
432
433 message << "Monotone-Revision: " << *r << "\n";
434 }
435
436 if (log_certs)
437 {
438 message << "\n";
439 for ( ; author != authors.end(); ++author)
440 message << "Monotone-Author: " << author->value() << "\n";
441
442 for ( ; date != dates.end(); ++date)
443 message << "Monotone-Date: " << date->value() << "\n";
444
445 for (cert_iterator
446 branch = branches.begin() ; branch != branches.end(); ++branch)
447 message << "Monotone-Branch: " << branch->value() << "\n";
448
449 for (cert_iterator tag = tags.begin(); tag != tags.end(); ++tag)
450 message << "Monotone-Tag: " << tag->value() << "\n";
451 }
452
453 string data = message.str();
454
455 marked_revs[*r] = mark_id++;
456
457 cout << "commit refs/heads/" << branch_name << "\n"
458 << "mark :" << marked_revs[*r] << "\n"
459 << "author " << author_name << " "
460 << (author_date.as_millisecs_since_unix_epoch() / 1000) << " +0000\n"
461 << "committer " << author_key << " "
462 << (author_date.as_millisecs_since_unix_epoch() / 1000) << " +0000\n"
463 << "data " << data.size() << "\n" << data << "\n";
464
465 if (!null_id(parent1))
466 cout << "from :" << marked_revs[parent1] << "\n";
467
468 if (!null_id(parent2))
469 cout << "merge :" << marked_revs[parent2] << "\n";
470
471 for (delete_iterator
472 i = change.deletions.begin(); i != change.deletions.end(); ++i)
473 cout << "D " << quote_path(*i) << "\n";
474
475 for (rename_iterator
476 i = reordered_renames.begin(); i != reordered_renames.end(); ++i)
477 cout << "R "
478 << quote_path(i->first) << " "
479 << quote_path(i->second) << "\n";
480
481 for (add_iterator
482 i = change.additions.begin(); i != change.additions.end(); ++i)
483 cout << "M " << i->mode << " :"
484 << marked_files[i->content] << " "
485 << quote_path(i->path) << "\n";
486
487 // create additional branch refs
488 if (!branches.empty())
489 {
490 cert_iterator branch = branches.begin();
491 branch++;
492 for ( ; branch != branches.end(); ++branch)
493 {
494 branch_name = trim(branch->value());
495
496 lookup_iterator branch_lookup = branch_map.find(branch_name);
497
498 if (branch_lookup != branch_map.end())
499 branch_name = branch_lookup->second;
500
501 cout << "reset refs/heads/" << branch_name << "\n"
502 << "from :" << marked_revs[*r] << "\n";
503 }
504 }
505
506 // create tag refs
507 for (cert_iterator tag = tags.begin(); tag != tags.end(); ++tag)
508 cout << "reset refs/tags/" << tag->value() << "\n"
509 << "from :" << marked_revs[*r] << "\n";
510
511 // report progress to the export file which will be reported during import
512 cout << "progress revision " << *r
513 << " (" << revnum << "/" << revmax << ")\n"
514 << "#############################################################\n";
515
516 ++exported;
517 }
518}
519
520void
521export_rev_refs(vector<revision_id> const & revisions,
522 map<revision_id, size_t> & marked_revs)
523{
524 for (vector<revision_id>::const_iterator
525 i = revisions.begin(); i != revisions.end(); ++i)
526 cout << "reset refs/mtn/revs/" << *i << "\n"
527 << "from :" << marked_revs[*i] << "\n";
528}
529
530void
531export_root_refs(database & db,
532 map<revision_id, size_t> & marked_revs)
533{
534 set<revision_id> roots;
535 revision_id nullid;
536 db.get_revision_children(nullid, roots);
537 for (set<revision_id>::const_iterator
538 i = roots.begin(); i != roots.end(); ++i)
539 cout << "reset refs/mtn/roots/" << *i << "\n"
540 << "from :" << marked_revs[*i] << "\n";
541}
542
543void
544export_leaf_refs(database & db,
545 map<revision_id, size_t> & marked_revs)
546{
547 set<revision_id> leaves;
548 db.get_leaves(leaves);
549 for (set<revision_id>::const_iterator
550 i = leaves.begin(); i != leaves.end(); ++i)
551 cout << "reset refs/mtn/leaves/" << *i << "\n"
552 << "from :" << marked_revs[*i] << "\n";
553}
554
555// Local Variables:
556// mode: C++
557// fill-column: 76
558// c-file-style: "gnu"
559// indent-tabs-mode: nil
560// End:
561// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status