monotone

monotone Mtn Source Tree

Root/rcs_import.cc

1// copyright (C) 2002, 2003 graydon hoare <graydon@pobox.com>
2// all rights reserved.
3// licensed to the public under the terms of the GNU GPL (>= 2)
4// see the file COPYING for details
5
6#include <iostream>
7#include <string>
8#include <algorithm>
9#include <vector>
10#include <stack>
11#include <map>
12#include <sstream>
13#include <stdexcept>
14
15#include <unistd.h>
16
17#include <boost/shared_ptr.hpp>
18#include <boost/scoped_ptr.hpp>
19#include <boost/lexical_cast.hpp>
20#include <boost/tokenizer.hpp>
21
22#include <boost/filesystem/path.hpp>
23#include <boost/filesystem/operations.hpp>
24#include <boost/filesystem/convenience.hpp>
25
26#include "app_state.hh"
27#include "cert.hh"
28#include "constants.hh"
29#include "cycle_detector.hh"
30#include "database.hh"
31#include "file_io.hh"
32#include "interner.hh"
33#include "manifest.hh"
34#include "packet.hh"
35#include "rcs_file.hh"
36#include "sanity.hh"
37#include "transforms.hh"
38#include "ui.hh"
39
40using namespace std;
41using boost::shared_ptr;
42using boost::scoped_ptr;
43
44// cvs history recording stuff
45
46typedef unsigned long cvs_branchname;
47typedef unsigned long cvs_author;
48typedef unsigned long cvs_changelog;
49typedef unsigned long cvs_version;
50typedef unsigned long cvs_path;
51
52struct cvs_history;
53struct cvs_key
54{
55 cvs_key() {}
56 cvs_key(rcs_file const & r,
57 string const & version,
58 cvs_history & cvs);
59
60 inline bool similar_enough(cvs_key const & other) const
61 {
62 if (changelog != other.changelog)
63 return false;
64 if (author != other.author)
65 return false;
66 if (labs(time - other.time) > constants::cvs_window)
67 return false;
68 return true;
69 }
70
71 inline bool operator<(cvs_key const & other) const
72 {
73 // nb: this must sort as > to construct the edges in the right direction
74 return time > other.time ||
75
76 (time == other.time
77 && author > other.author) ||
78
79 (time == other.time
80 && author == other.author
81 && changelog > other.changelog) ||
82
83 (time == other.time
84 && author == other.author
85 && changelog == other.changelog
86 && branch > other.branch);
87 }
88
89 cvs_branchname branch;
90 cvs_changelog changelog;
91 cvs_author author;
92 time_t time;
93};
94
95struct cvs_file_edge
96{
97 cvs_file_edge (file_id const & pv,
98 file_path const & pp,
99 file_id const & cv,
100 file_path const & cp,
101 cvs_history & cvs);
102 cvs_version parent_version;
103 cvs_path parent_path;
104 cvs_version child_version;
105 cvs_path child_path;
106 inline bool operator<(cvs_file_edge const & other) const
107 {
108 return (parent_path < other.parent_path) ||
109
110 ((parent_path == other.parent_path) && (parent_version < other.parent_version)) ||
111
112 ((parent_path == other.parent_path) && (parent_version == other.parent_version) &&
113 (child_path < other.child_path)) ||
114
115 ((parent_path == other.parent_path) && (parent_version == other.parent_version) &&
116 (child_path == other.child_path) && (child_version < other.child_version));
117 }
118};
119
120struct cvs_state
121{
122 set<cvs_file_edge> in_edges;
123 map< cvs_key, shared_ptr<cvs_state> > substates;
124};
125
126struct cvs_history
127{
128
129 interner<unsigned long> branch_interner;
130 interner<unsigned long> author_interner;
131 interner<unsigned long> changelog_interner;
132 interner<unsigned long> file_version_interner;
133 interner<unsigned long> path_interner;
134 interner<unsigned long> manifest_version_interner;
135
136 cycle_detector<unsigned long> manifest_cycle_detector;
137
138 bool find_key_and_state(rcs_file const & r,
139 string const & version,
140 cvs_key & key,
141 shared_ptr<cvs_state> & state);
142
143 typedef stack< shared_ptr<cvs_state> > state_stack;
144
145 state_stack stk;
146 file_path curr_file;
147 manifest_map head_manifest;
148 string base_branch;
149
150 ticker n_versions;
151 ticker n_tree_branches;
152 ticker n_file_branches;
153
154 cvs_history();
155 void set_filename(string const & file,
156 file_id const & ident);
157 void push_branch(rcs_file const & r, string const & rcs_version_num);
158 void note_file_edge(rcs_file const & r,
159 string const & prev_rcs_version_num,
160 string const & next_rcs_version_num,
161 file_id const & prev_version,
162 file_id const & next_version);
163 void pop_branch();
164};
165
166
167// piece table stuff
168
169struct piece;
170struct piece_store
171{
172 vector< boost::shared_ptr<rcs_deltatext> > texts;
173 void index_deltatext(boost::shared_ptr<rcs_deltatext> const & dt,
174 vector<piece> & pieces);
175 void build_string(vector<piece> const & pieces,
176 string & out);
177 void reset() { texts.clear(); }
178};
179
180// FIXME: kludge, I was lazy and did not make this
181// a properly scoped variable.
182
183static piece_store global_pieces;
184
185
186struct piece
187{
188 piece(string::size_type p, string::size_type l, unsigned long id) :
189 pos(p), len(l), string_id(id) {}
190 string::size_type pos;
191 string::size_type len;
192 unsigned long string_id;
193 string operator*() const
194 {
195 return string(global_pieces.texts.at(string_id)->text.data() + pos, len);
196 }
197};
198
199
200void piece_store::build_string(vector<piece> const & pieces,
201 string & out)
202{
203 out.clear();
204 out.reserve(pieces.size() * 60);
205 for(vector<piece>::const_iterator i = pieces.begin();
206 i != pieces.end(); ++i)
207 out.append(texts.at(i->string_id)->text, i->pos, i->len);
208}
209
210void piece_store::index_deltatext(boost::shared_ptr<rcs_deltatext> const & dt,
211 vector<piece> & pieces)
212{
213 pieces.clear();
214 pieces.reserve(dt->text.size() / 30);
215 texts.push_back(dt);
216 unsigned long id = texts.size() - 1;
217 string::size_type begin = 0;
218 string::size_type end = dt->text.find('\n');
219 while(end != string::npos)
220 {
221 // nb: the piece includes the '\n'
222 pieces.push_back(piece(begin, (end - begin) + 1, id));
223 begin = end + 1;
224 end = dt->text.find('\n', begin);
225 }
226 if (begin != dt->text.size())
227 {
228 // the text didn't end with '\n', so neither does the piece
229 end = dt->text.size();
230 pieces.push_back(piece(begin, end - begin, id));
231 }
232}
233
234
235void
236process_one_hunk(vector< piece > const & source,
237 vector< piece > & dest,
238 vector< piece >::const_iterator & i,
239 int & cursor)
240{
241 string directive = **i;
242 assert(directive.size() > 1);
243 ++i;
244
245 char code;
246 int pos, len;
247 sscanf(directive.c_str(), " %c %d %d", &code, &pos, &len);
248
249 try
250 {
251 if (code == 'a')
252{
253 // 'ax y' means "copy from source to dest until cursor == x, then
254 // copy y lines from delta, leaving cursor where it is"
255 while (cursor < pos)
256 dest.push_back(source.at(cursor++));
257 I(cursor == pos);
258 while (len--)
259 dest.push_back(*i++);
260}
261 else if (code == 'd')
262{
263 // 'dx y' means "copy from source to dest until cursor == x-1,
264 // then increment cursor by y, ignoring those y lines"
265 while (cursor < (pos - 1))
266 dest.push_back(source.at(cursor++));
267 I(cursor == pos - 1);
268 cursor += len;
269}
270 else
271throw oops("unknown directive '" + directive + "'");
272 }
273 catch (std::out_of_range & oor)
274 {
275 throw oops("std::out_of_range while processing " + directive
276 + " with source.size() == "
277 + boost::lexical_cast<string>(source.size())
278 + " and cursor == "
279 + boost::lexical_cast<string>(cursor));
280 }
281}
282
283static void
284construct_version(vector< piece > const & source_lines,
285 string const & dest_version,
286 vector< piece > & dest_lines,
287 rcs_file const & r)
288{
289 dest_lines.clear();
290 dest_lines.reserve(source_lines.size());
291
292 I(r.deltas.find(dest_version) != r.deltas.end());
293 shared_ptr<rcs_delta> delta = r.deltas.find(dest_version)->second;
294
295 I(r.deltatexts.find(dest_version) != r.deltatexts.end());
296 shared_ptr<rcs_deltatext> deltatext = r.deltatexts.find(dest_version)->second;
297
298 vector<piece> deltalines;
299 global_pieces.index_deltatext(deltatext, deltalines);
300
301 int cursor = 0;
302 for (vector<piece>::const_iterator i = deltalines.begin();
303 i != deltalines.end(); )
304 process_one_hunk(source_lines, dest_lines, i, cursor);
305 while (cursor < static_cast<int>(source_lines.size()))
306 dest_lines.push_back(source_lines[cursor++]);
307}
308
309// FIXME: should these be someplace else? using 'friend' to reach into the
310// DB is stupid, but it's also stupid to put raw edge insert methods on the
311// DB itself. or is it? hmm.. encapsulation vs. usage guidance..
312void
313rcs_put_raw_file_edge(hexenc<id> const & old_id,
314 hexenc<id> const & new_id,
315 base64< gzip<delta> > const & del,
316 database & db)
317{
318 if (db.delta_exists(old_id, "file_deltas"))
319 {
320 // we already have a way to get to this old version,
321 // no need to insert another reconstruction path
322 L(F("existing path to %s found, skipping\n") % old_id);
323 }
324 else
325 {
326 I(db.exists(new_id, "files")
327|| db.delta_exists(new_id, "file_deltas"));
328 db.put_delta(old_id, new_id, del, "file_deltas");
329 }
330}
331
332void
333rcs_put_raw_manifest_edge(hexenc<id> const & old_id,
334 hexenc<id> const & new_id,
335 base64< gzip<delta> > const & del,
336 database & db)
337{
338 if (db.delta_exists(old_id, "manifest_deltas"))
339 {
340 // we already have a way to get to this old version,
341 // no need to insert another reconstruction path
342 L(F("existing path to %s found, skipping\n") % old_id);
343 }
344 else
345 {
346 I(db.exists(new_id, "manifests")
347|| db.delta_exists(new_id, "manifest_deltas"));
348 db.put_delta(old_id, new_id, del, "manifest_deltas");
349 }
350}
351
352
353static void
354insert_into_db(data const & curr_data,
355 hexenc<id> const & curr_id,
356 vector< piece > const & next_lines,
357 data & next_data,
358 hexenc<id> & next_id,
359 database & db)
360{
361 // inserting into the DB
362 // note: curr_lines is a "new" (base) version
363 // and next_lines is an "old" (derived) version.
364 // all storage edges go from new -> old.
365 {
366 string tmp;
367 global_pieces.build_string(next_lines, tmp);
368 next_data = tmp;
369 }
370 base64< gzip<delta> > del;
371 diff(curr_data, next_data, del);
372 calculate_ident(next_data, next_id);
373 rcs_put_raw_file_edge(next_id, curr_id, del, db);
374}
375
376
377static void
378process_branch(string const & begin_version,
379 vector< piece > const & begin_lines,
380 data const & begin_data,
381 hexenc<id> const & begin_id,
382 rcs_file const & r,
383 database & db,
384 cvs_history & cvs)
385{
386 string curr_version = begin_version;
387 scoped_ptr< vector< piece > > next_lines(new vector<piece>);
388 scoped_ptr< vector< piece > > curr_lines(new vector<piece>
389 (begin_lines.begin(),
390 begin_lines.end()));
391 data curr_data(begin_data), next_data;
392 hexenc<id> curr_id(begin_id), next_id;
393
394 while(! (r.deltas.find(curr_version) == r.deltas.end() ||
395 r.deltas.find(curr_version)->second->next.empty()))
396 {
397 L(F("version %s has %d lines\n") % curr_version % curr_lines->size());
398
399 // construct this edge on our own branch
400 string next_version = r.deltas.find(curr_version)->second->next;
401 L(F("following RCS edge %s -> %s\n") % curr_version % next_version);
402
403 construct_version(*curr_lines, next_version, *next_lines, r);
404 L(F("constructed RCS version %s, inserting into database\n") %
405next_version);
406
407 insert_into_db(curr_data, curr_id,
408 *next_lines, next_data, next_id, db);
409
410 cvs.note_file_edge (r, curr_version, next_version,
411 file_id(curr_id), file_id(next_id));
412
413 // recursively follow any branches rooted here
414 boost::shared_ptr<rcs_delta> curr_delta = r.deltas.find(curr_version)->second;
415 for(vector<string>::const_iterator i = curr_delta->branches.begin();
416 i != curr_delta->branches.end(); ++i)
417{
418 L(F("following RCS branch %s\n") % (*i));
419 vector< piece > branch_lines;
420 construct_version(*curr_lines, *i, branch_lines, r);
421
422 data branch_data;
423 hexenc<id> branch_id;
424 insert_into_db(curr_data, curr_id,
425 branch_lines, branch_data, branch_id, db);
426 cvs.push_branch (r, curr_version);
427
428 cvs.note_file_edge (r, curr_version, *i,
429 file_id(curr_id), file_id(branch_id));
430
431 process_branch(*i, branch_lines, branch_data, branch_id, r, db, cvs);
432 cvs.pop_branch();
433 L(F("finished RCS branch %s\n") % (*i));
434}
435
436 // advance
437 curr_data = next_data;
438 curr_id = next_id;
439 curr_version = next_version;
440 swap(next_lines, curr_lines);
441 next_lines->clear();
442 }
443}
444
445
446void
447import_rcs_file_with_cvs(string const & filename, database & db, cvs_history & cvs)
448{
449 rcs_file r;
450 L(F("parsing RCS file %s\n") % filename);
451 parse_rcs_file(filename, r);
452 L(F("parsed RCS file %s OK\n") % filename);
453
454 {
455 vector< piece > head_lines;
456 I(r.deltatexts.find(r.admin.head) != r.deltatexts.end());
457
458 hexenc<id> id;
459 base64< gzip<data> > packed;
460 data dat(r.deltatexts.find(r.admin.head)->second->text);
461 calculate_ident(dat, id);
462 file_id fid = id;
463
464 cvs.set_filename (filename, fid);
465
466 if (! db.file_version_exists (fid))
467 {
468pack(dat, packed);
469file_data fdat = packed;
470db.put_file(fid, fdat);
471 }
472
473 global_pieces.reset();
474 global_pieces.index_deltatext(r.deltatexts.find(r.admin.head)->second, head_lines);
475 process_branch(r.admin.head, head_lines, dat, id, r, db, cvs);
476 global_pieces.reset();
477 }
478
479 ui.set_tick_trailer("");
480}
481
482
483void
484import_rcs_file(fs::path const & filename, database & db)
485{
486 cvs_history cvs;
487
488 I(! fs::is_directory(filename));
489 I(! filename.empty());
490
491 fs::path leaf = mkpath(filename.leaf());
492 fs::path branch = mkpath(filename.branch_path().string());
493
494 I(! branch.empty());
495 I(! leaf.empty());
496 I( fs::is_directory(branch));
497 I( fs::exists(branch));
498
499 I(chdir(filename.branch_path().native_directory_string().c_str()) == 0);
500
501 I(fs::exists(leaf));
502
503 import_rcs_file_with_cvs(leaf.native_file_string(), db, cvs);
504}
505
506
507// CVS importing stuff follows
508
509/*
510
511 we define a "cvs key" as a triple of author, commit time and
512 changelog. the equality of keys is a bit blurry due to a window of time
513 in which "the same" commit may begin and end. the window is evaluated
514 during the multimap walk though; for insertion in the multimap a true >
515 is used. a key identifies a particular commit.
516
517 we reconstruct the history of a CVS archive by accumulating file edges
518 into archive nodes. each node is called a "cvs_state", but it is really a
519 collection of file *edges* leading into that archive state. we accumulate
520 file edges by walking up the trunk and down the branches of each RCS file.
521
522 once we've got all the edges accumulated into archive nodes, we walk the
523 tree of cvs_states, up through the trunk and down through the branches,
524 carrying a manifest_map with us during the walk. for each edge, we
525 construct either the parent or child state of the edge (depending on
526 which way we're walking) and then calculate and write out a manifest
527 delta for the difference between the previous and current manifest map. we
528 also write out manifest certs, though the direction of ancestry changes
529 depending on whether we're going up the trunk or down the branches.
530
531 */
532
533cvs_file_edge::cvs_file_edge (file_id const & pv, file_path const & pp,
534 file_id const & cv, file_path const & cp,
535 cvs_history & cvs) :
536 parent_version(cvs.file_version_interner.intern(pv.inner()())),
537 parent_path(cvs.path_interner.intern(pp())),
538 child_version(cvs.file_version_interner.intern(cv.inner()())),
539 child_path(cvs.path_interner.intern(cp()))
540{
541}
542
543
544string find_branch_for_version(multimap<string,string> const & symbols,
545 string const & version,
546 string const & base)
547{
548 typedef multimap<string,string>::const_iterator ity;
549 typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
550
551 L(F("looking up branch name for %s\n") % version);
552
553 boost::char_separator<char> sep(".");
554 tokenizer tokens(version, sep);
555 vector<string> components;
556 copy(tokens.begin(), tokens.end(), back_inserter(components));
557
558 if (components.size() < 4)
559 {
560 L(F("version %s has too few components, using branch %s\n")
561% version % base);
562 return base;
563 }
564
565 string branch_version;
566 components[components.size() - 1] = components[components.size() - 2];
567 components[components.size() - 2] = "0";
568 for (size_t i = 0; i < components.size(); ++i)
569 {
570 if (i != 0)
571branch_version += ".";
572 branch_version += components[i];
573 }
574
575 pair<ity,ity> range = symbols.equal_range(branch_version);
576 if (range.first == symbols.end())
577 {
578 L(F("no branch %s found, using base '%s'\n")
579% branch_version % base);
580 return base;
581 }
582 else
583 {
584 string res = base;
585 res += ".";
586 res += range.first->second;
587 int num_results = 0;
588 while (range.first != range.second)
589{ range.first++; num_results++; }
590
591 if (num_results > 1)
592W(F("multiple entries (%d) for branch %s found, using: '%s'\n")
593 % num_results % branch_version % res);
594 else
595L(F("unique entry for branch %s found: '%s'\n")
596 % branch_version % res);
597 return res;
598 }
599}
600
601cvs_key::cvs_key(rcs_file const & r, string const & version,
602 cvs_history & cvs)
603{
604 map<string, shared_ptr<rcs_delta> >::const_iterator delta =
605 r.deltas.find(version);
606 I(delta != r.deltas.end());
607
608 map<string, shared_ptr<rcs_deltatext> >::const_iterator deltatext =
609 r.deltatexts.find(version);
610 I(deltatext != r.deltatexts.end());
611
612 {
613 struct tm t;
614 char const * dp = delta->second->date.c_str();
615 if (strptime(dp, "%y.%m.%d.%H.%M.%S", &t) == NULL)
616 I(strptime(dp, "%Y.%m.%d.%H.%M.%S", &t) != NULL);
617 time=mktime(&t);
618 }
619
620 string branch_name = find_branch_for_version(r.admin.symbols,
621 version,
622 cvs.base_branch);
623 branch = cvs.branch_interner.intern(branch_name);
624 changelog = cvs.changelog_interner.intern(deltatext->second->log);
625 author = cvs.author_interner.intern(delta->second->author);
626}
627
628
629cvs_history::cvs_history() :
630 n_versions("versions"),
631 n_tree_branches("tree branches"),
632 n_file_branches("file branches")
633{
634 stk.push(shared_ptr<cvs_state>(new cvs_state()));
635}
636
637void cvs_history::set_filename(string const & file,
638 file_id const & ident)
639{
640 L(F("importing file '%s'\n") % file);
641 I(file.size() > 2);
642 I(file.substr(file.size() - 2) == string(",v"));
643 string ss = file;
644 ui.set_tick_trailer(ss);
645 ss.resize(ss.size() - 2);
646 curr_file = file_path(ss);
647}
648
649bool cvs_history::find_key_and_state(rcs_file const & r,
650 string const & version,
651 cvs_key & key,
652 shared_ptr<cvs_state> & state)
653{
654 I(stk.size() > 0);
655 map< cvs_key, shared_ptr<cvs_state> > & substates = stk.top()->substates;
656 cvs_key nk(r, version, *this);
657
658 // key+(window/2) is in the future, key-(window/2) is in the past. the
659 // past is considered "greater than" the future in this map, so we take:
660 //
661 // - new, the lower bound of key+(window/2) in the map
662 // - old, the upper bound of key-(window/2) in the map
663 //
664 // and search all the nodes inside this section, from new to old bound.
665
666 map< cvs_key, shared_ptr<cvs_state> >::const_iterator i_new, i_old, i;
667 cvs_key k_new(nk), k_old(nk);
668
669 if (static_cast<time_t>(k_new.time + constants::cvs_window / 2) > k_new.time)
670 k_new.time += constants::cvs_window / 2;
671
672 if (static_cast<time_t>(k_old.time - constants::cvs_window / 2) < k_old.time)
673 k_old.time -= constants::cvs_window / 2;
674
675 i_new = substates.lower_bound(k_new);
676 i_old = substates.upper_bound(k_old);
677
678 for (i = i_new; i != i_old; ++i)
679 {
680 if (i->first.similar_enough(nk))
681{
682 key = i->first;
683 state = i->second;
684 return true;
685}
686 }
687 key = nk;
688 state = shared_ptr<cvs_state>(new cvs_state());
689 substates.insert(make_pair(key, state));
690 return false;
691}
692
693void cvs_history::push_branch(rcs_file const & r,
694 string const & branchpoint_rcs_version_num)
695{
696 cvs_key k;
697 shared_ptr<cvs_state> s;
698 ++n_file_branches;
699
700 I(stk.size() > 0);
701 I(find_key_and_state (r, branchpoint_rcs_version_num, k, s));
702 if (s->substates.size() > 0)
703 {
704 L(F("pushing existing branch for %s : %s\n")
705% curr_file % branchpoint_rcs_version_num);
706 }
707 else
708 {
709 ++n_tree_branches;
710 L(F("pushing new branch for %s : %s\n")
711% curr_file % branchpoint_rcs_version_num);
712 }
713 stk.push(s);
714}
715
716void cvs_history::note_file_edge(rcs_file const & r,
717 string const & prev_rcs_version_num,
718 string const & next_rcs_version_num,
719 file_id const & prev_version,
720 file_id const & next_version)
721{
722
723 cvs_key k;
724 shared_ptr<cvs_state> s;
725
726 I(stk.size() > 0);
727 I(! curr_file().empty());
728
729 // we always aggregate in-edges in children, but we will also create
730 // parents as we encounter them.
731 if (stk.size() == 1)
732 {
733 // we are on the trunk, prev is child, next is parent.
734 L(F("noting trunk edge %s : %s -> %s\n") % curr_file
735% next_rcs_version_num
736% prev_rcs_version_num);
737 // find_key_and_state (r, next_rcs_version_num, k, s); // just to create it if necessary
738 find_key_and_state (r, prev_rcs_version_num, k, s);
739 s->in_edges.insert(cvs_file_edge(next_version, curr_file,
740 prev_version, curr_file,
741 *this));
742 }
743 else
744 {
745 // we are on a branch, prev is parent, next is child.
746 L(F("noting branch edge %s : %s -> %s\n") % curr_file
747% prev_rcs_version_num
748% next_rcs_version_num);
749 find_key_and_state (r, next_rcs_version_num, k, s);
750 s->in_edges.insert(cvs_file_edge(prev_version, curr_file,
751 next_version, curr_file,
752 *this));
753 }
754
755 // add this file and youngest version to the manifest if we've never seen it before
756 if (head_manifest.find(curr_file) == head_manifest.end())
757 head_manifest.insert(make_pair(curr_file, prev_version));
758
759 ++n_versions;
760}
761
762void cvs_history::pop_branch()
763{
764 I(stk.size() > 1);
765 I(stk.top()->substates.size() > 0);
766 stk.pop();
767}
768
769
770class cvs_tree_walker : public tree_walker
771{
772 cvs_history & cvs;
773 database & db;
774public:
775 cvs_tree_walker(cvs_history & c, database & d) :
776 cvs(c), db(d)
777 {
778 }
779 virtual void visit_file(file_path const & path)
780 {
781 string file = path();
782 if (file.substr(file.size() - 2) == string(",v"))
783 {
784import_rcs_file_with_cvs(file, db, cvs);
785 }
786 else
787 L(F("skipping non-RCS file %s\n") % file);
788 }
789 virtual ~cvs_tree_walker() {}
790};
791
792
793// this is for a branch edge, in which the ancestry is the opposite
794// direction as the storage delta. nb: the terms 'parent' and 'child' are
795// *always* ancestry terms; we refer to 'old' and 'new' for storage system
796// terms (where, perhaps confusingly, old versions are those constructed by
797// applying deltas from new versions, so in fact we are successively
798// creating older and older versions here. 'old' and 'new' do not refer to
799// the execution time of this program, but rather the storage system's view
800// of time.
801
802void store_branch_manifest_edge(manifest_map const & parent,
803manifest_map const & child,
804manifest_id const & parent_id,
805manifest_id const & child_id,
806app_state & app,
807cvs_history & cvs)
808{
809
810 unsigned long p, c;
811 p = cvs.manifest_version_interner.intern(parent_id.inner()());
812 c = cvs.manifest_version_interner.intern(child_id.inner()());
813
814 if (cvs.manifest_cycle_detector.edge_makes_cycle(p,c))
815 {
816 L(F("skipping cyclical branch edge %s -> %s\n")
817% parent_id % child_id);
818 }
819 else
820 {
821 L(F("storing branch manifest edge %s -> %s\n")
822% parent_id % child_id);
823
824 cvs.manifest_cycle_detector.put_edge(p,c);
825
826 // in this case, the ancestry-based 'child' is on a branch, so it is
827 // an 'old' version as far as the storage system is concerned; that
828 // is to say it is constructed by first building a 'new' version (the
829 // ancestry-based 'parent') and then following a delta to the
830 // child. remember that the storage system assumes that all deltas go
831 // from temporally new -> temporally old. so the delta should go from
832 // parent (new) -> child (old)
833
834 base64< gzip<delta> > del;
835 diff(parent, child, del);
836 rcs_put_raw_manifest_edge(child_id.inner(),
837parent_id.inner(),
838del, app.db);
839 packet_db_writer dbw(app);
840 cert_manifest_ancestor(parent_id, child_id, app, dbw);
841 }
842}
843
844// this is for a trunk edge, in which the ancestry is the
845// same direction as the storage delta
846void store_trunk_manifest_edge(manifest_map const & parent,
847 manifest_map const & child,
848 manifest_id const & parent_id,
849 manifest_id const & child_id,
850 app_state & app,
851 cvs_history & cvs)
852{
853
854 unsigned long p, c;
855 p = cvs.manifest_version_interner.intern(parent_id.inner()());
856 c = cvs.manifest_version_interner.intern(child_id.inner()());
857
858 if (cvs.manifest_cycle_detector.edge_makes_cycle(p,c))
859 {
860 L(F("skipping cyclical trunk edge %s -> %s\n")
861% parent_id % child_id);
862 }
863 else
864 {
865 L(F("storing trunk manifest edge %s -> %s\n")
866% parent_id % child_id);
867
868 cvs.manifest_cycle_detector.put_edge(p,c);
869
870 // in this case, the ancestry-based 'child' is on a trunk, so it is
871 // a 'new' version as far as the storage system is concerned; that
872 // is to say that the ancestry-based 'parent' is a temporally older
873 // tree version, which can be constructed from the 'newer' child. so
874 // the delta should run from child (new) -> parent (old).
875
876 base64< gzip<delta> > del;
877 diff(child, parent, del);
878 rcs_put_raw_manifest_edge(parent_id.inner(),
879child_id.inner(),
880del, app.db);
881 packet_db_writer dbw(app);
882 cert_manifest_ancestor(parent_id, child_id, app, dbw);
883 }
884}
885
886void store_auxiliary_certs(cvs_key const & key,
887 manifest_id const & id,
888 app_state & app,
889 cvs_history const & cvs)
890{
891 packet_db_writer dbw(app);
892 cert_manifest_in_branch(id, cert_value(cvs.branch_interner.lookup(key.branch)), app, dbw);
893 cert_manifest_author(id, cvs.author_interner.lookup(key.author), app, dbw);
894 cert_manifest_changelog(id, cvs.changelog_interner.lookup(key.changelog), app, dbw);
895 cert_manifest_date_time(id, key.time, app, dbw);
896}
897
898// we call this when we're going child -> parent, i.e. when we're walking
899// up the trunk.
900void build_parent_state(shared_ptr<cvs_state> state,
901manifest_map & state_map,
902cvs_history & cvs)
903{
904 for (set<cvs_file_edge>::const_iterator f = state->in_edges.begin();
905 f != state->in_edges.end(); ++f)
906 {
907 file_id fid(cvs.file_version_interner.lookup(f->parent_version));
908 file_path pth(cvs.path_interner.lookup(f->parent_path));
909 state_map[pth] = fid;
910 }
911 L(F("logical changeset from child -> parent has %d file deltas\n")
912 % state->in_edges.size());
913}
914
915// we call this when we're going parent -> child, i.e. when we're walking
916// down a branch.
917void build_child_state(shared_ptr<cvs_state> state,
918 manifest_map & state_map,
919 cvs_history & cvs)
920{
921 for (set<cvs_file_edge>::const_iterator f = state->in_edges.begin();
922 f != state->in_edges.end(); ++f)
923 {
924 file_id fid(cvs.file_version_interner.lookup(f->child_version));
925 file_path pth(cvs.path_interner.lookup(f->child_path));
926 state_map[pth] = fid;
927 }
928 L(F("logical changeset from parent -> child has %d file deltas\n")
929 % state->in_edges.size());
930}
931
932void import_substates(ticker & n_edges,
933 ticker & n_branches,
934 shared_ptr<cvs_state> state,
935 manifest_map parent_map,
936 cvs_history & cvs,
937 app_state & app)
938{
939 manifest_id parent_id;
940 calculate_ident(parent_map, parent_id);
941 manifest_map child_map = parent_map;
942
943 if (state->substates.size() > 0)
944 ++n_branches;
945
946 // these are all sub-branches, so we look through them temporally
947 // *backwards* from oldest to newest
948 for (map< cvs_key, shared_ptr<cvs_state> >::reverse_iterator i = state->substates.rbegin();
949 i != state->substates.rend(); ++i)
950 {
951 manifest_id child_id;
952 build_child_state(i->second, child_map, cvs);
953 calculate_ident(child_map, child_id);
954 store_branch_manifest_edge(parent_map, child_map, parent_id, child_id, app, cvs);
955 store_auxiliary_certs(i->first, child_id, app, cvs);
956 if (i->second->substates.size() > 0)
957import_substates(n_edges, n_branches, i->second, child_map, cvs, app);
958
959 // now apply the edge to the parent, too, making parent = child
960 build_child_state(i->second, parent_map, cvs);
961 parent_id = child_id;
962 ++n_edges;
963 }
964}
965
966
967void import_cvs_repo(fs::path const & cvsroot, app_state & app)
968{
969
970 {
971 // early short-circuit to avoid failure after lots of work
972 rsa_keypair_id key;
973 N(guess_default_key(key,app),
974 F("no unique private key for cert construction"));
975 N(app.db.private_key_exists(key),
976 F("no private key '%s' found in database") % key);
977 }
978
979 cvs_history cvs;
980 N(app.branch_name() != "", F("need base --branch argument for importing"));
981 cvs.base_branch = app.branch_name();
982
983 {
984 transaction_guard guard(app.db);
985 cvs_tree_walker walker(cvs, app.db);
986 N( fs::exists(cvsroot),
987 F("path %s does not exist") % cvsroot.string());
988 N( fs::is_directory(cvsroot),
989 F("path %s is not a directory") % cvsroot.string());
990 app.db.ensure_open();
991 N(chdir(cvsroot.native_directory_string().c_str()) == 0,
992 F("could not change directory to %s") % cvsroot.string());
993 walk_tree(walker);
994 guard.commit();
995 }
996
997 P(F("phase 1 (version import) complete\n"));
998
999 I(cvs.stk.size() == 1);
1000 shared_ptr<cvs_state> state = cvs.stk.top();
1001 manifest_map child_map = cvs.head_manifest;
1002 manifest_map parent_map = child_map;
1003 manifest_id child_id;
1004 calculate_ident (child_map, child_id);
1005
1006
1007 {
1008 ticker n_branches("branches"), n_edges("edges");
1009 transaction_guard guard(app.db);
1010
1011 // write the trunk head version
1012 if (!app.db.manifest_version_exists (child_id))
1013 {
1014manifest_data child_data;
1015write_manifest_map(child_map, child_data);
1016app.db.put_manifest(child_id, child_data);
1017 }
1018
1019 // these are all versions on the main trunk, so we look through them from
1020 // newest to oldest
1021 for (map< cvs_key, shared_ptr<cvs_state> >::const_iterator i = state->substates.begin();
1022 i != state->substates.end(); ++i)
1023 {
1024manifest_id parent_id;
1025build_parent_state(i->second, parent_map, cvs);
1026calculate_ident(parent_map, parent_id);
1027store_trunk_manifest_edge(parent_map, child_map, parent_id, child_id, app, cvs);
1028store_auxiliary_certs(i->first, child_id, app, cvs);
1029if (i->second->substates.size() > 0)
1030 import_substates(n_edges, n_branches, i->second, parent_map, cvs, app);
1031
1032// now apply the edge to the child, too, making child = parent
1033build_parent_state(i->second, child_map, cvs);
1034child_id = parent_id;
1035++n_edges;
1036 }
1037
1038 P(F("phase 2 (ancestry reconstruction) complete\n"));
1039
1040 guard.commit();
1041 }
1042}
1043

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status