monotone

monotone Mtn Source Tree

Root/rcs_import.cc

1// copyright (C) 2002, 2003, 2004 graydon hoare <graydon@pobox.com>
2// all rights reserved.
3// licensed to the public under the terms of the GNU GPL (>= 2)
4// see the file COPYING for details
5
6#include <algorithm>
7#include <iostream>
8#include <iterator>
9#include <map>
10#include <set>
11#include <sstream>
12#include <stack>
13#include <stdexcept>
14#include <string>
15#include <vector>
16
17#include <unistd.h>
18
19#include <boost/shared_ptr.hpp>
20#include <boost/scoped_ptr.hpp>
21#include <boost/lexical_cast.hpp>
22#include <boost/tokenizer.hpp>
23
24#include <boost/filesystem/path.hpp>
25#include <boost/filesystem/operations.hpp>
26#include <boost/filesystem/convenience.hpp>
27
28#include "app_state.hh"
29#include "cert.hh"
30#include "constants.hh"
31#include "cycle_detector.hh"
32#include "database.hh"
33#include "file_io.hh"
34#include "keys.hh"
35#include "interner.hh"
36#include "manifest.hh"
37#include "packet.hh"
38#include "rcs_file.hh"
39#include "sanity.hh"
40#include "transforms.hh"
41#include "ui.hh"
42
43using namespace std;
44using boost::shared_ptr;
45using boost::scoped_ptr;
46
47// cvs history recording stuff
48
49typedef unsigned long cvs_branchname;
50typedef unsigned long cvs_author;
51typedef unsigned long cvs_changelog;
52typedef unsigned long cvs_version;
53typedef unsigned long cvs_path;
54
55struct cvs_history;
56
57struct
58cvs_key
59{
60 cvs_key() {}
61 cvs_key(rcs_file const & r,
62 string const & version,
63 cvs_history & cvs);
64
65 inline bool similar_enough(cvs_key const & other) const
66 {
67 if (changelog != other.changelog)
68 return false;
69 if (author != other.author)
70 return false;
71 if (labs(time - other.time) > constants::cvs_window)
72 return false;
73 return true;
74 }
75
76 inline bool operator==(cvs_key const & other) const
77 {
78 return branch == other.branch &&
79 changelog == other.changelog &&
80 author == other.author &&
81 time == other.time;
82 }
83
84 inline bool operator<(cvs_key const & other) const
85 {
86 // nb: this must sort as > to construct the edges in the right direction
87 return time > other.time ||
88
89 (time == other.time
90 && author > other.author) ||
91
92 (time == other.time
93 && author == other.author
94 && changelog > other.changelog) ||
95
96 (time == other.time
97 && author == other.author
98 && changelog == other.changelog
99 && branch > other.branch);
100 }
101
102 cvs_branchname branch;
103 cvs_changelog changelog;
104 cvs_author author;
105 time_t time;
106};
107
108struct
109cvs_file_edge
110{
111 cvs_file_edge (file_id const & pv,
112 file_path const & pp,
113 bool pl,
114 file_id const & cv,
115 file_path const & cp,
116 bool cl,
117 cvs_history & cvs);
118 cvs_version parent_version;
119 cvs_path parent_path;
120 bool parent_live_p;
121 cvs_version child_version;
122 cvs_path child_path;
123 bool child_live_p;
124 inline bool operator<(cvs_file_edge const & other) const
125 {
126#if 0
127 return (parent_path < other.parent_path)
128 || ((parent_path == other.parent_path)
129 && ((parent_version < other.parent_version)
130 || ((parent_version == other.parent_version)
131 && ((parent_live_p < other.parent_live_p)
132 || ((parent_live_p == other.parent_live_p)
133 && ((child_path < other.child_path)
134 || ((child_path == other.child_path)
135 && ((child_version < other.child_version)
136 || ((child_version == other.child_version)
137 && (child_live_p < other.child_live_p) )))))))));
138#else
139 return (parent_path < other.parent_path)
140 || ((parent_path == other.parent_path)
141 && ((parent_version < other.parent_version)
142 || ((parent_version == other.parent_version)
143 && ((parent_live_p < other.parent_live_p)
144 || ((parent_live_p == other.parent_live_p)
145 && ((child_path < other.child_path)
146 || ((child_path == other.child_path)
147 && ((child_version < other.child_version)
148 || ((child_version == other.child_version)
149 && (child_live_p < other.child_live_p) )))))))));
150#endif
151 }
152};
153
154struct
155cvs_state
156{
157 set<cvs_file_edge> in_edges;
158 map< cvs_key, shared_ptr<cvs_state> > substates;
159};
160
161struct
162cvs_history
163{
164
165 interner<unsigned long> branch_interner;
166 interner<unsigned long> author_interner;
167 interner<unsigned long> changelog_interner;
168 interner<unsigned long> file_version_interner;
169 interner<unsigned long> path_interner;
170 interner<unsigned long> manifest_version_interner;
171
172 cycle_detector<unsigned long> manifest_cycle_detector;
173
174 bool find_key_and_state(rcs_file const & r,
175 string const & version,
176 cvs_key & key,
177 shared_ptr<cvs_state> & state);
178
179 typedef stack< shared_ptr<cvs_state> > state_stack;
180
181 map<unsigned long,
182 pair<cvs_key,
183 shared_ptr<cvs_state> > > branchpoints;
184
185 state_stack stk;
186 file_path curr_file;
187
188 string base_branch;
189
190 ticker n_versions;
191 ticker n_tree_branches;
192
193 cvs_history();
194 void set_filename(string const & file,
195 file_id const & ident);
196 void push_branch(rcs_file const & r,
197 string const & branchpoint_version,
198 string const & first_branch_version);
199 void note_file_edge(rcs_file const & r,
200 string const & prev_rcs_version_num,
201 string const & next_rcs_version_num,
202 file_id const & prev_version,
203 file_id const & next_version);
204 void find_branchpoint(rcs_file const & r,
205 string const & branchpoint_version,
206 string const & first_branch_version,
207 shared_ptr<cvs_state> & branchpoint);
208 void pop_branch();
209};
210
211
212// piece table stuff
213
214struct piece;
215
216struct
217piece_store
218{
219 vector< boost::shared_ptr<rcs_deltatext> > texts;
220 void index_deltatext(boost::shared_ptr<rcs_deltatext> const & dt,
221 vector<piece> & pieces);
222 void build_string(vector<piece> const & pieces,
223 string & out);
224 void reset() { texts.clear(); }
225};
226
227// FIXME: kludge, I was lazy and did not make this
228// a properly scoped variable.
229
230static piece_store global_pieces;
231
232
233struct
234piece
235{
236 piece(string::size_type p, string::size_type l, unsigned long id) :
237 pos(p), len(l), string_id(id) {}
238 string::size_type pos;
239 string::size_type len;
240 unsigned long string_id;
241 string operator*() const
242 {
243 return string(global_pieces.texts.at(string_id)->text.data() + pos, len);
244 }
245};
246
247
248void
249piece_store::build_string(vector<piece> const & pieces,
250 string & out)
251{
252 out.clear();
253 out.reserve(pieces.size() * 60);
254 for(vector<piece>::const_iterator i = pieces.begin();
255 i != pieces.end(); ++i)
256 out.append(texts.at(i->string_id)->text, i->pos, i->len);
257}
258
259void
260piece_store::index_deltatext(boost::shared_ptr<rcs_deltatext> const & dt,
261 vector<piece> & pieces)
262{
263 pieces.clear();
264 pieces.reserve(dt->text.size() / 30);
265 texts.push_back(dt);
266 unsigned long id = texts.size() - 1;
267 string::size_type begin = 0;
268 string::size_type end = dt->text.find('\n');
269 while(end != string::npos)
270 {
271 // nb: the piece includes the '\n'
272 pieces.push_back(piece(begin, (end - begin) + 1, id));
273 begin = end + 1;
274 end = dt->text.find('\n', begin);
275 }
276 if (begin != dt->text.size())
277 {
278 // the text didn't end with '\n', so neither does the piece
279 end = dt->text.size();
280 pieces.push_back(piece(begin, end - begin, id));
281 }
282}
283
284
285static void
286process_one_hunk(vector< piece > const & source,
287 vector< piece > & dest,
288 vector< piece >::const_iterator & i,
289 int & cursor)
290{
291 string directive = **i;
292 assert(directive.size() > 1);
293 ++i;
294
295 char code;
296 int pos, len;
297 sscanf(directive.c_str(), " %c %d %d", &code, &pos, &len);
298
299 try
300 {
301 if (code == 'a')
302 {
303 // 'ax y' means "copy from source to dest until cursor == x, then
304 // copy y lines from delta, leaving cursor where it is"
305 while (cursor < pos)
306 dest.push_back(source.at(cursor++));
307 I(cursor == pos);
308 while (len--)
309 dest.push_back(*i++);
310 }
311 else if (code == 'd')
312 {
313 // 'dx y' means "copy from source to dest until cursor == x-1,
314 // then increment cursor by y, ignoring those y lines"
315 while (cursor < (pos - 1))
316 dest.push_back(source.at(cursor++));
317 I(cursor == pos - 1);
318 cursor += len;
319 }
320 else
321 throw oops("unknown directive '" + directive + "'");
322 }
323 catch (std::out_of_range & oor)
324 {
325 throw oops("std::out_of_range while processing " + directive
326 + " with source.size() == "
327 + boost::lexical_cast<string>(source.size())
328 + " and cursor == "
329 + boost::lexical_cast<string>(cursor));
330 }
331}
332
333static void
334construct_version(vector< piece > const & source_lines,
335 string const & dest_version,
336 vector< piece > & dest_lines,
337 rcs_file const & r)
338{
339 dest_lines.clear();
340 dest_lines.reserve(source_lines.size());
341
342 I(r.deltas.find(dest_version) != r.deltas.end());
343 shared_ptr<rcs_delta> delta = r.deltas.find(dest_version)->second;
344
345 I(r.deltatexts.find(dest_version) != r.deltatexts.end());
346 shared_ptr<rcs_deltatext> deltatext = r.deltatexts.find(dest_version)->second;
347
348 vector<piece> deltalines;
349 global_pieces.index_deltatext(deltatext, deltalines);
350
351 int cursor = 0;
352 for (vector<piece>::const_iterator i = deltalines.begin();
353 i != deltalines.end(); )
354 process_one_hunk(source_lines, dest_lines, i, cursor);
355 while (cursor < static_cast<int>(source_lines.size()))
356 dest_lines.push_back(source_lines[cursor++]);
357}
358
359// FIXME: should these be someplace else? using 'friend' to reach into the
360// DB is stupid, but it's also stupid to put raw edge insert methods on the
361// DB itself. or is it? hmm.. encapsulation vs. usage guidance..
362void
363rcs_put_raw_file_edge(hexenc<id> const & old_id,
364 hexenc<id> const & new_id,
365 base64< gzip<delta> > const & del,
366 database & db)
367{
368 if (old_id == new_id)
369 {
370 L(F("skipping identity file edge\n"));
371 return;
372 }
373
374 if (db.file_version_exists(old_id))
375 {
376 // we already have a way to get to this old version,
377 // no need to insert another reconstruction path
378 L(F("existing path to %s found, skipping\n") % old_id);
379 }
380 else
381 {
382 I(db.exists(new_id, "files")
383 || db.delta_exists(new_id, "file_deltas"));
384 db.put_delta(old_id, new_id, del, "file_deltas");
385 }
386}
387
388void
389rcs_put_raw_manifest_edge(hexenc<id> const & old_id,
390 hexenc<id> const & new_id,
391 base64< gzip<delta> > const & del,
392 database & db)
393{
394 if (old_id == new_id)
395 {
396 L(F("skipping identity manifest edge\n"));
397 return;
398 }
399
400 if (db.manifest_version_exists(old_id))
401 {
402 // we already have a way to get to this old version,
403 // no need to insert another reconstruction path
404 L(F("existing path to %s found, skipping\n") % old_id);
405 }
406 else
407 {
408 db.put_delta(old_id, new_id, del, "manifest_deltas");
409 }
410}
411
412
413static void
414insert_into_db(data const & curr_data,
415 hexenc<id> const & curr_id,
416 vector< piece > const & next_lines,
417 data & next_data,
418 hexenc<id> & next_id,
419 database & db)
420{
421 // inserting into the DB
422 // note: curr_lines is a "new" (base) version
423 // and next_lines is an "old" (derived) version.
424 // all storage edges go from new -> old.
425 {
426 string tmp;
427 global_pieces.build_string(next_lines, tmp);
428 next_data = tmp;
429 }
430 base64< gzip<delta> > del;
431 diff(curr_data, next_data, del);
432 calculate_ident(next_data, next_id);
433 rcs_put_raw_file_edge(next_id, curr_id, del, db);
434}
435
436
437static void
438process_branch(string const & begin_version,
439 vector< piece > const & begin_lines,
440 data const & begin_data,
441 hexenc<id> const & begin_id,
442 rcs_file const & r,
443 database & db,
444 cvs_history & cvs)
445{
446 string curr_version = begin_version;
447 scoped_ptr< vector< piece > > next_lines(new vector<piece>);
448 scoped_ptr< vector< piece > > curr_lines(new vector<piece>
449 (begin_lines.begin(),
450 begin_lines.end()));
451 data curr_data(begin_data), next_data;
452 hexenc<id> curr_id(begin_id), next_id;
453
454 while(! (r.deltas.find(curr_version) == r.deltas.end()))
455 {
456 L(F("version %s has %d lines\n") % curr_version % curr_lines->size());
457
458 string next_version = r.deltas.find(curr_version)->second->next;
459 if (!next_version.empty())
460 { // construct this edge on our own branch
461 L(F("following RCS edge %s -> %s\n") % curr_version % next_version);
462
463 construct_version(*curr_lines, next_version, *next_lines, r);
464 L(F("constructed RCS version %s, inserting into database\n") %
465 next_version);
466
467 insert_into_db(curr_data, curr_id,
468 *next_lines, next_data, next_id, db);
469
470 cvs.note_file_edge (r, curr_version, next_version,
471 file_id(curr_id), file_id(next_id));
472 }
473 else
474 { L(F("revision %s has no successor\n") % curr_version);
475 if (curr_version=="1.1")
476 { // mark this file as newly present since this commit
477 // (and as not present before)
478
479 // perhaps this should get a member function of cvs_history ?
480 L(F("marking %s as not present in older manifests\n") % curr_version);
481 cvs_key k;
482 shared_ptr<cvs_state> s;
483 cvs.find_key_and_state(r, curr_version, k, s);
484 I(r.deltas.find(curr_version) != r.deltas.end());
485 bool live_p = r.deltas.find(curr_version)->second->state != "dead";
486 s->in_edges.insert(cvs_file_edge(curr_id, cvs.curr_file, false,
487 curr_id, cvs.curr_file, live_p,
488 cvs));
489 ++cvs.n_versions;
490 }
491 }
492
493 // recursively follow any branches rooted here
494 boost::shared_ptr<rcs_delta> curr_delta = r.deltas.find(curr_version)->second;
495 for(vector<string>::const_iterator i = curr_delta->branches.begin();
496 i != curr_delta->branches.end(); ++i)
497 {
498 L(F("following RCS branch %s\n") % (*i));
499 vector< piece > branch_lines;
500 construct_version(*curr_lines, *i, branch_lines, r);
501
502 data branch_data;
503 hexenc<id> branch_id;
504 insert_into_db(curr_data, curr_id,
505 branch_lines, branch_data, branch_id, db);
506 cvs.push_branch (r, curr_version, *i);
507
508 cvs.note_file_edge (r, curr_version, *i,
509 file_id(curr_id), file_id(branch_id));
510
511 process_branch(*i, branch_lines, branch_data, branch_id, r, db, cvs);
512 cvs.pop_branch();
513 L(F("finished RCS branch %s\n") % (*i));
514 }
515
516 if (!r.deltas.find(curr_version)->second->next.empty())
517 { // advance
518 curr_data = next_data;
519 curr_id = next_id;
520 curr_version = next_version;
521 swap(next_lines, curr_lines);
522 next_lines->clear();
523 }
524 else break;
525 }
526}
527
528
529static void
530import_rcs_file_with_cvs(string const & filename, database & db, cvs_history & cvs)
531{
532 rcs_file r;
533 L(F("parsing RCS file %s\n") % filename);
534 parse_rcs_file(filename, r);
535 L(F("parsed RCS file %s OK\n") % filename);
536
537 {
538 vector< piece > head_lines;
539 I(r.deltatexts.find(r.admin.head) != r.deltatexts.end());
540 I(r.deltas.find(r.admin.head) != r.deltas.end());
541
542 hexenc<id> id;
543 base64< gzip<data> > packed;
544 data dat(r.deltatexts.find(r.admin.head)->second->text);
545 calculate_ident(dat, id);
546 file_id fid = id;
547
548 cvs.set_filename (filename, fid);
549
550 if (! db.file_version_exists (fid))
551 {
552 pack(dat, packed);
553 file_data fdat = packed;
554 db.put_file(fid, fdat);
555 }
556
557 {
558 // create the head state in case it is a loner
559 cvs_key k;
560 shared_ptr<cvs_state> s;
561 L(F("noting head version %s : %s\n") % cvs.curr_file % r.admin.head);
562 cvs.find_key_and_state (r, r.admin.head, k, s);
563 }
564
565 global_pieces.reset();
566 global_pieces.index_deltatext(r.deltatexts.find(r.admin.head)->second, head_lines);
567 process_branch(r.admin.head, head_lines, dat, id, r, db, cvs);
568 global_pieces.reset();
569 }
570
571 ui.set_tick_trailer("");
572}
573
574
575void
576import_rcs_file(fs::path const & filename, database & db)
577{
578 cvs_history cvs;
579
580 I(! fs::is_directory(filename));
581 I(! filename.empty());
582
583 fs::path leaf = mkpath(filename.leaf());
584 fs::path branch = mkpath(filename.branch_path().string());
585
586 I(! branch.empty());
587 I(! leaf.empty());
588 I( fs::is_directory(branch));
589 I( fs::exists(branch));
590
591 I(chdir(filename.branch_path().native_directory_string().c_str()) == 0);
592
593 I(fs::exists(leaf));
594
595 import_rcs_file_with_cvs(leaf.native_file_string(), db, cvs);
596}
597
598
599// CVS importing stuff follows
600
601/*
602
603 we define a "cvs key" as a triple of author, commit time and
604 changelog. the equality of keys is a bit blurry due to a window of time
605 in which "the same" commit may begin and end. the window is evaluated
606 during the multimap walk though; for insertion in the multimap a true >
607 is used. a key identifies a particular commit.
608
609 we reconstruct the history of a CVS archive by accumulating file edges
610 into archive nodes. each node is called a "cvs_state", but it is really a
611 collection of file *edges* leading into that archive state. we accumulate
612 file edges by walking up the trunk and down the branches of each RCS file.
613
614 once we've got all the edges accumulated into archive nodes, we walk the
615 tree of cvs_states, up through the trunk and down through the branches,
616 carrying a manifest_map with us during the walk. for each edge, we
617 construct either the parent or child state of the edge (depending on
618 which way we're walking) and then calculate and write out a manifest
619 delta for the difference between the previous and current manifest map. we
620 also write out manifest certs, though the direction of ancestry changes
621 depending on whether we're going up the trunk or down the branches.
622
623 */
624
625cvs_file_edge::cvs_file_edge (file_id const & pv, file_path const & pp, bool pl,
626 file_id const & cv, file_path const & cp, bool cl,
627 cvs_history & cvs) :
628 parent_version(cvs.file_version_interner.intern(pv.inner()())),
629 parent_path(cvs.path_interner.intern(pp())),
630 parent_live_p(pl),
631 child_version(cvs.file_version_interner.intern(cv.inner()())),
632 child_path(cvs.path_interner.intern(cp())),
633 child_live_p(cl)
634{
635}
636
637
638static string
639find_branch_for_version(multimap<string,string> const & symbols,
640 string const & version,
641 string const & base)
642{
643 typedef multimap<string,string>::const_iterator ity;
644 typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
645
646 L(F("looking up branch name for %s\n") % version);
647
648 boost::char_separator<char> sep(".");
649 tokenizer tokens(version, sep);
650 vector<string> components;
651 copy(tokens.begin(), tokens.end(), back_inserter(components));
652
653 if (components.size() < 4)
654 {
655 L(F("version %s has too few components, using branch %s\n")
656 % version % base);
657 return base;
658 }
659
660 string branch_version;
661 components[components.size() - 1] = components[components.size() - 2];
662 components[components.size() - 2] = "0";
663 for (size_t i = 0; i < components.size(); ++i)
664 {
665 if (i != 0)
666 branch_version += ".";
667 branch_version += components[i];
668 }
669
670 pair<ity,ity> range = symbols.equal_range(branch_version);
671 if (range.first == symbols.end())
672 {
673 L(F("no branch %s found, using base '%s'\n")
674 % branch_version % base);
675 return base;
676 }
677 else
678 {
679 string res = base;
680 res += ".";
681 res += range.first->second;
682 int num_results = 0;
683 while (range.first != range.second)
684 { range.first++; num_results++; }
685
686 if (num_results > 1)
687 W(F("multiple entries (%d) for branch %s found, using: '%s'\n")
688 % num_results % branch_version % res);
689 else
690 L(F("unique entry for branch %s found: '%s'\n")
691 % branch_version % res);
692 return res;
693 }
694}
695
696cvs_key::cvs_key(rcs_file const & r, string const & version,
697 cvs_history & cvs)
698{
699 map<string, shared_ptr<rcs_delta> >::const_iterator delta =
700 r.deltas.find(version);
701 I(delta != r.deltas.end());
702
703 map<string, shared_ptr<rcs_deltatext> >::const_iterator deltatext =
704 r.deltatexts.find(version);
705 I(deltatext != r.deltatexts.end());
706
707 {
708 struct tm t;
709 // We need to initialize t to all zeros, because strptime has a habit of
710 // leaving bits of the data structure alone, letting garbage sneak into
711 // our output.
712 memset(&t, 0, sizeof(t));
713 char const * dp = delta->second->date.c_str();
714 L(F("Calculating time of %s\n") % dp);
715#ifdef WIN32
716 I(sscanf(dp, "%d.%d.%d.%d.%d.%d", &(t.tm_year), &(t.tm_mon),
717 &(t.tm_mday), &(t.tm_hour), &(t.tm_min), &(t.tm_sec))==6);
718 t.tm_mon--;
719 // Apparently some RCS files have 2 digit years, others four; tm always
720 // wants a 2 (or 3) digit year (years since 1900).
721 if (t.tm_year > 1900)
722 t.tm_year-=1900;
723#else
724 if (strptime(dp, "%y.%m.%d.%H.%M.%S", &t) == NULL)
725 I(strptime(dp, "%Y.%m.%d.%H.%M.%S", &t) != NULL);
726#endif
727 time=mktime(&t);
728 L(F("= %i\n") % time);
729 }
730
731 string branch_name = find_branch_for_version(r.admin.symbols,
732 version,
733 cvs.base_branch);
734 branch = cvs.branch_interner.intern(branch_name);
735 changelog = cvs.changelog_interner.intern(deltatext->second->log);
736 author = cvs.author_interner.intern(delta->second->author);
737}
738
739
740cvs_history::cvs_history() :
741 n_versions("versions", "v", 1),
742 n_tree_branches("branches", "b", 1)
743{
744 stk.push(shared_ptr<cvs_state>(new cvs_state()));
745}
746
747void
748cvs_history::set_filename(string const & file,
749 file_id const & ident)
750{
751 L(F("importing file '%s'\n") % file);
752 I(file.size() > 2);
753 I(file.substr(file.size() - 2) == string(",v"));
754 string ss = file;
755 ui.set_tick_trailer(ss);
756 ss.resize(ss.size() - 2);
757 // remove Attic/ if present
758 std::string::size_type last_slash=ss.rfind('/');
759 if (last_slash!=std::string::npos && last_slash>=5
760 && ss.substr(last_slash-5,6)=="Attic/")
761 ss.erase(last_slash-5,6);
762 curr_file = file_path(ss);
763}
764
765bool
766cvs_history::find_key_and_state(rcs_file const & r,
767 string const & version,
768 cvs_key & key,
769 shared_ptr<cvs_state> & state)
770{
771 I(stk.size() > 0);
772 map< cvs_key, shared_ptr<cvs_state> > & substates = stk.top()->substates;
773 cvs_key nk(r, version, *this);
774
775 // key+(window/2) is in the future, key-(window/2) is in the past. the
776 // past is considered "greater than" the future in this map, so we take:
777 //
778 // - new, the lower bound of key+(window/2) in the map
779 // - old, the upper bound of key-(window/2) in the map
780 //
781 // and search all the nodes inside this section, from new to old bound.
782
783 map< cvs_key, shared_ptr<cvs_state> >::const_iterator i_new, i_old, i;
784 cvs_key k_new(nk), k_old(nk);
785
786 if (static_cast<time_t>(k_new.time + constants::cvs_window / 2) > k_new.time)
787 k_new.time += constants::cvs_window / 2;
788
789 if (static_cast<time_t>(k_old.time - constants::cvs_window / 2) < k_old.time)
790 k_old.time -= constants::cvs_window / 2;
791
792 i_new = substates.lower_bound(k_new);
793 i_old = substates.upper_bound(k_old);
794
795 for (i = i_new; i != i_old; ++i)
796 {
797 if (i->first.similar_enough(nk))
798 {
799 key = i->first;
800 state = i->second;
801 return true;
802 }
803 }
804 key = nk;
805 state = shared_ptr<cvs_state>(new cvs_state());
806 substates.insert(make_pair(key, state));
807 return false;
808}
809
810void
811cvs_history::find_branchpoint(rcs_file const & r,
812 string const & branchpoint_version,
813 string const & first_branch_version,
814 shared_ptr<cvs_state> & branchpoint)
815{
816 cvs_key k;
817 I(find_key_and_state(r, branchpoint_version, k, branchpoint));
818
819 string branch_name = find_branch_for_version(r.admin.symbols,
820 first_branch_version,
821 base_branch);
822
823 unsigned long branch = branch_interner.intern(branch_name);
824
825 map<unsigned long,
826 pair<cvs_key, shared_ptr<cvs_state> > >::const_iterator i
827 = branchpoints.find(branch);
828
829 if (i == branchpoints.end())
830 {
831 ++n_tree_branches;
832 L(F("beginning branch %s at %s : %s\n")
833 % branch_name % curr_file % branchpoint_version);
834 branchpoints.insert(make_pair(branch,
835 make_pair(k, branchpoint)));
836 }
837 else
838 {
839 // take the earlier of the new key and the existing branchpoint
840 if (k.time < i->second.first.time)
841 {
842 L(F("moving branch %s back to %s : %s\n")
843 % branch_name % curr_file % branchpoint_version);
844 shared_ptr<cvs_state> old = i->second.second;
845 set<cvs_key> moved;
846 for (map< cvs_key, shared_ptr<cvs_state> >::const_iterator j =
847 old->substates.begin(); j != old->substates.end(); ++j)
848 {
849 if (j->first.branch == branch)
850 {
851 branchpoint->substates.insert(*j);
852 moved.insert(j->first);
853 }
854 }
855 for (set<cvs_key>::const_iterator j = moved.begin(); j != moved.end();
856 ++j)
857 {
858 old->substates.erase(*j);
859 }
860 branchpoints[branch] = make_pair(k, branchpoint);
861 }
862 else
863 {
864 L(F("using existing branchpoint for %s at %s : %s\n")
865 % branch_name % curr_file % branchpoint_version);
866 branchpoint = i->second.second;
867 }
868 }
869}
870
871void
872cvs_history::push_branch(rcs_file const & r,
873 string const & branchpoint_version,
874 string const & first_branch_version)
875{
876 shared_ptr<cvs_state> branchpoint;
877 I(stk.size() > 0);
878 find_branchpoint(r, branchpoint_version,
879 first_branch_version, branchpoint);
880 stk.push(branchpoint);
881}
882
883void
884cvs_history::note_file_edge(rcs_file const & r,
885 string const & prev_rcs_version_num,
886 string const & next_rcs_version_num,
887 file_id const & prev_version,
888 file_id const & next_version)
889{
890
891 cvs_key k;
892 shared_ptr<cvs_state> s;
893
894 I(stk.size() > 0);
895 I(! curr_file().empty());
896
897 // we can't use operator[] since it is non-const
898 std::map<std::string, boost::shared_ptr<rcs_delta> >::const_iterator
899 prev_delta = r.deltas.find(prev_rcs_version_num),
900 next_delta = r.deltas.find(next_rcs_version_num);
901 I(prev_delta!=r.deltas.end());
902 I(next_delta!=r.deltas.end());
903 bool prev_alive = prev_delta->second->state!="dead";
904 bool next_alive = next_delta->second->state!="dead";
905
906 L(F("note_file_edge %s %d -> %s %d\n") % prev_rcs_version_num % prev_alive
907 % next_rcs_version_num % next_alive);
908
909 // we always aggregate in-edges in children, but we will also create
910 // parents as we encounter them.
911 if (stk.size() == 1)
912 {
913 // we are on the trunk, prev is child, next is parent.
914 L(F("noting trunk edge %s : %s -> %s\n") % curr_file
915 % next_rcs_version_num
916 % prev_rcs_version_num);
917 find_key_and_state (r, next_rcs_version_num, k, s); // just to create it if necessary
918 find_key_and_state (r, prev_rcs_version_num, k, s);
919
920 s->in_edges.insert(cvs_file_edge(next_version, curr_file, next_alive,
921 prev_version, curr_file, prev_alive,
922 *this));
923 }
924 else
925 {
926 // we are on a branch, prev is parent, next is child.
927 L(F("noting branch edge %s : %s -> %s\n") % curr_file
928 % prev_rcs_version_num
929 % next_rcs_version_num);
930 find_key_and_state (r, next_rcs_version_num, k, s);
931 s->in_edges.insert(cvs_file_edge(prev_version, curr_file, prev_alive,
932 next_version, curr_file, next_alive,
933 *this));
934 }
935
936 ++n_versions;
937}
938
939void
940cvs_history::pop_branch()
941{
942 I(stk.size() > 1);
943 I(stk.top()->substates.size() > 0);
944 stk.pop();
945}
946
947
948class
949cvs_tree_walker
950 : public tree_walker
951{
952 cvs_history & cvs;
953 database & db;
954public:
955 cvs_tree_walker(cvs_history & c, database & d) :
956 cvs(c), db(d)
957 {
958 }
959 virtual void visit_file(file_path const & path)
960 {
961 string file = path();
962 if (file.substr(file.size() - 2) == string(",v"))
963 {
964 import_rcs_file_with_cvs(file, db, cvs);
965 }
966 else
967 L(F("skipping non-RCS file %s\n") % file);
968 }
969 virtual ~cvs_tree_walker() {}
970};
971
972
973static void
974store_manifest_edge(manifest_map const & parent,
975 manifest_map const & child,
976 manifest_id const & parent_mid,
977 manifest_id const & child_mid,
978 app_state & app,
979 cvs_history & cvs,
980 unsigned long depth,
981 bool head_manifest_p)
982{
983
984 L(F("storing manifest %s (base %s)\n") % parent_mid % child_mid);
985
986 if (depth == 0 && head_manifest_p)
987 {
988 L(F("storing trunk head %s\n") % child_mid);
989 // the trunk branch has one very important manifest: the head.
990 // this is the "newest" of all manifests within the import, and
991 // we store it in its entirety.
992 if (! app.db.manifest_version_exists(child_mid))
993 {
994 manifest_data mdat;
995 write_manifest_map(child, mdat);
996 app.db.put_manifest(child_mid, mdat);
997 }
998 }
999
1000 if (null_id(parent_mid))
1001 {
1002 L(F("skipping null manifest\n"));
1003 return;
1004 }
1005
1006 unsigned long p, c;
1007 p = cvs.manifest_version_interner.intern(parent_mid.inner()());
1008 c = cvs.manifest_version_interner.intern(child_mid.inner()());
1009 if (cvs.manifest_cycle_detector.edge_makes_cycle(p,c))
1010 {
1011 L(F("skipping cyclical trunk manifest delta %s -> %s\n")
1012 % parent_mid % child_mid);
1013 if (depth == 0)
1014 {
1015 // if this is on the trunk, we are potentially breaking the chain
1016 // one would use to get to p. we need to make sure p exists.
1017 if (!app.db.manifest_version_exists(parent_mid))
1018 {
1019 manifest_data mdat;
1020 write_manifest_map(parent, mdat);
1021 app.db.put_manifest(parent_mid, mdat);
1022 }
1023 }
1024 else
1025 {
1026 // if this is on the trunk, we are potentially breaking the chain
1027 // one would use to get to c. we need to make sure c exists.
1028 if (!app.db.manifest_version_exists(child_mid))
1029 {
1030 manifest_data mdat;
1031 write_manifest_map(child, mdat);
1032 app.db.put_manifest(child_mid, mdat);
1033 }
1034 }
1035 return;
1036 }
1037
1038 cvs.manifest_cycle_detector.put_edge(p,c);
1039 if (depth == 0)
1040 {
1041 L(F("storing trunk manifest delta %s -> %s\n")
1042 % child_mid % parent_mid);
1043
1044 // in this case, the ancestry-based 'child' is on a trunk, so it is
1045 // a 'new' version as far as the storage system is concerned; that
1046 // is to say that the ancestry-based 'parent' is a temporally older
1047 // tree version, which can be constructed from the 'newer' child. so
1048 // the delta should run from child (new) -> parent (old).
1049
1050 base64< gzip<delta> > del;
1051 diff(child, parent, del);
1052 rcs_put_raw_manifest_edge(parent_mid.inner(),
1053 child_mid.inner(),
1054 del, app.db);
1055 }
1056 else
1057 {
1058 L(F("storing branch manifest delta %s -> %s\n")
1059 % parent_mid % child_mid);
1060
1061 // in this case, the ancestry-based 'child' is on a branch, so it is
1062 // an 'old' version as far as the storage system is concerned; that
1063 // is to say it is constructed by first building a 'new' version (the
1064 // ancestry-based 'parent') and then following a delta to the
1065 // child. remember that the storage system assumes that all deltas go
1066 // from temporally new -> temporally old. so the delta should go from
1067 // parent (new) -> child (old)
1068
1069 base64< gzip<delta> > del;
1070 diff(parent, child, del);
1071 rcs_put_raw_manifest_edge(child_mid.inner(),
1072 parent_mid.inner(),
1073 del, app.db);
1074 }
1075}
1076
1077
1078static void
1079store_auxiliary_certs(cvs_key const & key,
1080 revision_id const & id,
1081 app_state & app,
1082 cvs_history const & cvs)
1083{
1084 packet_db_writer dbw(app);
1085 cert_revision_in_branch(id, cert_value(cvs.branch_interner.lookup(key.branch)), app, dbw);
1086 cert_revision_author(id, cvs.author_interner.lookup(key.author), app, dbw);
1087 cert_revision_changelog(id, cvs.changelog_interner.lookup(key.changelog), app, dbw);
1088 cert_revision_date_time(id, key.time, app, dbw);
1089}
1090
1091static void
1092build_change_set(shared_ptr<cvs_state> state,
1093 manifest_map const & state_map,
1094 cvs_history & cvs,
1095 change_set & cs)
1096{
1097 change_set empty;
1098 cs = empty;
1099
1100 for (set<cvs_file_edge>::const_iterator f = state->in_edges.begin();
1101 f != state->in_edges.end(); ++f)
1102 {
1103 file_id fid(cvs.file_version_interner.lookup(f->child_version));
1104 file_path pth(cvs.path_interner.lookup(f->child_path));
1105 if (!f->child_live_p)
1106 {
1107 L(F("deleting entry state '%s' on '%s'\n") % fid % pth);
1108 cs.delete_file(pth);
1109 }
1110 else
1111 {
1112 manifest_map::const_iterator i = state_map.find(pth);
1113 if (i == state_map.end())
1114 {
1115 L(F("adding entry state '%s' on '%s'\n") % fid % pth);
1116 cs.add_file(pth, fid);
1117 }
1118 else if (manifest_entry_id(i) == fid)
1119 {
1120 L(F("skipping preserved entry state '%s' on '%s'\n")
1121 % fid % pth);
1122 }
1123 else
1124 {
1125 L(F("applying state delta on '%s' : '%s' -> '%s'\n")
1126 % pth % manifest_entry_id(i) % fid);
1127 cs.apply_delta(pth, manifest_entry_id(i), fid);
1128 }
1129 }
1130 }
1131 L(F("logical changeset from parent -> child has %d file state changes\n")
1132 % state->in_edges.size());
1133}
1134
1135
1136static void
1137import_states_recursive(ticker & n_edges,
1138 ticker & n_branches,
1139 shared_ptr<cvs_state> state,
1140 cvs_branchname branch_filter,
1141 revision_id parent_rid,
1142 manifest_id parent_mid,
1143 manifest_map parent_map,
1144 cvs_history & cvs,
1145 app_state & app,
1146 vector< pair<cvs_key, revision_set> > & revisions,
1147 unsigned long depth);
1148
1149static void
1150import_states_by_branch(ticker & n_edges,
1151 ticker & n_branches,
1152 shared_ptr<cvs_state> state,
1153 revision_id const & parent_rid,
1154 manifest_id const & parent_mid,
1155 manifest_map const & parent_map,
1156 cvs_history & cvs,
1157 app_state & app,
1158 vector< pair<cvs_key, revision_set> > & revisions,
1159 unsigned long depth)
1160{
1161 set<cvs_branchname> branches;
1162
1163 // collect all the branches
1164 for (map< cvs_key, shared_ptr<cvs_state> >::reverse_iterator i = state->substates.rbegin();
1165 i != state->substates.rend(); ++i)
1166 branches.insert(i->first.branch);
1167
1168 // walk each sub-branch in order
1169 for (set<cvs_branchname>::const_iterator branch = branches.begin();
1170 branch != branches.end(); ++branch)
1171 {
1172 import_states_recursive(n_edges, n_branches, state, *branch,
1173 parent_rid, parent_mid, parent_map,
1174 cvs, app, revisions, depth);
1175 }
1176}
1177
1178static void
1179import_states_recursive(ticker & n_edges,
1180 ticker & n_branches,
1181 shared_ptr<cvs_state> state,
1182 cvs_branchname branch_filter,
1183 revision_id parent_rid,
1184 manifest_id parent_mid,
1185 manifest_map parent_map,
1186 cvs_history & cvs,
1187 app_state & app,
1188 vector< pair<cvs_key, revision_set> > & revisions,
1189 unsigned long depth)
1190{
1191 if (state->substates.size() > 0)
1192 ++n_branches;
1193
1194 manifest_id child_mid;
1195 revision_id child_rid;
1196 manifest_map child_map = parent_map;
1197
1198 string branchname = cvs.branch_interner.lookup(branch_filter);
1199 ui.set_tick_trailer("building branch " + branchname);
1200
1201 // these are all sub-branches, so we look through them temporally
1202 // *backwards* from oldest to newest
1203 map< cvs_key, shared_ptr<cvs_state> >::reverse_iterator newest_branch_state;
1204 for (map< cvs_key, shared_ptr<cvs_state> >::reverse_iterator i = state->substates.rbegin();
1205 i != state->substates.rend(); ++i)
1206 {
1207 if (i->first.branch != branch_filter)
1208 continue;
1209 newest_branch_state = i;
1210 }
1211
1212 for (map< cvs_key, shared_ptr<cvs_state> >::reverse_iterator i = state->substates.rbegin();
1213 i != state->substates.rend(); ++i)
1214 {
1215 if (i->first.branch != branch_filter)
1216 continue;
1217
1218 revision_set rev;
1219 boost::shared_ptr<change_set> cs(new change_set());
1220 build_change_set(i->second, parent_map, cvs, *cs);
1221
1222 apply_change_set(*cs, child_map);
1223 calculate_ident(child_map, child_mid);
1224
1225 rev.new_manifest = child_mid;
1226 rev.edges.insert(make_pair(parent_rid, make_pair(parent_mid, cs)));
1227 calculate_ident(rev, child_rid);
1228
1229 revisions.push_back(make_pair(i->first, rev));
1230
1231 store_manifest_edge(parent_map, child_map,
1232 parent_mid, child_mid,
1233 app, cvs, depth, i == newest_branch_state);
1234
1235 if (i->second->substates.size() > 0)
1236 import_states_by_branch(n_edges, n_branches, i->second,
1237 child_rid, child_mid, child_map,
1238 cvs, app, revisions, depth+1);
1239
1240 // now apply same change set to parent_map, making parent_map == child_map
1241 apply_change_set(*cs, parent_map);
1242 parent_mid = child_mid;
1243 parent_rid = child_rid;
1244 ++n_edges;
1245 }
1246}
1247
1248void
1249import_cvs_repo(fs::path const & cvsroot,
1250 app_state & app)
1251{
1252
1253 {
1254 // early short-circuit to avoid failure after lots of work
1255 rsa_keypair_id key;
1256 N(guess_default_key(key,app),
1257 F("no unique private key for cert construction"));
1258 require_password(key, app);
1259 }
1260
1261 cvs_history cvs;
1262 N(app.branch_name() != "", F("need base --branch argument for importing"));
1263 cvs.base_branch = app.branch_name();
1264
1265 {
1266 transaction_guard guard(app.db);
1267 cvs_tree_walker walker(cvs, app.db);
1268 N( fs::exists(cvsroot),
1269 F("path %s does not exist") % cvsroot.string());
1270 N( fs::is_directory(cvsroot),
1271 F("path %s is not a directory") % cvsroot.string());
1272 app.db.ensure_open();
1273 N(chdir(cvsroot.native_directory_string().c_str()) == 0,
1274 F("could not change directory to %s") % cvsroot.string());
1275 walk_tree(walker);
1276 guard.commit();
1277 }
1278
1279 P(F("phase 1 (version import) complete\n"));
1280
1281 I(cvs.stk.size() == 1);
1282 shared_ptr<cvs_state> state = cvs.stk.top();
1283
1284 vector< pair<cvs_key, revision_set> > revisions;
1285 {
1286 ticker n_branches("finished branches", "b", 1);
1287 ticker n_edges("finished edges", "e", 1);
1288 transaction_guard guard(app.db);
1289 manifest_map root_manifest;
1290 manifest_id root_mid;
1291 revision_id root_rid;
1292
1293 import_states_by_branch(n_edges, n_branches, state,
1294 root_rid, root_mid,
1295 root_manifest, cvs, app, revisions, 0);
1296 P(F("phase 2 (ancestry reconstruction) complete\n"));
1297 guard.commit();
1298 }
1299
1300 {
1301 ticker n_revisions("written revisions", "r", 1);
1302 ui.set_tick_trailer("");
1303 transaction_guard guard(app.db);
1304 for (vector< pair<cvs_key, revision_set> >::const_iterator
1305 i = revisions.begin(); i != revisions.end(); ++i)
1306 {
1307 revision_id rid;
1308 calculate_ident(i->second, rid);
1309 if (! app.db.revision_exists(rid))
1310 app.db.put_revision(rid, i->second);
1311 store_auxiliary_certs(i->first, rid, app, cvs);
1312 ++n_revisions;
1313 }
1314 P(F("phase 3 (writing revisions) complete\n"));
1315 guard.commit();
1316 }
1317}
1318

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status