monotone

monotone Mtn Source Tree

Root/rcs_import.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <algorithm>
12#include <iterator>
13#include <list>
14#include <map>
15#include <set>
16#include <sstream>
17#include <stack>
18#include <stdexcept>
19#include "vector.hh"
20
21#include <unistd.h>
22
23#include <boost/shared_ptr.hpp>
24#include <boost/scoped_ptr.hpp>
25#include "lexical_cast.hh"
26#include <boost/tokenizer.hpp>
27
28#include "app_state.hh"
29#include "cert.hh"
30#include "constants.hh"
31#include "cycle_detector.hh"
32#include "database.hh"
33#include "file_io.hh"
34#include "interner.hh"
35#include "keys.hh"
36#include "paths.hh"
37#include "platform-wrapped.hh"
38#include "project.hh"
39#include "rcs_file.hh"
40#include "revision.hh"
41#include "safe_map.hh"
42#include "sanity.hh"
43#include "transforms.hh"
44#include "ui.hh"
45
46using std::make_pair;
47using std::map;
48using std::multimap;
49using std::out_of_range;
50using std::pair;
51using std::search;
52using std::set;
53using std::sscanf;
54using std::stable_sort;
55using std::stack;
56using std::string;
57using std::vector;
58
59using boost::scoped_ptr;
60using boost::shared_ptr;
61using boost::lexical_cast;
62
63// cvs history recording stuff
64
65typedef unsigned long cvs_branchname;
66typedef unsigned long cvs_author;
67typedef unsigned long cvs_changelog;
68typedef unsigned long cvs_version;
69typedef unsigned long cvs_path;
70typedef unsigned long cvs_tag;
71
72struct cvs_history;
73
74struct
75cvs_commit
76{
77 cvs_commit(rcs_file const & r,
78 string const & rcs_version,
79 file_id const & ident,
80 cvs_history & cvs);
81
82 bool is_synthetic_branch_root;
83 time_t time;
84 bool alive;
85 cvs_author author;
86 cvs_changelog changelog;
87 cvs_version version;
88 cvs_path path;
89 vector<cvs_tag> tags;
90
91 bool operator<(cvs_commit const & other) const
92 {
93 return time < other.time;
94 }
95};
96
97struct
98cvs_branch
99{
100 bool has_a_branchpoint;
101 bool has_a_commit;
102 time_t last_branchpoint;
103 time_t first_commit;
104
105 map<cvs_path, cvs_version> live_at_beginning;
106 vector<cvs_commit> lineage;
107
108 cvs_branch()
109 : has_a_branchpoint(false),
110 has_a_commit(false),
111 last_branchpoint(0),
112 first_commit(0)
113 {
114 }
115
116 void note_commit(time_t now)
117 {
118 if (!has_a_commit)
119 {
120 first_commit = now;
121 }
122 else
123 {
124 if (now < first_commit)
125 first_commit = now;
126 }
127 has_a_commit = true;
128 }
129
130 void note_branchpoint(time_t now)
131 {
132 has_a_branchpoint = true;
133 if (now > last_branchpoint)
134 last_branchpoint = now;
135 }
136
137 time_t beginning() const
138 {
139 I(has_a_branchpoint || has_a_commit);
140 if (has_a_commit)
141 {
142 I(first_commit != 0);
143 return first_commit;
144 }
145 else
146 {
147 I(last_branchpoint != 0);
148 return last_branchpoint;
149 }
150 }
151
152 void append_commit(cvs_commit const & c)
153 {
154 I(c.time != 0);
155 note_commit(c.time);
156 lineage.push_back(c);
157 }
158};
159
160struct
161cvs_history
162{
163
164 interner<unsigned long> branch_interner;
165 interner<unsigned long> author_interner;
166 interner<unsigned long> changelog_interner;
167 interner<unsigned long> file_version_interner;
168 interner<unsigned long> path_interner;
169 interner<unsigned long> tag_interner;
170
171 // assume admin has foo:X.Y.0.N in it, then
172 // this multimap contains entries of the form
173 // X.Y -> foo
174 multimap<string, string> branchpoints;
175
176 // and this map contains entries of the form
177 // X.Y.N.1 -> foo
178 map<string, string> branch_first_entries;
179
180 // branch name -> branch
181 map<string, shared_ptr<cvs_branch> > branches;
182 shared_ptr<cvs_branch> trunk;
183
184 // stack of branches we're injecting states into
185 stack< shared_ptr<cvs_branch> > stk;
186 stack< cvs_branchname > bstk;
187
188 // tag -> time, revision
189 //
190 // used to resolve the *last* revision which has a given tag
191 // applied; this is the revision which wins the tag.
192 map<unsigned long, pair<time_t, revision_id> > resolved_tags;
193
194 file_path curr_file;
195 cvs_path curr_file_interned;
196
197 string base_branch;
198
199 ticker n_versions;
200 ticker n_tree_branches;
201
202 cvs_history();
203 void set_filename(string const & file,
204 file_id const & ident);
205
206 void index_branchpoint_symbols(rcs_file const & r);
207
208 void push_branch(string const & branch_name, bool private_branch);
209 void pop_branch();
210};
211
212
213static bool
214is_sbr(shared_ptr<rcs_delta> dl,
215 shared_ptr<rcs_deltatext> dt)
216{
217
218 // CVS abuses the RCS format a bit (ha!) when storing a file which
219 // was only added on a branch: on the root of the branch there'll be
220 // a commit with dead state, empty text, and a log message
221 // containing the string "file foo was initially added on branch
222 // bar". We recognize and ignore these cases, as they do not
223 // "really" represent commits to be clustered together.
224
225 if (dl->state != "dead")
226 return false;
227
228 if (!dt->text.empty())
229 return false;
230
231 string log_bit = "was initially added on branch";
232 string::const_iterator i = search(dt->log.begin(),
233 dt->log.end(),
234 log_bit.begin(),
235 log_bit.end());
236
237 return i != dt->log.end();
238}
239
240
241cvs_commit::cvs_commit(rcs_file const & r,
242 string const & rcs_version,
243 file_id const & ident,
244 cvs_history & cvs)
245{
246 map<string, shared_ptr<rcs_delta> >::const_iterator delta =
247 r.deltas.find(rcs_version);
248 I(delta != r.deltas.end());
249
250 map<string, shared_ptr<rcs_deltatext> >::const_iterator deltatext =
251 r.deltatexts.find(rcs_version);
252 I(deltatext != r.deltatexts.end());
253
254 struct tm t;
255 // We need to initialize t to all zeros, because strptime has a habit of
256 // leaving bits of the data structure alone, letting garbage sneak into
257 // our output.
258 memset(&t, 0, sizeof(t));
259 char const * dp = delta->second->date.c_str();
260 L(FL("Calculating time of %s") % dp);
261#ifdef HAVE_STRPTIME
262 if (strptime(dp, "%y.%m.%d.%H.%M.%S", &t) == NULL)
263 I(strptime(dp, "%Y.%m.%d.%H.%M.%S", &t) != NULL);
264#else
265 I(sscanf(dp, "%d.%d.%d.%d.%d.%d", &(t.tm_year), &(t.tm_mon),
266 &(t.tm_mday), &(t.tm_hour), &(t.tm_min), &(t.tm_sec))==6);
267 t.tm_mon--;
268 // Apparently some RCS files have 2 digit years, others four; tm always
269 // wants a 2 (or 3) digit year (years since 1900).
270 if (t.tm_year > 1900)
271 t.tm_year-=1900;
272#endif
273 time = mktime(&t);
274 L(FL("= %i") % time);
275
276 is_synthetic_branch_root = is_sbr(delta->second,
277 deltatext->second);
278
279 alive = delta->second->state != "dead";
280 if (is_synthetic_branch_root)
281 changelog = cvs.changelog_interner.intern("synthetic branch root changelog");
282 else
283 changelog = cvs.changelog_interner.intern(deltatext->second->log);
284 author = cvs.author_interner.intern(delta->second->author);
285 path = cvs.curr_file_interned;
286 version = cvs.file_version_interner.intern(ident.inner()());
287
288 typedef multimap<string,string>::const_iterator ity;
289 pair<ity,ity> range = r.admin.symbols.equal_range(rcs_version);
290 for (ity i = range.first; i != range.second; ++i)
291 {
292 if (i->first == rcs_version)
293 {
294 L(FL("version %s -> tag %s") % rcs_version % i->second);
295 tags.push_back(cvs.tag_interner.intern(i->second));
296 }
297 }
298
299}
300
301
302// piece table stuff
303
304struct piece;
305
306struct
307piece_store
308{
309 vector< shared_ptr<rcs_deltatext> > texts;
310 void index_deltatext(shared_ptr<rcs_deltatext> const & dt,
311 vector<piece> & pieces);
312 void build_string(vector<piece> const & pieces,
313 string & out);
314 void reset() { texts.clear(); }
315};
316
317// FIXME: kludge, I was lazy and did not make this
318// a properly scoped variable.
319
320static piece_store global_pieces;
321
322
323struct
324piece
325{
326 piece(string::size_type p, string::size_type l, unsigned long id) :
327 pos(p), len(l), string_id(id) {}
328 string::size_type pos;
329 string::size_type len;
330 unsigned long string_id;
331 string operator*() const
332 {
333 return string(global_pieces.texts.at(string_id)->text.data() + pos, len);
334 }
335};
336
337
338void
339piece_store::build_string(vector<piece> const & pieces,
340 string & out)
341{
342 out.clear();
343 out.reserve(pieces.size() * 60);
344 for(vector<piece>::const_iterator i = pieces.begin();
345 i != pieces.end(); ++i)
346 out.append(texts.at(i->string_id)->text, i->pos, i->len);
347}
348
349void
350piece_store::index_deltatext(shared_ptr<rcs_deltatext> const & dt,
351 vector<piece> & pieces)
352{
353 pieces.clear();
354 pieces.reserve(dt->text.size() / 30);
355 texts.push_back(dt);
356 unsigned long id = texts.size() - 1;
357 string::size_type begin = 0;
358 string::size_type end = dt->text.find('\n');
359 while(end != string::npos)
360 {
361 // nb: the piece includes the '\n'
362 pieces.push_back(piece(begin, (end - begin) + 1, id));
363 begin = end + 1;
364 end = dt->text.find('\n', begin);
365 }
366 if (begin != dt->text.size())
367 {
368 // the text didn't end with '\n', so neither does the piece
369 end = dt->text.size();
370 pieces.push_back(piece(begin, end - begin, id));
371 }
372}
373
374
375static void
376process_one_hunk(vector< piece > const & source,
377 vector< piece > & dest,
378 vector< piece >::const_iterator & i,
379 int & cursor)
380{
381 string directive = **i;
382 assert(directive.size() > 1);
383 ++i;
384
385 try
386 {
387 char code;
388 int pos, len;
389 if (sscanf(directive.c_str(), " %c %d %d", &code, &pos, &len) != 3)
390 throw oops("illformed directive '" + directive + "'");
391
392 if (code == 'a')
393 {
394 // 'ax y' means "copy from source to dest until cursor == x, then
395 // copy y lines from delta, leaving cursor where it is"
396 while (cursor < pos)
397 dest.push_back(source.at(cursor++));
398 I(cursor == pos);
399 while (len--)
400 dest.push_back(*i++);
401 }
402 else if (code == 'd')
403 {
404 // 'dx y' means "copy from source to dest until cursor == x-1,
405 // then increment cursor by y, ignoring those y lines"
406 while (cursor < (pos - 1))
407 dest.push_back(source.at(cursor++));
408 I(cursor == pos - 1);
409 cursor += len;
410 }
411 else
412 throw oops("unknown directive '" + directive + "'");
413 }
414 catch (out_of_range &)
415 {
416 throw oops("out_of_range while processing " + directive
417 + " with source.size() == "
418 + lexical_cast<string>(source.size())
419 + " and cursor == "
420 + lexical_cast<string>(cursor));
421 }
422}
423
424static void
425construct_version(vector< piece > const & source_lines,
426 string const & dest_version,
427 vector< piece > & dest_lines,
428 rcs_file const & r)
429{
430 dest_lines.clear();
431 dest_lines.reserve(source_lines.size());
432
433 I(r.deltas.find(dest_version) != r.deltas.end());
434 shared_ptr<rcs_delta> delta = r.deltas.find(dest_version)->second;
435
436 I(r.deltatexts.find(dest_version) != r.deltatexts.end());
437 shared_ptr<rcs_deltatext> deltatext = r.deltatexts.find(dest_version)->second;
438
439 vector<piece> deltalines;
440 global_pieces.index_deltatext(deltatext, deltalines);
441
442 int cursor = 0;
443 for (vector<piece>::const_iterator i = deltalines.begin();
444 i != deltalines.end(); )
445 process_one_hunk(source_lines, dest_lines, i, cursor);
446 while (cursor < static_cast<int>(source_lines.size()))
447 dest_lines.push_back(source_lines[cursor++]);
448}
449
450// FIXME: should these be someplace else? using 'friend' to reach into the
451// DB is stupid, but it's also stupid to put raw edge insert methods on the
452// DB itself. or is it? hmm.. encapsulation vs. usage guidance..
453void
454rcs_put_raw_file_edge(hexenc<id> const & old_id,
455 hexenc<id> const & new_id,
456 delta const & del,
457 database & db)
458{
459 if (old_id == new_id)
460 {
461 L(FL("skipping identity file edge"));
462 return;
463 }
464
465 if (db.file_version_exists(file_id(old_id)))
466 {
467 // we already have a way to get to this old version,
468 // no need to insert another reconstruction path
469 L(FL("existing path to %s found, skipping") % old_id);
470 }
471 else
472 {
473 I(db.file_or_manifest_base_exists(new_id, "files")
474 || db.delta_exists(new_id(), "file_deltas"));
475 db.put_file_delta(file_id(old_id), file_id(new_id), file_delta(del));
476 }
477}
478
479
480static void
481insert_into_db(data const & curr_data,
482 hexenc<id> const & curr_id,
483 vector< piece > const & next_lines,
484 data & next_data,
485 hexenc<id> & next_id,
486 database & db)
487{
488 // inserting into the DB
489 // note: curr_lines is a "new" (base) version
490 // and next_lines is an "old" (derived) version.
491 // all storage edges go from new -> old.
492 {
493 string tmp;
494 global_pieces.build_string(next_lines, tmp);
495 next_data = data(tmp);
496 }
497 delta del;
498 diff(curr_data, next_data, del);
499 calculate_ident(next_data, next_id);
500 rcs_put_raw_file_edge(next_id, curr_id, del, db);
501}
502
503
504
505/*
506
507please read this exhaustingly long comment and understand it
508before mucking with the branch inference logic.
509
510we are processing a file version. a branch might begin here. if
511the current version is X.Y, then there is a branch B starting
512here iff there is a symbol in the admin section called X.Y.0.Z,
513where Z is the branch number (or if there is a private branch
514called X.Y.Z, which is either an import branch or some private
515RCS cruft).
516
517the version X.Y is then considered the branchpoint of B in the
518current file. this does *not* mean that the CVS key -- an
519abstraction representing whole-tree operations -- of X.Y is the
520branchpoint across the CVS archive we're processing.
521
522in fact, CVS does not record the occurrence of a branching
523action (tag -b). we have no idea who executed that command and
524when. what we know instead is the commit X.Y immediately
525preceeding the branch -- CVS consideres this the branchpoint --
526in this file's reduced view of history. we also know the first
527commit X.Y.Z.1 inside the branch (which might not exist).
528
529our old strategy was to consider all branches nested in a
530hierarchy, which was a super-tree of all the branch trees in all
531the CVS files in a repository. this involved considering X.Y as
532the parent version of branch X.Y.Z, an selecting "the"
533branchpoint connecting the two as the least CVS key X.Y.Z.1
534committed inside the branch B.
535
536this was a mistake, for two significant reasons.
537
538first, some files do not *have* any commit inside the branch B,
539only a branchpoint X.Y.0.Z. this branchpoint is actually the
540last commit *before* the user branched, and could be a very old
541commit, long before the branch was formed, so it is useless in
542determining the branch structure.
543
544second, some files do not have a branch B, or worse, have
545branched into B from an "ancestor" branch A, where a different
546file branches into B from a different ancestor branch C. in
547other words, while there *is* a tree structure within the X.Y.Z
548branches of each file, there is *no* shared tree structure
549between the branch names across a repository. in one file A can
550be an ancestor of B, in another file B can be an ancestor of A.
551
552thus, we give up on establishing a hierarchy between branches
553altogether. all branches exist in a flat namespace, and all are
554direct descendents of the empty revision at the root of
555history. each branchpoint symbol mentioned in the
556administrative section of a file is considered the root of a new
557lineage.
558
559*/
560
561
562static void
563process_branch(string const & begin_version,
564 vector< piece > const & begin_lines,
565 data const & begin_data,
566 hexenc<id> const & begin_id,
567 rcs_file const & r,
568 database & db,
569 cvs_history & cvs)
570{
571 string curr_version = begin_version;
572 scoped_ptr< vector< piece > > next_lines(new vector<piece>);
573 scoped_ptr< vector< piece > > curr_lines(new vector<piece>
574 (begin_lines.begin(),
575 begin_lines.end()));
576 data curr_data(begin_data), next_data;
577 hexenc<id> curr_id(begin_id), next_id;
578
579 while(! (r.deltas.find(curr_version) == r.deltas.end()))
580 {
581 L(FL("version %s has %d lines") % curr_version % curr_lines->size());
582
583 cvs_commit curr_commit(r, curr_version, file_id(curr_id), cvs);
584 if (!curr_commit.is_synthetic_branch_root)
585 {
586 cvs.stk.top()->append_commit(curr_commit);
587 ++cvs.n_versions;
588 }
589
590 string next_version = r.deltas.find(curr_version)->second->next;
591
592 if (! next_version.empty())
593 {
594 L(FL("following RCS edge %s -> %s") % curr_version % next_version);
595
596 construct_version(*curr_lines, next_version, *next_lines, r);
597 L(FL("constructed RCS version %s, inserting into database") %
598 next_version);
599
600 insert_into_db(curr_data, curr_id,
601 *next_lines, next_data, next_id, db);
602 }
603
604 // mark the beginning-of-branch time and state of this file if
605 // we're at a branchpoint
606 typedef multimap<string,string>::const_iterator ity;
607 pair<ity,ity> range = cvs.branchpoints.equal_range(curr_version);
608 if (range.first != cvs.branchpoints.end()
609 && range.first->first == curr_version)
610 {
611 for (ity i = range.first; i != range.second; ++i)
612 {
613 cvs.push_branch(i->second, false);
614 shared_ptr<cvs_branch> b = cvs.stk.top();
615 if (curr_commit.alive)
616 b->live_at_beginning[cvs.curr_file_interned] = curr_commit.version;
617 b->note_branchpoint(curr_commit.time);
618 cvs.pop_branch();
619 }
620 }
621
622
623 // recursively follow any branch commits coming from the branchpoint
624 shared_ptr<rcs_delta> curr_delta = r.deltas.find(curr_version)->second;
625 for(vector<string>::const_iterator i = curr_delta->branches.begin();
626 i != curr_delta->branches.end(); ++i)
627 {
628 string branch;
629 data branch_data;
630 hexenc<id> branch_id;
631 vector< piece > branch_lines;
632 bool priv = false;
633 map<string, string>::const_iterator be = cvs.branch_first_entries.find(*i);
634
635 if (be != cvs.branch_first_entries.end())
636 branch = be->second;
637 else
638 priv = true;
639
640 L(FL("following RCS branch %s = '%s'") % (*i) % branch);
641
642 construct_version(*curr_lines, *i, branch_lines, r);
643 insert_into_db(curr_data, curr_id,
644 branch_lines, branch_data, branch_id, db);
645
646 cvs.push_branch(branch, priv);
647 process_branch(*i, branch_lines, branch_data, branch_id, r, db, cvs);
648 cvs.pop_branch();
649
650 L(FL("finished RCS branch %s = '%s'") % (*i) % branch);
651 }
652
653 if (!r.deltas.find(curr_version)->second->next.empty())
654 {
655 // advance
656 curr_data = next_data;
657 curr_id = next_id;
658 curr_version = next_version;
659 swap(next_lines, curr_lines);
660 next_lines->clear();
661 }
662 else break;
663 }
664}
665
666
667static void
668import_rcs_file_with_cvs(string const & filename, database & db, cvs_history & cvs)
669{
670 rcs_file r;
671 L(FL("parsing RCS file %s") % filename);
672 parse_rcs_file(filename, r);
673 L(FL("parsed RCS file %s OK") % filename);
674
675 {
676 vector< piece > head_lines;
677 I(r.deltatexts.find(r.admin.head) != r.deltatexts.end());
678 I(r.deltas.find(r.admin.head) != r.deltas.end());
679
680 hexenc<id> id;
681 data dat(r.deltatexts.find(r.admin.head)->second->text);
682 calculate_ident(dat, id);
683 file_id fid(id);
684
685 cvs.set_filename (filename, fid);
686 cvs.index_branchpoint_symbols (r);
687 db.put_file(fid, file_data(dat));
688
689 {
690 // create the head state in case it is a loner
691 // cvs_key k;
692 // shared_ptr<cvs_state> s;
693 // L(FL("noting head version %s : %s") % cvs.curr_file % r.admin.head);
694 // cvs.find_key_and_state (r, r.admin.head, k, s);
695 }
696
697 global_pieces.reset();
698 global_pieces.index_deltatext(r.deltatexts.find(r.admin.head)->second, head_lines);
699 process_branch(r.admin.head, head_lines, dat, id, r, db, cvs);
700 global_pieces.reset();
701 }
702
703 ui.set_tick_trailer("");
704}
705
706
707void
708test_parse_rcs_file(system_path const & filename, database & db)
709{
710 cvs_history cvs;
711
712 I(! filename.empty());
713 assert_path_is_file(filename);
714
715 P(F("parsing RCS file %s") % filename);
716 rcs_file r;
717 parse_rcs_file(filename.as_external(), r);
718 P(F("parsed RCS file %s OK") % filename);
719}
720
721
722// CVS importing stuff follows
723
724
725static void
726split_version(string const & v, vector<string> & vs)
727{
728 vs.clear();
729 boost::char_separator<char> sep(".");
730 typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
731 tokenizer tokens(v, sep);
732 copy(tokens.begin(), tokens.end(), back_inserter(vs));
733}
734
735static void
736join_version(vector<string> const & vs, string & v)
737{
738 v.clear();
739 for (vector<string>::const_iterator i = vs.begin();
740 i != vs.end(); ++i)
741 {
742 if (i != vs.begin())
743 v += ".";
744 v += *i;
745 }
746}
747
748cvs_history::cvs_history() :
749 n_versions("versions", "v", 1),
750 n_tree_branches("branches", "b", 1)
751{
752}
753
754void
755cvs_history::set_filename(string const & file,
756 file_id const & ident)
757{
758 L(FL("importing file '%s'") % file);
759 I(file.size() > 2);
760 I(file.substr(file.size() - 2) == string(",v"));
761 string ss = file;
762 ui.set_tick_trailer(ss);
763 ss.resize(ss.size() - 2);
764 // remove Attic/ if present
765 string::size_type last_slash=ss.rfind('/');
766 if (last_slash!=string::npos && last_slash>=5
767 && ss.substr(last_slash-5,6)=="Attic/")
768 ss.erase(last_slash-5,6);
769 curr_file = file_path_internal(ss);
770 curr_file_interned = path_interner.intern(ss);
771}
772
773void cvs_history::index_branchpoint_symbols(rcs_file const & r)
774{
775 branchpoints.clear();
776 branch_first_entries.clear();
777
778 for (multimap<string, string>::const_iterator i =
779 r.admin.symbols.begin(); i != r.admin.symbols.end(); ++i)
780 {
781 string const & num = i->first;
782 string const & sym = i->second;
783
784 vector<string> components;
785 split_version(num, components);
786
787 vector<string> first_entry_components;
788 vector<string> branchpoint_components;
789
790 if (components.size() > 2 &&
791 (components.size() % 2 == 1))
792 {
793 // this is a "vendor" branch
794 //
795 // such as "1.1.1", where "1.1" is the branchpoint and
796 // "1.1.1.1" will be the first commit on it.
797
798 first_entry_components = components;
799 first_entry_components.push_back("1");
800
801 branchpoint_components = components;
802 branchpoint_components.erase(branchpoint_components.end() - 1,
803 branchpoint_components.end());
804
805 }
806
807 else if (components.size() > 2 &&
808 (components.size() % 2 == 0) &&
809 components[components.size() - 2] == string("0"))
810 {
811 // this is a "normal" branch
812 //
813 // such as "1.3.0.2", where "1.3" is the branchpoint and
814 // "1.3.2.1"
815
816 first_entry_components = components;
817 first_entry_components[first_entry_components.size() - 2]
818 = first_entry_components[first_entry_components.size() - 1];
819 first_entry_components[first_entry_components.size() - 1]
820 = string("1");
821
822 branchpoint_components = components;
823 branchpoint_components.erase(branchpoint_components.end() - 2,
824 branchpoint_components.end());
825 }
826
827 string first_entry_version;
828 join_version(first_entry_components, first_entry_version);
829
830 L(FL("first version in branch %s would be %s")
831 % sym % first_entry_version);
832 branch_first_entries.insert(make_pair(first_entry_version, sym));
833
834 string branchpoint_version;
835 join_version(branchpoint_components, branchpoint_version);
836
837 L(FL("file branchpoint for %s at %s") % sym % branchpoint_version);
838 branchpoints.insert(make_pair(branchpoint_version, sym));
839 }
840}
841
842
843
844void
845cvs_history::push_branch(string const & branch_name, bool private_branch)
846{
847 shared_ptr<cvs_branch> branch;
848
849 string bname = base_branch + "." + branch_name;
850 I(stk.size() > 0);
851
852 if (private_branch)
853 {
854 branch = shared_ptr<cvs_branch>(new cvs_branch());
855 stk.push(branch);
856 bstk.push(branch_interner.intern(""));
857 return;
858 }
859 else
860 {
861 map<string, shared_ptr<cvs_branch> >::const_iterator b = branches.find(bname);
862 if (b == branches.end())
863 {
864 branch = shared_ptr<cvs_branch>(new cvs_branch());
865 branches.insert(make_pair(bname, branch));
866 ++n_tree_branches;
867 }
868 else
869 branch = b->second;
870
871 stk.push(branch);
872 bstk.push(branch_interner.intern(bname));
873 }
874}
875
876void
877cvs_history::pop_branch()
878{
879 I(stk.size() > 1);
880 stk.pop();
881 bstk.pop();
882}
883
884
885class
886cvs_tree_walker
887 : public tree_walker
888{
889 cvs_history & cvs;
890 database & db;
891public:
892 cvs_tree_walker(cvs_history & c, database & d) :
893 cvs(c), db(d)
894 {
895 }
896 virtual void visit_file(file_path const & path)
897 {
898 string file = path.as_external();
899 if (file.substr(file.size() - 2) == string(",v"))
900 {
901 try
902 {
903 import_rcs_file_with_cvs(file, db, cvs);
904 }
905 catch (oops const & o)
906 {
907 W(F("error reading RCS file %s: %s") % file % o.what());
908 }
909 }
910 else
911 L(FL("skipping non-RCS file %s") % file);
912 }
913 virtual ~cvs_tree_walker() {}
914};
915
916
917
918
919//
920// our task here is to produce a sequence of revision descriptions
921// from the per-file commit records we have. we do this by rolling
922// forwards through the temporally sorted file-commit list
923// accumulating file-commits into revisions and flushing the
924// revisions when we feel they are "complete".
925//
926// revisions have to have a time associated with them. this time
927// will be the first time of any commit associated with the
928// revision. they have an author and a changelog, which is shared
929// by all the file-commits in the revision.
930//
931// there might be multiple revisions overlapping in time. this is
932// legal wrt. CVS. we keep a set, and search all members of the set
933// for the best match.
934//
935// consider this situation of overlapping revisions:
936//
937// +---------------+ +---------------+ +---------------+
938// | rev #1 @ 0011 | | rev #2 @ 0012 | | rev #3 @ 0013 |
939// |~~~~~~~~~~~~~~~| |~~~~~~~~~~~~~~~| |~~~~~~~~~~~~~~~|
940// | patch foo.txt | | patch bar.txt | | patch baz.txt |
941// +---------------+ +---------------+ +---------------+
942//
943// suppose you have this situation and you run across a "patch
944// bar.txt" commit at timestamp 0014. what do you do?
945//
946// - you know that rev #2 cannot accept this commit, simply because
947// two commits on the same file makes *two* revisions, not one.
948//
949// - perhaps rev #3 could accept it; after all, it could be that the
950// commit associated with rev #2 released its commit lock, and the
951// commit associated with rev #3 quickly updated and committed at
952// 0013, finishing off at 0014.
953//
954// - can rev #1 accept it? no. because CVS calcualted the version it
955// expected to see in bar.txt before calling up the server, when
956// committing rev #1. the version it expected to see was the version
957// in bar.txt *before* time 0012; that is, before rev #2 had any affect
958// on bar.txt. when it contacted the server, the commit associated
959// with rev #1 would have aborted if it had seen any other number.
960// so rev #1 could not start before an edit to bar.txt and then
961// include its own edit to bar.txt.
962//
963// so we have only one case where bar.txt can be accepted. if the
964// commit is not accepted into a legal rev (outside the window,
965// wrong changelog/author) it starts a new revision.
966//
967// as we scan forwards, if we hit timestamps which lie beyond rev #n's
968// window, we flush rev #n.
969//
970// if there are multiple coincident and legal revs to direct a
971// commit to (all with the same author/changelog), we direct the
972// commit to the rev with the closest initial timestamp. that is,
973// the *latest* beginning time.
974
975struct
976cvs_cluster
977{
978 time_t first_time;
979 cvs_author author;
980 cvs_changelog changelog;
981 set<cvs_tag> tags;
982
983 cvs_cluster(time_t t,
984 cvs_author a,
985 cvs_changelog c)
986 : first_time(t),
987 author(a),
988 changelog(c)
989 {}
990
991 struct entry
992 {
993 bool live;
994 cvs_version version;
995 time_t time;
996 entry(bool l, cvs_version v, time_t t)
997 : live(l),
998 version(v),
999 time(t)
1000 {}
1001 };
1002
1003 typedef map<cvs_path, entry> entry_map;
1004 entry_map entries;
1005};
1006
1007
1008struct
1009cluster_consumer
1010{
1011 cvs_history & cvs;
1012 app_state & app;
1013 string const & branchname;
1014 cvs_branch const & branch;
1015 set<file_path> created_dirs;
1016 map<cvs_path, cvs_version> live_files;
1017 ticker & n_revisions;
1018
1019 struct prepared_revision
1020 {
1021 prepared_revision(revision_id i,
1022 shared_ptr<revision_t> r,
1023 cvs_cluster const & c);
1024 revision_id rid;
1025 shared_ptr<revision_t> rev;
1026 time_t time;
1027 cvs_author author;
1028 cvs_changelog changelog;
1029 vector<cvs_tag> tags;
1030 };
1031
1032 vector<prepared_revision> preps;
1033
1034 roster_t ros;
1035 temp_node_id_source nis;
1036 editable_roster_base editable_ros;
1037 revision_id parent_rid, child_rid;
1038
1039 cluster_consumer(cvs_history & cvs,
1040 app_state & app,
1041 string const & branchname,
1042 cvs_branch const & branch,
1043 ticker & n_revs);
1044
1045 void consume_cluster(cvs_cluster const & c);
1046 void add_missing_parents(file_path const & sp, cset & cs);
1047 void build_cset(cvs_cluster const & c, cset & cs);
1048 void store_auxiliary_certs(prepared_revision const & p);
1049 void store_revisions();
1050};
1051
1052typedef shared_ptr<cvs_cluster>
1053cluster_ptr;
1054
1055struct
1056cluster_ptr_lt
1057{
1058 bool operator()(cluster_ptr const & a,
1059 cluster_ptr const & b) const
1060 {
1061 return a->first_time < b->first_time;
1062 }
1063};
1064
1065typedef set<cluster_ptr, cluster_ptr_lt>
1066cluster_set;
1067
1068void
1069import_branch(cvs_history & cvs,
1070 app_state & app,
1071 string const & branchname,
1072 shared_ptr<cvs_branch> const & branch,
1073 ticker & n_revs)
1074{
1075 cluster_set clusters;
1076 cluster_consumer cons(cvs, app, branchname, *branch, n_revs);
1077 unsigned long commits_remaining = branch->lineage.size();
1078
1079 // step 1: sort the lineage
1080 stable_sort(branch->lineage.begin(), branch->lineage.end());
1081
1082 for (vector<cvs_commit>::const_iterator i = branch->lineage.begin();
1083 i != branch->lineage.end(); ++i)
1084 {
1085 commits_remaining--;
1086
1087 L(FL("examining next commit [t:%d] [p:%s] [a:%s] [c:%s]")
1088 % i->time
1089 % cvs.path_interner.lookup(i->path)
1090 % cvs.author_interner.lookup(i->author)
1091 % cvs.changelog_interner.lookup(i->changelog));
1092
1093 // step 2: expire all clusters from the beginning of the set which
1094 // have passed the window size
1095 while (!clusters.empty())
1096 {
1097 cluster_set::iterator j = clusters.begin();
1098 if ((*j)->first_time + constants::cvs_window < i->time)
1099 {
1100 L(FL("expiring cluster"));
1101 cons.consume_cluster(**j);
1102 clusters.erase(j);
1103 }
1104 else
1105 break;
1106 }
1107
1108 // step 3: find the last still-live cluster to have touched this
1109 // file
1110 time_t time_of_last_cluster_touching_this_file = 0;
1111
1112 unsigned clu = 0;
1113 for (cluster_set::const_iterator j = clusters.begin();
1114 j != clusters.end(); ++j)
1115 {
1116 L(FL("examining cluster %d to see if it touched %d")
1117 % clu++
1118 % i->path);
1119
1120 cvs_cluster::entry_map::const_iterator k = (*j)->entries.find(i->path);
1121 if ((k != (*j)->entries.end())
1122 && (k->second.time > time_of_last_cluster_touching_this_file))
1123 {
1124 L(FL("found cluster touching %d: [t:%d] [a:%d] [c:%d]")
1125 % i->path
1126 % (*j)->first_time
1127 % (*j)->author
1128 % (*j)->changelog);
1129 time_of_last_cluster_touching_this_file = (*j)->first_time;
1130 }
1131 }
1132 L(FL("last modification time is %d")
1133 % time_of_last_cluster_touching_this_file);
1134
1135 // step 4: find a cluster which starts on or after the
1136 // last_modify_time, which doesn't modify the file in question,
1137 // and which contains the same author and changelog as our
1138 // commit
1139 cluster_ptr target;
1140 for (cluster_set::const_iterator j = clusters.begin();
1141 j != clusters.end(); ++j)
1142 {
1143 if (((*j)->first_time >= time_of_last_cluster_touching_this_file)
1144 && ((*j)->author == i->author)
1145 && ((*j)->changelog == i->changelog)
1146 && ((*j)->entries.find(i->path) == (*j)->entries.end()))
1147 {
1148 L(FL("picked existing cluster [t:%d] [a:%d] [c:%d]")
1149 % (*j)->first_time
1150 % (*j)->author
1151 % (*j)->changelog);
1152
1153 target = (*j);
1154 }
1155 }
1156
1157 // if we're still not finding an active cluster,
1158 // this is probably the first commit in it. make
1159 // a new one.
1160 if (!target)
1161 {
1162 L(FL("building new cluster [t:%d] [a:%d] [c:%d]")
1163 % i->time
1164 % i->author
1165 % i->changelog);
1166
1167 target = cluster_ptr(new cvs_cluster(i->time,
1168 i->author,
1169 i->changelog));
1170 clusters.insert(target);
1171 }
1172
1173 I(target);
1174 target->entries.insert(make_pair(i->path,
1175 cvs_cluster::entry(i->alive,
1176 i->version,
1177 i->time)));
1178 for (vector<cvs_tag>::const_iterator j = i->tags.begin();
1179 j != i->tags.end(); ++j)
1180 {
1181 target->tags.insert(*j);
1182 }
1183 }
1184
1185
1186 // now we are done this lineage; flush all remaining clusters
1187 L(FL("finished branch commits, writing all pending clusters"));
1188 while (!clusters.empty())
1189 {
1190 cons.consume_cluster(**clusters.begin());
1191 clusters.erase(clusters.begin());
1192 }
1193 L(FL("finished writing pending clusters"));
1194
1195 cons.store_revisions();
1196
1197}
1198
1199void
1200import_cvs_repo(system_path const & cvsroot,
1201 app_state & app)
1202{
1203 N(!directory_exists(cvsroot / "CVSROOT"),
1204 F("%s appears to be a CVS repository root directory\n"
1205 "try importing a module instead, with 'cvs_import %s/<module_name>")
1206 % cvsroot % cvsroot);
1207
1208 {
1209 // early short-circuit to avoid failure after lots of work
1210 rsa_keypair_id key;
1211 get_user_key(key, app);
1212 require_password(key, app);
1213 }
1214
1215 cvs_history cvs;
1216 N(app.opts.branchname() != "", F("need base --branch argument for importing"));
1217 cvs.base_branch = app.opts.branchname();
1218
1219 // push the trunk
1220 cvs.trunk = shared_ptr<cvs_branch>(new cvs_branch());
1221 cvs.stk.push(cvs.trunk);
1222 cvs.bstk.push(cvs.branch_interner.intern(cvs.base_branch));
1223
1224 {
1225 transaction_guard guard(app.db);
1226 cvs_tree_walker walker(cvs, app.db);
1227 require_path_is_directory(cvsroot,
1228 F("path %s does not exist") % cvsroot,
1229 F("'%s' is not a directory") % cvsroot);
1230 app.db.ensure_open();
1231 change_current_working_dir(cvsroot);
1232 walk_tree(file_path(), walker);
1233 guard.commit();
1234 }
1235
1236 I(cvs.stk.size() == 1);
1237
1238 ticker n_revs(_("revisions"), "r", 1);
1239
1240 while (cvs.branches.size() > 0)
1241 {
1242 transaction_guard guard(app.db);
1243 map<string, shared_ptr<cvs_branch> >::const_iterator i = cvs.branches.begin();
1244 string branchname = i->first;
1245 shared_ptr<cvs_branch> branch = i->second;
1246 L(FL("branch %s has %d entries") % branchname % branch->lineage.size());
1247 import_branch(cvs, app, branchname, branch, n_revs);
1248
1249 // free up some memory
1250 cvs.branches.erase(branchname);
1251 guard.commit();
1252 }
1253
1254 {
1255 transaction_guard guard(app.db);
1256 L(FL("trunk has %d entries") % cvs.trunk->lineage.size());
1257 import_branch(cvs, app, cvs.base_branch, cvs.trunk, n_revs);
1258 guard.commit();
1259 }
1260
1261 // now we have a "last" rev for each tag
1262 {
1263 ticker n_tags(_("tags"), "t", 1);
1264 transaction_guard guard(app.db);
1265 for (map<unsigned long, pair<time_t, revision_id> >::const_iterator i = cvs.resolved_tags.begin();
1266 i != cvs.resolved_tags.end(); ++i)
1267 {
1268 string tag = cvs.tag_interner.lookup(i->first);
1269 ui.set_tick_trailer("marking tag " + tag);
1270 app.get_project().put_tag(i->second.second, tag);
1271 ++n_tags;
1272 }
1273 guard.commit();
1274 }
1275
1276
1277 return;
1278}
1279
1280cluster_consumer::cluster_consumer(cvs_history & cvs,
1281 app_state & app,
1282 string const & branchname,
1283 cvs_branch const & branch,
1284 ticker & n_revs)
1285 : cvs(cvs),
1286 app(app),
1287 branchname(branchname),
1288 branch(branch),
1289 n_revisions(n_revs),
1290 editable_ros(ros, nis)
1291{
1292 if (!branch.live_at_beginning.empty())
1293 {
1294 cvs_author synthetic_author =
1295 cvs.author_interner.intern("cvs_import");
1296
1297 cvs_changelog synthetic_cl =
1298 cvs.changelog_interner.intern("beginning of branch "
1299 + branchname);
1300
1301 time_t synthetic_time = branch.beginning();
1302 cvs_cluster initial_cluster(synthetic_time,
1303 synthetic_author,
1304 synthetic_cl);
1305
1306 L(FL("initial cluster on branch %s has %d live entries") %
1307 branchname % branch.live_at_beginning.size());
1308
1309 for (map<cvs_path, cvs_version>::const_iterator i = branch.live_at_beginning.begin();
1310 i != branch.live_at_beginning.end(); ++i)
1311 {
1312 cvs_cluster::entry e(true, i->second, synthetic_time);
1313 L(FL("initial cluster contains %s at %s") %
1314 cvs.path_interner.lookup(i->first) %
1315 cvs.file_version_interner.lookup(i->second));
1316 initial_cluster.entries.insert(make_pair(i->first, e));
1317 }
1318 consume_cluster(initial_cluster);
1319 }
1320}
1321
1322cluster_consumer::prepared_revision::prepared_revision(revision_id i,
1323 shared_ptr<revision_t> r,
1324 cvs_cluster const & c)
1325 : rid(i),
1326 rev(r),
1327 time(c.first_time),
1328 author(c.author),
1329 changelog(c.changelog)
1330{
1331 for (set<cvs_tag>::const_iterator i = c.tags.begin();
1332 i != c.tags.end(); ++i)
1333 {
1334 tags.push_back(*i);
1335 }
1336}
1337
1338
1339void
1340cluster_consumer::store_revisions()
1341{
1342 for (vector<prepared_revision>::const_iterator i = preps.begin();
1343 i != preps.end(); ++i)
1344 if (app.db.put_revision(i->rid, *(i->rev)))
1345 {
1346 store_auxiliary_certs(*i);
1347 ++n_revisions;
1348 }
1349}
1350
1351void
1352cluster_consumer::store_auxiliary_certs(prepared_revision const & p)
1353{
1354 for (vector<cvs_tag>::const_iterator i = p.tags.begin();
1355 i != p.tags.end(); ++i)
1356 {
1357 map<unsigned long, pair<time_t, revision_id> >::const_iterator j
1358 = cvs.resolved_tags.find(*i);
1359
1360 if (j != cvs.resolved_tags.end())
1361 {
1362 if (j->second.first < p.time)
1363 {
1364 // move the tag forwards
1365 cvs.resolved_tags.erase(*i);
1366 cvs.resolved_tags.insert(make_pair(*i, make_pair(p.time, p.rid)));
1367 }
1368 }
1369 else
1370 {
1371 cvs.resolved_tags.insert(make_pair(*i, make_pair(p.time, p.rid)));
1372 }
1373 }
1374
1375 app.get_project().put_standard_certs(p.rid,
1376 branch_name(branchname),
1377 utf8(cvs.changelog_interner.lookup(p.changelog)),
1378 date_t::from_unix_epoch(p.time),
1379 utf8(cvs.author_interner.lookup(p.author)));
1380}
1381
1382void
1383cluster_consumer::add_missing_parents(file_path const & path, cset & cs)
1384{
1385 if (created_dirs.find(path) != created_dirs.end())
1386 return;
1387
1388 if (!path.empty())
1389 add_missing_parents(path.dirname(), cs);
1390
1391 safe_insert(created_dirs, path);
1392 safe_insert(cs.dirs_added, path);
1393}
1394
1395void
1396cluster_consumer::build_cset(cvs_cluster const & c,
1397 cset & cs)
1398{
1399 for (cvs_cluster::entry_map::const_iterator i = c.entries.begin();
1400 i != c.entries.end(); ++i)
1401 {
1402 file_path pth = file_path_internal(cvs.path_interner.lookup(i->first));
1403
1404 file_id fid(cvs.file_version_interner.lookup(i->second.version));
1405 if (i->second.live)
1406 {
1407 map<cvs_path, cvs_version>::const_iterator e = live_files.find(i->first);
1408 if (e == live_files.end())
1409 {
1410 add_missing_parents(pth.dirname(), cs);
1411 L(FL("adding entry state '%s' on '%s'") % fid % pth);
1412 safe_insert(cs.files_added, make_pair(pth, fid));
1413 live_files[i->first] = i->second.version;
1414 }
1415 else if (e->second != i->second.version)
1416 {
1417 file_id old_fid(cvs.file_version_interner.lookup(e->second));
1418 L(FL("applying state delta on '%s' : '%s' -> '%s'")
1419 % pth % old_fid % fid);
1420 safe_insert(cs.deltas_applied,
1421 make_pair(pth, make_pair(old_fid, fid)));
1422 live_files[i->first] = i->second.version;
1423 }
1424 }
1425 else
1426 {
1427 map<cvs_path, cvs_version>::const_iterator e = live_files.find(i->first);
1428 if (e != live_files.end())
1429 {
1430 L(FL("deleting entry state '%s' on '%s'") % fid % pth);
1431 safe_insert(cs.nodes_deleted, pth);
1432 live_files.erase(i->first);
1433 }
1434 }
1435 }
1436}
1437
1438void
1439cluster_consumer::consume_cluster(cvs_cluster const & c)
1440{
1441 // we should never have an empty cluster; it's *possible* to have
1442 // an empty changeset (say on a vendor import) but every cluster
1443 // should have been created by at least one file commit, even
1444 // if the commit made no changes. it's a logical inconsistency if
1445 // you have an empty cluster.
1446 I(!c.entries.empty());
1447
1448 shared_ptr<revision_t> rev(new revision_t());
1449 shared_ptr<cset> cs(new cset());
1450 build_cset(c, *cs);
1451
1452 cs->apply_to(editable_ros);
1453 manifest_id child_mid;
1454 calculate_ident(ros, child_mid);
1455 rev->made_for = made_for_database;
1456 rev->new_manifest = child_mid;
1457 rev->edges.insert(make_pair(parent_rid, cs));
1458 calculate_ident(*rev, child_rid);
1459
1460 preps.push_back(prepared_revision(child_rid, rev, c));
1461
1462 parent_rid = child_rid;
1463}
1464
1465// Local Variables:
1466// mode: C++
1467// fill-column: 76
1468// c-file-style: "gnu"
1469// indent-tabs-mode: nil
1470// End:
1471// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status