monotone

monotone Mtn Source Tree

Root/src/database_check.cc

1// Copyright (C) 2010 Stephen Leake <stephen_leake@stephe-leake.org>
2// Copyright (C) 2005 Derek Scherger <derek@echologic.com>
3//
4// This program is made available under the GNU GPL version 2.0 or
5// greater. See the accompanying file COPYING for details.
6//
7// This program is distributed WITHOUT ANY WARRANTY; without even the
8// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9// PURPOSE.
10
11#include "base.hh"
12#include <map>
13#include <set>
14
15#include "constants.hh"
16#include "database.hh"
17#include "revision.hh"
18#include "ui.hh"
19#include "vocab.hh"
20#include "transforms.hh"
21#include "cert.hh"
22#include "project.hh"
23#include "rev_height.hh"
24#include "roster.hh"
25#include "outdated_indicator.hh"
26
27// the database has roughly the following structure
28//
29// certs
30// |
31// +---+---+
32// | |
33// keys revisions
34// |
35// rosters
36// |
37// files
38//
39
40// FIXME: add a test that for each revision, generates that rev's roster
41// from scratch, and compares it to the one stored in the db. (Do the
42// comparison using something like equal_up_to_renumbering, except should
43// say if (!temp_node(a) && !temp_node(b)) I(a == b).)
44
45using std::logic_error;
46using std::map;
47using std::multimap;
48using std::set;
49using std::string;
50using std::vector;
51
52struct checked_cert {
53 cert rcert;
54 bool found_key;
55 bool good_sig;
56
57 checked_cert(cert const & c): rcert(c), found_key(false), good_sig(false) {}
58};
59
60struct checked_key {
61 bool found; // found public keypair id in db
62 size_t sigs; // number of signatures by this key
63
64 rsa_pub_key pub;
65
66 checked_key(): found(false), sigs(0) {}
67};
68
69struct checked_file {
70 bool found; // found in db, retrieved and verified sha1 hash
71 bool size_ok; // recorded file size is correct
72 size_t roster_refs; // number of roster references to this file
73
74 checked_file(): found(false), size_ok(false), roster_refs(0) {}
75};
76
77struct checked_roster {
78 bool found; // found in db, retrieved and verified sha1 hash
79 size_t revision_refs; // number of revision references to this roster
80 size_t missing_files; // number of missing files referenced by this roster
81 size_t missing_mark_revs; // number of missing revisions referenced in node markings by this roster
82
83 manifest_id man_id; // manifest id of this roster's public part
84
85 checked_roster():
86 found(false), revision_refs(0),
87 missing_files(0), missing_mark_revs(0),
88 man_id() {}
89};
90
91// the number of times a revision is referenced (revision_refs)
92// should match the number of times it is listed as a parent in
93// the ancestry cache (ancestry_parent_refs)
94//
95// the number of parents a revision has should match the number
96// of times it is listed as a child in the ancestry cache
97// (ancestry_child_refs)
98
99struct checked_revision {
100 bool found; // found in db, retrieved and verified sha1 hash
101 size_t revision_refs; // number of references to this revision from other revisions
102 size_t ancestry_parent_refs; // number of references to this revision by ancestry parent
103 size_t ancestry_child_refs; // number of references to this revision by ancestry child
104 size_t marking_refs; // number of references to this revision by roster markings
105
106 bool found_roster; // the roster for this revision exists
107 bool manifest_mismatch; // manifest doesn't match the roster for this revision
108 bool incomplete_roster; // the roster for this revision is missing files
109 size_t missing_manifests; // number of missing manifests referenced by this revision
110 size_t missing_revisions; // number of missing revisions referenced by this revision
111
112 size_t cert_refs; // number of references to this revision by revision certs;
113
114 bool parseable; // read_revision does not throw
115 bool normalized; // write_revision( read_revision(dat) ) == dat
116
117 string history_error;
118
119 set<revision_id> parents;
120 vector<checked_cert> checked_certs;
121
122 checked_revision():
123 found(false),
124 revision_refs(0), ancestry_parent_refs(0), ancestry_child_refs(0),
125 marking_refs(0),
126 found_roster(false), manifest_mismatch(false), incomplete_roster(false),
127 missing_manifests(0), missing_revisions(0),
128 cert_refs(0), parseable(false), normalized(false) {}
129};
130
131struct checked_height {
132 bool found; // found in db
133 bool unique; // not identical to any height retrieved earlier
134 bool sensible; // greater than all parent heights
135 checked_height(): found(false), unique(false), sensible(true) {}
136};
137
138struct checked_branch {
139 bool used;
140 bool heads_ok;
141 bool cached;
142
143 checked_branch(): used(false), heads_ok(false), cached(false) {}
144};
145
146/*
147 * check integrity of the SQLite database
148 */
149static void
150check_db_integrity_check(database & db)
151{
152 L(FL("asking sqlite to check db integrity"));
153 E(db.check_integrity(), origin::database,
154 F("file structure is corrupted; cannot check further"));
155}
156
157static void
158check_files(database & db, map<file_id, checked_file> & checked_files)
159{
160 set<file_id> files;
161
162 db.get_file_ids(files);
163 L(FL("checking %d files") % files.size());
164
165 ticker ticks(_("files"), "f", files.size()/70+1);
166
167 for (set<file_id>::const_iterator i = files.begin();
168 i != files.end(); ++i)
169 {
170 L(FL("checking file %s") % *i);
171 file_data data;
172 db.get_file_version(*i, data);
173 checked_files[*i].found = true;
174
175 if (db.file_size_exists(*i))
176 {
177 file_size stored_size, calculated_size;
178 calculated_size = data.inner()().size();
179 db.get_file_size(*i, stored_size);
180 checked_files[*i].size_ok = stored_size == calculated_size;
181 }
182 else
183 {
184 L(FL("missing file size entry for %s") % *i);
185 checked_files[*i].size_ok = false;
186 }
187
188 ++ticks;
189 }
190
191 I(checked_files.size() == files.size());
192}
193
194// first phase of roster checking, checks manifest-related parts of the
195// roster, and general parsability/normalisation
196static void
197check_rosters_manifest(database & db,
198 map<revision_id, checked_roster> & checked_rosters,
199 map<revision_id, checked_revision> & checked_revisions,
200 set<manifest_id> & found_manifests,
201 map<file_id, checked_file> & checked_files)
202{
203 set<revision_id> rosters;
204
205 db.get_roster_ids(rosters);
206 L(FL("checking %d rosters, manifest pass") % rosters.size());
207
208 ticker ticks(_("rosters"), "r", rosters.size()/70+1);
209
210 for (set<revision_id>::const_iterator i = rosters.begin();
211 i != rosters.end(); ++i)
212 {
213 L(FL("checking roster %s") % *i);
214
215 roster_t ros;
216 marking_map mm;
217 try
218 {
219 db.get_roster(*i, ros, mm);
220 }
221 // When attempting to fetch a roster with no corresponding revision,
222 // we fail with E(), not I() (when it tries to look up the manifest_id
223 // to check). std::exception catches both informative_failure's and
224 // logic_error's.
225 catch (std::exception & e)
226 {
227 L(FL("error loading roster %s: %s")
228 % *i % e.what());
229 checked_rosters[*i].found = false;
230 continue;
231 }
232 checked_rosters[*i].found = true;
233
234 manifest_id man_id;
235 calculate_ident(ros, man_id);
236 checked_rosters[*i].man_id = man_id;
237 found_manifests.insert(man_id);
238
239 for (node_map::const_iterator n = ros.all_nodes().begin();
240 n != ros.all_nodes().end(); ++n)
241 {
242
243 if (is_file_t(n->second))
244 {
245 file_id fid = downcast_to_file_t(n->second)->content;
246 checked_files[fid].roster_refs++;
247 if (!checked_files[fid].found)
248 checked_rosters[*i].missing_files++;
249 }
250 }
251
252 ++ticks;
253 }
254 I(checked_rosters.size() == rosters.size());
255}
256
257// Second phase of roster checking. examine the marking of a roster, checking
258// that the referenced revisions exist.
259// This function assumes that check_revisions has been called!
260static void
261check_rosters_marking(database & db,
262 map<revision_id, checked_roster> & checked_rosters,
263 map<revision_id, checked_revision> & checked_revisions)
264{
265 L(FL("checking %d rosters, marking pass") % checked_rosters.size());
266
267 ticker ticks(_("markings"), "m", checked_rosters.size()/70+1);
268
269 for (map<revision_id, checked_roster>::const_iterator i
270 = checked_rosters.begin(); i != checked_rosters.end(); i++)
271 {
272 revision_id ros_id = i->first;
273 L(FL("checking roster %s") % i->first);
274 if (!i->second.found)
275 continue;
276
277 // skip marking check on unreferenced rosters -- they're left by
278 // kill_rev_locally, and not expected to have everything they
279 // reference existing
280 if (!i->second.revision_refs)
281 continue;
282
283 roster_t ros;
284 marking_map mm;
285 db.get_roster(ros_id, ros, mm);
286
287 for (node_map::const_iterator n = ros.all_nodes().begin();
288 n != ros.all_nodes().end(); ++n)
289 {
290 // lots of revisions that must exist
291 if (!mm.contains(n->first))
292 continue;
293 const_marking_t mark = mm.get_marking(n->first);
294 checked_revisions[mark->birth_revision].marking_refs++;
295 if (!checked_revisions[mark->birth_revision].found)
296 checked_rosters[ros_id].missing_mark_revs++;
297
298 for (set<revision_id>::const_iterator r = mark->parent_name.begin();
299 r != mark->parent_name.end(); r++)
300 {
301 checked_revisions[*r].marking_refs++;
302 if (!checked_revisions[*r].found)
303 checked_rosters[ros_id].missing_mark_revs++;
304 }
305
306 for (set<revision_id>::const_iterator r = mark->file_content.begin();
307 r != mark->file_content.end(); r++)
308 {
309 checked_revisions[*r].marking_refs++;
310 if (!checked_revisions[*r].found)
311 checked_rosters[ros_id].missing_mark_revs++;
312 }
313
314 for (map<attr_key,set<revision_id> >::const_iterator attr =
315 mark->attrs.begin(); attr != mark->attrs.end(); attr++)
316 for (set<revision_id>::const_iterator r = attr->second.begin();
317 r != attr->second.end(); r++)
318 {
319 checked_revisions[*r].marking_refs++;
320 if (!checked_revisions[*r].found)
321 checked_rosters[ros_id].missing_mark_revs++;
322 }
323 }
324 ++ticks;
325 }
326}
327
328static void
329check_revisions(database & db,
330 map<revision_id, checked_revision> & checked_revisions,
331 map<revision_id, checked_roster> & checked_rosters,
332 set<manifest_id> const & found_manifests,
333 size_t & missing_rosters)
334{
335 set<revision_id> revisions;
336
337 db.get_revision_ids(revisions);
338 L(FL("checking %d revisions") % revisions.size());
339
340 ticker ticks(_("revisions"), "r", revisions.size()/70+1);
341
342 for (set<revision_id>::const_iterator i = revisions.begin();
343 i != revisions.end(); ++i)
344 {
345 L(FL("checking revision %s") % *i);
346 revision_data data;
347 db.get_revision(*i, data);
348 checked_revisions[*i].found = true;
349
350 revision_t rev;
351 try
352 {
353 read_revision(data, rev);
354 }
355 catch (logic_error & e)
356 {
357 L(FL("error parsing revision %s: %s")
358 % *i % e.what());
359 checked_revisions[*i].parseable = false;
360 continue;
361 }
362 checked_revisions[*i].parseable = true;
363
364 // normalisation check
365 revision_id norm_ident;
366 revision_data norm_data;
367 write_revision(rev, norm_data);
368 calculate_ident(norm_data, norm_ident);
369 if (norm_ident == *i)
370 checked_revisions[*i].normalized = true;
371
372 // roster checks
373 if (db.roster_version_exists(*i))
374 {
375 checked_revisions[*i].found_roster = true;
376 I(checked_rosters[*i].found);
377 checked_rosters[*i].revision_refs++;
378 if (!(rev.new_manifest == checked_rosters[*i].man_id))
379 checked_revisions[*i].manifest_mismatch = true;
380 if (checked_rosters[*i].missing_files > 0)
381 checked_revisions[*i].incomplete_roster = true;
382 }
383 else
384 ++missing_rosters;
385
386 if (found_manifests.find(rev.new_manifest) == found_manifests.end())
387 checked_revisions[*i].missing_manifests++;
388
389 for (edge_map::const_iterator edge = rev.edges.begin();
390 edge != rev.edges.end(); ++edge)
391 {
392 // ignore [] -> [...] revisions
393
394 // delay checking parents until we've processed all revisions
395 if (!null_id(edge_old_revision(edge)))
396 {
397 checked_revisions[edge_old_revision(edge)].revision_refs++;
398 checked_revisions[*i].parents.insert(edge_old_revision(edge));
399 }
400
401 // also check that change_sets applied to old manifests == new
402 // manifests (which might be a merge)
403 }
404
405 ++ticks;
406 }
407
408 // now check for parent revision existence and problems
409
410 for (map<revision_id, checked_revision>::iterator
411 revision = checked_revisions.begin();
412 revision != checked_revisions.end(); ++revision)
413 {
414 for (set<revision_id>::const_iterator p = revision->second.parents.begin();
415 p != revision->second.parents.end(); ++p)
416 {
417 if (!checked_revisions[*p].found)
418 revision->second.missing_revisions++;
419 }
420 }
421
422 L(FL("checked %d revisions after starting with %d")
423 % checked_revisions.size()
424 % revisions.size());
425}
426
427static void
428check_ancestry(database & db,
429 map<revision_id, checked_revision> & checked_revisions)
430{
431 multimap<revision_id, revision_id> graph;
432
433 db.get_forward_ancestry(graph);
434 L(FL("checking %d ancestry edges") % graph.size());
435
436 ticker ticks(_("ancestry"), "a", graph.size()/70+1);
437
438 // checked revision has set of parents
439 // graph has revision and associated parents
440 // these two representations of the graph should agree!
441
442 set<revision_id> seen;
443 for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
444 i != graph.end(); ++i)
445 {
446 // ignore the [] -> [...] edges here too
447 if (!null_id(i->first))
448 {
449 checked_revisions[i->first].ancestry_parent_refs++;
450
451 if (!null_id(i->second))
452 checked_revisions[i->second].ancestry_child_refs++;
453 }
454
455 ++ticks;
456 }
457}
458
459static void
460check_keys(database & db,
461 map<key_id, checked_key> & checked_keys)
462{
463 vector<key_id> pubkeys;
464
465 db.get_key_ids(pubkeys);
466
467 L(FL("checking %d public keys") % pubkeys.size());
468
469 ticker ticks(_("keys"), "k", 1);
470
471 for (vector<key_id>::const_iterator i = pubkeys.begin();
472 i != pubkeys.end(); ++i)
473 {
474 db.get_key(*i, checked_keys[*i].pub);
475 checked_keys[*i].found = true;
476 ++ticks;
477 }
478
479}
480
481static void
482check_certs(database & db,
483 map<revision_id, checked_revision> & checked_revisions,
484 map<key_id, checked_key> & checked_keys,
485 size_t & total_certs)
486{
487 vector<cert> certs;
488 db.get_revision_certs(certs);
489
490 total_certs = certs.size();
491
492 L(FL("checking %d revision certs") % certs.size());
493
494 ticker ticks(_("certs"), "c", certs.size()/70+1);
495
496 for (vector<cert>::const_iterator i = certs.begin();
497 i != certs.end(); ++i)
498 {
499 checked_cert checked(*i);
500 checked.found_key = checked_keys[i->key].found;
501
502 if (checked.found_key)
503 {
504 string signed_text;
505 i->signable_text(signed_text);
506 checked.good_sig
507 = (db.check_signature(i->key, signed_text, i->sig) == cert_ok);
508 }
509
510 checked_keys[i->key].sigs++;
511 checked_revisions[i->ident].checked_certs.push_back(checked);
512
513 ++ticks;
514 }
515}
516
517// - check that every rev has a height
518// - check that no two revs have the same height
519static void
520check_heights(database & db,
521 map<revision_id, checked_height> & checked_heights)
522{
523 set<revision_id> heights;
524 db.get_revision_ids(heights);
525
526 // add revision [], it is the (imaginary) root of all revisions, and
527 // should have a height, too
528 {
529 revision_id null_id;
530 heights.insert(null_id);
531 }
532
533 L(FL("checking %d heights") % heights.size());
534
535 set<rev_height> seen;
536
537 ticker ticks(_("heights"), "h", heights.size()/70+1);
538
539 for (set<revision_id>::const_iterator i = heights.begin();
540 i != heights.end(); ++i)
541 {
542 L(FL("checking height for %s") % *i);
543
544 rev_height h;
545 try
546 {
547 db.get_rev_height(*i, h);
548 }
549 catch (std::exception & e)
550 {
551 L(FL("error loading height: %s") % e.what());
552 continue;
553 }
554 checked_heights[*i].found = true; // defaults to false
555
556 if (seen.find(h) != seen.end())
557 {
558 L(FL("error: height not unique: %s") % h());
559 continue;
560 }
561 checked_heights[*i].unique = true; // defaults to false
562 seen.insert(h);
563
564 ++ticks;
565 }
566}
567
568// check that every rev's height is a sensible height to assign, given its
569// parents
570static void
571check_heights_relation(database & db,
572 map<revision_id, checked_height> & checked_heights)
573{
574 set<revision_id> heights;
575
576 multimap<revision_id, revision_id> graph; // parent, child
577 db.get_forward_ancestry(graph);
578
579 L(FL("checking heights for %d edges") % graph.size());
580
581 ticker ticks(_("height relations"), "h", graph.size()/70+1);
582
583 typedef multimap<revision_id, revision_id>::const_iterator gi;
584 for (gi i = graph.begin(); i != graph.end(); ++i)
585 {
586 revision_id const & p_id = i->first;
587 revision_id const & c_id = i->second;
588
589 if (!checked_heights[p_id].found || !checked_heights[c_id].found)
590 {
591 if (global_sanity.debug_p())
592 L(FL("missing height(s), skipping edge %s -> %s")
593 % p_id
594 % c_id);
595 continue;
596 }
597
598 if (global_sanity.debug_p())
599 L(FL("checking heights for edges %s -> %s")
600 % p_id
601 % c_id);
602
603 rev_height parent, child;
604 db.get_rev_height(p_id, parent);
605 db.get_rev_height(c_id, child);
606
607 if (!(child > parent))
608 {
609 if (global_sanity.debug_p())
610 L(FL("error: height %s of child %s not greater than height %s of parent %s")
611 % child
612 % c_id
613 % parent
614 % p_id);
615 checked_heights[c_id].sensible = false; // defaults to true
616 continue;
617 }
618
619 ++ticks;
620 }
621}
622
623static void
624check_branch_leaves(database & db, map<string, checked_branch> & checked_branches)
625{
626 // We don't assume db.get_branches is right, because that uses
627 // branch_leaves, and we are checking to see if branch_leaves is ok.
628
629 vector<cert> all_branch_certs;
630 set<string> seen_branches;
631 vector<string> cached_branches;
632
633 db.get_branches (cached_branches);
634
635 L(FL("checking %d branches") % cached_branches.size());
636
637 db.get_revision_certs(branch_cert_name, all_branch_certs);
638
639 // we assume cached_branches is close enough for the ticker.
640 ticker ticks(_("branches"), "b", cached_branches.size());
641
642 for (vector<cert>::const_iterator i = all_branch_certs.begin(); i != all_branch_certs.end(); ++i)
643 {
644 string const name = i->value();
645
646 std::pair<set<string>::iterator, bool> inserted = seen_branches.insert(name);
647
648 if (inserted.second)
649 {
650 checked_branches[name].used = true;
651
652 checked_branches[name].cached =
653 find(cached_branches.begin(), cached_branches.end(), name) != cached_branches.end();
654
655 set<revision_id> cached_leaves;
656 set<revision_id> computed_leaves;
657
658 db.get_branch_leaves(i->value, cached_leaves);
659 try
660 {
661 db.compute_branch_leaves(i->value, computed_leaves);
662 }
663 catch (std::exception & e)
664 {
665 if (string(e.what()).find("height") != string::npos)
666 {
667 L(FL("error loading height when checking heads of '%s'") % i->value);
668 }
669 else
670 throw;
671 }
672
673 checked_branches[name].heads_ok = cached_leaves == computed_leaves;
674 ++ticks;
675 }
676 }
677
678 for (vector<string>::const_iterator i = cached_branches.begin(); i != cached_branches.end(); ++i)
679 {
680 string const name = *i;
681
682 if (seen_branches.find(name) == seen_branches.end())
683 {
684 checked_branches[name].used = false;
685 checked_branches[name].cached = true;
686 checked_branches[name].heads_ok = false;
687 }
688 }
689}
690
691static void
692report_files(map<file_id, checked_file> const & checked_files,
693 size_t & missing_files,
694 size_t & unreferenced_files,
695 size_t & missing_or_invalid_file_sizes)
696{
697 for (map<file_id, checked_file>::const_iterator
698 i = checked_files.begin(); i != checked_files.end(); ++i)
699 {
700 checked_file file = i->second;
701
702 if (!file.found)
703 {
704 missing_files++;
705 P(F("file %s missing (%d manifest references)")
706 % i->first % file.roster_refs);
707 }
708
709 if (file.roster_refs == 0)
710 {
711 unreferenced_files++;
712 P(F("file %s unreferenced") % i->first);
713 }
714
715 if (file.size_ok == false)
716 {
717 missing_or_invalid_file_sizes++;
718 P(F("file %s has a missing or invalid file size") % i->first);
719 }
720 }
721}
722
723static void
724report_rosters(map<revision_id, checked_roster> const & checked_rosters,
725 size_t & unreferenced_rosters,
726 size_t & incomplete_rosters)
727{
728 for (map<revision_id, checked_roster>::const_iterator
729 i = checked_rosters.begin(); i != checked_rosters.end(); ++i)
730 {
731 checked_roster roster = i->second;
732
733 if (roster.revision_refs == 0)
734 {
735 unreferenced_rosters++;
736 P(F("roster %s unreferenced")
737 % i->first);
738 }
739
740 if (roster.missing_files > 0)
741 {
742 incomplete_rosters++;
743 P(F("roster %s incomplete (%d missing files)")
744 % i->first % roster.missing_files);
745 }
746
747 if (roster.missing_mark_revs > 0)
748 {
749 incomplete_rosters++;
750 P(F("roster %s incomplete (%d missing revisions)")
751 % i->first % roster.missing_mark_revs);
752 }
753 }
754}
755
756static void
757report_revisions(map<revision_id, checked_revision> const & checked_revisions,
758 size_t & missing_revisions,
759 size_t & incomplete_revisions,
760 size_t & mismatched_parents,
761 size_t & mismatched_children,
762 size_t & manifest_mismatch,
763 size_t & bad_history,
764 size_t & non_parseable_revisions,
765 size_t & non_normalized_revisions)
766{
767 for (map<revision_id, checked_revision>::const_iterator
768 i = checked_revisions.begin(); i != checked_revisions.end(); ++i)
769 {
770 checked_revision revision = i->second;
771
772 if (!revision.found)
773 {
774 missing_revisions++;
775 P(F("revision %s missing (%d revision references; %d cert references; %d parent references; %d child references; %d roster references)")
776 % i->first
777 % revision.revision_refs
778 % revision.cert_refs
779 % revision.ancestry_parent_refs
780 % revision.ancestry_child_refs
781 % revision.marking_refs);
782 }
783
784 if (revision.missing_manifests > 0)
785 {
786 incomplete_revisions++;
787 P(F("revision %s incomplete (%d missing manifests)")
788 % i->first
789 % revision.missing_manifests);
790 }
791
792 if (revision.missing_revisions > 0)
793 {
794 incomplete_revisions++;
795 P(F("revision %s incomplete (%d missing revisions)")
796 % i->first
797 % revision.missing_revisions);
798 }
799
800 if (!revision.found_roster)
801 {
802 incomplete_revisions++;
803 P(F("revision %s incomplete (missing roster)")
804 % i->first);
805 }
806
807 if (revision.manifest_mismatch)
808 {
809 manifest_mismatch++;
810 P(F("revision %s mismatched roster and manifest")
811 % i->first);
812 }
813
814 if (revision.incomplete_roster)
815 {
816 incomplete_revisions++;
817 P(F("revision %s incomplete (incomplete roster)")
818 % i->first);
819 }
820
821 if (revision.ancestry_parent_refs != revision.revision_refs)
822 {
823 mismatched_parents++;
824 P(F("revision %s mismatched parents (%d ancestry parents; %d revision refs)")
825 % i->first
826 % revision.ancestry_parent_refs
827 % revision.revision_refs );
828 }
829
830 if (revision.ancestry_child_refs != revision.parents.size())
831 {
832 mismatched_children++;
833 P(F("revision %s mismatched children (%d ancestry children; %d parents)")
834 % i->first
835 % revision.ancestry_child_refs
836 % revision.parents.size() );
837 }
838
839 if (!revision.history_error.empty())
840 {
841 bad_history++;
842 string tmp = revision.history_error;
843 if (tmp[tmp.length() - 1] == '\n')
844 tmp.erase(tmp.length() - 1);
845 P(F("revision %s has bad history (%s)")
846 % i->first % tmp);
847 }
848
849 if (!revision.parseable)
850 {
851 non_parseable_revisions++;
852 P(F("revision %s is not parseable (perhaps with unnormalized paths?)")
853 % i->first);
854 }
855
856 if (revision.parseable && !revision.normalized)
857 {
858 non_normalized_revisions++;
859 P(F("revision %s is not in normalized form")
860 % i->first);
861 }
862 }
863}
864
865static void
866report_keys(map<key_id, checked_key> const & checked_keys,
867 size_t & missing_keys)
868{
869 for (map<key_id, checked_key>::const_iterator
870 i = checked_keys.begin(); i != checked_keys.end(); ++i)
871 {
872 checked_key key = i->second;
873
874 if (key.found)
875 {
876 L(FL("key %s signed %d certs")
877 % i->first
878 % key.sigs);
879 }
880 else
881 {
882 missing_keys++;
883 P(F("key %s missing (signed %d certs)")
884 % i->first
885 % key.sigs);
886 }
887 }
888}
889
890static void
891report_certs(map<revision_id, checked_revision> const & checked_revisions,
892 size_t & missing_certs,
893 size_t & mismatched_certs,
894 size_t & unchecked_sigs,
895 size_t & bad_sigs)
896{
897 set<cert_name> cnames;
898
899 cnames.insert(cert_name(author_cert_name));
900 cnames.insert(cert_name(branch_cert_name));
901 cnames.insert(cert_name(changelog_cert_name));
902 cnames.insert(cert_name(date_cert_name));
903
904 for (map<revision_id, checked_revision>::const_iterator
905 i = checked_revisions.begin(); i != checked_revisions.end(); ++i)
906 {
907 checked_revision revision = i->second;
908 map<cert_name, size_t> cert_counts;
909
910 for (vector<checked_cert>::const_iterator checked = revision.checked_certs.begin();
911 checked != revision.checked_certs.end(); ++checked)
912 {
913 if (!checked->found_key)
914 {
915 unchecked_sigs++;
916 P(F("revision %s unchecked signature in %s cert from missing key %s")
917 % i->first
918 % checked->rcert.name
919 % checked->rcert.key);
920 }
921 else if (!checked->good_sig)
922 {
923 bad_sigs++;
924 P(F("revision %s bad signature in %s cert from key %s")
925 % i->first
926 % checked->rcert.name
927 % checked->rcert.key);
928 }
929
930 cert_counts[checked->rcert.name]++;
931 }
932
933 for (set<cert_name>::const_iterator n = cnames.begin();
934 n != cnames.end(); ++n)
935 {
936 if (revision.found && cert_counts[*n] == 0)
937 {
938 missing_certs++;
939 P(F("revision %s missing %s cert")
940 % i->first % *n);
941 }
942 }
943
944 if (cert_counts[cert_name(author_cert_name)] != cert_counts[cert_name(changelog_cert_name)] ||
945 cert_counts[cert_name(author_cert_name)] != cert_counts[cert_name(date_cert_name)] ||
946 cert_counts[cert_name(date_cert_name)] != cert_counts[cert_name(changelog_cert_name)])
947 {
948 mismatched_certs++;
949 P(F("revision %s mismatched certs (%d authors %d dates %d changelogs)")
950 % i->first
951 % cert_counts[cert_name(author_cert_name)]
952 % cert_counts[cert_name(date_cert_name)]
953 % cert_counts[cert_name(changelog_cert_name)]);
954 }
955
956 }
957}
958
959static void
960report_heights(map<revision_id, checked_height> const & checked_heights,
961 size_t & missing_heights,
962 size_t & duplicate_heights,
963 size_t & incorrect_heights)
964{
965 for (map<revision_id, checked_height>::const_iterator
966 i = checked_heights.begin(); i != checked_heights.end(); ++i)
967 {
968 checked_height height = i->second;
969
970 if (!height.found)
971 {
972 missing_heights++;
973 P(F("height missing for revision %s")
974 % i->first);
975 continue;
976 }
977
978 if (!height.unique)
979 {
980 duplicate_heights++;
981 P(F("duplicate height for revision %s")
982 % i->first);
983 }
984
985 if (!height.sensible)
986 {
987 incorrect_heights++;
988 P(F("height of revision %s not greater than that of parent")
989 % i->first);
990 }
991 }
992}
993
994static void
995report_branches(map<string, checked_branch> const & checked_branches,
996 size_t & extra_branches,
997 size_t & bad_branches,
998 size_t & missing_branches)
999{
1000 for (map<string, checked_branch>::const_iterator i = checked_branches.begin(); i != checked_branches.end(); ++i)
1001 {
1002 if (!i->second.used)
1003 {
1004 extra_branches++;
1005 P(F("cached branch '%s' not used") % i->first);
1006 }
1007 else if (!i->second.cached)
1008 {
1009 missing_branches++;
1010 P(F("branch '%s' not cached") % i->first);
1011 }
1012 else if (!i->second.heads_ok)
1013 {
1014 bad_branches ++;
1015 P(F("branch '%s' wrong head count") % i->first);
1016 }
1017 }
1018}
1019
1020void
1021check_db(database & db)
1022{
1023 map<file_id, checked_file> checked_files;
1024 set<manifest_id> found_manifests;
1025 map<revision_id, checked_roster> checked_rosters;
1026 map<revision_id, checked_revision> checked_revisions;
1027 map<key_id, checked_key> checked_keys;
1028 map<revision_id, checked_height> checked_heights;
1029 map<string, checked_branch> checked_branches;
1030
1031 size_t missing_files = 0;
1032 size_t unreferenced_files = 0;
1033 size_t missing_or_invalid_file_sizes = 0;
1034
1035 size_t missing_rosters = 0;
1036 size_t unreferenced_rosters = 0;
1037 size_t incomplete_rosters = 0;
1038
1039 size_t missing_revisions = 0;
1040 size_t incomplete_revisions = 0;
1041 size_t mismatched_parents = 0;
1042 size_t mismatched_children = 0;
1043 size_t bad_history = 0;
1044 size_t non_parseable_revisions = 0;
1045 size_t non_normalized_revisions = 0;
1046
1047 size_t missing_keys = 0;
1048
1049 size_t total_certs = 0;
1050 size_t missing_certs = 0;
1051 size_t mismatched_certs = 0;
1052 size_t manifest_mismatch = 0;
1053 size_t unchecked_sigs = 0;
1054 size_t bad_sigs = 0;
1055
1056 size_t missing_heights = 0;
1057 size_t duplicate_heights = 0;
1058 size_t incorrect_heights = 0;
1059
1060 size_t extra_branches = 0;
1061 size_t bad_branches = 0;
1062 size_t missing_branches = 0;
1063
1064 transaction_guard guard(db, false);
1065
1066 check_db_integrity_check(db);
1067 check_files(db, checked_files);
1068 check_rosters_manifest(db, checked_rosters, checked_revisions,
1069 found_manifests, checked_files);
1070 check_revisions(db, checked_revisions, checked_rosters, found_manifests,
1071 missing_rosters);
1072 check_rosters_marking(db, checked_rosters, checked_revisions);
1073 check_ancestry(db, checked_revisions);
1074 check_keys(db, checked_keys);
1075 check_certs(db, checked_revisions, checked_keys, total_certs);
1076 check_heights(db, checked_heights);
1077 check_heights_relation(db, checked_heights);
1078 check_branch_leaves(db, checked_branches);
1079
1080 report_files(checked_files, missing_files, unreferenced_files,
1081 missing_or_invalid_file_sizes);
1082
1083 report_rosters(checked_rosters,
1084 unreferenced_rosters,
1085 incomplete_rosters);
1086
1087 report_revisions(checked_revisions,
1088 missing_revisions, incomplete_revisions,
1089 mismatched_parents, mismatched_children,
1090 manifest_mismatch,
1091 bad_history, non_parseable_revisions,
1092 non_normalized_revisions);
1093
1094 report_keys(checked_keys, missing_keys);
1095
1096 report_certs(checked_revisions,
1097 missing_certs, mismatched_certs,
1098 unchecked_sigs, bad_sigs);
1099
1100 report_heights(checked_heights,
1101 missing_heights, duplicate_heights, incorrect_heights);
1102
1103 report_branches(checked_branches, extra_branches, bad_branches, missing_branches);
1104
1105 // NOTE: any new sorts of problems need to have added:
1106 // -- a message here, that tells the user about them
1107 // -- entries in one _or both_ of the sums calculated at the end
1108 // -- an entry added to the manual, which describes in detail why the
1109 // error occurs and what it means to the user
1110
1111 if (missing_files > 0)
1112 W(F("%d missing files") % missing_files);
1113 if (unreferenced_files > 0)
1114 W(F("%d unreferenced files") % unreferenced_files);
1115 if (missing_or_invalid_file_sizes > 0)
1116 W(F("%d missing or invalid file sizes") % missing_or_invalid_file_sizes);
1117
1118 if (unreferenced_rosters > 0)
1119 W(F("%d unreferenced rosters") % unreferenced_rosters);
1120 if (incomplete_rosters > 0)
1121 W(F("%d incomplete rosters") % incomplete_rosters);
1122
1123 if (missing_revisions > 0)
1124 W(F("%d missing revisions") % missing_revisions);
1125 if (incomplete_revisions > 0)
1126 W(F("%d incomplete revisions") % incomplete_revisions);
1127 if (mismatched_parents > 0)
1128 W(F("%d mismatched parents") % mismatched_parents);
1129 if (mismatched_children > 0)
1130 W(F("%d mismatched children") % mismatched_children);
1131 if (bad_history > 0)
1132 W(F("%d revisions with bad history") % bad_history);
1133 if (non_parseable_revisions > 0)
1134 W(F("%d revisions not parseable (perhaps with invalid paths)")
1135 % non_parseable_revisions);
1136 if (non_normalized_revisions > 0)
1137 W(F("%d revisions not in normalized form") % non_normalized_revisions);
1138
1139
1140 if (missing_rosters > 0)
1141 W(F("%d missing rosters") % missing_rosters);
1142
1143
1144 if (missing_keys > 0)
1145 W(F("%d missing keys") % missing_keys);
1146
1147 if (missing_certs > 0)
1148 W(F("%d missing certs") % missing_certs);
1149 if (mismatched_certs > 0)
1150 W(F("%d mismatched certs") % mismatched_certs);
1151 if (unchecked_sigs > 0)
1152 W(F("%d unchecked signatures due to missing keys") % unchecked_sigs);
1153 if (bad_sigs > 0)
1154 W(F("%d bad signatures") % bad_sigs);
1155
1156 if (missing_heights > 0)
1157 W(F("%d missing heights") % missing_heights);
1158 if (duplicate_heights > 0)
1159 W(F("%d duplicate heights") % duplicate_heights);
1160 if (incorrect_heights > 0)
1161 W(F("%d incorrect heights") % incorrect_heights);
1162
1163 if (extra_branches > 0)
1164 W(F("%d branches cached but not used") % extra_branches);
1165 if (bad_branches > 0)
1166 W(F("%d branches with incorrect head count") % bad_branches);
1167 if (missing_branches > 0)
1168 W(F("%d branches missing from branch cache") % missing_branches);
1169
1170 size_t total = missing_files + unreferenced_files +
1171 missing_or_invalid_file_sizes +
1172 unreferenced_rosters + incomplete_rosters +
1173 missing_revisions + incomplete_revisions +
1174 non_parseable_revisions + non_normalized_revisions +
1175 mismatched_parents + mismatched_children +
1176 bad_history +
1177 missing_rosters +
1178 missing_certs + mismatched_certs +
1179 unchecked_sigs + bad_sigs +
1180 missing_keys +
1181 missing_heights + duplicate_heights + incorrect_heights +
1182 extra_branches + bad_branches + missing_branches;
1183
1184 // unreferenced files and rosters and mismatched certs are not actually
1185 // serious errors; odd, but nothing will break. Similarly, missing and
1186 // mismatched certs are not serious errors.
1187 size_t serious = missing_files + missing_or_invalid_file_sizes +
1188 incomplete_rosters + missing_rosters +
1189 missing_revisions + incomplete_revisions +
1190 non_parseable_revisions + non_normalized_revisions +
1191 mismatched_parents + mismatched_children + manifest_mismatch +
1192 bad_history +
1193 unchecked_sigs + bad_sigs +
1194 missing_keys +
1195 missing_heights + duplicate_heights + incorrect_heights+
1196 extra_branches + bad_branches + missing_branches;
1197
1198 P(F("check complete: %d files; %d rosters; %d revisions; %d keys; %d certs; %d heights; %d branches")
1199 % checked_files.size()
1200 % checked_rosters.size()
1201 % checked_revisions.size()
1202 % checked_keys.size()
1203 % total_certs
1204 % checked_heights.size()
1205 % checked_branches.size());
1206 P(F("total problems detected: %d (%d serious)") % total % serious);
1207 if (serious)
1208 {
1209 // should be origin::database, but that gives the "almost certainly a bug"
1210 // message, which we don't want.
1211 E(false, origin::no_fault, F("serious problems detected"));
1212 }
1213 else if (total)
1214 P(F("minor problems detected"));
1215 else
1216 P(F("database is good"));
1217}
1218
1219// Local Variables:
1220// mode: C++
1221// fill-column: 76
1222// c-file-style: "gnu"
1223// indent-tabs-mode: nil
1224// End:
1225// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status