monotone

monotone Mtn Source Tree

Root/database.hh

1#ifndef __DATABASE_HH__
2#define __DATABASE_HH__
3
4// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13struct sqlite3;
14struct sqlite3_stmt;
15struct cert;
16int sqlite3_finalize(sqlite3_stmt *);
17
18#include "vector.hh"
19#include <set>
20#include <map>
21
22#include "numeric_vocab.hh"
23#include "vocab.hh"
24#include "paths.hh"
25#include "cleanup.hh"
26#include "roster.hh"
27#include "selectors.hh"
28#include "graph.hh"
29
30// FIXME: would be better not to include this everywhere
31#include "outdated_indicator.hh"
32#include "lru_writeback_cache.hh"
33
34// this file defines a public, typed interface to the database.
35// the database class encapsulates all knowledge about sqlite,
36// the schema, and all SQL statements used to access the schema.
37//
38// one thing which is rather important to note is that this file
39// deals with two sorts of version relationships. the versions
40// stored in the database are all *backwards* from those the program
41// sees. so for example if you have two versions of a file
42//
43// file.1, file.2
44//
45// where file.2 was a modification of file.1, then as far as the rest of
46// the application is concerned -- and the ancestry graph -- file.1 is the
47// "old" version and file.2 is the "new" version. note the use of terms
48// which describe time, and the sequence of edits a user makes to a
49// file. those are ancestry terms. when the application composes a
50// patchset, for example, it'll contain the diff delta(file.1, file.2)
51//
52// from the database's perspective, however, file.1 is the derived version,
53// and file.2 is the base version. the base version is stored in the
54// "files" table, and the *reverse* diff delta(file.2, file.1) is stored in
55// the "file_deltas" table, under the id of file.1, with the id of file.2
56// listed as its base. note the use of the terms which describe
57// reconstruction; those are storage-system terms.
58//
59// the interface *to* the database, and the ancestry version graphs, use
60// the old / new metaphor of ancestry, but within the database (including
61// the private helper methods, and the storage version graphs) the
62// base/derived storage metaphor is used. the only real way to tell which
63// is which is to look at the parameter names and code. I might try to
64// express this in the type system some day, but not presently.
65//
66// the key phrase to keep repeating when working on this code is:
67//
68// "base files are new, derived files are old"
69//
70// it makes the code confusing, I know. this is possibly the worst part of
71// the program. I don't know if there's any way to make it clearer.
72
73class transaction_guard;
74class app_state;
75struct revision_t;
76struct query;
77class rev_height;
78struct globish;
79
80class database
81{
82 //
83 // --== Opening the database and schema checking ==--
84 //
85private:
86 system_path filename;
87 app_state * __app;
88 struct sqlite3 * __sql;
89
90 enum open_mode { normal_mode = 0,
91 schema_bypass_mode,
92 format_bypass_mode };
93
94 void install_functions(app_state * app);
95 struct sqlite3 * sql(enum open_mode mode = normal_mode);
96
97 void check_filename();
98 void check_db_exists();
99 void check_db_nonexistent();
100 void open();
101 void close();
102 void check_format();
103
104public:
105 database(system_path const & file);
106 ~database();
107
108 void set_app(app_state * app);
109
110 void set_filename(system_path const & file);
111 system_path get_filename();
112 bool is_dbfile(any_path const & file);
113 void ensure_open();
114 void ensure_open_for_format_changes();
115private:
116 void ensure_open_for_maintenance();
117public:
118 void check_is_not_rosterified();
119 bool database_specified();
120
121 //
122 // --== Basic SQL interface and statement caching ==--
123 //
124private:
125 struct statement {
126 statement() : count(0), stmt(0, sqlite3_finalize) {}
127 int count;
128 cleanup_ptr<sqlite3_stmt*, int> stmt;
129 };
130 std::map<std::string, statement> statement_cache;
131
132 typedef std::vector< std::vector<std::string> > results;
133 void fetch(results & res,
134 int const want_cols, int const want_rows,
135 query const & q);
136 void execute(query const & q);
137
138 bool table_has_entry(std::string const & key, std::string const & column,
139 std::string const & table);
140
141 //
142 // --== Generic database metadata gathering ==--
143 //
144private:
145 std::string count(std::string const & table);
146 std::string space(std::string const & table,
147 std::string const & concatenated_columns,
148 u64 & total);
149 unsigned int page_size();
150 unsigned int cache_size();
151
152 //
153 // --== Transactions ==--
154 //
155private:
156 int transaction_level;
157 bool transaction_exclusive;
158 void begin_transaction(bool exclusive);
159 void commit_transaction();
160 void rollback_transaction();
161 friend class transaction_guard;
162
163 //
164 // --== Write-buffering -- tied into transaction ==--
165 // --== machinery and delta compression machinery ==--
166 //
167public:
168 typedef boost::shared_ptr<roster_t const> roster_t_cp;
169 typedef boost::shared_ptr<marking_map const> marking_map_cp;
170 typedef std::pair<roster_t_cp, marking_map_cp> cached_roster;
171private:
172 struct roster_size_estimator
173 {
174 unsigned long operator() (cached_roster const &);
175 };
176 struct roster_writeback_manager
177 {
178 database & db;
179 roster_writeback_manager(database & db) : db(db) {}
180 void writeout(revision_id const &, cached_roster const &);
181 };
182 LRUWritebackCache<revision_id, cached_roster,
183 roster_size_estimator, roster_writeback_manager>
184 roster_cache;
185
186 size_t size_delayed_file(file_id const & id, file_data const & dat);
187 bool have_delayed_file(file_id const & id);
188 void load_delayed_file(file_id const & id, file_data & dat);
189 void cancel_delayed_file(file_id const & id);
190 void drop_or_cancel_file(file_id const & id);
191 void schedule_delayed_file(file_id const & id, file_data const & dat);
192
193 std::map<file_id, file_data> delayed_files;
194 size_t delayed_writes_size;
195
196 void flush_delayed_writes();
197 void clear_delayed_writes();
198 void write_delayed_file(file_id const & new_id,
199 file_data const & dat);
200
201 void write_delayed_roster(revision_id const & new_id,
202 roster_t const & roster,
203 marking_map const & marking);
204
205 //
206 // --== Reading/writing delta-compressed objects ==--
207 //
208private:
209 // "do we have any entry for 'ident' that is a base version"
210 bool file_or_manifest_base_exists(hexenc<id> const & ident,
211 std::string const & table);
212 bool roster_base_stored(revision_id const & ident);
213 bool roster_base_available(revision_id const & ident);
214
215 // "do we have any entry for 'ident' that is a delta"
216 bool delta_exists(std::string const & ident,
217 std::string const & table);
218
219 bool delta_exists(std::string const & ident,
220 std::string const & base,
221 std::string const & table);
222
223 void get_file_or_manifest_base_unchecked(hexenc<id> const & new_id,
224 data & dat,
225 std::string const & table);
226 void get_file_or_manifest_delta_unchecked(hexenc<id> const & ident,
227 hexenc<id> const & base,
228 delta & del,
229 std::string const & table);
230 void get_roster_base(std::string const & ident,
231 roster_t & roster, marking_map & marking);
232 void get_roster_delta(std::string const & ident,
233 std::string const & base,
234 roster_delta & del);
235 friend struct file_and_manifest_reconstruction_graph;
236 friend struct roster_reconstruction_graph;
237 void get_version(hexenc<id> const & ident,
238 data & dat,
239 std::string const & data_table,
240 std::string const & delta_table);
241
242 void drop(std::string const & base,
243 std::string const & table);
244 void put_file_delta(file_id const & ident,
245 file_id const & base,
246 file_delta const & del);
247private:
248 void put_roster_delta(revision_id const & ident,
249 revision_id const & base,
250 roster_delta const & del);
251 void put_version(hexenc<id> const & old_id,
252 hexenc<id> const & new_id,
253 delta const & del,
254 std::string const & data_table,
255 std::string const & delta_table);
256
257 void put_roster(revision_id const & rev_id,
258 roster_t_cp const & roster,
259 marking_map_cp const & marking);
260
261public:
262
263 bool file_version_exists(file_id const & ident);
264 bool revision_exists(revision_id const & ident);
265 bool roster_link_exists_for_revision(revision_id const & ident);
266 bool roster_exists_for_revision(revision_id const & ident);
267
268
269 // get plain version if it exists, or reconstruct version
270 // from deltas (if they exist)
271 void get_file_version(file_id const & ident,
272 file_data & dat);
273
274 // put file w/o predecessor into db
275 void put_file(file_id const & new_id,
276 file_data const & dat);
277
278 // store new version and update old version to be a delta
279 void put_file_version(file_id const & old_id,
280 file_id const & new_id,
281 file_delta const & del);
282
283 void get_arbitrary_file_delta(file_id const & src_id,
284 file_id const & dst_id,
285 file_delta & del);
286
287 // get plain version if it exists, or reconstruct version
288 // from deltas (if they exist).
289 void get_manifest_version(manifest_id const & ident,
290 manifest_data & dat);
291
292 //
293 // --== The ancestry graph ==--
294 //
295public:
296 void get_revision_ancestry(rev_ancestry_map & graph);
297
298 void get_revision_parents(revision_id const & ident,
299 std::set<revision_id> & parents);
300
301 void get_revision_children(revision_id const & ident,
302 std::set<revision_id> & children);
303
304 void get_leaves(std::set<revision_id> & leaves);
305
306 void get_revision_manifest(revision_id const & cid,
307 manifest_id & mid);
308private:
309 // helper
310 void get_ids(std::string const & table, std::set< hexenc<id> > & ids);
311public:
312 void get_revision_ids(std::set<revision_id> & ids);
313 // this is exposed for 'db check':
314 void get_file_ids(std::set<file_id> & ids);
315
316 //
317 // --== Revision reading/writing ==--
318 //
319private:
320 void deltify_revision(revision_id const & rid);
321public:
322 void get_revision(revision_id const & ident,
323 revision_t & cs);
324
325 void get_revision(revision_id const & ident,
326 revision_data & dat);
327
328 bool put_revision(revision_id const & new_id,
329 revision_t const & rev);
330
331 bool put_revision(revision_id const & new_id,
332 revision_data const & dat);
333
334 //
335 // --== Rosters ==--
336 //
337private:
338 u64 next_id_from_table(std::string const & table);
339 void get_roster_version(revision_id const & ros_id, cached_roster & cr);
340public:
341 node_id next_node_id();
342
343 void get_roster(revision_id const & rid,
344 roster_t & roster);
345
346 void get_roster(revision_id const & rid,
347 roster_t & roster,
348 marking_map & marks);
349
350 void get_roster(revision_id const & rid,
351 cached_roster & cr);
352
353 // these are exposed for the use of database_check.cc
354 bool roster_version_exists(revision_id const & ident);
355 void get_roster_ids(std::set<revision_id> & ids);
356
357 // using roster deltas
358 void get_markings(revision_id const & id,
359 node_id const & nid,
360 marking_t & markings);
361
362 void get_file_content(revision_id const & id,
363 node_id const & nid,
364 file_id & content);
365private:
366 struct extractor;
367 struct file_content_extractor;
368 struct markings_extractor;
369 void extract_from_deltas(revision_id const & id, extractor & x);
370
371 //
372 // --== Keys ==--
373 //
374private:
375 void get_keys(std::string const & table, std::vector<rsa_keypair_id> & keys);
376public:
377 void get_key_ids(std::vector<rsa_keypair_id> & pubkeys);
378 void get_key_ids(globish const & pattern,
379 std::vector<rsa_keypair_id> & pubkeys);
380
381 void get_public_keys(std::vector<rsa_keypair_id> & pubkeys);
382
383 bool public_key_exists(hexenc<id> const & hash);
384 bool public_key_exists(rsa_keypair_id const & ident);
385
386 void get_pubkey(hexenc<id> const & hash,
387 rsa_keypair_id & ident,
388 base64<rsa_pub_key> & pub_encoded);
389
390 void get_key(rsa_keypair_id const & ident,
391 base64<rsa_pub_key> & pub_encoded);
392
393 bool put_key(rsa_keypair_id const & ident,
394 base64<rsa_pub_key> const & pub_encoded);
395
396 void delete_public_key(rsa_keypair_id const & pub_id);
397
398 //
399 // --== Certs ==--
400 //
401 // note: this section is ridiculous. please do something about it.
402private:
403 bool cert_exists(cert const & t,
404 std::string const & table);
405 void put_cert(cert const & t, std::string const & table);
406 void results_to_certs(results const & res,
407 std::vector<cert> & certs);
408
409 void get_certs(std::vector< cert > & certs,
410 std::string const & table);
411
412 void get_certs(hexenc<id> const & ident,
413 std::vector< cert > & certs,
414 std::string const & table);
415
416 void get_certs(cert_name const & name,
417 std::vector< cert > & certs,
418 std::string const & table);
419
420 void get_certs(hexenc<id> const & ident,
421 cert_name const & name,
422 std::vector< cert > & certs,
423 std::string const & table);
424
425 void get_certs(hexenc<id> const & ident,
426 cert_name const & name,
427 base64<cert_value> const & val,
428 std::vector< cert > & certs,
429 std::string const & table);
430
431 void get_certs(cert_name const & name,
432 base64<cert_value> const & val,
433 std::vector<cert> & certs,
434 std::string const & table);
435
436 outdated_indicator_factory cert_stamper;
437public:
438
439 bool revision_cert_exists(revision<cert> const & cert);
440 bool revision_cert_exists(hexenc<id> const & hash);
441
442 bool put_revision_cert(revision<cert> const & cert);
443
444 // this variant has to be rather coarse and fast, for netsync's use
445 outdated_indicator get_revision_cert_nobranch_index(std::vector< std::pair<hexenc<id>,
446 std::pair<revision_id, rsa_keypair_id> > > & idx);
447
448 // Only used by database_check.cc
449 outdated_indicator get_revision_certs(std::vector< revision<cert> > & certs);
450
451 outdated_indicator get_revision_certs(cert_name const & name,
452 std::vector< revision<cert> > & certs);
453
454 outdated_indicator get_revision_certs(revision_id const & ident,
455 cert_name const & name,
456 std::vector< revision<cert> > & certs);
457
458 // Only used by get_branch_certs (project.cc)
459 outdated_indicator get_revision_certs(cert_name const & name,
460 base64<cert_value> const & val,
461 std::vector< revision<cert> > & certs);
462
463 // Only used by revision_is_in_branch (project.cc)
464 outdated_indicator get_revision_certs(revision_id const & ident,
465 cert_name const & name,
466 base64<cert_value> const & value,
467 std::vector< revision<cert> > & certs);
468
469 // Only used by get_branch_heads (project.cc)
470 outdated_indicator get_revisions_with_cert(cert_name const & name,
471 base64<cert_value> const & value,
472 std::set<revision_id> & revisions);
473
474 // Used through project.cc, and by
475 // anc_graph::add_node_for_oldstyle_revision (revision.cc)
476 outdated_indicator get_revision_certs(revision_id const & ident,
477 std::vector< revision<cert> > & certs);
478
479 // Used through get_revision_cert_hashes (project.cc)
480 outdated_indicator get_revision_certs(revision_id const & ident,
481 std::vector< hexenc<id> > & hashes);
482
483 void get_revision_cert(hexenc<id> const & hash,
484 revision<cert> & c);
485
486 void get_manifest_certs(manifest_id const & ident,
487 std::vector< manifest<cert> > & certs);
488
489 void get_manifest_certs(cert_name const & name,
490 std::vector< manifest<cert> > & certs);
491
492 //
493 // --== Epochs ==--
494 //
495public:
496 void get_epochs(std::map<branch_name, epoch_data> & epochs);
497
498 void get_epoch(epoch_id const & eid, branch_name & branch, epoch_data & epo);
499
500 bool epoch_exists(epoch_id const & eid);
501
502 void set_epoch(branch_name const & branch, epoch_data const & epo);
503
504 void clear_epoch(branch_name const & branch);
505
506 //
507 // --== Database 'vars' ==--
508 //
509public:
510 void get_vars(std::map<var_key, var_value > & vars);
511
512 void get_var(var_key const & key, var_value & value);
513
514 bool var_exists(var_key const & key);
515
516 void set_var(var_key const & key, var_value const & value);
517
518 void clear_var(var_key const & key);
519
520 //
521 // --== Completion ==--
522 //
523public:
524 void complete(std::string const & partial,
525 std::set<revision_id> & completions);
526
527 void complete(std::string const & partial,
528 std::set<file_id> & completions);
529
530 void complete(std::string const & partial,
531 std::set< std::pair<key_id, utf8 > > & completions);
532
533 void complete(selectors::selector_type ty,
534 std::string const & partial,
535 std::vector<std::pair<selectors::selector_type,
536 std::string> > const & limit,
537 std::set<std::string> & completions);
538
539 //
540 // --== The 'db' family of top-level commands ==--
541 //
542public:
543 void initialize();
544 void debug(std::string const & sql, std::ostream & out);
545 void dump(std::ostream &);
546 void load(std::istream &);
547 void info(std::ostream &);
548 void version(std::ostream &);
549 void migrate();
550 void test_migration_step(std::string const &);
551 // for kill_rev_locally:
552 void delete_existing_rev_and_certs(revision_id const & rid);
553 // for kill_branch_certs_locally:
554 void delete_branch_named(cert_value const & branch);
555 // for kill_tag_locally:
556 void delete_tag_named(cert_value const & tag);
557
558 // misc
559private:
560 friend void rcs_put_raw_file_edge(hexenc<id> const & old_id,
561 hexenc<id> const & new_id,
562 delta const & del,
563 database & db);
564public:
565 // branches
566 outdated_indicator get_branches(std::vector<std::string> & names);
567 outdated_indicator get_branches(globish const & glob,
568 std::vector<std::string> & names);
569
570 bool check_integrity();
571
572 void get_uncommon_ancestors(revision_id const & a,
573 revision_id const & b,
574 std::set<revision_id> & a_uncommon_ancs,
575 std::set<revision_id> & b_uncommon_ancs);
576
577 // for changesetify, rosterify
578 void delete_existing_revs_and_certs();
579
580 void delete_existing_manifests();
581
582 // heights
583 void get_rev_height(revision_id const & id,
584 rev_height & height);
585
586 void put_rev_height(revision_id const & id,
587 rev_height const & height);
588
589 bool has_rev_height(rev_height const & height);
590 void delete_existing_heights();
591
592 void put_height_for_revision(revision_id const & new_id,
593 revision_t const & rev);
594
595 // for regenerate_rosters
596 void delete_existing_rosters();
597 void put_roster_for_revision(revision_id const & new_id,
598 revision_t const & rev);
599};
600
601// Parent maps are used in a number of places to keep track of all the
602// parent rosters of a given revision.
603typedef std::map<revision_id, database::cached_roster>
604parent_map;
605
606typedef parent_map::value_type
607parent_entry;
608
609inline revision_id const & parent_id(parent_entry const & p)
610{
611 return p.first;
612}
613
614inline revision_id const & parent_id(parent_map::const_iterator i)
615{
616 return i->first;
617}
618
619inline database::cached_roster const &
620parent_cached_roster(parent_entry const & p)
621{
622 return p.second;
623}
624
625inline database::cached_roster const &
626parent_cached_roster(parent_map::const_iterator i)
627{
628 return i->second;
629}
630
631inline roster_t const & parent_roster(parent_entry const & p)
632{
633 return *(p.second.first);
634}
635
636inline roster_t const & parent_roster(parent_map::const_iterator i)
637{
638 return *(i->second.first);
639}
640
641inline marking_map const & parent_marking(parent_entry const & p)
642{
643 return *(p.second.second);
644}
645
646inline marking_map const & parent_marking(parent_map::const_iterator i)
647{
648 return *(i->second.second);
649}
650
651// Transaction guards nest. Acquire one in any scope you'd like
652// transaction-protected, and it'll make sure the db aborts a transaction
653// if there's any exception before you call commit().
654//
655// By default, transaction_guard locks the database exclusively. If the
656// transaction is intended to be read-only, construct the guard with
657// exclusive=false. In this case, if a database update is attempted and
658// another process is accessing the database an exception will be thrown -
659// uglier and more confusing for the user - however no data inconsistency
660// should result.
661//
662// An exception is thrown if an exclusive transaction_guard is created
663// while a non-exclusive transaction_guard exists.
664//
665// Transaction guards also support splitting long transactions up into
666// checkpoints. Any time you feel the database is in an
667// acceptably-consistent state, you can call maybe_checkpoint(nn) with a
668// given number of bytes. When the number of bytes and number of
669// maybe_checkpoint() calls exceeds the guard's parameters, the transaction
670// is committed and reopened. Any time you feel the database has reached a
671// point where want to ensure a transaction commit, without destructing the
672// object, you can call do_checkpoint().
673//
674// This does *not* free you from having to call .commit() on the guard when
675// it "completes" its lifecycle. Here's a way to think of checkpointing: a
676// normal transaction guard is associated with a program-control
677// scope. Sometimes (notably in netsync) it is not convenient to create a
678// scope which exactly matches the size of work-unit you want to commit (a
679// bunch of packets, or a session-close, whichever comes first) so
680// checkpointing allows you to use a long-lived transaction guard and mark
681// off the moments where commits are desired, without destructing the
682// guard. The guard still performs an error-management task in case of an
683// exception, so you still have to clean it before destruction using
684// .commit().
685//
686// Checkpointing also does not override the transaction guard nesting: if
687// there's an enclosing transaction_guard, your checkpointing calls have no
688// affect.
689//
690// The purpose of checkpointing is to provide an alternative to "many short
691// transactions" on platforms (OSX in particular) where the overhead of
692// full commits at high frequency is too high. The solution for these
693// platforms is to run inside a longer-lived transaction (session-length),
694// and checkpoint at higher granularity (every megabyte or so).
695
696class transaction_guard
697{
698 bool committed;
699 database & db;
700 bool exclusive;
701 size_t const checkpoint_batch_size;
702 size_t const checkpoint_batch_bytes;
703 size_t checkpointed_calls;
704 size_t checkpointed_bytes;
705public:
706 transaction_guard(database & d, bool exclusive=true,
707 size_t checkpoint_batch_size=1000,
708 size_t checkpoint_batch_bytes=0xfffff);
709 ~transaction_guard();
710 void do_checkpoint();
711 void maybe_checkpoint(size_t nbytes);
712 void commit();
713};
714
715// Local Variables:
716// mode: C++
717// fill-column: 76
718// c-file-style: "gnu"
719// indent-tabs-mode: nil
720// End:
721// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
722
723#endif // __DATABASE_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status