monotone

monotone Mtn Source Tree

Root/database.hh

1#ifndef __DATABASE_HH__
2#define __DATABASE_HH__
3
4// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13struct sqlite3;
14struct sqlite3_stmt;
15struct cert;
16int sqlite3_finalize(sqlite3_stmt *);
17
18#include <vector>
19#include <set>
20#include <map>
21#include <string>
22
23#include "numeric_vocab.hh"
24#include "vocab.hh"
25#include "paths.hh"
26#include "cleanup.hh"
27#include "roster.hh"
28#include "selectors.hh"
29
30// FIXME: would be better not to include this everywhere
31#include "outdated_indicator.hh"
32#include "lru_writeback_cache.hh"
33
34// this file defines a public, typed interface to the database.
35// the database class encapsulates all knowledge about sqlite,
36// the schema, and all SQL statements used to access the schema.
37//
38// one thing which is rather important to note is that this file
39// deals with two sorts of version relationships. the versions
40// stored in the database are all *backwards* from those the program
41// sees. so for example if you have two versions of a file
42//
43// file.1, file.2
44//
45// where file.2 was a modification of file.1, then as far as the rest of
46// the application is concerned -- and the ancestry graph -- file.1 is the
47// "old" version and file.2 is the "new" version. note the use of terms
48// which describe time, and the sequence of edits a user makes to a
49// file. those are ancestry terms. when the application composes a
50// patchset, for example, it'll contain the diff delta(file.1, file.2)
51//
52// from the database's perspective, however, file.1 is the derived version,
53// and file.2 is the base version. the base version is stored in the
54// "files" table, and the *reverse* diff delta(file.2, file.1) is stored in
55// the "file_deltas" table, under the id of file.1, with the id of file.2
56// listed as its base. note the use of the terms which describe
57// reconstruction; those are storage-system terms.
58//
59// the interface *to* the database, and the ancestry version graphs, use
60// the old / new metaphor of ancestry, but within the database (including
61// the private helper methods, and the storage version graphs) the
62// base/derived storage metaphor is used. the only real way to tell which
63// is which is to look at the parameter names and code. I might try to
64// express this in the type system some day, but not presently.
65//
66// the key phrase to keep repeating when working on this code is:
67//
68// "base files are new, derived files are old"
69//
70// it makes the code confusing, I know. this is possibly the worst part of
71// the program. I don't know if there's any way to make it clearer.
72
73class transaction_guard;
74class app_state;
75struct revision_t;
76struct query;
77class rev_height;
78
79class database
80{
81 //
82 // --== Opening the database and schema checking ==--
83 //
84private:
85 system_path filename;
86 app_state * __app;
87 struct sqlite3 * __sql;
88
89 enum open_mode { normal_mode = 0,
90 schema_bypass_mode,
91 format_bypass_mode };
92
93 void install_functions(app_state * app);
94 struct sqlite3 * sql(enum open_mode mode = normal_mode);
95
96 void check_filename();
97 void check_db_exists();
98 void check_db_nonexistent();
99 void open();
100 void close();
101 void check_format();
102
103public:
104 database(system_path const & file);
105 ~database();
106
107 void set_app(app_state * app);
108
109 void set_filename(system_path const & file);
110 system_path get_filename();
111 bool is_dbfile(any_path const & file);
112 void ensure_open();
113 void ensure_open_for_format_changes();
114private:
115 void ensure_open_for_maintenance();
116public:
117 void check_is_not_rosterified();
118 bool database_specified();
119
120 //
121 // --== Basic SQL interface and statement caching ==--
122 //
123private:
124 struct statement {
125 statement() : count(0), stmt(0, sqlite3_finalize) {}
126 int count;
127 cleanup_ptr<sqlite3_stmt*, int> stmt;
128 };
129 std::map<std::string, statement> statement_cache;
130
131 typedef std::vector< std::vector<std::string> > results;
132 void fetch(results & res,
133 int const want_cols, int const want_rows,
134 query const & q);
135 void execute(query const & q);
136
137 bool table_has_entry(std::string const & key, std::string const & column,
138 std::string const & table);
139
140 //
141 // --== Generic database metadata gathering ==--
142 //
143private:
144 std::string count(std::string const & table);
145 std::string space(std::string const & table,
146 std::string const & concatenated_columns,
147 u64 & total);
148 unsigned int page_size();
149 unsigned int cache_size();
150
151 //
152 // --== Transactions ==--
153 //
154private:
155 int transaction_level;
156 bool transaction_exclusive;
157 void begin_transaction(bool exclusive);
158 void commit_transaction();
159 void rollback_transaction();
160 friend class transaction_guard;
161
162 //
163 // --== Write-buffering -- tied into transaction ==--
164 // --== machinery and delta compression machinery ==--
165 //
166public:
167 typedef boost::shared_ptr<roster_t const> roster_t_cp;
168 typedef boost::shared_ptr<marking_map const> marking_map_cp;
169 typedef std::pair<roster_t_cp, marking_map_cp> cached_roster;
170private:
171 struct roster_size_estimator
172 {
173 unsigned long operator() (cached_roster const &);
174 };
175 struct roster_writeback_manager
176 {
177 database & db;
178 roster_writeback_manager(database & db) : db(db) {}
179 void writeout(revision_id const &, cached_roster const &);
180 };
181 LRUWritebackCache<revision_id, cached_roster,
182 roster_size_estimator, roster_writeback_manager>
183 roster_cache;
184
185 size_t size_delayed_file(file_id const & id, file_data const & dat);
186 bool have_delayed_file(file_id const & id);
187 void load_delayed_file(file_id const & id, file_data & dat);
188 void cancel_delayed_file(file_id const & id);
189 void schedule_delayed_file(file_id const & id, file_data const & dat);
190
191 std::map<file_id, file_data> delayed_files;
192 size_t delayed_writes_size;
193
194 void flush_delayed_writes();
195 void clear_delayed_writes();
196 void write_delayed_file(file_id const & new_id,
197 file_data const & dat);
198
199 void write_delayed_roster(revision_id const & new_id,
200 roster_t const & roster,
201 marking_map const & marking);
202
203 //
204 // --== Reading/writing delta-compressed objects ==--
205 //
206private:
207 // "do we have any entry for 'ident' that is a base version"
208 bool file_or_manifest_base_exists(hexenc<id> const & ident,
209 std::string const & table);
210 bool roster_base_stored(revision_id const & ident);
211 bool roster_base_available(revision_id const & ident);
212
213 // "do we have any entry for 'ident' that is a delta"
214 bool delta_exists(std::string const & ident,
215 std::string const & table);
216
217 void get_file_or_manifest_base_unchecked(hexenc<id> const & new_id,
218 data & dat,
219 std::string const & table);
220 void get_file_or_manifest_delta_unchecked(hexenc<id> const & ident,
221 hexenc<id> const & base,
222 delta & del,
223 std::string const & table);
224 void get_roster_base(std::string const & ident,
225 roster_t & roster, marking_map & marking);
226 void get_roster_delta(std::string const & ident,
227 std::string const & base,
228 roster_delta & del);
229 friend struct file_and_manifest_reconstruction_graph;
230 friend struct roster_reconstruction_graph;
231 void get_version(hexenc<id> const & ident,
232 data & dat,
233 std::string const & data_table,
234 std::string const & delta_table);
235
236 void drop(std::string const & base,
237 std::string const & table);
238 void put_file_delta(file_id const & ident,
239 file_id const & base,
240 file_delta const & del);
241private:
242 void put_roster_delta(revision_id const & ident,
243 revision_id const & base,
244 roster_delta const & del);
245 void put_version(hexenc<id> const & old_id,
246 hexenc<id> const & new_id,
247 delta const & del,
248 std::string const & data_table,
249 std::string const & delta_table);
250
251 void put_roster(revision_id const & rev_id,
252 roster_t_cp const & roster,
253 marking_map_cp const & marking);
254
255public:
256
257 bool file_version_exists(file_id const & ident);
258 bool revision_exists(revision_id const & ident);
259 bool roster_link_exists_for_revision(revision_id const & ident);
260 bool roster_exists_for_revision(revision_id const & ident);
261
262
263 // get plain version if it exists, or reconstruct version
264 // from deltas (if they exist)
265 void get_file_version(file_id const & ident,
266 file_data & dat);
267
268 // put file w/o predecessor into db
269 void put_file(file_id const & new_id,
270 file_data const & dat);
271
272 // store new version and update old version to be a delta
273 void put_file_version(file_id const & old_id,
274 file_id const & new_id,
275 file_delta const & del);
276
277 void get_arbitrary_file_delta(file_id const & src_id,
278 file_id const & dst_id,
279 file_delta & del);
280
281 // get plain version if it exists, or reconstruct version
282 // from deltas (if they exist).
283 void get_manifest_version(manifest_id const & ident,
284 manifest_data & dat);
285
286 //
287 // --== The ancestry graph ==--
288 //
289public:
290 void get_revision_ancestry(std::multimap<revision_id, revision_id> & graph);
291
292 void get_revision_parents(revision_id const & ident,
293 std::set<revision_id> & parents);
294
295 void get_revision_children(revision_id const & ident,
296 std::set<revision_id> & children);
297
298 void get_revision_manifest(revision_id const & cid,
299 manifest_id & mid);
300private:
301 // helper
302 void get_ids(std::string const & table, std::set< hexenc<id> > & ids);
303public:
304 void get_revision_ids(std::set<revision_id> & ids);
305 // this is exposed for 'db check':
306 void get_file_ids(std::set<file_id> & ids);
307
308 //
309 // --== Revision reading/writing ==--
310 //
311private:
312 void deltify_revision(revision_id const & rid);
313public:
314 void get_revision(revision_id const & ident,
315 revision_t & cs);
316
317 void get_revision(revision_id const & ident,
318 revision_data & dat);
319
320 void put_revision(revision_id const & new_id,
321 revision_t const & rev);
322
323 void put_revision(revision_id const & new_id,
324 revision_data const & dat);
325
326 //
327 // --== Rosters ==--
328 //
329private:
330 u64 next_id_from_table(std::string const & table);
331 void get_roster_version(revision_id const & ros_id, cached_roster & cr);
332public:
333 node_id next_node_id();
334
335 void get_roster(revision_id const & rid,
336 roster_t & roster);
337
338 void get_roster(revision_id const & rid,
339 roster_t & roster,
340 marking_map & marks);
341
342 void get_roster(revision_id const & rid,
343 cached_roster & cr);
344
345 // these are exposed for the use of database_check.cc
346 bool roster_version_exists(revision_id const & ident);
347 void get_roster_ids(std::set<revision_id> & ids);
348
349 // using roster deltas
350 void get_markings(revision_id const & id,
351 node_id const & nid,
352 marking_t & markings);
353
354 void get_file_content(revision_id const & id,
355 node_id const & nid,
356 file_id & content);
357private:
358 struct extractor;
359 struct file_content_extractor;
360 struct markings_extractor;
361 void extract_from_deltas(revision_id const & id, extractor & x);
362
363 //
364 // --== Keys ==--
365 //
366private:
367 void get_keys(std::string const & table, std::vector<rsa_keypair_id> & keys);
368public:
369 void get_key_ids(std::string const & pattern,
370 std::vector<rsa_keypair_id> & pubkeys);
371
372 void get_public_keys(std::vector<rsa_keypair_id> & pubkeys);
373
374 bool public_key_exists(hexenc<id> const & hash);
375 bool public_key_exists(rsa_keypair_id const & ident);
376
377 void get_pubkey(hexenc<id> const & hash,
378 rsa_keypair_id & ident,
379 base64<rsa_pub_key> & pub_encoded);
380
381 void get_key(rsa_keypair_id const & ident,
382 base64<rsa_pub_key> & pub_encoded);
383
384 void put_key(rsa_keypair_id const & ident,
385 base64<rsa_pub_key> const & pub_encoded);
386
387 void delete_public_key(rsa_keypair_id const & pub_id);
388
389 //
390 // --== Certs ==--
391 //
392 // note: this section is ridiculous. please do something about it.
393private:
394 bool cert_exists(cert const & t,
395 std::string const & table);
396 void put_cert(cert const & t, std::string const & table);
397 void results_to_certs(results const & res,
398 std::vector<cert> & certs);
399
400 void get_certs(std::vector< cert > & certs,
401 std::string const & table);
402
403 void get_certs(hexenc<id> const & ident,
404 std::vector< cert > & certs,
405 std::string const & table);
406
407 void get_certs(cert_name const & name,
408 std::vector< cert > & certs,
409 std::string const & table);
410
411 void get_certs(hexenc<id> const & ident,
412 cert_name const & name,
413 std::vector< cert > & certs,
414 std::string const & table);
415
416 void get_certs(hexenc<id> const & ident,
417 cert_name const & name,
418 base64<cert_value> const & val,
419 std::vector< cert > & certs,
420 std::string const & table);
421
422 void get_certs(cert_name const & name,
423 base64<cert_value> const & val,
424 std::vector<cert> & certs,
425 std::string const & table);
426
427 outdated_indicator_factory cert_stamper;
428public:
429
430 bool revision_cert_exists(revision<cert> const & cert);
431 bool revision_cert_exists(hexenc<id> const & hash);
432
433 void put_revision_cert(revision<cert> const & cert);
434
435 // this variant has to be rather coarse and fast, for netsync's use
436 outdated_indicator get_revision_cert_nobranch_index(std::vector< std::pair<hexenc<id>,
437 std::pair<revision_id, rsa_keypair_id> > > & idx);
438
439 // Only used by database_check.cc
440 outdated_indicator get_revision_certs(std::vector< revision<cert> > & certs);
441
442 outdated_indicator get_revision_certs(cert_name const & name,
443 std::vector< revision<cert> > & certs);
444
445 outdated_indicator get_revision_certs(revision_id const & ident,
446 cert_name const & name,
447 std::vector< revision<cert> > & certs);
448
449 // Only used by get_branch_certs (project.cc)
450 outdated_indicator get_revision_certs(cert_name const & name,
451 base64<cert_value> const & val,
452 std::vector< revision<cert> > & certs);
453
454 // Only used by revision_is_in_branch (project.cc)
455 outdated_indicator get_revision_certs(revision_id const & ident,
456 cert_name const & name,
457 base64<cert_value> const & value,
458 std::vector< revision<cert> > & certs);
459
460 // Only used by get_branch_heads (project.cc)
461 outdated_indicator get_revisions_with_cert(cert_name const & name,
462 base64<cert_value> const & value,
463 std::set<revision_id> & revisions);
464
465 // Used through project.cc, and by
466 // anc_graph::add_node_for_oldstyle_revision (revision.cc)
467 outdated_indicator get_revision_certs(revision_id const & ident,
468 std::vector< revision<cert> > & certs);
469
470 // Used through get_revision_cert_hashes (project.cc)
471 outdated_indicator get_revision_certs(revision_id const & ident,
472 std::vector< hexenc<id> > & hashes);
473
474 void get_revision_cert(hexenc<id> const & hash,
475 revision<cert> & c);
476
477 void get_manifest_certs(manifest_id const & ident,
478 std::vector< manifest<cert> > & certs);
479
480 void get_manifest_certs(cert_name const & name,
481 std::vector< manifest<cert> > & certs);
482
483 //
484 // --== Epochs ==--
485 //
486public:
487 void get_epochs(std::map<branch_name, epoch_data> & epochs);
488
489 void get_epoch(epoch_id const & eid, branch_name & branch, epoch_data & epo);
490
491 bool epoch_exists(epoch_id const & eid);
492
493 void set_epoch(branch_name const & branch, epoch_data const & epo);
494
495 void clear_epoch(branch_name const & branch);
496
497 //
498 // --== Database 'vars' ==--
499 //
500public:
501 void get_vars(std::map<var_key, var_value > & vars);
502
503 void get_var(var_key const & key, var_value & value);
504
505 bool var_exists(var_key const & key);
506
507 void set_var(var_key const & key, var_value const & value);
508
509 void clear_var(var_key const & key);
510
511 //
512 // --== Completion ==--
513 //
514public:
515 void complete(std::string const & partial,
516 std::set<revision_id> & completions);
517
518 void complete(std::string const & partial,
519 std::set<file_id> & completions);
520
521 void complete(std::string const & partial,
522 std::set< std::pair<key_id, utf8 > > & completions);
523
524 void complete(selectors::selector_type ty,
525 std::string const & partial,
526 std::vector<std::pair<selectors::selector_type,
527 std::string> > const & limit,
528 std::set<std::string> & completions);
529
530 //
531 // --== The 'db' family of top-level commands ==--
532 //
533public:
534 void initialize();
535 void debug(std::string const & sql, std::ostream & out);
536 void dump(std::ostream &);
537 void load(std::istream &);
538 void info(std::ostream &);
539 void version(std::ostream &);
540 void migrate();
541 void test_migration_step(std::string const &);
542 // for kill_rev_locally:
543 void delete_existing_rev_and_certs(revision_id const & rid);
544 // for kill_branch_certs_locally:
545 void delete_branch_named(cert_value const & branch);
546 // for kill_tag_locally:
547 void delete_tag_named(cert_value const & tag);
548
549 // misc
550private:
551 friend void rcs_put_raw_file_edge(hexenc<id> const & old_id,
552 hexenc<id> const & new_id,
553 delta const & del,
554 database & db);
555 typedef std::pair<rev_height, revision_id> height_rev_pair;
556 void do_step_ancestor(std::set<height_rev_pair> & this_frontier,
557 std::set<revision_id> & this_seen,
558 std::set<revision_id> const & other_seen,
559 std::set<revision_id> & this_uncommon_ancs);
560
561public:
562 // branches
563 outdated_indicator get_branches(std::vector<std::string> & names);
564 outdated_indicator get_branches(std::string const & glob,
565 std::vector<std::string> & names);
566
567 bool check_integrity();
568
569 void get_uncommon_ancestors(revision_id const & a,
570 revision_id const & b,
571 std::set<revision_id> & a_uncommon_ancs,
572 std::set<revision_id> & b_uncommon_ancs);
573
574 // for changesetify, rosterify
575 void delete_existing_revs_and_certs();
576
577 void delete_existing_manifests();
578
579 // heights
580 void get_rev_height(revision_id const & id,
581 rev_height & height);
582
583 void put_rev_height(revision_id const & id,
584 rev_height const & height);
585
586 bool has_rev_height(rev_height const & height);
587 void delete_existing_heights();
588
589 void put_height_for_revision(revision_id const & new_id,
590 revision_t const & rev);
591
592 // for regenerate_rosters
593 void delete_existing_rosters();
594 void put_roster_for_revision(revision_id const & new_id,
595 revision_t const & rev);
596};
597
598// Parent maps are used in a number of places to keep track of all the
599// parent rosters of a given revision.
600typedef std::map<revision_id, database::cached_roster>
601parent_map;
602
603typedef parent_map::value_type
604parent_entry;
605
606inline revision_id const & parent_id(parent_entry const & p)
607{
608 return p.first;
609}
610
611inline revision_id const & parent_id(parent_map::const_iterator i)
612{
613 return i->first;
614}
615
616inline database::cached_roster const &
617parent_cached_roster(parent_entry const & p)
618{
619 return p.second;
620}
621
622inline database::cached_roster const &
623parent_cached_roster(parent_map::const_iterator i)
624{
625 return i->second;
626}
627
628inline roster_t const & parent_roster(parent_entry const & p)
629{
630 return *(p.second.first);
631}
632
633inline roster_t const & parent_roster(parent_map::const_iterator i)
634{
635 return *(i->second.first);
636}
637
638inline marking_map const & parent_marking(parent_entry const & p)
639{
640 return *(p.second.second);
641}
642
643inline marking_map const & parent_marking(parent_map::const_iterator i)
644{
645 return *(i->second.second);
646}
647
648// Transaction guards nest. Acquire one in any scope you'd like
649// transaction-protected, and it'll make sure the db aborts a transaction
650// if there's any exception before you call commit().
651//
652// By default, transaction_guard locks the database exclusively. If the
653// transaction is intended to be read-only, construct the guard with
654// exclusive=false. In this case, if a database update is attempted and
655// another process is accessing the database an exception will be thrown -
656// uglier and more confusing for the user - however no data inconsistency
657// should result.
658//
659// An exception is thrown if an exclusive transaction_guard is created
660// while a non-exclusive transaction_guard exists.
661//
662// Transaction guards also support splitting long transactions up into
663// checkpoints. Any time you feel the database is in an
664// acceptably-consistent state, you can call maybe_checkpoint(nn) with a
665// given number of bytes. When the number of bytes and number of
666// maybe_checkpoint() calls exceeds the guard's parameters, the transaction
667// is committed and reopened. Any time you feel the database has reached a
668// point where want to ensure a transaction commit, without destructing the
669// object, you can call do_checkpoint().
670//
671// This does *not* free you from having to call .commit() on the guard when
672// it "completes" its lifecycle. Here's a way to think of checkpointing: a
673// normal transaction guard is associated with a program-control
674// scope. Sometimes (notably in netsync) it is not convenient to create a
675// scope which exactly matches the size of work-unit you want to commit (a
676// bunch of packets, or a session-close, whichever comes first) so
677// checkpointing allows you to use a long-lived transaction guard and mark
678// off the moments where commits are desired, without destructing the
679// guard. The guard still performs an error-management task in case of an
680// exception, so you still have to clean it before destruction using
681// .commit().
682//
683// Checkpointing also does not override the transaction guard nesting: if
684// there's an enclosing transaction_guard, your checkpointing calls have no
685// affect.
686//
687// The purpose of checkpointing is to provide an alternative to "many short
688// transactions" on platforms (OSX in particular) where the overhead of
689// full commits at high frequency is too high. The solution for these
690// platforms is to run inside a longer-lived transaction (session-length),
691// and checkpoint at higher granularity (every megabyte or so).
692
693class transaction_guard
694{
695 bool committed;
696 database & db;
697 bool exclusive;
698 size_t const checkpoint_batch_size;
699 size_t const checkpoint_batch_bytes;
700 size_t checkpointed_calls;
701 size_t checkpointed_bytes;
702public:
703 transaction_guard(database & d, bool exclusive=true,
704 size_t checkpoint_batch_size=1000,
705 size_t checkpoint_batch_bytes=0xfffff);
706 ~transaction_guard();
707 void do_checkpoint();
708 void maybe_checkpoint(size_t nbytes);
709 void commit();
710};
711
712// Local Variables:
713// mode: C++
714// fill-column: 76
715// c-file-style: "gnu"
716// indent-tabs-mode: nil
717// End:
718// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
719
720#endif // __DATABASE_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status