monotone

monotone Mtn Source Tree

Root/src/database.hh

1// Copyright (C) 2010 Stephen Leake <stephen_leake@stephe-leake.org>
2// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
3//
4// This program is made available under the GNU GPL version 2.0 or
5// greater. See the accompanying file COPYING for details.
6//
7// This program is distributed WITHOUT ANY WARRANTY; without even the
8// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9// PURPOSE.
10
11#ifndef __DATABASE_HH__
12#define __DATABASE_HH__
13
14#include "vector.hh"
15#include <set>
16#include <boost/function.hpp>
17#include <boost/shared_ptr.hpp>
18
19#include "rev_types.hh"
20#include "cert.hh"
21#include "options.hh"
22
23using std::vector;
24
25class app_state;
26class lua_hooks;
27struct date_t;
28struct globish;
29class key_store;
30class outdated_indicator;
31class rev_height;
32class lazy_rng;
33class project_t;
34
35class migration_status;
36
37typedef std::pair<var_domain, var_name> var_key;
38typedef enum {cert_ok, cert_bad, cert_unknown} cert_status;
39
40// this file defines a public, typed interface to the database.
41// the database class encapsulates all knowledge about sqlite,
42// the schema, and all SQL statements used to access the schema.
43//
44// one thing which is rather important to note is that this file
45// deals with two sorts of version relationships. the versions
46// stored in the database are all *backwards* from those the program
47// sees. so for example if you have two versions of a file
48//
49// file.1, file.2
50//
51// where file.2 was a modification of file.1, then as far as the rest of
52// the application is concerned -- and the ancestry graph -- file.1 is the
53// "old" version and file.2 is the "new" version. note the use of terms
54// which describe time, and the sequence of edits a user makes to a
55// file. those are ancestry terms. when the application composes a
56// patchset, for example, it'll contain the diff delta(file.1, file.2)
57//
58// from the database's perspective, however, file.1 is the derived version,
59// and file.2 is the base version. the base version is stored in the
60// "files" table, and the *reverse* diff delta(file.2, file.1) is stored in
61// the "file_deltas" table, under the id of file.1, with the id of file.2
62// listed as its base. note the use of the terms which describe
63// reconstruction; those are storage-system terms.
64//
65// the interface *to* the database, and the ancestry version graphs, use
66// the old / new metaphor of ancestry, but within the database (including
67// the private helper methods, and the storage version graphs) the
68// base/derived storage metaphor is used. the only real way to tell which
69// is which is to look at the parameter names and code. I might try to
70// express this in the type system some day, but not presently.
71//
72// the key phrase to keep repeating when working on this code is:
73//
74// "base files are new, derived files are old"
75//
76// it makes the code confusing, I know. this is possibly the worst part of
77// the program. I don't know if there's any way to make it clearer.
78
79class database_impl;
80struct key_identity_info;
81
82typedef std::map<system_path, boost::shared_ptr<database_impl> > database_cache;
83
84class database
85{
86 //
87 // --== Opening the database and schema checking ==--
88 //
89public:
90 // database options
91 typedef enum { none, maybe_unspecified } dboptions;
92
93 explicit database(app_state & app, dboptions dbopts = none);
94 database(options const & o, lua_hooks & l, dboptions dbopts = none);
95 ~database();
96
97 system_path get_filename();
98 bool is_dbfile(any_path const & file);
99 bool database_specified();
100 void check_is_not_rosterified();
101 void create_if_not_exists();
102
103 void ensure_open();
104 void ensure_open_for_format_changes();
105 void ensure_open_for_cache_reset();
106
107 // this is about resetting the database_impl cache
108 static void reset_cache();
109
110private:
111 void ensure_open_for_maintenance();
112 void init();
113
114 //
115 // --== Transactions ==--
116 //
117private:
118 friend class conditional_transaction_guard;
119
120 //
121 // --== Reading/writing delta-compressed objects ==--
122 //
123public:
124 bool file_version_exists(file_id const & ident);
125 bool file_size_exists(file_id const & ident);
126 bool revision_exists(revision_id const & ident);
127 bool roster_link_exists_for_revision(revision_id const & ident);
128 bool roster_exists_for_revision(revision_id const & ident);
129
130
131 // get plain version if it exists, or reconstruct version
132 // from deltas (if they exist)
133 void get_file_version(file_id const & ident,
134 file_data & dat);
135
136 // gets the (cached) size of the file if it exists
137 void get_file_size(file_id const & ident,
138 file_size & size);
139
140 // gets a map of all file sizes of this particular roster
141 void get_file_sizes(roster_t const & roster,
142 std::map<file_id, file_size> & file_sizes);
143
144 // put file w/o predecessor into db
145 void put_file(file_id const & new_id,
146 file_data const & dat);
147
148 // store new version and update old version to be a delta
149 void put_file_version(file_id const & old_id,
150 file_id const & new_id,
151 file_delta const & del);
152
153 void get_arbitrary_file_delta(file_id const & src_id,
154 file_id const & dst_id,
155 file_delta & del);
156
157 // get plain version if it exists, or reconstruct version
158 // from deltas (if they exist).
159 void get_manifest_version(manifest_id const & ident,
160 manifest_data & dat);
161
162private:
163 bool file_or_manifest_base_exists(file_id const & ident,
164 std::string const & table);
165 bool delta_exists(id const & ident,
166 std::string const & table);
167 void put_file_delta(file_id const & ident,
168 file_id const & base,
169 file_delta const & del);
170
171 friend void rcs_put_raw_file_edge(database & db,
172 file_id const & old_id,
173 file_id const & new_id,
174 delta const & del);
175
176
177 //
178 // --== The ancestry graph ==--
179 //
180public:
181 void get_forward_ancestry(rev_ancestry_map & graph);
182 void get_reverse_ancestry(rev_ancestry_map & graph);
183
184 void get_revision_parents(revision_id const & ident,
185 std::set<revision_id> & parents);
186
187 void get_revision_children(revision_id const & ident,
188 std::set<revision_id> & children);
189
190 void get_leaves(std::set<revision_id> & leaves);
191
192 void get_revision_manifest(revision_id const & cid,
193 manifest_id & mid);
194
195 void get_common_ancestors(std::set<revision_id> const & revs,
196 std::set<revision_id> & common_ancestors);
197
198 bool is_a_ancestor_of_b(revision_id const & ancestor,
199 revision_id const & child);
200
201 void get_revision_ids(std::set<revision_id> & ids);
202 // this is exposed for 'db check':
203 void get_file_ids(std::set<file_id> & ids);
204
205 //
206 // --== Revision reading/writing ==--
207 //
208public:
209 void get_revision(revision_id const & ident,
210 revision_t & cs);
211
212 void get_revision(revision_id const & ident,
213 revision_data & dat);
214
215 bool put_revision(revision_id const & new_id,
216 revision_t const & rev);
217
218 bool put_revision(revision_id const & new_id,
219 revision_data const & dat);
220
221private:
222 void deltify_revision(revision_id const & rid);
223
224 //
225 // --== Rosters ==--
226 //
227public:
228 node_id next_node_id();
229
230 void get_roster(revision_id const & rid,
231 roster_t & roster);
232
233 void get_roster(revision_id const & rid,
234 roster_t & roster,
235 marking_map & marks);
236
237 void get_roster(revision_id const & rid,
238 cached_roster & cr);
239
240 // these are exposed for the use of database_check.cc
241 bool roster_version_exists(revision_id const & ident);
242 void get_roster_ids(std::set<revision_id> & ids);
243
244 // using roster deltas
245 void get_markings(revision_id const & id,
246 node_id const & nid,
247 const_marking_t & markings);
248
249 void get_file_content(revision_id const & id,
250 node_id const & nid,
251 file_id & content);
252
253private:
254 void get_roster_version(revision_id const & ros_id,
255 cached_roster & cr);
256
257 void put_roster(revision_id const & rev_id,
258 revision_t const & rev,
259 roster_t_cp const & roster,
260 marking_map_cp const & marking);
261
262 //
263 // --== Keys ==--
264 //
265public:
266 void get_key_ids(std::vector<key_id> & pubkeys);
267
268 void get_public_keys(std::vector<key_name> & pubkeys);
269
270 bool public_key_exists(key_id const & hash);
271 bool public_key_exists(key_name const & ident);
272
273 void get_pubkey(key_id const & hash,
274 key_name & ident,
275 rsa_pub_key & pub);
276
277 void get_key(key_id const & ident, rsa_pub_key & pub);
278 bool put_key(key_name const & ident, rsa_pub_key const & pub);
279
280 void delete_public_key(key_id const & pub_id);
281
282 // Crypto operations
283
284 void encrypt_rsa(key_id const & pub_id,
285 std::string const & plaintext,
286 rsa_oaep_sha_data & ciphertext);
287
288 cert_status check_signature(key_id const & id,
289 std::string const & alleged_text,
290 rsa_sha1_signature const & signature);
291 cert_status check_cert(cert const & t);
292
293 //
294 // --== Certs ==--
295 //
296 // note: this section is ridiculous. please do something about it.
297public:
298 bool revision_cert_exists(cert const & cert);
299 bool revision_cert_exists(revision_id const & hash);
300
301 bool put_revision_cert(cert const & cert);
302 void record_as_branch_leaf(cert_value const & branch, revision_id const & rev);
303
304 // this variant has to be rather coarse and fast, for netsync's use
305 outdated_indicator get_revision_cert_nobranch_index(std::vector< std::pair<revision_id,
306 std::pair<revision_id, key_id> > > & idx);
307
308 // Only used by database_check.cc
309 outdated_indicator get_revision_certs(std::vector<cert> & certs);
310
311 outdated_indicator get_revision_certs(cert_name const & name,
312 std::vector<cert> & certs);
313
314 outdated_indicator get_revision_certs(revision_id const & ident,
315 cert_name const & name,
316 std::vector<cert> & certs);
317
318 // Only used by get_branch_certs (project.cc)
319 outdated_indicator get_revision_certs(cert_name const & name,
320 cert_value const & val,
321 std::vector<std::pair<id, cert> > & certs);
322
323 // Only used by revision_is_in_branch (project.cc)
324 outdated_indicator get_revision_certs(revision_id const & ident,
325 cert_name const & name,
326 cert_value const & value,
327 std::vector<cert> & certs);
328
329 // Only used by get_branch_heads (project.cc)
330 outdated_indicator get_revisions_with_cert(cert_name const & name,
331 cert_value const & value,
332 std::set<revision_id> & revisions);
333
334 // Used by get_branch_heads (project.cc)
335 // Will also be needed by daggy-refinement, if/when implemented
336 outdated_indicator get_branch_leaves(cert_value const & value,
337 std::set<revision_id> & revisions);
338
339 // used by check_db, regenerate_caches
340 void compute_branch_leaves(cert_value const & branch_name, std::set<revision_id> & revs);
341 void recalc_branch_leaves(cert_value const & branch_name);
342 void delete_existing_branch_leaves();
343
344 // Used through project.cc
345 outdated_indicator get_revision_certs(revision_id const & ident,
346 std::vector<cert> & certs);
347
348 // Used through get_revision_cert_hashes (project.cc)
349 outdated_indicator get_revision_certs(revision_id const & ident,
350 std::vector<id> & hashes);
351
352 void get_revision_cert(id const & hash, cert & c);
353
354 typedef boost::function<bool(std::set<key_id> const &,
355 id const &,
356 cert_name const &,
357 cert_value const &)> cert_trust_checker;
358 // this takes a project_t so it can translate key names for the trust hook
359 void erase_bogus_certs(project_t const & project, std::vector<cert> & certs);
360 // permit alternative trust functions
361 void erase_bogus_certs(std::vector<cert> & certs,
362 cert_trust_checker const & checker);
363
364 //
365 // --== Epochs ==--
366 //
367public:
368 void get_epochs(std::map<branch_name, epoch_data> & epochs);
369
370 void get_epoch(epoch_id const & eid, branch_name & branch, epoch_data & epo);
371
372 bool epoch_exists(epoch_id const & eid);
373
374 void set_epoch(branch_name const & branch, epoch_data const & epo);
375
376 void clear_epoch(branch_name const & branch);
377
378 //
379 // --== Database 'vars' ==--
380 //
381public:
382 void get_vars(std::map<var_key, var_value > & vars);
383
384 void get_var(var_key const & key, var_value & value);
385
386 bool var_exists(var_key const & key);
387
388 void set_var(var_key const & key, var_value const & value);
389
390 void clear_var(var_key const & key);
391
392 void register_workspace(system_path const & workspace);
393
394 void unregister_workspace(system_path const & workspace);
395
396 void get_registered_workspaces(vector<system_path> & workspaces);
397
398 void set_registered_workspaces(vector<system_path> const & workspaces);
399
400 //
401 // --== Completion ==--
402 //
403public:
404 void prefix_matching_constraint(std::string const & colname,
405 std::string const & prefix,
406 std::string & constraint);
407
408 void complete(std::string const & partial,
409 std::set<revision_id> & completions);
410
411 void complete(std::string const & partial,
412 std::set<file_id> & completions);
413
414 void complete(std::string const & partial,
415 std::set< std::pair<key_id, utf8 > > & completions);
416
417 //
418 // --== Revision selectors ==--
419 //
420public:
421 void select_parent(std::string const & partial,
422 std::set<revision_id> & completions);
423 void select_cert(std::string const & certname,
424 std::set<revision_id> & completions);
425 void select_cert(std::string const & certname, std::string const & certvalue,
426 std::set<revision_id> & completions);
427 void select_author_tag_or_branch(std::string const & partial,
428 std::set<revision_id> & completions);
429 void select_date(std::string const & date, std::string const & comparison,
430 std::set<revision_id> & completions);
431 void select_key(key_id const & id, std::set<revision_id> & completions);
432
433 //
434 // --== The 'db' family of top-level commands ==--
435 //
436public:
437 void initialize();
438 void debug(std::string const & sql, std::ostream & out);
439 void dump(std::ostream &);
440 void load(std::istream &);
441 void info(std::ostream &, bool analyze);
442 void version(std::ostream &);
443 void migrate(key_store &, migration_status &);
444 void test_migration_step(key_store &, std::string const &);
445 void fix_bad_certs(bool drop_not_fixable);
446 // for kill_rev_locally:
447 void delete_existing_rev_and_certs(revision_id const & rid);
448 // for kill_certs_locally:
449 void delete_certs_locally(revision_id const & rev,
450 cert_name const & name);
451 void delete_certs_locally(revision_id const & rev,
452 cert_name const & name,
453 cert_value const & value);
454
455public:
456 // branches
457 outdated_indicator get_branches(std::vector<std::string> & names);
458 outdated_indicator get_branches(globish const & glob,
459 std::vector<std::string> & names);
460
461 bool check_integrity();
462
463 void get_uncommon_ancestors(revision_id const & a,
464 revision_id const & b,
465 std::set<revision_id> & a_uncommon_ancs,
466 std::set<revision_id> & b_uncommon_ancs);
467
468 // for changesetify, rosterify
469 void delete_existing_revs_and_certs();
470 void delete_existing_manifests();
471
472 void get_manifest_certs(manifest_id const & id, std::vector<cert> & certs);
473 void get_manifest_certs(cert_name const & name, std::vector<cert> & certs);
474 void get_revision_certs_with_keynames(revision_id const & id,
475 std::vector<cert> & certs);
476
477 // heights
478 void get_rev_height(revision_id const & id,
479 rev_height & height);
480
481 void put_rev_height(revision_id const & id,
482 rev_height const & height);
483
484 bool has_rev_height(rev_height const & height);
485 void delete_existing_heights();
486
487 void put_height_for_revision(revision_id const & new_id,
488 revision_t const & rev);
489
490 // for regenerate_rosters
491 void delete_existing_rosters();
492 void put_roster_for_revision(revision_id const & new_id,
493 revision_t const & rev);
494
495 // for regenerate_rosters
496 void delete_existing_file_sizes();
497 void put_file_sizes_for_revision(revision_t const & rev);
498
499private:
500 static database_cache dbcache;
501
502 boost::shared_ptr<database_impl> imp;
503 options opts;
504 lua_hooks & lua;
505 dboptions dbopts;
506};
507
508// not a member function, defined in database_check.cc
509void check_db(database & db);
510
511// Transaction guards nest. Acquire one in any scope you'd like
512// transaction-protected, and it'll make sure the db aborts a transaction
513// if there's any exception before you call commit().
514//
515// By default, transaction_guard locks the database exclusively. If the
516// transaction is intended to be read-only, construct the guard with
517// exclusive=false. In this case, if a database update is attempted and
518// another process is accessing the database an exception will be thrown -
519// uglier and more confusing for the user - however no data inconsistency
520// should result.
521//
522// An exception is thrown if an exclusive transaction_guard is created
523// while a non-exclusive transaction_guard exists.
524//
525// Transaction guards also support splitting long transactions up into
526// checkpoints. Any time you feel the database is in an
527// acceptably-consistent state, you can call maybe_checkpoint(nn) with a
528// given number of bytes. When the number of bytes and number of
529// maybe_checkpoint() calls exceeds the guard's parameters, the transaction
530// is committed and reopened. Any time you feel the database has reached a
531// point where want to ensure a transaction commit, without destructing the
532// object, you can call do_checkpoint().
533//
534// This does *not* free you from having to call .commit() on the guard when
535// it "completes" its lifecycle. Here's a way to think of checkpointing: a
536// normal transaction guard is associated with a program-control
537// scope. Sometimes (notably in netsync) it is not convenient to create a
538// scope which exactly matches the size of work-unit you want to commit (a
539// bunch of packets, or a session-close, whichever comes first) so
540// checkpointing allows you to use a long-lived transaction guard and mark
541// off the moments where commits are desired, without destructing the
542// guard. The guard still performs an error-management task in case of an
543// exception, so you still have to clean it before destruction using
544// .commit().
545//
546// Checkpointing also does not override the transaction guard nesting: if
547// there's an enclosing transaction_guard, your checkpointing calls have no
548// affect.
549//
550// The purpose of checkpointing is to provide an alternative to "many short
551// transactions" on platforms (OSX in particular) where the overhead of
552// full commits at high frequency is too high. The solution for these
553// platforms is to run inside a longer-lived transaction (session-length),
554// and checkpoint at higher granularity (every megabyte or so).
555//
556// A conditional transaction guard is just like a transaction guard,
557// except that it doesn't begin the transaction until you call acquire().
558// If you don't call acquire(), you must not call commit(), do_checkpoint(),
559// or maybe_checkpoint() either.
560//
561// Implementation note: Making transaction_guard inherit from
562// conditional_transaction_guard means we can reuse all the latter's methods
563// and just call acquire() in transaction_guard's constructor. If we did it
564// the other way around they would wind up being totally unrelated classes.
565
566class conditional_transaction_guard
567{
568 database & db;
569 size_t const checkpoint_batch_size;
570 size_t const checkpoint_batch_bytes;
571 size_t checkpointed_calls;
572 size_t checkpointed_bytes;
573 bool committed;
574 bool acquired;
575 bool const exclusive;
576public:
577 conditional_transaction_guard(database & db, bool exclusive=true,
578 size_t checkpoint_batch_size=1000,
579 size_t checkpoint_batch_bytes=0xfffff)
580 : db(db),
581 checkpoint_batch_size(checkpoint_batch_size),
582 checkpoint_batch_bytes(checkpoint_batch_bytes),
583 checkpointed_calls(0),
584 checkpointed_bytes(0),
585 committed(false), acquired(false), exclusive(exclusive)
586 {}
587
588 ~conditional_transaction_guard();
589 void acquire();
590 void commit();
591 void do_checkpoint();
592 void maybe_checkpoint(size_t nbytes);
593};
594
595class transaction_guard : public conditional_transaction_guard
596{
597public:
598 transaction_guard(database & d, bool exclusive=true,
599 size_t checkpoint_batch_size=1000,
600 size_t checkpoint_batch_bytes=0xfffff)
601 : conditional_transaction_guard(d, exclusive, checkpoint_batch_size,
602 checkpoint_batch_bytes)
603 {
604 acquire();
605 }
606};
607
608class database_path_helper
609{
610 lua_hooks & lua;
611public:
612 database_path_helper(lua_hooks & l) : lua(l) {}
613
614 void get_database_path(options const & opts, system_path & path,
615 database::dboptions dbopts = database::none);
616
617 void maybe_set_default_alias(options & opts);
618
619private:
620 void validate_and_clean_alias(std::string const & alias, path_component & pc);
621};
622
623#endif // __DATABASE_HH__
624
625// Local Variables:
626// mode: C++
627// fill-column: 76
628// c-file-style: "gnu"
629// indent-tabs-mode: nil
630// End:
631// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status