monotone

monotone Mtn Source Tree

Root/database.hh

1#ifndef __DATABASE_HH__
2#define __DATABASE_HH__
3
4// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13#include "vector.hh"
14#include <set>
15#include <boost/shared_ptr.hpp>
16#include "botan/rng.h"
17
18#include "rev_types.hh"
19#include "cert.hh"
20
21class app_state;
22class lua_hooks;
23struct date_t;
24struct globish;
25class key_store;
26class outdated_indicator;
27class rev_height;
28
29typedef std::pair<var_domain, var_name> var_key;
30
31// this file defines a public, typed interface to the database.
32// the database class encapsulates all knowledge about sqlite,
33// the schema, and all SQL statements used to access the schema.
34//
35// one thing which is rather important to note is that this file
36// deals with two sorts of version relationships. the versions
37// stored in the database are all *backwards* from those the program
38// sees. so for example if you have two versions of a file
39//
40// file.1, file.2
41//
42// where file.2 was a modification of file.1, then as far as the rest of
43// the application is concerned -- and the ancestry graph -- file.1 is the
44// "old" version and file.2 is the "new" version. note the use of terms
45// which describe time, and the sequence of edits a user makes to a
46// file. those are ancestry terms. when the application composes a
47// patchset, for example, it'll contain the diff delta(file.1, file.2)
48//
49// from the database's perspective, however, file.1 is the derived version,
50// and file.2 is the base version. the base version is stored in the
51// "files" table, and the *reverse* diff delta(file.2, file.1) is stored in
52// the "file_deltas" table, under the id of file.1, with the id of file.2
53// listed as its base. note the use of the terms which describe
54// reconstruction; those are storage-system terms.
55//
56// the interface *to* the database, and the ancestry version graphs, use
57// the old / new metaphor of ancestry, but within the database (including
58// the private helper methods, and the storage version graphs) the
59// base/derived storage metaphor is used. the only real way to tell which
60// is which is to look at the parameter names and code. I might try to
61// express this in the type system some day, but not presently.
62//
63// the key phrase to keep repeating when working on this code is:
64//
65// "base files are new, derived files are old"
66//
67// it makes the code confusing, I know. this is possibly the worst part of
68// the program. I don't know if there's any way to make it clearer.
69
70class database_impl;
71
72class database
73{
74 //
75 // --== Opening the database and schema checking ==--
76 //
77public:
78 explicit database(app_state &);
79 ~database();
80
81 system_path get_filename();
82 bool is_dbfile(any_path const & file);
83 bool database_specified();
84 void check_is_not_rosterified();
85
86 void ensure_open();
87 void ensure_open_for_format_changes();
88private:
89 void ensure_open_for_maintenance();
90
91 //
92 // --== Transactions ==--
93 //
94private:
95 friend class conditional_transaction_guard;
96
97 //
98 // --== Reading/writing delta-compressed objects ==--
99 //
100public:
101 bool file_version_exists(file_id const & ident);
102 bool revision_exists(revision_id const & ident);
103 bool roster_link_exists_for_revision(revision_id const & ident);
104 bool roster_exists_for_revision(revision_id const & ident);
105
106
107 // get plain version if it exists, or reconstruct version
108 // from deltas (if they exist)
109 void get_file_version(file_id const & ident,
110 file_data & dat);
111
112 // put file w/o predecessor into db
113 void put_file(file_id const & new_id,
114 file_data const & dat);
115
116 // store new version and update old version to be a delta
117 void put_file_version(file_id const & old_id,
118 file_id const & new_id,
119 file_delta const & del);
120
121 void get_arbitrary_file_delta(file_id const & src_id,
122 file_id const & dst_id,
123 file_delta & del);
124
125 // get plain version if it exists, or reconstruct version
126 // from deltas (if they exist).
127 void get_manifest_version(manifest_id const & ident,
128 manifest_data & dat);
129
130private:
131 bool file_or_manifest_base_exists(file_id const & ident,
132 std::string const & table);
133 bool delta_exists(id const & ident,
134 std::string const & table);
135 void put_file_delta(file_id const & ident,
136 file_id const & base,
137 file_delta const & del);
138
139 friend void rcs_put_raw_file_edge(database & db,
140 file_id const & old_id,
141 file_id const & new_id,
142 delta const & del);
143
144
145 //
146 // --== The ancestry graph ==--
147 //
148public:
149 void get_revision_ancestry(rev_ancestry_map & graph);
150
151 void get_revision_parents(revision_id const & ident,
152 std::set<revision_id> & parents);
153
154 void get_revision_children(revision_id const & ident,
155 std::set<revision_id> & children);
156
157 void get_leaves(std::set<revision_id> & leaves);
158
159 void get_revision_manifest(revision_id const & cid,
160 manifest_id & mid);
161
162 void get_common_ancestors(std::set<revision_id> const & revs,
163 std::set<revision_id> & common_ancestors);
164
165 void get_revision_ids(std::set<revision_id> & ids);
166 // this is exposed for 'db check':
167 void get_file_ids(std::set<file_id> & ids);
168
169 //
170 // --== Revision reading/writing ==--
171 //
172public:
173 void get_revision(revision_id const & ident,
174 revision_t & cs);
175
176 void get_revision(revision_id const & ident,
177 revision_data & dat);
178
179 bool put_revision(revision_id const & new_id,
180 revision_t const & rev);
181
182 bool put_revision(revision_id const & new_id,
183 revision_data const & dat);
184
185private:
186 void deltify_revision(revision_id const & rid);
187
188 //
189 // --== Rosters ==--
190 //
191public:
192 node_id next_node_id();
193
194 void get_roster(revision_id const & rid,
195 roster_t & roster);
196
197 void get_roster(revision_id const & rid,
198 roster_t & roster,
199 marking_map & marks);
200
201 void get_roster(revision_id const & rid,
202 cached_roster & cr);
203
204 // these are exposed for the use of database_check.cc
205 bool roster_version_exists(revision_id const & ident);
206 void get_roster_ids(std::set<revision_id> & ids);
207
208 // using roster deltas
209 void get_markings(revision_id const & id,
210 node_id const & nid,
211 marking_t & markings);
212
213 void get_file_content(revision_id const & id,
214 node_id const & nid,
215 file_id & content);
216
217private:
218 void get_roster_version(revision_id const & ros_id,
219 cached_roster & cr);
220
221 void put_roster(revision_id const & rev_id,
222 roster_t_cp const & roster,
223 marking_map_cp const & marking);
224
225 //
226 // --== Keys ==--
227 //
228public:
229 void get_key_ids(std::vector<rsa_keypair_id> & pubkeys);
230 void get_key_ids(globish const & pattern,
231 std::vector<rsa_keypair_id> & pubkeys);
232
233 void get_public_keys(std::vector<rsa_keypair_id> & pubkeys);
234
235 bool public_key_exists(id const & hash);
236 bool public_key_exists(rsa_keypair_id const & ident);
237
238 void get_pubkey(id const & hash,
239 rsa_keypair_id & ident,
240 rsa_pub_key & pub);
241
242 void get_key(rsa_keypair_id const & ident, rsa_pub_key & pub);
243 bool put_key(rsa_keypair_id const & ident, rsa_pub_key const & pub);
244
245 void delete_public_key(rsa_keypair_id const & pub_id);
246
247 // Crypto operations
248
249 void encrypt_rsa(rsa_keypair_id const & pub_id,
250 std::string const & plaintext,
251 rsa_oaep_sha_data & ciphertext);
252
253 cert_status check_signature(rsa_keypair_id const & id,
254 std::string const & alleged_text,
255 rsa_sha1_signature const & signature);
256
257 //
258 // --== Certs ==--
259 //
260 // note: this section is ridiculous. please do something about it.
261public:
262 bool revision_cert_exists(revision<cert> const & cert);
263 bool revision_cert_exists(revision_id const & hash);
264
265 bool put_revision_cert(revision<cert> const & cert);
266
267 // this variant has to be rather coarse and fast, for netsync's use
268 outdated_indicator get_revision_cert_nobranch_index(std::vector< std::pair<revision_id,
269 std::pair<revision_id, rsa_keypair_id> > > & idx);
270
271 // Only used by database_check.cc
272 outdated_indicator get_revision_certs(std::vector< revision<cert> > & certs);
273
274 outdated_indicator get_revision_certs(cert_name const & name,
275 std::vector< revision<cert> > & certs);
276
277 outdated_indicator get_revision_certs(revision_id const & ident,
278 cert_name const & name,
279 std::vector< revision<cert> > & certs);
280
281 // Only used by get_branch_certs (project.cc)
282 outdated_indicator get_revision_certs(cert_name const & name,
283 cert_value const & val,
284 std::vector< revision<cert> > & certs);
285
286 // Only used by revision_is_in_branch (project.cc)
287 outdated_indicator get_revision_certs(revision_id const & ident,
288 cert_name const & name,
289 cert_value const & value,
290 std::vector< revision<cert> > & certs);
291
292 // Only used by get_branch_heads (project.cc)
293 outdated_indicator get_revisions_with_cert(cert_name const & name,
294 cert_value const & value,
295 std::set<revision_id> & revisions);
296
297 // Used through project.cc, and by
298 // anc_graph::add_node_for_oldstyle_revision (revision.cc)
299 outdated_indicator get_revision_certs(revision_id const & ident,
300 std::vector< revision<cert> > & certs);
301
302 // Used through get_revision_cert_hashes (project.cc)
303 outdated_indicator get_revision_certs(revision_id const & ident,
304 std::vector<id> & hashes);
305
306 void get_revision_cert(id const & hash,
307 revision<cert> & c);
308
309 void get_manifest_certs(manifest_id const & ident,
310 std::vector< manifest<cert> > & certs);
311
312 void get_manifest_certs(cert_name const & name,
313 std::vector< manifest<cert> > & certs);
314
315 //
316 // --== Epochs ==--
317 //
318public:
319 void get_epochs(std::map<branch_name, epoch_data> & epochs);
320
321 void get_epoch(epoch_id const & eid, branch_name & branch, epoch_data & epo);
322
323 bool epoch_exists(epoch_id const & eid);
324
325 void set_epoch(branch_name const & branch, epoch_data const & epo);
326
327 void clear_epoch(branch_name const & branch);
328
329 //
330 // --== Database 'vars' ==--
331 //
332public:
333 void get_vars(std::map<var_key, var_value > & vars);
334
335 void get_var(var_key const & key, var_value & value);
336
337 bool var_exists(var_key const & key);
338
339 void set_var(var_key const & key, var_value const & value);
340
341 void clear_var(var_key const & key);
342
343 //
344 // --== Completion ==--
345 //
346public:
347 void prefix_matching_constraint(std::string const & colname,
348 std::string const & prefix,
349 std::string & constraint);
350
351 void complete(std::string const & partial,
352 std::set<revision_id> & completions);
353
354 void complete(std::string const & partial,
355 std::set<file_id> & completions);
356
357 void complete(std::string const & partial,
358 std::set< std::pair<key_id, utf8 > > & completions);
359
360 //
361 // --== Revision selectors ==--
362 //
363public:
364 void select_parent(std::string const & partial,
365 std::set<revision_id> & completions);
366 void select_cert(std::string const & certname,
367 std::set<revision_id> & completions);
368 void select_cert(std::string const & certname, std::string const & certvalue,
369 std::set<revision_id> & completions);
370 void select_author_tag_or_branch(std::string const & partial,
371 std::set<revision_id> & completions);
372 void select_date(std::string const & date, std::string const & comparison,
373 std::set<revision_id> & completions);
374
375 //
376 // --== The 'db' family of top-level commands ==--
377 //
378public:
379 void initialize();
380 void debug(std::string const & sql, std::ostream & out);
381 void dump(std::ostream &);
382 void load(std::istream &);
383 void info(std::ostream &);
384 void version(std::ostream &);
385 void migrate(key_store &);
386 void test_migration_step(key_store &, std::string const &);
387 // for kill_rev_locally:
388 void delete_existing_rev_and_certs(revision_id const & rid);
389 // for kill_branch_certs_locally:
390 void delete_branch_named(cert_value const & branch);
391 // for kill_tag_locally:
392 void delete_tag_named(cert_value const & tag);
393
394public:
395 // branches
396 outdated_indicator get_branches(std::vector<std::string> & names);
397 outdated_indicator get_branches(globish const & glob,
398 std::vector<std::string> & names);
399
400 bool check_integrity();
401
402 void get_uncommon_ancestors(revision_id const & a,
403 revision_id const & b,
404 std::set<revision_id> & a_uncommon_ancs,
405 std::set<revision_id> & b_uncommon_ancs);
406
407 // for changesetify, rosterify
408 void delete_existing_revs_and_certs();
409
410 void delete_existing_manifests();
411
412 // heights
413 void get_rev_height(revision_id const & id,
414 rev_height & height);
415
416 void put_rev_height(revision_id const & id,
417 rev_height const & height);
418
419 bool has_rev_height(rev_height const & height);
420 void delete_existing_heights();
421
422 void put_height_for_revision(revision_id const & new_id,
423 revision_t const & rev);
424
425 // for regenerate_rosters
426 void delete_existing_rosters();
427 void put_roster_for_revision(revision_id const & new_id,
428 revision_t const & rev);
429
430 // We make these lua hooks available via the database context;
431 // see comments above their definition for rationale and plans.
432 bool hook_get_manifest_cert_trust(std::set<rsa_keypair_id> const & signers,
433 manifest_id const & id, cert_name const & name, cert_value const & val);
434 bool hook_get_revision_cert_trust(std::set<rsa_keypair_id> const & signers,
435 revision_id const & id, cert_name const & name, cert_value const & val);
436
437private:
438 boost::shared_ptr<database_impl> imp;
439 lua_hooks & lua;
440 boost::shared_ptr<Botan::RandomNumberGenerator> rng;
441};
442
443// not a member function, defined in database_check.cc
444void check_db(database & db);
445
446// Parent maps are used in a number of places to keep track of all the
447// parent rosters of a given revision.
448
449inline revision_id const & parent_id(parent_entry const & p)
450{
451 return p.first;
452}
453
454inline revision_id const & parent_id(parent_map::const_iterator i)
455{
456 return i->first;
457}
458
459inline cached_roster const &
460parent_cached_roster(parent_entry const & p)
461{
462 return p.second;
463}
464
465inline cached_roster const &
466parent_cached_roster(parent_map::const_iterator i)
467{
468 return i->second;
469}
470
471inline roster_t const & parent_roster(parent_entry const & p)
472{
473 return *(p.second.first);
474}
475
476inline roster_t const & parent_roster(parent_map::const_iterator i)
477{
478 return *(i->second.first);
479}
480
481inline marking_map const & parent_marking(parent_entry const & p)
482{
483 return *(p.second.second);
484}
485
486inline marking_map const & parent_marking(parent_map::const_iterator i)
487{
488 return *(i->second.second);
489}
490
491// Transaction guards nest. Acquire one in any scope you'd like
492// transaction-protected, and it'll make sure the db aborts a transaction
493// if there's any exception before you call commit().
494//
495// By default, transaction_guard locks the database exclusively. If the
496// transaction is intended to be read-only, construct the guard with
497// exclusive=false. In this case, if a database update is attempted and
498// another process is accessing the database an exception will be thrown -
499// uglier and more confusing for the user - however no data inconsistency
500// should result.
501//
502// An exception is thrown if an exclusive transaction_guard is created
503// while a non-exclusive transaction_guard exists.
504//
505// Transaction guards also support splitting long transactions up into
506// checkpoints. Any time you feel the database is in an
507// acceptably-consistent state, you can call maybe_checkpoint(nn) with a
508// given number of bytes. When the number of bytes and number of
509// maybe_checkpoint() calls exceeds the guard's parameters, the transaction
510// is committed and reopened. Any time you feel the database has reached a
511// point where want to ensure a transaction commit, without destructing the
512// object, you can call do_checkpoint().
513//
514// This does *not* free you from having to call .commit() on the guard when
515// it "completes" its lifecycle. Here's a way to think of checkpointing: a
516// normal transaction guard is associated with a program-control
517// scope. Sometimes (notably in netsync) it is not convenient to create a
518// scope which exactly matches the size of work-unit you want to commit (a
519// bunch of packets, or a session-close, whichever comes first) so
520// checkpointing allows you to use a long-lived transaction guard and mark
521// off the moments where commits are desired, without destructing the
522// guard. The guard still performs an error-management task in case of an
523// exception, so you still have to clean it before destruction using
524// .commit().
525//
526// Checkpointing also does not override the transaction guard nesting: if
527// there's an enclosing transaction_guard, your checkpointing calls have no
528// affect.
529//
530// The purpose of checkpointing is to provide an alternative to "many short
531// transactions" on platforms (OSX in particular) where the overhead of
532// full commits at high frequency is too high. The solution for these
533// platforms is to run inside a longer-lived transaction (session-length),
534// and checkpoint at higher granularity (every megabyte or so).
535//
536// A conditional transaction guard is just like a transaction guard,
537// except that it doesn't begin the transaction until you call acquire().
538// If you don't call acquire(), you must not call commit(), do_checkpoint(),
539// or maybe_checkpoint() either.
540//
541// Implementation note: Making transaction_guard inherit from
542// conditional_transaction_guard means we can reuse all the latter's methods
543// and just call acquire() in transaction_guard's constructor. If we did it
544// the other way around they would wind up being totally unrelated classes.
545
546class conditional_transaction_guard
547{
548 database & db;
549 size_t const checkpoint_batch_size;
550 size_t const checkpoint_batch_bytes;
551 size_t checkpointed_calls;
552 size_t checkpointed_bytes;
553 bool committed;
554 bool acquired;
555 bool const exclusive;
556public:
557 conditional_transaction_guard(database & db, bool exclusive=true,
558 size_t checkpoint_batch_size=1000,
559 size_t checkpoint_batch_bytes=0xfffff)
560 : db(db),
561 checkpoint_batch_size(checkpoint_batch_size),
562 checkpoint_batch_bytes(checkpoint_batch_bytes),
563 checkpointed_calls(0),
564 checkpointed_bytes(0),
565 committed(false), acquired(false), exclusive(exclusive)
566 {}
567
568 ~conditional_transaction_guard();
569 void acquire();
570 void commit();
571 void do_checkpoint();
572 void maybe_checkpoint(size_t nbytes);
573};
574
575class transaction_guard : public conditional_transaction_guard
576{
577public:
578 transaction_guard(database & d, bool exclusive=true,
579 size_t checkpoint_batch_size=1000,
580 size_t checkpoint_batch_bytes=0xfffff)
581 : conditional_transaction_guard(d, exclusive, checkpoint_batch_size,
582 checkpoint_batch_bytes)
583 {
584 acquire();
585 }
586};
587
588// Local Variables:
589// mode: C++
590// fill-column: 76
591// c-file-style: "gnu"
592// indent-tabs-mode: nil
593// End:
594// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
595
596#endif // __DATABASE_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status