monotone

monotone Mtn Source Tree

Root/database.hh

1#ifndef __DATABASE_HH__
2#define __DATABASE_HH__
3
4// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13#include "vector.hh"
14#include <set>
15#include <boost/scoped_ptr.hpp>
16#include "rev_types.hh"
17#include "cert.hh"
18
19class app_state;
20class lua_hooks;
21struct date_t;
22struct globish;
23class key_store;
24class outdated_indicator;
25class rev_height;
26
27typedef std::pair<var_domain, var_name> var_key;
28
29// this file defines a public, typed interface to the database.
30// the database class encapsulates all knowledge about sqlite,
31// the schema, and all SQL statements used to access the schema.
32//
33// one thing which is rather important to note is that this file
34// deals with two sorts of version relationships. the versions
35// stored in the database are all *backwards* from those the program
36// sees. so for example if you have two versions of a file
37//
38// file.1, file.2
39//
40// where file.2 was a modification of file.1, then as far as the rest of
41// the application is concerned -- and the ancestry graph -- file.1 is the
42// "old" version and file.2 is the "new" version. note the use of terms
43// which describe time, and the sequence of edits a user makes to a
44// file. those are ancestry terms. when the application composes a
45// patchset, for example, it'll contain the diff delta(file.1, file.2)
46//
47// from the database's perspective, however, file.1 is the derived version,
48// and file.2 is the base version. the base version is stored in the
49// "files" table, and the *reverse* diff delta(file.2, file.1) is stored in
50// the "file_deltas" table, under the id of file.1, with the id of file.2
51// listed as its base. note the use of the terms which describe
52// reconstruction; those are storage-system terms.
53//
54// the interface *to* the database, and the ancestry version graphs, use
55// the old / new metaphor of ancestry, but within the database (including
56// the private helper methods, and the storage version graphs) the
57// base/derived storage metaphor is used. the only real way to tell which
58// is which is to look at the parameter names and code. I might try to
59// express this in the type system some day, but not presently.
60//
61// the key phrase to keep repeating when working on this code is:
62//
63// "base files are new, derived files are old"
64//
65// it makes the code confusing, I know. this is possibly the worst part of
66// the program. I don't know if there's any way to make it clearer.
67
68class database_impl;
69
70class database
71{
72 //
73 // --== Opening the database and schema checking ==--
74 //
75public:
76 explicit database(app_state &);
77 ~database();
78
79 system_path get_filename();
80 bool is_dbfile(any_path const & file);
81 bool database_specified();
82 void check_is_not_rosterified();
83
84 void ensure_open();
85 void ensure_open_for_format_changes();
86private:
87 void ensure_open_for_maintenance();
88
89 //
90 // --== Transactions ==--
91 //
92private:
93 friend class conditional_transaction_guard;
94
95 //
96 // --== Reading/writing delta-compressed objects ==--
97 //
98public:
99 bool file_version_exists(file_id const & ident);
100 bool revision_exists(revision_id const & ident);
101 bool roster_link_exists_for_revision(revision_id const & ident);
102 bool roster_exists_for_revision(revision_id const & ident);
103
104
105 // get plain version if it exists, or reconstruct version
106 // from deltas (if they exist)
107 void get_file_version(file_id const & ident,
108 file_data & dat);
109
110 // put file w/o predecessor into db
111 void put_file(file_id const & new_id,
112 file_data const & dat);
113
114 // store new version and update old version to be a delta
115 void put_file_version(file_id const & old_id,
116 file_id const & new_id,
117 file_delta const & del);
118
119 void get_arbitrary_file_delta(file_id const & src_id,
120 file_id const & dst_id,
121 file_delta & del);
122
123 // get plain version if it exists, or reconstruct version
124 // from deltas (if they exist).
125 void get_manifest_version(manifest_id const & ident,
126 manifest_data & dat);
127
128private:
129 bool file_or_manifest_base_exists(file_id const & ident,
130 std::string const & table);
131 bool delta_exists(id const & ident,
132 std::string const & table);
133 void put_file_delta(file_id const & ident,
134 file_id const & base,
135 file_delta const & del);
136
137 friend void rcs_put_raw_file_edge(database & db,
138 file_id const & old_id,
139 file_id const & new_id,
140 delta const & del);
141
142
143 //
144 // --== The ancestry graph ==--
145 //
146public:
147 void get_revision_ancestry(rev_ancestry_map & graph);
148
149 void get_revision_parents(revision_id const & ident,
150 std::set<revision_id> & parents);
151
152 void get_revision_children(revision_id const & ident,
153 std::set<revision_id> & children);
154
155 void get_leaves(std::set<revision_id> & leaves);
156
157 void get_revision_manifest(revision_id const & cid,
158 manifest_id & mid);
159 void get_revision_ids(std::set<revision_id> & ids);
160 // this is exposed for 'db check':
161 void get_file_ids(std::set<file_id> & ids);
162
163 //
164 // --== Revision reading/writing ==--
165 //
166public:
167 void get_revision(revision_id const & ident,
168 revision_t & cs);
169
170 void get_revision(revision_id const & ident,
171 revision_data & dat);
172
173 bool put_revision(revision_id const & new_id,
174 revision_t const & rev);
175
176 bool put_revision(revision_id const & new_id,
177 revision_data const & dat);
178
179private:
180 void deltify_revision(revision_id const & rid);
181
182 //
183 // --== Rosters ==--
184 //
185public:
186 node_id next_node_id();
187
188 void get_roster(revision_id const & rid,
189 roster_t & roster);
190
191 void get_roster(revision_id const & rid,
192 roster_t & roster,
193 marking_map & marks);
194
195 void get_roster(revision_id const & rid,
196 cached_roster & cr);
197
198 // these are exposed for the use of database_check.cc
199 bool roster_version_exists(revision_id const & ident);
200 void get_roster_ids(std::set<revision_id> & ids);
201
202 // using roster deltas
203 void get_markings(revision_id const & id,
204 node_id const & nid,
205 marking_t & markings);
206
207 void get_file_content(revision_id const & id,
208 node_id const & nid,
209 file_id & content);
210
211private:
212 void get_roster_version(revision_id const & ros_id,
213 cached_roster & cr);
214
215 void put_roster(revision_id const & rev_id,
216 roster_t_cp const & roster,
217 marking_map_cp const & marking);
218
219 //
220 // --== Keys ==--
221 //
222public:
223 void get_key_ids(std::vector<rsa_keypair_id> & pubkeys);
224 void get_key_ids(globish const & pattern,
225 std::vector<rsa_keypair_id> & pubkeys);
226
227 void get_public_keys(std::vector<rsa_keypair_id> & pubkeys);
228
229 bool public_key_exists(id const & hash);
230 bool public_key_exists(rsa_keypair_id const & ident);
231
232 void get_pubkey(id const & hash,
233 rsa_keypair_id & ident,
234 rsa_pub_key & pub);
235
236 void get_key(rsa_keypair_id const & ident, rsa_pub_key & pub);
237 bool put_key(rsa_keypair_id const & ident, rsa_pub_key const & pub);
238
239 void delete_public_key(rsa_keypair_id const & pub_id);
240
241 // Crypto operations
242
243 void encrypt_rsa(rsa_keypair_id const & pub_id,
244 std::string const & plaintext,
245 rsa_oaep_sha_data & ciphertext);
246
247 cert_status check_signature(rsa_keypair_id const & id,
248 std::string const & alleged_text,
249 rsa_sha1_signature const & signature);
250
251 //
252 // --== Certs ==--
253 //
254 // note: this section is ridiculous. please do something about it.
255public:
256 bool revision_cert_exists(revision<cert> const & cert);
257 bool revision_cert_exists(revision_id const & hash);
258
259 bool put_revision_cert(revision<cert> const & cert);
260
261 // this variant has to be rather coarse and fast, for netsync's use
262 outdated_indicator get_revision_cert_nobranch_index(std::vector< std::pair<revision_id,
263 std::pair<revision_id, rsa_keypair_id> > > & idx);
264
265 // Only used by database_check.cc
266 outdated_indicator get_revision_certs(std::vector< revision<cert> > & certs);
267
268 outdated_indicator get_revision_certs(cert_name const & name,
269 std::vector< revision<cert> > & certs);
270
271 outdated_indicator get_revision_certs(revision_id const & ident,
272 cert_name const & name,
273 std::vector< revision<cert> > & certs);
274
275 // Only used by get_branch_certs (project.cc)
276 outdated_indicator get_revision_certs(cert_name const & name,
277 cert_value const & val,
278 std::vector< revision<cert> > & certs);
279
280 // Only used by revision_is_in_branch (project.cc)
281 outdated_indicator get_revision_certs(revision_id const & ident,
282 cert_name const & name,
283 cert_value const & value,
284 std::vector< revision<cert> > & certs);
285
286 // Only used by get_branch_heads (project.cc)
287 outdated_indicator get_revisions_with_cert(cert_name const & name,
288 cert_value const & value,
289 std::set<revision_id> & revisions);
290
291 // Used through project.cc, and by
292 // anc_graph::add_node_for_oldstyle_revision (revision.cc)
293 outdated_indicator get_revision_certs(revision_id const & ident,
294 std::vector< revision<cert> > & certs);
295
296 // Used through get_revision_cert_hashes (project.cc)
297 outdated_indicator get_revision_certs(revision_id const & ident,
298 std::vector<id> & hashes);
299
300 void get_revision_cert(id const & hash,
301 revision<cert> & c);
302
303 void get_manifest_certs(manifest_id const & ident,
304 std::vector< manifest<cert> > & certs);
305
306 void get_manifest_certs(cert_name const & name,
307 std::vector< manifest<cert> > & certs);
308
309 //
310 // --== Epochs ==--
311 //
312public:
313 void get_epochs(std::map<branch_name, epoch_data> & epochs);
314
315 void get_epoch(epoch_id const & eid, branch_name & branch, epoch_data & epo);
316
317 bool epoch_exists(epoch_id const & eid);
318
319 void set_epoch(branch_name const & branch, epoch_data const & epo);
320
321 void clear_epoch(branch_name const & branch);
322
323 //
324 // --== Database 'vars' ==--
325 //
326public:
327 void get_vars(std::map<var_key, var_value > & vars);
328
329 void get_var(var_key const & key, var_value & value);
330
331 bool var_exists(var_key const & key);
332
333 void set_var(var_key const & key, var_value const & value);
334
335 void clear_var(var_key const & key);
336
337 //
338 // --== Completion ==--
339 //
340public:
341 void prefix_matching_constraint(std::string const & colname,
342 std::string const & prefix,
343 std::string & constraint);
344
345 void complete(std::string const & partial,
346 std::set<revision_id> & completions);
347
348 void complete(std::string const & partial,
349 std::set<file_id> & completions);
350
351 void complete(std::string const & partial,
352 std::set< std::pair<key_id, utf8 > > & completions);
353
354 //
355 // --== Revision selectors ==--
356 //
357public:
358 void select_parent(std::string const & partial,
359 std::set<revision_id> & completions);
360 void select_cert(std::string const & certname,
361 std::set<revision_id> & completions);
362 void select_cert(std::string const & certname, std::string const & certvalue,
363 std::set<revision_id> & completions);
364 void select_author_tag_or_branch(std::string const & partial,
365 std::set<revision_id> & completions);
366 void select_date(std::string const & date, std::string const & comparison,
367 std::set<revision_id> & completions);
368
369 //
370 // --== The 'db' family of top-level commands ==--
371 //
372public:
373 void initialize();
374 void debug(std::string const & sql, std::ostream & out);
375 void dump(std::ostream &);
376 void load(std::istream &);
377 void info(std::ostream &);
378 void version(std::ostream &);
379 void migrate(key_store &);
380 void test_migration_step(key_store &, std::string const &);
381 // for kill_rev_locally:
382 void delete_existing_rev_and_certs(revision_id const & rid);
383 // for kill_branch_certs_locally:
384 void delete_branch_named(cert_value const & branch);
385 // for kill_tag_locally:
386 void delete_tag_named(cert_value const & tag);
387
388public:
389 // branches
390 outdated_indicator get_branches(std::vector<std::string> & names);
391 outdated_indicator get_branches(globish const & glob,
392 std::vector<std::string> & names);
393
394 bool check_integrity();
395
396 void get_uncommon_ancestors(revision_id const & a,
397 revision_id const & b,
398 std::set<revision_id> & a_uncommon_ancs,
399 std::set<revision_id> & b_uncommon_ancs);
400
401 // for changesetify, rosterify
402 void delete_existing_revs_and_certs();
403
404 void delete_existing_manifests();
405
406 // heights
407 void get_rev_height(revision_id const & id,
408 rev_height & height);
409
410 void put_rev_height(revision_id const & id,
411 rev_height const & height);
412
413 bool has_rev_height(rev_height const & height);
414 void delete_existing_heights();
415
416 void put_height_for_revision(revision_id const & new_id,
417 revision_t const & rev);
418
419 // for regenerate_rosters
420 void delete_existing_rosters();
421 void put_roster_for_revision(revision_id const & new_id,
422 revision_t const & rev);
423
424 // We make these lua hooks available via the database context;
425 // see comments above their definition for rationale and plans.
426 bool hook_get_manifest_cert_trust(std::set<rsa_keypair_id> const & signers,
427 manifest_id const & id, cert_name const & name, cert_value const & val);
428 bool hook_get_revision_cert_trust(std::set<rsa_keypair_id> const & signers,
429 revision_id const & id, cert_name const & name, cert_value const & val);
430
431private:
432 boost::scoped_ptr<database_impl> imp;
433 lua_hooks & lua;
434};
435
436// not a member function, defined in database_check.cc
437void check_db(database & db);
438
439// Parent maps are used in a number of places to keep track of all the
440// parent rosters of a given revision.
441
442inline revision_id const & parent_id(parent_entry const & p)
443{
444 return p.first;
445}
446
447inline revision_id const & parent_id(parent_map::const_iterator i)
448{
449 return i->first;
450}
451
452inline cached_roster const &
453parent_cached_roster(parent_entry const & p)
454{
455 return p.second;
456}
457
458inline cached_roster const &
459parent_cached_roster(parent_map::const_iterator i)
460{
461 return i->second;
462}
463
464inline roster_t const & parent_roster(parent_entry const & p)
465{
466 return *(p.second.first);
467}
468
469inline roster_t const & parent_roster(parent_map::const_iterator i)
470{
471 return *(i->second.first);
472}
473
474inline marking_map const & parent_marking(parent_entry const & p)
475{
476 return *(p.second.second);
477}
478
479inline marking_map const & parent_marking(parent_map::const_iterator i)
480{
481 return *(i->second.second);
482}
483
484// Transaction guards nest. Acquire one in any scope you'd like
485// transaction-protected, and it'll make sure the db aborts a transaction
486// if there's any exception before you call commit().
487//
488// By default, transaction_guard locks the database exclusively. If the
489// transaction is intended to be read-only, construct the guard with
490// exclusive=false. In this case, if a database update is attempted and
491// another process is accessing the database an exception will be thrown -
492// uglier and more confusing for the user - however no data inconsistency
493// should result.
494//
495// An exception is thrown if an exclusive transaction_guard is created
496// while a non-exclusive transaction_guard exists.
497//
498// Transaction guards also support splitting long transactions up into
499// checkpoints. Any time you feel the database is in an
500// acceptably-consistent state, you can call maybe_checkpoint(nn) with a
501// given number of bytes. When the number of bytes and number of
502// maybe_checkpoint() calls exceeds the guard's parameters, the transaction
503// is committed and reopened. Any time you feel the database has reached a
504// point where want to ensure a transaction commit, without destructing the
505// object, you can call do_checkpoint().
506//
507// This does *not* free you from having to call .commit() on the guard when
508// it "completes" its lifecycle. Here's a way to think of checkpointing: a
509// normal transaction guard is associated with a program-control
510// scope. Sometimes (notably in netsync) it is not convenient to create a
511// scope which exactly matches the size of work-unit you want to commit (a
512// bunch of packets, or a session-close, whichever comes first) so
513// checkpointing allows you to use a long-lived transaction guard and mark
514// off the moments where commits are desired, without destructing the
515// guard. The guard still performs an error-management task in case of an
516// exception, so you still have to clean it before destruction using
517// .commit().
518//
519// Checkpointing also does not override the transaction guard nesting: if
520// there's an enclosing transaction_guard, your checkpointing calls have no
521// affect.
522//
523// The purpose of checkpointing is to provide an alternative to "many short
524// transactions" on platforms (OSX in particular) where the overhead of
525// full commits at high frequency is too high. The solution for these
526// platforms is to run inside a longer-lived transaction (session-length),
527// and checkpoint at higher granularity (every megabyte or so).
528//
529// A conditional transaction guard is just like a transaction guard,
530// except that it doesn't begin the transaction until you call acquire().
531// If you don't call acquire(), you must not call commit(), do_checkpoint(),
532// or maybe_checkpoint() either.
533//
534// Implementation note: Making transaction_guard inherit from
535// conditional_transaction_guard means we can reuse all the latter's methods
536// and just call acquire() in transaction_guard's constructor. If we did it
537// the other way around they would wind up being totally unrelated classes.
538
539class conditional_transaction_guard
540{
541 database & db;
542 size_t const checkpoint_batch_size;
543 size_t const checkpoint_batch_bytes;
544 size_t checkpointed_calls;
545 size_t checkpointed_bytes;
546 bool committed;
547 bool acquired;
548 bool const exclusive;
549public:
550 conditional_transaction_guard(database & db, bool exclusive=true,
551 size_t checkpoint_batch_size=1000,
552 size_t checkpoint_batch_bytes=0xfffff)
553 : db(db),
554 checkpoint_batch_size(checkpoint_batch_size),
555 checkpoint_batch_bytes(checkpoint_batch_bytes),
556 checkpointed_calls(0),
557 checkpointed_bytes(0),
558 committed(false), acquired(false), exclusive(exclusive)
559 {}
560
561 ~conditional_transaction_guard();
562 void acquire();
563 void commit();
564 void do_checkpoint();
565 void maybe_checkpoint(size_t nbytes);
566};
567
568class transaction_guard : public conditional_transaction_guard
569{
570public:
571 transaction_guard(database & d, bool exclusive=true,
572 size_t checkpoint_batch_size=1000,
573 size_t checkpoint_batch_bytes=0xfffff)
574 : conditional_transaction_guard(d, exclusive, checkpoint_batch_size,
575 checkpoint_batch_bytes)
576 {
577 acquire();
578 }
579};
580
581// Local Variables:
582// mode: C++
583// fill-column: 76
584// c-file-style: "gnu"
585// indent-tabs-mode: nil
586// End:
587// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
588
589#endif // __DATABASE_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status