monotone

monotone Mtn Source Tree

Root/database.hh

1#ifndef __DATABASE_HH__
2#define __DATABASE_HH__
3
4// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13struct sqlite3;
14struct sqlite3_stmt;
15struct cert;
16int sqlite3_finalize(sqlite3_stmt *);
17
18#include <stdarg.h>
19
20#include <vector>
21#include <set>
22#include <map>
23#include <string>
24
25#include "cset.hh"
26#include "numeric_vocab.hh"
27#include "paths.hh"
28#include "cleanup.hh"
29#include "roster.hh"
30#include "selectors.hh"
31#include "vocab.hh"
32
33// this file defines a public, typed interface to the database.
34// the database class encapsulates all knowledge about sqlite,
35// the schema, and all SQL statements used to access the schema.
36//
37// one thing which is rather important to note is that this file
38// deals with two sorts of version relationships. the versions
39// stored in the database are all *backwards* from those the program
40// sees. so for example if you have two versions of a file
41//
42// file.1, file.2
43//
44// where file.2 was a modification of file.1, then as far as the rest of
45// the application is concerned -- and the ancestry graph -- file.1 is the
46// "old" version and file.2 is the "new" version. note the use of terms
47// which describe time, and the sequence of edits a user makes to a
48// file. those are ancestry terms. when the application composes a
49// patchset, for example, it'll contain the diff delta(file.1, file.2)
50//
51// from the database's perspective, however, file.1 is the derived version,
52// and file.2 is the base version. the base version is stored in the
53// "files" table, and the *reverse* diff delta(file.2, file.1) is stored in
54// the "file_deltas" table, under the id of file.1, with the id of file.2
55// listed as its base. note the use of the terms which describe
56// reconstruction; those are storage-system terms.
57//
58// the interface *to* the database, and the ancestry version graphs, use
59// the old / new metaphor of ancestry, but within the database (including
60// the private helper methods, and the storage version graphs) the
61// base/derived storage metaphor is used. the only real way to tell which
62// is which is to look at the parameter names and code. I might try to
63// express this in the type system some day, but not presently.
64//
65// the key phrase to keep repeating when working on this code is:
66//
67// "base files are new, derived files are old"
68//
69// it makes the code confusing, I know. this is possibly the worst part of
70// the program. I don't know if there's any way to make it clearer.
71
72class transaction_guard;
73struct posting;
74class app_state;
75struct revision_t;
76struct query;
77
78class database
79{
80 system_path filename;
81 std::string const schema;
82 void check_schema();
83 void check_format();
84
85 struct statement {
86 statement() : count(0), stmt(0, sqlite3_finalize) {}
87 int count;
88 cleanup_ptr<sqlite3_stmt*, int> stmt;
89 };
90
91 std::map<std::string, statement> statement_cache;
92 std::map<std::pair<std::string, hexenc<id> >, data> pending_writes;
93 unsigned long pending_writes_size;
94
95 bool have_pending_write(std::string const & tab, hexenc<id> const & id);
96 void load_pending_write(std::string const & tab, hexenc<id> const & id, data & dat);
97 void cancel_pending_write(std::string const & tab, hexenc<id> const & id);
98 void schedule_write(std::string const & tab, hexenc<id> const & id, data const & dat);
99
100 app_state * __app;
101 struct sqlite3 * __sql;
102 struct sqlite3 * sql(bool init = false, bool migrating_format = false);
103 int transaction_level;
104 bool transaction_exclusive;
105
106 void install_functions(app_state * app);
107
108 typedef std::vector< std::vector<std::string> > results;
109
110 void execute(query const & q);
111
112 void fetch(results & res,
113 int const want_cols,
114 int const want_rows,
115 query const & q);
116
117 bool exists(hexenc<id> const & ident,
118 std::string const & table);
119 bool delta_exists(hexenc<id> const & ident,
120 std::string const & table);
121
122 unsigned long count(std::string const & table);
123 unsigned long space_usage(std::string const & table,
124 std::string const & concatenated_columns);
125 unsigned int page_size();
126 unsigned int cache_size();
127
128 void get_ids(std::string const & table, std::set< hexenc<id> > & ids);
129
130 void get(hexenc<id> const & new_id,
131 data & dat,
132 std::string const & table);
133 void get_delta(hexenc<id> const & ident,
134 hexenc<id> const & base,
135 delta & del,
136 std::string const & table);
137 void get_version(hexenc<id> const & ident,
138 data & dat,
139 std::string const & data_table,
140 std::string const & delta_table);
141
142 void put(hexenc<id> const & new_id,
143 data const & dat,
144 std::string const & table);
145 void drop(hexenc<id> const & base,
146 std::string const & table);
147 void put_delta(hexenc<id> const & ident,
148 hexenc<id> const & base,
149 delta const & del,
150 std::string const & table);
151 void put_version(hexenc<id> const & old_id,
152 hexenc<id> const & new_id,
153 delta const & del,
154 std::string const & data_table,
155 std::string const & delta_table);
156 void remove_version(hexenc<id> const & target_id,
157 std::string const & data_table,
158 std::string const & delta_table);
159
160 void get_keys(std::string const & table, std::vector<rsa_keypair_id> & keys);
161
162 bool cert_exists(cert const & t,
163 std::string const & table);
164 void put_cert(cert const & t, std::string const & table);
165 void results_to_certs(results const & res,
166 std::vector<cert> & certs);
167
168 void get_certs(std::vector< cert > & certs,
169 std::string const & table);
170
171 void get_certs(hexenc<id> const & ident,
172 std::vector< cert > & certs,
173 std::string const & table);
174
175 void get_certs(cert_name const & name,
176 std::vector< cert > & certs,
177 std::string const & table);
178
179 void get_certs(hexenc<id> const & ident,
180 cert_name const & name,
181 std::vector< cert > & certs,
182 std::string const & table);
183
184 void get_certs(hexenc<id> const & ident,
185 cert_name const & name,
186 base64<cert_value> const & val,
187 std::vector< cert > & certs,
188 std::string const & table);
189
190 void get_certs(cert_name const & name,
191 base64<cert_value> const & val,
192 std::vector<cert> & certs,
193 std::string const & table);
194
195 void begin_transaction(bool exclusive);
196 void commit_transaction();
197 void rollback_transaction();
198 friend class transaction_guard;
199 friend void rcs_put_raw_file_edge(hexenc<id> const & old_id,
200 hexenc<id> const & new_id,
201 delta const & del,
202 database & db);
203
204 void put_roster(revision_id const & rev_id,
205 roster_t & roster,
206 marking_map & marks);
207
208 void check_filename();
209 void check_db_exists();
210 void open();
211 void close();
212
213public:
214
215 database(system_path const & file);
216
217 void set_filename(system_path const & file);
218 bool is_dbfile(any_path const & file);
219 void initialize();
220 void debug(std::string const & sql, std::ostream & out);
221 void dump(std::ostream &);
222 void load(std::istream &);
223 void info(std::ostream &);
224 void version(std::ostream &);
225 void migrate();
226 void ensure_open();
227 void ensure_open_for_format_changes();
228 void check_is_not_rosterified();
229 bool database_specified();
230
231 bool file_version_exists(file_id const & ident);
232 bool roster_version_exists(roster_id const & ident);
233 bool revision_exists(revision_id const & ident);
234 bool roster_link_exists_for_revision(revision_id const & ident);
235 bool roster_exists_for_revision(revision_id const & ident);
236
237 void get_roster_links(std::map<revision_id, roster_id> & links);
238 void get_file_ids(std::set<file_id> & ids);
239 void get_revision_ids(std::set<revision_id> & ids);
240 void get_roster_ids(std::set<roster_id> & ids) ;
241
242
243 bool check_integrity();
244
245 void set_app(app_state * app);
246
247 // get plain version if it exists, or reconstruct version
248 // from deltas (if they exist)
249 void get_file_version(file_id const & ident,
250 file_data & dat);
251
252 // put file w/o predecessor into db
253 void put_file(file_id const & new_id,
254 file_data const & dat);
255
256 // store new version and update old version to be a delta
257 void put_file_version(file_id const & old_id,
258 file_id const & new_id,
259 file_delta const & del);
260
261 void get_arbitrary_file_delta(file_id const & src_id,
262 file_id const & dst_id,
263 file_delta & del);
264
265 // get plain version if it exists, or reconstruct version
266 // from deltas (if they exist).
267 void get_manifest_version(manifest_id const & ident,
268 manifest_data & dat);
269
270 void get_revision_ancestry(std::multimap<revision_id, revision_id> & graph);
271
272 void get_revision_parents(revision_id const & ident,
273 std::set<revision_id> & parents);
274
275 void get_revision_children(revision_id const & ident,
276 std::set<revision_id> & children);
277
278 void get_revision_manifest(revision_id const & cid,
279 manifest_id & mid);
280
281 void deltify_revision(revision_id const & rid);
282
283 void get_revision(revision_id const & ident,
284 revision_t & cs);
285
286 void get_revision(revision_id const & ident,
287 revision_data & dat);
288
289 void put_revision(revision_id const & new_id,
290 revision_t const & rev);
291
292 void put_revision(revision_id const & new_id,
293 revision_data const & dat);
294
295 void delete_existing_revs_and_certs();
296
297 void delete_existing_manifests();
298
299 void delete_existing_rev_and_certs(revision_id const & rid);
300
301 void delete_branch_named(cert_value const & branch);
302
303 void delete_tag_named(cert_value const & tag);
304
305 // crypto key / cert operations
306
307 void get_key_ids(std::string const & pattern,
308 std::vector<rsa_keypair_id> & pubkeys);
309
310 void get_public_keys(std::vector<rsa_keypair_id> & pubkeys);
311
312 bool public_key_exists(hexenc<id> const & hash);
313 bool public_key_exists(rsa_keypair_id const & ident);
314
315
316 void get_pubkey(hexenc<id> const & hash,
317 rsa_keypair_id & ident,
318 base64<rsa_pub_key> & pub_encoded);
319
320 void get_key(rsa_keypair_id const & ident,
321 base64<rsa_pub_key> & pub_encoded);
322
323 void put_key(rsa_keypair_id const & ident,
324 base64<rsa_pub_key> const & pub_encoded);
325
326 void delete_public_key(rsa_keypair_id const & pub_id);
327
328 // note: this section is ridiculous. please do something about it.
329
330 bool revision_cert_exists(revision<cert> const & cert);
331 bool revision_cert_exists(hexenc<id> const & hash);
332
333 void put_revision_cert(revision<cert> const & cert);
334
335 // this variant has to be rather coarse and fast, for netsync's use
336 void get_revision_cert_nobranch_index(std::vector< std::pair<hexenc<id>,
337 std::pair<revision_id, rsa_keypair_id> > > & idx);
338
339 void get_revision_certs(std::vector< revision<cert> > & certs);
340
341 void get_revision_certs(cert_name const & name,
342 std::vector< revision<cert> > & certs);
343
344 void get_revision_certs(revision_id const & ident,
345 cert_name const & name,
346 std::vector< revision<cert> > & certs);
347
348 void get_revision_certs(cert_name const & name,
349 base64<cert_value> const & val,
350 std::vector< revision<cert> > & certs);
351
352 void get_revision_certs(revision_id const & ident,
353 cert_name const & name,
354 base64<cert_value> const & value,
355 std::vector< revision<cert> > & certs);
356
357 void get_revision_certs(revision_id const & ident,
358 std::vector< revision<cert> > & certs);
359
360 void get_revision_certs(revision_id const & ident,
361 std::vector< hexenc<id> > & hashes);
362
363 void get_revision_cert(hexenc<id> const & hash,
364 revision<cert> & c);
365
366 void get_manifest_certs(manifest_id const & ident,
367 std::vector< manifest<cert> > & certs);
368
369 void get_manifest_certs(cert_name const & name,
370 std::vector< manifest<cert> > & certs);
371
372 // epochs
373
374 void get_epochs(std::map<cert_value, epoch_data> & epochs);
375
376 void get_epoch(epoch_id const & eid, cert_value & branch, epoch_data & epo);
377
378 bool epoch_exists(epoch_id const & eid);
379
380 void set_epoch(cert_value const & branch, epoch_data const & epo);
381
382 void clear_epoch(cert_value const & branch);
383
384 // vars
385
386 void get_vars(std::map<var_key, var_value > & vars);
387
388 void get_var(var_key const & key, var_value & value);
389
390 bool var_exists(var_key const & key);
391
392 void set_var(var_key const & key, var_value const & value);
393
394 void clear_var(var_key const & key);
395
396 // branches
397 void get_branches(std::vector<std::string> & names);
398
399 // roster and node_id stuff
400 void get_roster_id_for_revision(revision_id const & rev_id,
401 roster_id & ros_id);
402
403 void get_roster(revision_id const & rid,
404 roster_t & roster);
405
406 void get_roster(revision_id const & rid,
407 roster_t & roster,
408 marking_map & marks);
409
410 void get_roster_version(roster_id const & ros_id,
411 roster_data & dat);
412
413 void get_uncommon_ancestors(revision_id const & a,
414 revision_id const & b,
415 std::set<revision_id> & a_uncommon_ancs,
416 std::set<revision_id> & b_uncommon_ancs);
417
418 node_id next_node_id();
419
420 // completion stuff
421
422 void complete(std::string const & partial,
423 std::set<revision_id> & completions);
424
425 void complete(std::string const & partial,
426 std::set<file_id> & completions);
427
428 void complete(std::string const & partial,
429 std::set< std::pair<key_id, utf8 > > & completions);
430
431 void complete(selectors::selector_type ty,
432 std::string const & partial,
433 std::vector<std::pair<selectors::selector_type,
434 std::string> > const & limit,
435 std::set<std::string> & completions);
436
437 ~database();
438
439 void set_vcache_max_size();
440};
441
442// Transaction guards nest. Acquire one in any scope you'd like
443// transaction-protected, and it'll make sure the db aborts a transaction
444// if there's any exception before you call commit().
445//
446// By default, transaction_guard locks the database exclusively. If the
447// transaction is intended to be read-only, construct the guard with
448// exclusive=false. In this case, if a database update is attempted and
449// another process is accessing the database an exception will be thrown -
450// uglier and more confusing for the user - however no data inconsistency
451// should result.
452//
453// An exception is thrown if an exclusive transaction_guard is created
454// while a non-exclusive transaction_guard exists.
455//
456// Transaction guards also support splitting long transactions up into
457// checkpoints. Any time you feel the database is in an
458// acceptably-consistent state, you can call maybe_checkpoint(nn) with a
459// given number of bytes. When the number of bytes and number of
460// maybe_checkpoint() calls exceeds the guard's parameters, the transaction
461// is committed and reopened. Any time you feel the database has reached a
462// point where want to ensure a transaction commit, without destructing the
463// object, you can call do_checkpoint().
464//
465// This does *not* free you from having to call .commit() on the guard when
466// it "completes" its lifecycle. Here's a way to think of checkpointing: a
467// normal transaction guard is associated with a program-control
468// scope. Sometimes (notably in netsync) it is not convenient to create a
469// scope which exactly matches the size of work-unit you want to commit (a
470// bunch of packets, or a session-close, whichever comes first) so
471// checkpointing allows you to use a long-lived transaction guard and mark
472// off the moments where commits are desired, without destructing the
473// guard. The guard still performs an error-management task in case of an
474// exception, so you still have to clean it before destruction using
475// .commit().
476//
477// Checkpointing also does not override the transaction guard nesting: if
478// there's an enclosing transaction_guard, your checkpointing calls have no
479// affect.
480//
481// The purpose of checkpointing is to provide an alternative to "many short
482// transactions" on platforms (OSX in particular) where the overhead of
483// full commits at high frequency is too high. The solution for these
484// platforms is to run inside a longer-lived transaction (session-length),
485// and checkpoint at higher granularity (every megabyte or so).
486
487class transaction_guard
488{
489 bool committed;
490 database & db;
491 bool exclusive;
492 size_t const checkpoint_batch_size;
493 size_t const checkpoint_batch_bytes;
494 size_t checkpointed_calls;
495 size_t checkpointed_bytes;
496public:
497 transaction_guard(database & d, bool exclusive=true,
498 size_t checkpoint_batch_size=1000,
499 size_t checkpoint_batch_bytes=0xfffff);
500 ~transaction_guard();
501 void do_checkpoint();
502 void maybe_checkpoint(size_t nbytes);
503 void commit();
504};
505
506
507void
508close_all_databases();
509
510
511// Local Variables:
512// mode: C++
513// fill-column: 76
514// c-file-style: "gnu"
515// indent-tabs-mode: nil
516// End:
517// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
518
519#endif // __DATABASE_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status