monotone

monotone Mtn Source Tree

Root/database.hh

1#ifndef __DATABASE_HH__
2#define __DATABASE_HH__
3
4// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13struct sqlite3;
14struct sqlite3_stmt;
15struct cert;
16int sqlite3_finalize(sqlite3_stmt *);
17
18#include <vector>
19#include <set>
20#include <map>
21
22#include "numeric_vocab.hh"
23#include "vocab.hh"
24#include "paths.hh"
25#include "cleanup.hh"
26#include "roster.hh"
27#include "selectors.hh"
28#include "graph.hh"
29
30// FIXME: would be better not to include this everywhere
31#include "outdated_indicator.hh"
32#include "lru_writeback_cache.hh"
33
34// this file defines a public, typed interface to the database.
35// the database class encapsulates all knowledge about sqlite,
36// the schema, and all SQL statements used to access the schema.
37//
38// one thing which is rather important to note is that this file
39// deals with two sorts of version relationships. the versions
40// stored in the database are all *backwards* from those the program
41// sees. so for example if you have two versions of a file
42//
43// file.1, file.2
44//
45// where file.2 was a modification of file.1, then as far as the rest of
46// the application is concerned -- and the ancestry graph -- file.1 is the
47// "old" version and file.2 is the "new" version. note the use of terms
48// which describe time, and the sequence of edits a user makes to a
49// file. those are ancestry terms. when the application composes a
50// patchset, for example, it'll contain the diff delta(file.1, file.2)
51//
52// from the database's perspective, however, file.1 is the derived version,
53// and file.2 is the base version. the base version is stored in the
54// "files" table, and the *reverse* diff delta(file.2, file.1) is stored in
55// the "file_deltas" table, under the id of file.1, with the id of file.2
56// listed as its base. note the use of the terms which describe
57// reconstruction; those are storage-system terms.
58//
59// the interface *to* the database, and the ancestry version graphs, use
60// the old / new metaphor of ancestry, but within the database (including
61// the private helper methods, and the storage version graphs) the
62// base/derived storage metaphor is used. the only real way to tell which
63// is which is to look at the parameter names and code. I might try to
64// express this in the type system some day, but not presently.
65//
66// the key phrase to keep repeating when working on this code is:
67//
68// "base files are new, derived files are old"
69//
70// it makes the code confusing, I know. this is possibly the worst part of
71// the program. I don't know if there's any way to make it clearer.
72
73class transaction_guard;
74class app_state;
75struct revision_t;
76struct query;
77class rev_height;
78
79class database
80{
81 //
82 // --== Opening the database and schema checking ==--
83 //
84private:
85 system_path filename;
86 app_state * __app;
87 struct sqlite3 * __sql;
88
89 enum open_mode { normal_mode = 0,
90 schema_bypass_mode,
91 format_bypass_mode };
92
93 void install_functions(app_state * app);
94 struct sqlite3 * sql(enum open_mode mode = normal_mode);
95
96 void check_filename();
97 void check_db_exists();
98 void check_db_nonexistent();
99 void open();
100 void close();
101 void check_format();
102
103public:
104 database(system_path const & file);
105 ~database();
106
107 void set_app(app_state * app);
108
109 void set_filename(system_path const & file);
110 system_path get_filename();
111 bool is_dbfile(any_path const & file);
112 void ensure_open();
113 void ensure_open_for_format_changes();
114private:
115 void ensure_open_for_maintenance();
116public:
117 void check_is_not_rosterified();
118 bool database_specified();
119
120 //
121 // --== Basic SQL interface and statement caching ==--
122 //
123private:
124 struct statement {
125 statement() : count(0), stmt(0, sqlite3_finalize) {}
126 int count;
127 cleanup_ptr<sqlite3_stmt*, int> stmt;
128 };
129 std::map<std::string, statement> statement_cache;
130
131 typedef std::vector< std::vector<std::string> > results;
132 void fetch(results & res,
133 int const want_cols, int const want_rows,
134 query const & q);
135 void execute(query const & q);
136
137 bool table_has_entry(std::string const & key, std::string const & column,
138 std::string const & table);
139
140 //
141 // --== Generic database metadata gathering ==--
142 //
143private:
144 std::string count(std::string const & table);
145 std::string space(std::string const & table,
146 std::string const & concatenated_columns,
147 u64 & total);
148 unsigned int page_size();
149 unsigned int cache_size();
150
151 //
152 // --== Transactions ==--
153 //
154private:
155 int transaction_level;
156 bool transaction_exclusive;
157 void begin_transaction(bool exclusive);
158 void commit_transaction();
159 void rollback_transaction();
160 friend class transaction_guard;
161
162 //
163 // --== Write-buffering -- tied into transaction ==--
164 // --== machinery and delta compression machinery ==--
165 //
166public:
167 typedef boost::shared_ptr<roster_t const> roster_t_cp;
168 typedef boost::shared_ptr<marking_map const> marking_map_cp;
169 typedef std::pair<roster_t_cp, marking_map_cp> cached_roster;
170private:
171 struct roster_size_estimator
172 {
173 unsigned long operator() (cached_roster const &);
174 };
175 struct roster_writeback_manager
176 {
177 database & db;
178 roster_writeback_manager(database & db) : db(db) {}
179 void writeout(revision_id const &, cached_roster const &);
180 };
181 LRUWritebackCache<revision_id, cached_roster,
182 roster_size_estimator, roster_writeback_manager>
183 roster_cache;
184
185 size_t size_delayed_file(file_id const & id, file_data const & dat);
186 bool have_delayed_file(file_id const & id);
187 void load_delayed_file(file_id const & id, file_data & dat);
188 void cancel_delayed_file(file_id const & id);
189 void drop_or_cancel_file(file_id const & id);
190 void schedule_delayed_file(file_id const & id, file_data const & dat);
191
192 std::map<file_id, file_data> delayed_files;
193 size_t delayed_writes_size;
194
195 void flush_delayed_writes();
196 void clear_delayed_writes();
197 void write_delayed_file(file_id const & new_id,
198 file_data const & dat);
199
200 void write_delayed_roster(revision_id const & new_id,
201 roster_t const & roster,
202 marking_map const & marking);
203
204 //
205 // --== Reading/writing delta-compressed objects ==--
206 //
207private:
208 // "do we have any entry for 'ident' that is a base version"
209 bool file_or_manifest_base_exists(hexenc<id> const & ident,
210 std::string const & table);
211 bool roster_base_stored(revision_id const & ident);
212 bool roster_base_available(revision_id const & ident);
213
214 // "do we have any entry for 'ident' that is a delta"
215 bool delta_exists(std::string const & ident,
216 std::string const & table);
217
218 bool delta_exists(std::string const & ident,
219 std::string const & base,
220 std::string const & table);
221
222 void get_file_or_manifest_base_unchecked(hexenc<id> const & new_id,
223 data & dat,
224 std::string const & table);
225 void get_file_or_manifest_delta_unchecked(hexenc<id> const & ident,
226 hexenc<id> const & base,
227 delta & del,
228 std::string const & table);
229 void get_roster_base(std::string const & ident,
230 roster_t & roster, marking_map & marking);
231 void get_roster_delta(std::string const & ident,
232 std::string const & base,
233 roster_delta & del);
234 friend struct file_and_manifest_reconstruction_graph;
235 friend struct roster_reconstruction_graph;
236 void get_version(hexenc<id> const & ident,
237 data & dat,
238 std::string const & data_table,
239 std::string const & delta_table);
240
241 void drop(std::string const & base,
242 std::string const & table);
243 void put_file_delta(file_id const & ident,
244 file_id const & base,
245 file_delta const & del);
246private:
247 void put_roster_delta(revision_id const & ident,
248 revision_id const & base,
249 roster_delta const & del);
250 void put_version(hexenc<id> const & old_id,
251 hexenc<id> const & new_id,
252 delta const & del,
253 std::string const & data_table,
254 std::string const & delta_table);
255
256 void put_roster(revision_id const & rev_id,
257 roster_t_cp const & roster,
258 marking_map_cp const & marking);
259
260public:
261
262 bool file_version_exists(file_id const & ident);
263 bool revision_exists(revision_id const & ident);
264 bool roster_link_exists_for_revision(revision_id const & ident);
265 bool roster_exists_for_revision(revision_id const & ident);
266
267
268 // get plain version if it exists, or reconstruct version
269 // from deltas (if they exist)
270 void get_file_version(file_id const & ident,
271 file_data & dat);
272
273 // put file w/o predecessor into db
274 void put_file(file_id const & new_id,
275 file_data const & dat);
276
277 // store new version and update old version to be a delta
278 void put_file_version(file_id const & old_id,
279 file_id const & new_id,
280 file_delta const & del);
281
282 void get_arbitrary_file_delta(file_id const & src_id,
283 file_id const & dst_id,
284 file_delta & del);
285
286 // get plain version if it exists, or reconstruct version
287 // from deltas (if they exist).
288 void get_manifest_version(manifest_id const & ident,
289 manifest_data & dat);
290
291 //
292 // --== The ancestry graph ==--
293 //
294public:
295 void get_revision_ancestry(rev_ancestry_map & graph);
296
297 void get_revision_parents(revision_id const & ident,
298 std::set<revision_id> & parents);
299
300 void get_revision_children(revision_id const & ident,
301 std::set<revision_id> & children);
302
303 void get_revision_manifest(revision_id const & cid,
304 manifest_id & mid);
305private:
306 // helper
307 void get_ids(std::string const & table, std::set< hexenc<id> > & ids);
308public:
309 void get_revision_ids(std::set<revision_id> & ids);
310 // this is exposed for 'db check':
311 void get_file_ids(std::set<file_id> & ids);
312
313 //
314 // --== Revision reading/writing ==--
315 //
316private:
317 void deltify_revision(revision_id const & rid);
318public:
319 void get_revision(revision_id const & ident,
320 revision_t & cs);
321
322 void get_revision(revision_id const & ident,
323 revision_data & dat);
324
325 bool put_revision(revision_id const & new_id,
326 revision_t const & rev);
327
328 bool put_revision(revision_id const & new_id,
329 revision_data const & dat);
330
331 //
332 // --== Rosters ==--
333 //
334private:
335 u64 next_id_from_table(std::string const & table);
336 void get_roster_version(revision_id const & ros_id, cached_roster & cr);
337public:
338 node_id next_node_id();
339
340 void get_roster(revision_id const & rid,
341 roster_t & roster);
342
343 void get_roster(revision_id const & rid,
344 roster_t & roster,
345 marking_map & marks);
346
347 void get_roster(revision_id const & rid,
348 cached_roster & cr);
349
350 // these are exposed for the use of database_check.cc
351 bool roster_version_exists(revision_id const & ident);
352 void get_roster_ids(std::set<revision_id> & ids);
353
354 // using roster deltas
355 void get_markings(revision_id const & id,
356 node_id const & nid,
357 marking_t & markings);
358
359 void get_file_content(revision_id const & id,
360 node_id const & nid,
361 file_id & content);
362private:
363 struct extractor;
364 struct file_content_extractor;
365 struct markings_extractor;
366 void extract_from_deltas(revision_id const & id, extractor & x);
367
368 //
369 // --== Keys ==--
370 //
371private:
372 void get_keys(std::string const & table, std::vector<rsa_keypair_id> & keys);
373public:
374 void get_key_ids(std::string const & pattern,
375 std::vector<rsa_keypair_id> & pubkeys);
376
377 void get_public_keys(std::vector<rsa_keypair_id> & pubkeys);
378
379 bool public_key_exists(hexenc<id> const & hash);
380 bool public_key_exists(rsa_keypair_id const & ident);
381
382 void get_pubkey(hexenc<id> const & hash,
383 rsa_keypair_id & ident,
384 base64<rsa_pub_key> & pub_encoded);
385
386 void get_key(rsa_keypair_id const & ident,
387 base64<rsa_pub_key> & pub_encoded);
388
389 bool put_key(rsa_keypair_id const & ident,
390 base64<rsa_pub_key> const & pub_encoded);
391
392 void delete_public_key(rsa_keypair_id const & pub_id);
393
394 //
395 // --== Certs ==--
396 //
397 // note: this section is ridiculous. please do something about it.
398private:
399 bool cert_exists(cert const & t,
400 std::string const & table);
401 void put_cert(cert const & t, std::string const & table);
402 void results_to_certs(results const & res,
403 std::vector<cert> & certs);
404
405 void get_certs(std::vector< cert > & certs,
406 std::string const & table);
407
408 void get_certs(hexenc<id> const & ident,
409 std::vector< cert > & certs,
410 std::string const & table);
411
412 void get_certs(cert_name const & name,
413 std::vector< cert > & certs,
414 std::string const & table);
415
416 void get_certs(hexenc<id> const & ident,
417 cert_name const & name,
418 std::vector< cert > & certs,
419 std::string const & table);
420
421 void get_certs(hexenc<id> const & ident,
422 cert_name const & name,
423 base64<cert_value> const & val,
424 std::vector< cert > & certs,
425 std::string const & table);
426
427 void get_certs(cert_name const & name,
428 base64<cert_value> const & val,
429 std::vector<cert> & certs,
430 std::string const & table);
431
432 outdated_indicator_factory cert_stamper;
433public:
434
435 bool revision_cert_exists(revision<cert> const & cert);
436 bool revision_cert_exists(hexenc<id> const & hash);
437
438 bool put_revision_cert(revision<cert> const & cert);
439
440 // this variant has to be rather coarse and fast, for netsync's use
441 outdated_indicator get_revision_cert_nobranch_index(std::vector< std::pair<hexenc<id>,
442 std::pair<revision_id, rsa_keypair_id> > > & idx);
443
444 // Only used by database_check.cc
445 outdated_indicator get_revision_certs(std::vector< revision<cert> > & certs);
446
447 outdated_indicator get_revision_certs(cert_name const & name,
448 std::vector< revision<cert> > & certs);
449
450 outdated_indicator get_revision_certs(revision_id const & ident,
451 cert_name const & name,
452 std::vector< revision<cert> > & certs);
453
454 // Only used by get_branch_certs (project.cc)
455 outdated_indicator get_revision_certs(cert_name const & name,
456 base64<cert_value> const & val,
457 std::vector< revision<cert> > & certs);
458
459 // Only used by revision_is_in_branch (project.cc)
460 outdated_indicator get_revision_certs(revision_id const & ident,
461 cert_name const & name,
462 base64<cert_value> const & value,
463 std::vector< revision<cert> > & certs);
464
465 // Only used by get_branch_heads (project.cc)
466 outdated_indicator get_revisions_with_cert(cert_name const & name,
467 base64<cert_value> const & value,
468 std::set<revision_id> & revisions);
469
470 // Used through project.cc, and by
471 // anc_graph::add_node_for_oldstyle_revision (revision.cc)
472 outdated_indicator get_revision_certs(revision_id const & ident,
473 std::vector< revision<cert> > & certs);
474
475 // Used through get_revision_cert_hashes (project.cc)
476 outdated_indicator get_revision_certs(revision_id const & ident,
477 std::vector< hexenc<id> > & hashes);
478
479 void get_revision_cert(hexenc<id> const & hash,
480 revision<cert> & c);
481
482 void get_manifest_certs(manifest_id const & ident,
483 std::vector< manifest<cert> > & certs);
484
485 void get_manifest_certs(cert_name const & name,
486 std::vector< manifest<cert> > & certs);
487
488 //
489 // --== Epochs ==--
490 //
491public:
492 void get_epochs(std::map<branch_name, epoch_data> & epochs);
493
494 void get_epoch(epoch_id const & eid, branch_name & branch, epoch_data & epo);
495
496 bool epoch_exists(epoch_id const & eid);
497
498 void set_epoch(branch_name const & branch, epoch_data const & epo);
499
500 void clear_epoch(branch_name const & branch);
501
502 //
503 // --== Database 'vars' ==--
504 //
505public:
506 void get_vars(std::map<var_key, var_value > & vars);
507
508 void get_var(var_key const & key, var_value & value);
509
510 bool var_exists(var_key const & key);
511
512 void set_var(var_key const & key, var_value const & value);
513
514 void clear_var(var_key const & key);
515
516 //
517 // --== Completion ==--
518 //
519public:
520 void complete(std::string const & partial,
521 std::set<revision_id> & completions);
522
523 void complete(std::string const & partial,
524 std::set<file_id> & completions);
525
526 void complete(std::string const & partial,
527 std::set< std::pair<key_id, utf8 > > & completions);
528
529 void complete(selectors::selector_type ty,
530 std::string const & partial,
531 std::vector<std::pair<selectors::selector_type,
532 std::string> > const & limit,
533 std::set<std::string> & completions);
534
535 //
536 // --== The 'db' family of top-level commands ==--
537 //
538public:
539 void initialize();
540 void debug(std::string const & sql, std::ostream & out);
541 void dump(std::ostream &);
542 void load(std::istream &);
543 void info(std::ostream &);
544 void version(std::ostream &);
545 void migrate();
546 void test_migration_step(std::string const &);
547 // for kill_rev_locally:
548 void delete_existing_rev_and_certs(revision_id const & rid);
549 // for kill_branch_certs_locally:
550 void delete_branch_named(cert_value const & branch);
551 // for kill_tag_locally:
552 void delete_tag_named(cert_value const & tag);
553
554 // misc
555private:
556 friend void rcs_put_raw_file_edge(hexenc<id> const & old_id,
557 hexenc<id> const & new_id,
558 delta const & del,
559 database & db);
560public:
561 // branches
562 outdated_indicator get_branches(std::vector<std::string> & names);
563 outdated_indicator get_branches(std::string const & glob,
564 std::vector<std::string> & names);
565
566 bool check_integrity();
567
568 void get_uncommon_ancestors(revision_id const & a,
569 revision_id const & b,
570 std::set<revision_id> & a_uncommon_ancs,
571 std::set<revision_id> & b_uncommon_ancs);
572
573 // for changesetify, rosterify
574 void delete_existing_revs_and_certs();
575
576 void delete_existing_manifests();
577
578 // heights
579 void get_rev_height(revision_id const & id,
580 rev_height & height);
581
582 void put_rev_height(revision_id const & id,
583 rev_height const & height);
584
585 bool has_rev_height(rev_height const & height);
586 void delete_existing_heights();
587
588 void put_height_for_revision(revision_id const & new_id,
589 revision_t const & rev);
590
591 // for regenerate_rosters
592 void delete_existing_rosters();
593 void put_roster_for_revision(revision_id const & new_id,
594 revision_t const & rev);
595};
596
597// Parent maps are used in a number of places to keep track of all the
598// parent rosters of a given revision.
599typedef std::map<revision_id, database::cached_roster>
600parent_map;
601
602typedef parent_map::value_type
603parent_entry;
604
605inline revision_id const & parent_id(parent_entry const & p)
606{
607 return p.first;
608}
609
610inline revision_id const & parent_id(parent_map::const_iterator i)
611{
612 return i->first;
613}
614
615inline database::cached_roster const &
616parent_cached_roster(parent_entry const & p)
617{
618 return p.second;
619}
620
621inline database::cached_roster const &
622parent_cached_roster(parent_map::const_iterator i)
623{
624 return i->second;
625}
626
627inline roster_t const & parent_roster(parent_entry const & p)
628{
629 return *(p.second.first);
630}
631
632inline roster_t const & parent_roster(parent_map::const_iterator i)
633{
634 return *(i->second.first);
635}
636
637inline marking_map const & parent_marking(parent_entry const & p)
638{
639 return *(p.second.second);
640}
641
642inline marking_map const & parent_marking(parent_map::const_iterator i)
643{
644 return *(i->second.second);
645}
646
647// Transaction guards nest. Acquire one in any scope you'd like
648// transaction-protected, and it'll make sure the db aborts a transaction
649// if there's any exception before you call commit().
650//
651// By default, transaction_guard locks the database exclusively. If the
652// transaction is intended to be read-only, construct the guard with
653// exclusive=false. In this case, if a database update is attempted and
654// another process is accessing the database an exception will be thrown -
655// uglier and more confusing for the user - however no data inconsistency
656// should result.
657//
658// An exception is thrown if an exclusive transaction_guard is created
659// while a non-exclusive transaction_guard exists.
660//
661// Transaction guards also support splitting long transactions up into
662// checkpoints. Any time you feel the database is in an
663// acceptably-consistent state, you can call maybe_checkpoint(nn) with a
664// given number of bytes. When the number of bytes and number of
665// maybe_checkpoint() calls exceeds the guard's parameters, the transaction
666// is committed and reopened. Any time you feel the database has reached a
667// point where want to ensure a transaction commit, without destructing the
668// object, you can call do_checkpoint().
669//
670// This does *not* free you from having to call .commit() on the guard when
671// it "completes" its lifecycle. Here's a way to think of checkpointing: a
672// normal transaction guard is associated with a program-control
673// scope. Sometimes (notably in netsync) it is not convenient to create a
674// scope which exactly matches the size of work-unit you want to commit (a
675// bunch of packets, or a session-close, whichever comes first) so
676// checkpointing allows you to use a long-lived transaction guard and mark
677// off the moments where commits are desired, without destructing the
678// guard. The guard still performs an error-management task in case of an
679// exception, so you still have to clean it before destruction using
680// .commit().
681//
682// Checkpointing also does not override the transaction guard nesting: if
683// there's an enclosing transaction_guard, your checkpointing calls have no
684// affect.
685//
686// The purpose of checkpointing is to provide an alternative to "many short
687// transactions" on platforms (OSX in particular) where the overhead of
688// full commits at high frequency is too high. The solution for these
689// platforms is to run inside a longer-lived transaction (session-length),
690// and checkpoint at higher granularity (every megabyte or so).
691
692class transaction_guard
693{
694 bool committed;
695 database & db;
696 bool exclusive;
697 size_t const checkpoint_batch_size;
698 size_t const checkpoint_batch_bytes;
699 size_t checkpointed_calls;
700 size_t checkpointed_bytes;
701public:
702 transaction_guard(database & d, bool exclusive=true,
703 size_t checkpoint_batch_size=1000,
704 size_t checkpoint_batch_bytes=0xfffff);
705 ~transaction_guard();
706 void do_checkpoint();
707 void maybe_checkpoint(size_t nbytes);
708 void commit();
709};
710
711// Local Variables:
712// mode: C++
713// fill-column: 76
714// c-file-style: "gnu"
715// indent-tabs-mode: nil
716// End:
717// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
718
719#endif // __DATABASE_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status