monotone

monotone Mtn Source Tree

Root/database.hh

1#ifndef __DATABASE_HH__
2#define __DATABASE_HH__
3
4// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13struct sqlite3;
14struct sqlite3_stmt;
15struct cert;
16int sqlite3_finalize(sqlite3_stmt *);
17
18#include "vector.hh"
19#include <set>
20#include <map>
21
22#include "numeric_vocab.hh"
23#include "vocab.hh"
24#include "paths.hh"
25#include "cleanup.hh"
26#include "roster.hh"
27#include "selectors.hh"
28#include "graph.hh"
29
30// FIXME: would be better not to include this everywhere
31#include "outdated_indicator.hh"
32#include "lru_writeback_cache.hh"
33
34// this file defines a public, typed interface to the database.
35// the database class encapsulates all knowledge about sqlite,
36// the schema, and all SQL statements used to access the schema.
37//
38// one thing which is rather important to note is that this file
39// deals with two sorts of version relationships. the versions
40// stored in the database are all *backwards* from those the program
41// sees. so for example if you have two versions of a file
42//
43// file.1, file.2
44//
45// where file.2 was a modification of file.1, then as far as the rest of
46// the application is concerned -- and the ancestry graph -- file.1 is the
47// "old" version and file.2 is the "new" version. note the use of terms
48// which describe time, and the sequence of edits a user makes to a
49// file. those are ancestry terms. when the application composes a
50// patchset, for example, it'll contain the diff delta(file.1, file.2)
51//
52// from the database's perspective, however, file.1 is the derived version,
53// and file.2 is the base version. the base version is stored in the
54// "files" table, and the *reverse* diff delta(file.2, file.1) is stored in
55// the "file_deltas" table, under the id of file.1, with the id of file.2
56// listed as its base. note the use of the terms which describe
57// reconstruction; those are storage-system terms.
58//
59// the interface *to* the database, and the ancestry version graphs, use
60// the old / new metaphor of ancestry, but within the database (including
61// the private helper methods, and the storage version graphs) the
62// base/derived storage metaphor is used. the only real way to tell which
63// is which is to look at the parameter names and code. I might try to
64// express this in the type system some day, but not presently.
65//
66// the key phrase to keep repeating when working on this code is:
67//
68// "base files are new, derived files are old"
69//
70// it makes the code confusing, I know. this is possibly the worst part of
71// the program. I don't know if there's any way to make it clearer.
72
73class transaction_guard;
74class app_state;
75struct revision_t;
76struct query;
77class rev_height;
78struct globish;
79
80class database
81{
82 //
83 // --== Opening the database and schema checking ==--
84 //
85private:
86 system_path filename;
87 app_state * __app;
88 struct sqlite3 * __sql;
89
90 enum open_mode { normal_mode = 0,
91 schema_bypass_mode,
92 format_bypass_mode };
93
94 void install_functions(app_state * app);
95 struct sqlite3 * sql(enum open_mode mode = normal_mode);
96
97 void check_filename();
98 void check_db_exists();
99 void check_db_nonexistent();
100 void open();
101 void close();
102 void check_format();
103
104public:
105 database(system_path const & file);
106 ~database();
107
108 void set_app(app_state * app);
109
110 void set_filename(system_path const & file);
111 system_path get_filename();
112 bool is_dbfile(any_path const & file);
113 void ensure_open();
114 void ensure_open_for_format_changes();
115private:
116 void ensure_open_for_maintenance();
117public:
118 void check_is_not_rosterified();
119 bool database_specified();
120
121 //
122 // --== Basic SQL interface and statement caching ==--
123 //
124private:
125 struct statement {
126 statement() : count(0), stmt(0, sqlite3_finalize) {}
127 int count;
128 cleanup_ptr<sqlite3_stmt*, int> stmt;
129 };
130 std::map<std::string, statement> statement_cache;
131
132 typedef std::vector< std::vector<std::string> > results;
133 void fetch(results & res,
134 int const want_cols, int const want_rows,
135 query const & q);
136 void execute(query const & q);
137
138 bool table_has_entry(std::string const & key, std::string const & column,
139 std::string const & table);
140
141 //
142 // --== Generic database metadata gathering ==--
143 //
144private:
145 std::string count(std::string const & table);
146 std::string space(std::string const & table,
147 std::string const & concatenated_columns,
148 u64 & total);
149 unsigned int page_size();
150 unsigned int cache_size();
151
152 //
153 // --== Transactions ==--
154 //
155private:
156 int transaction_level;
157 bool transaction_exclusive;
158 void begin_transaction(bool exclusive);
159 void commit_transaction();
160 void rollback_transaction();
161 friend class transaction_guard;
162
163 //
164 // --== Write-buffering -- tied into transaction ==--
165 // --== machinery and delta compression machinery ==--
166 //
167public:
168 typedef boost::shared_ptr<roster_t const> roster_t_cp;
169 typedef boost::shared_ptr<marking_map const> marking_map_cp;
170 typedef std::pair<roster_t_cp, marking_map_cp> cached_roster;
171private:
172 struct roster_size_estimator
173 {
174 unsigned long operator() (cached_roster const &);
175 };
176 struct roster_writeback_manager
177 {
178 database & db;
179 roster_writeback_manager(database & db) : db(db) {}
180 void writeout(revision_id const &, cached_roster const &);
181 };
182 LRUWritebackCache<revision_id, cached_roster,
183 roster_size_estimator, roster_writeback_manager>
184 roster_cache;
185
186 size_t size_delayed_file(file_id const & id, file_data const & dat);
187 bool have_delayed_file(file_id const & id);
188 void load_delayed_file(file_id const & id, file_data & dat);
189 void cancel_delayed_file(file_id const & id);
190 void drop_or_cancel_file(file_id const & id);
191 void schedule_delayed_file(file_id const & id, file_data const & dat);
192
193 std::map<file_id, file_data> delayed_files;
194 size_t delayed_writes_size;
195
196 void flush_delayed_writes();
197 void clear_delayed_writes();
198 void write_delayed_file(file_id const & new_id,
199 file_data const & dat);
200
201 void write_delayed_roster(revision_id const & new_id,
202 roster_t const & roster,
203 marking_map const & marking);
204
205 //
206 // --== Reading/writing delta-compressed objects ==--
207 //
208private:
209 // "do we have any entry for 'ident' that is a base version"
210 bool file_or_manifest_base_exists(hexenc<id> const & ident,
211 std::string const & table);
212 bool roster_base_stored(revision_id const & ident);
213 bool roster_base_available(revision_id const & ident);
214
215 // "do we have any entry for 'ident' that is a delta"
216 bool delta_exists(std::string const & ident,
217 std::string const & table);
218
219 bool delta_exists(std::string const & ident,
220 std::string const & base,
221 std::string const & table);
222
223 void get_file_or_manifest_base_unchecked(hexenc<id> const & new_id,
224 data & dat,
225 std::string const & table);
226 void get_file_or_manifest_delta_unchecked(hexenc<id> const & ident,
227 hexenc<id> const & base,
228 delta & del,
229 std::string const & table);
230 void get_roster_base(std::string const & ident,
231 roster_t & roster, marking_map & marking);
232 void get_roster_delta(std::string const & ident,
233 std::string const & base,
234 roster_delta & del);
235 friend struct file_and_manifest_reconstruction_graph;
236 friend struct roster_reconstruction_graph;
237 void get_version(hexenc<id> const & ident,
238 data & dat,
239 std::string const & data_table,
240 std::string const & delta_table);
241
242 void drop(std::string const & base,
243 std::string const & table);
244 void put_file_delta(file_id const & ident,
245 file_id const & base,
246 file_delta const & del);
247private:
248 void put_roster_delta(revision_id const & ident,
249 revision_id const & base,
250 roster_delta const & del);
251 void put_version(hexenc<id> const & old_id,
252 hexenc<id> const & new_id,
253 delta const & del,
254 std::string const & data_table,
255 std::string const & delta_table);
256
257 void put_roster(revision_id const & rev_id,
258 roster_t_cp const & roster,
259 marking_map_cp const & marking);
260
261public:
262
263 bool file_version_exists(file_id const & ident);
264 bool revision_exists(revision_id const & ident);
265 bool roster_link_exists_for_revision(revision_id const & ident);
266 bool roster_exists_for_revision(revision_id const & ident);
267
268
269 // get plain version if it exists, or reconstruct version
270 // from deltas (if they exist)
271 void get_file_version(file_id const & ident,
272 file_data & dat);
273
274 // put file w/o predecessor into db
275 void put_file(file_id const & new_id,
276 file_data const & dat);
277
278 // store new version and update old version to be a delta
279 void put_file_version(file_id const & old_id,
280 file_id const & new_id,
281 file_delta const & del);
282
283 void get_arbitrary_file_delta(file_id const & src_id,
284 file_id const & dst_id,
285 file_delta & del);
286
287 // get plain version if it exists, or reconstruct version
288 // from deltas (if they exist).
289 void get_manifest_version(manifest_id const & ident,
290 manifest_data & dat);
291
292 //
293 // --== The ancestry graph ==--
294 //
295public:
296 void get_revision_ancestry(rev_ancestry_map & graph);
297
298 void get_revision_parents(revision_id const & ident,
299 std::set<revision_id> & parents);
300
301 void get_revision_children(revision_id const & ident,
302 std::set<revision_id> & children);
303
304 void get_revision_manifest(revision_id const & cid,
305 manifest_id & mid);
306private:
307 // helper
308 void get_ids(std::string const & table, std::set< hexenc<id> > & ids);
309public:
310 void get_revision_ids(std::set<revision_id> & ids);
311 // this is exposed for 'db check':
312 void get_file_ids(std::set<file_id> & ids);
313
314 //
315 // --== Revision reading/writing ==--
316 //
317private:
318 void deltify_revision(revision_id const & rid);
319public:
320 void get_revision(revision_id const & ident,
321 revision_t & cs);
322
323 void get_revision(revision_id const & ident,
324 revision_data & dat);
325
326 bool put_revision(revision_id const & new_id,
327 revision_t const & rev);
328
329 bool put_revision(revision_id const & new_id,
330 revision_data const & dat);
331
332 //
333 // --== Rosters ==--
334 //
335private:
336 u64 next_id_from_table(std::string const & table);
337 void get_roster_version(revision_id const & ros_id, cached_roster & cr);
338public:
339 node_id next_node_id();
340
341 void get_roster(revision_id const & rid,
342 roster_t & roster);
343
344 void get_roster(revision_id const & rid,
345 roster_t & roster,
346 marking_map & marks);
347
348 void get_roster(revision_id const & rid,
349 cached_roster & cr);
350
351 // these are exposed for the use of database_check.cc
352 bool roster_version_exists(revision_id const & ident);
353 void get_roster_ids(std::set<revision_id> & ids);
354
355 // using roster deltas
356 void get_markings(revision_id const & id,
357 node_id const & nid,
358 marking_t & markings);
359
360 void get_file_content(revision_id const & id,
361 node_id const & nid,
362 file_id & content);
363private:
364 struct extractor;
365 struct file_content_extractor;
366 struct markings_extractor;
367 void extract_from_deltas(revision_id const & id, extractor & x);
368
369 //
370 // --== Keys ==--
371 //
372private:
373 void get_keys(std::string const & table, std::vector<rsa_keypair_id> & keys);
374public:
375 void get_key_ids(std::vector<rsa_keypair_id> & pubkeys);
376 void get_key_ids(globish const & pattern,
377 std::vector<rsa_keypair_id> & pubkeys);
378
379 void get_public_keys(std::vector<rsa_keypair_id> & pubkeys);
380
381 bool public_key_exists(hexenc<id> const & hash);
382 bool public_key_exists(rsa_keypair_id const & ident);
383
384 void get_pubkey(hexenc<id> const & hash,
385 rsa_keypair_id & ident,
386 base64<rsa_pub_key> & pub_encoded);
387
388 void get_key(rsa_keypair_id const & ident,
389 base64<rsa_pub_key> & pub_encoded);
390
391 bool put_key(rsa_keypair_id const & ident,
392 base64<rsa_pub_key> const & pub_encoded);
393
394 void delete_public_key(rsa_keypair_id const & pub_id);
395
396 //
397 // --== Certs ==--
398 //
399 // note: this section is ridiculous. please do something about it.
400private:
401 bool cert_exists(cert const & t,
402 std::string const & table);
403 void put_cert(cert const & t, std::string const & table);
404 void results_to_certs(results const & res,
405 std::vector<cert> & certs);
406
407 void get_certs(std::vector< cert > & certs,
408 std::string const & table);
409
410 void get_certs(hexenc<id> const & ident,
411 std::vector< cert > & certs,
412 std::string const & table);
413
414 void get_certs(cert_name const & name,
415 std::vector< cert > & certs,
416 std::string const & table);
417
418 void get_certs(hexenc<id> const & ident,
419 cert_name const & name,
420 std::vector< cert > & certs,
421 std::string const & table);
422
423 void get_certs(hexenc<id> const & ident,
424 cert_name const & name,
425 base64<cert_value> const & val,
426 std::vector< cert > & certs,
427 std::string const & table);
428
429 void get_certs(cert_name const & name,
430 base64<cert_value> const & val,
431 std::vector<cert> & certs,
432 std::string const & table);
433
434 outdated_indicator_factory cert_stamper;
435public:
436
437 bool revision_cert_exists(revision<cert> const & cert);
438 bool revision_cert_exists(hexenc<id> const & hash);
439
440 bool put_revision_cert(revision<cert> const & cert);
441
442 // this variant has to be rather coarse and fast, for netsync's use
443 outdated_indicator get_revision_cert_nobranch_index(std::vector< std::pair<hexenc<id>,
444 std::pair<revision_id, rsa_keypair_id> > > & idx);
445
446 // Only used by database_check.cc
447 outdated_indicator get_revision_certs(std::vector< revision<cert> > & certs);
448
449 outdated_indicator get_revision_certs(cert_name const & name,
450 std::vector< revision<cert> > & certs);
451
452 outdated_indicator get_revision_certs(revision_id const & ident,
453 cert_name const & name,
454 std::vector< revision<cert> > & certs);
455
456 // Only used by get_branch_certs (project.cc)
457 outdated_indicator get_revision_certs(cert_name const & name,
458 base64<cert_value> const & val,
459 std::vector< revision<cert> > & certs);
460
461 // Only used by revision_is_in_branch (project.cc)
462 outdated_indicator get_revision_certs(revision_id const & ident,
463 cert_name const & name,
464 base64<cert_value> const & value,
465 std::vector< revision<cert> > & certs);
466
467 // Only used by get_branch_heads (project.cc)
468 outdated_indicator get_revisions_with_cert(cert_name const & name,
469 base64<cert_value> const & value,
470 std::set<revision_id> & revisions);
471
472 // Used through project.cc, and by
473 // anc_graph::add_node_for_oldstyle_revision (revision.cc)
474 outdated_indicator get_revision_certs(revision_id const & ident,
475 std::vector< revision<cert> > & certs);
476
477 // Used through get_revision_cert_hashes (project.cc)
478 outdated_indicator get_revision_certs(revision_id const & ident,
479 std::vector< hexenc<id> > & hashes);
480
481 void get_revision_cert(hexenc<id> const & hash,
482 revision<cert> & c);
483
484 void get_manifest_certs(manifest_id const & ident,
485 std::vector< manifest<cert> > & certs);
486
487 void get_manifest_certs(cert_name const & name,
488 std::vector< manifest<cert> > & certs);
489
490 //
491 // --== Epochs ==--
492 //
493public:
494 void get_epochs(std::map<branch_name, epoch_data> & epochs);
495
496 void get_epoch(epoch_id const & eid, branch_name & branch, epoch_data & epo);
497
498 bool epoch_exists(epoch_id const & eid);
499
500 void set_epoch(branch_name const & branch, epoch_data const & epo);
501
502 void clear_epoch(branch_name const & branch);
503
504 //
505 // --== Database 'vars' ==--
506 //
507public:
508 void get_vars(std::map<var_key, var_value > & vars);
509
510 void get_var(var_key const & key, var_value & value);
511
512 bool var_exists(var_key const & key);
513
514 void set_var(var_key const & key, var_value const & value);
515
516 void clear_var(var_key const & key);
517
518 //
519 // --== Completion ==--
520 //
521public:
522 void complete(std::string const & partial,
523 std::set<revision_id> & completions);
524
525 void complete(std::string const & partial,
526 std::set<file_id> & completions);
527
528 void complete(std::string const & partial,
529 std::set< std::pair<key_id, utf8 > > & completions);
530
531 void complete(selectors::selector_type ty,
532 std::string const & partial,
533 std::vector<std::pair<selectors::selector_type,
534 std::string> > const & limit,
535 std::set<std::string> & completions);
536
537 //
538 // --== The 'db' family of top-level commands ==--
539 //
540public:
541 void initialize();
542 void debug(std::string const & sql, std::ostream & out);
543 void dump(std::ostream &);
544 void load(std::istream &);
545 void info(std::ostream &);
546 void version(std::ostream &);
547 void migrate();
548 void test_migration_step(std::string const &);
549 // for kill_rev_locally:
550 void delete_existing_rev_and_certs(revision_id const & rid);
551 // for kill_branch_certs_locally:
552 void delete_branch_named(cert_value const & branch);
553 // for kill_tag_locally:
554 void delete_tag_named(cert_value const & tag);
555
556 // misc
557private:
558 friend void rcs_put_raw_file_edge(hexenc<id> const & old_id,
559 hexenc<id> const & new_id,
560 delta const & del,
561 database & db);
562public:
563 // branches
564 outdated_indicator get_branches(std::vector<std::string> & names);
565 outdated_indicator get_branches(globish const & glob,
566 std::vector<std::string> & names);
567
568 bool check_integrity();
569
570 void get_uncommon_ancestors(revision_id const & a,
571 revision_id const & b,
572 std::set<revision_id> & a_uncommon_ancs,
573 std::set<revision_id> & b_uncommon_ancs);
574
575 // for changesetify, rosterify
576 void delete_existing_revs_and_certs();
577
578 void delete_existing_manifests();
579
580 // heights
581 void get_rev_height(revision_id const & id,
582 rev_height & height);
583
584 void put_rev_height(revision_id const & id,
585 rev_height const & height);
586
587 bool has_rev_height(rev_height const & height);
588 void delete_existing_heights();
589
590 void put_height_for_revision(revision_id const & new_id,
591 revision_t const & rev);
592
593 // for regenerate_rosters
594 void delete_existing_rosters();
595 void put_roster_for_revision(revision_id const & new_id,
596 revision_t const & rev);
597};
598
599// Parent maps are used in a number of places to keep track of all the
600// parent rosters of a given revision.
601typedef std::map<revision_id, database::cached_roster>
602parent_map;
603
604typedef parent_map::value_type
605parent_entry;
606
607inline revision_id const & parent_id(parent_entry const & p)
608{
609 return p.first;
610}
611
612inline revision_id const & parent_id(parent_map::const_iterator i)
613{
614 return i->first;
615}
616
617inline database::cached_roster const &
618parent_cached_roster(parent_entry const & p)
619{
620 return p.second;
621}
622
623inline database::cached_roster const &
624parent_cached_roster(parent_map::const_iterator i)
625{
626 return i->second;
627}
628
629inline roster_t const & parent_roster(parent_entry const & p)
630{
631 return *(p.second.first);
632}
633
634inline roster_t const & parent_roster(parent_map::const_iterator i)
635{
636 return *(i->second.first);
637}
638
639inline marking_map const & parent_marking(parent_entry const & p)
640{
641 return *(p.second.second);
642}
643
644inline marking_map const & parent_marking(parent_map::const_iterator i)
645{
646 return *(i->second.second);
647}
648
649// Transaction guards nest. Acquire one in any scope you'd like
650// transaction-protected, and it'll make sure the db aborts a transaction
651// if there's any exception before you call commit().
652//
653// By default, transaction_guard locks the database exclusively. If the
654// transaction is intended to be read-only, construct the guard with
655// exclusive=false. In this case, if a database update is attempted and
656// another process is accessing the database an exception will be thrown -
657// uglier and more confusing for the user - however no data inconsistency
658// should result.
659//
660// An exception is thrown if an exclusive transaction_guard is created
661// while a non-exclusive transaction_guard exists.
662//
663// Transaction guards also support splitting long transactions up into
664// checkpoints. Any time you feel the database is in an
665// acceptably-consistent state, you can call maybe_checkpoint(nn) with a
666// given number of bytes. When the number of bytes and number of
667// maybe_checkpoint() calls exceeds the guard's parameters, the transaction
668// is committed and reopened. Any time you feel the database has reached a
669// point where want to ensure a transaction commit, without destructing the
670// object, you can call do_checkpoint().
671//
672// This does *not* free you from having to call .commit() on the guard when
673// it "completes" its lifecycle. Here's a way to think of checkpointing: a
674// normal transaction guard is associated with a program-control
675// scope. Sometimes (notably in netsync) it is not convenient to create a
676// scope which exactly matches the size of work-unit you want to commit (a
677// bunch of packets, or a session-close, whichever comes first) so
678// checkpointing allows you to use a long-lived transaction guard and mark
679// off the moments where commits are desired, without destructing the
680// guard. The guard still performs an error-management task in case of an
681// exception, so you still have to clean it before destruction using
682// .commit().
683//
684// Checkpointing also does not override the transaction guard nesting: if
685// there's an enclosing transaction_guard, your checkpointing calls have no
686// affect.
687//
688// The purpose of checkpointing is to provide an alternative to "many short
689// transactions" on platforms (OSX in particular) where the overhead of
690// full commits at high frequency is too high. The solution for these
691// platforms is to run inside a longer-lived transaction (session-length),
692// and checkpoint at higher granularity (every megabyte or so).
693
694class transaction_guard
695{
696 bool committed;
697 database & db;
698 bool exclusive;
699 size_t const checkpoint_batch_size;
700 size_t const checkpoint_batch_bytes;
701 size_t checkpointed_calls;
702 size_t checkpointed_bytes;
703public:
704 transaction_guard(database & d, bool exclusive=true,
705 size_t checkpoint_batch_size=1000,
706 size_t checkpoint_batch_bytes=0xfffff);
707 ~transaction_guard();
708 void do_checkpoint();
709 void maybe_checkpoint(size_t nbytes);
710 void commit();
711};
712
713// Local Variables:
714// mode: C++
715// fill-column: 76
716// c-file-style: "gnu"
717// indent-tabs-mode: nil
718// End:
719// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
720
721#endif // __DATABASE_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status