1 | #ifndef __MERKLE_TREE_HH__␊ |
2 | #define __MERKLE_TREE_HH__␊ |
3 | ␊ |
4 | // Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>␊ |
5 | //␊ |
6 | // This program is made available under the GNU GPL version 2.0 or␊ |
7 | // greater. See the accompanying file COPYING for details.␊ |
8 | //␊ |
9 | // This program is distributed WITHOUT ANY WARRANTY; without even the␊ |
10 | // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR␊ |
11 | // PURPOSE.␊ |
12 | ␊ |
13 | #include <map>␊ |
14 | #include <set>␊ |
15 | ␊ |
16 | #include <boost/shared_ptr.hpp>␊ |
17 | #include <boost/dynamic_bitset.hpp>␊ |
18 | ␊ |
19 | #include "vector.hh"␊ |
20 | #include "vocab.hh"␊ |
21 | #include "transforms.hh"␊ |
22 | #include "hash_map.hh"␊ |
23 | ␊ |
24 | // This file contains data structures and functions for managing merkle␊ |
25 | // trees. A merkle tree is, conceptually, a general recursive construction␊ |
26 | // whereby a range of K data elements is divided up into buckets. Each␊ |
27 | // bucket is then hashed, and the hash values of the buckets at level N of␊ |
28 | // the tree are used as data elements held in buckets at level N-1. At␊ |
29 | // level 0 there is only one bucket.␊ |
30 | //␊ |
31 | // The result is a tree in which each node has J "slots", each of which␊ |
32 | // summarizes (as a hashcode) the entire subtree beneath it. this makes a␊ |
33 | // pair of merkle trees amenable to setwise operations such as union or␊ |
34 | // difference while only inspecting D*log_base_J(K) nodes where D is the␊ |
35 | // number of differences between trees.␊ |
36 | //␊ |
37 | // We build merkle trees over a few collections of objects in our database␊ |
38 | // and use these to synchronize with remote hosts. See netsync.{cc,hh} and␊ |
39 | // refiner.{cc,hh} for more details.␊ |
40 | ␊ |
41 | typedef enum␊ |
42 | {␊ |
43 | file_item = 2,␊ |
44 | key_item = 3,␊ |
45 | revision_item = 4,␊ |
46 | cert_item = 5,␊ |
47 | epoch_item = 6␊ |
48 | }␊ |
49 | netcmd_item_type;␊ |
50 | ␊ |
51 | void netcmd_item_type_to_string(netcmd_item_type t, std::string & typestr);␊ |
52 | ␊ |
53 | typedef enum␊ |
54 | {␊ |
55 | empty_state,␊ |
56 | leaf_state,␊ |
57 | subtree_state␊ |
58 | }␊ |
59 | slot_state;␊ |
60 | ␊ |
61 | struct merkle_node␊ |
62 | {␊ |
63 | size_t level;␊ |
64 | boost::dynamic_bitset<unsigned char> pref;␊ |
65 | size_t total_num_leaves;␊ |
66 | boost::dynamic_bitset<unsigned char> bitmap;␊ |
67 | std::vector<id> slots;␊ |
68 | netcmd_item_type type;␊ |
69 | ␊ |
70 | merkle_node();␊ |
71 | bool operator==(merkle_node const & other) const;␊ |
72 | void check_invariants() const;␊ |
73 | ␊ |
74 | void get_raw_prefix(prefix & pref) const;␊ |
75 | void get_hex_prefix(hexenc<prefix> & hpref) const;␊ |
76 | ␊ |
77 | void get_raw_slot(size_t slot, id & val) const;␊ |
78 | void get_hex_slot(size_t slot, hexenc<id> & val) const;␊ |
79 | ␊ |
80 | void set_raw_slot(size_t slot, id const & val);␊ |
81 | void set_hex_slot(size_t slot, hexenc<id> const & val);␊ |
82 | ␊ |
83 | void extended_prefix(size_t slot, boost::dynamic_bitset<unsigned char> & extended) const;␊ |
84 | void extended_raw_prefix(size_t slot, prefix & extended) const;␊ |
85 | void extended_hex_prefix(size_t slot, hexenc<prefix> & extended) const;␊ |
86 | ␊ |
87 | slot_state get_slot_state(size_t n) const;␊ |
88 | void set_slot_state(size_t n, slot_state st);␊ |
89 | };␊ |
90 | ␊ |
91 | typedef boost::shared_ptr<merkle_node> merkle_ptr;␊ |
92 | ␊ |
93 | typedef std::pair<prefix,size_t> merkle_node_id;␊ |
94 | namespace hashmap {␊ |
95 | template<>␊ |
96 | struct hash<merkle_node_id>␊ |
97 | {␊ |
98 | hash<std::string> sh;␊ |
99 | size_t operator()(merkle_node_id const & m) const␊ |
100 | {␊ |
101 | return sh(m.first()) + m.second;␊ |
102 | }␊ |
103 | };␊ |
104 | }␊ |
105 | typedef hashmap::hash_map<merkle_node_id, merkle_ptr> merkle_table;␊ |
106 | ␊ |
107 | ␊ |
108 | size_t prefix_length_in_bits(size_t level);␊ |
109 | size_t prefix_length_in_bytes(size_t level);␊ |
110 | void write_node(merkle_node const & in, std::string & outbuf);␊ |
111 | void read_node(std::string const & inbuf, size_t & pos, merkle_node & out);␊ |
112 | ␊ |
113 | std::string raw_sha1(std::string const & in);␊ |
114 | ␊ |
115 | ␊ |
116 | bool␊ |
117 | locate_item(merkle_table & table,␊ |
118 | id const & val,␊ |
119 | size_t & slotnum,␊ |
120 | merkle_ptr & mp);␊ |
121 | ␊ |
122 | ␊ |
123 | void␊ |
124 | pick_slot_and_prefix_for_value(id const & val,␊ |
125 | size_t level,␊ |
126 | size_t & slotnum,␊ |
127 | boost::dynamic_bitset<unsigned char> & pref);␊ |
128 | ␊ |
129 | // Collect the items inside a subtree.␊ |
130 | ␊ |
131 | void␊ |
132 | collect_items_in_subtree(merkle_table & tab,␊ |
133 | prefix const & pref,␊ |
134 | size_t level,␊ |
135 | std::set<id> & items);␊ |
136 | ␊ |
137 | // Insert an item into a tree.␊ |
138 | ␊ |
139 | void␊ |
140 | insert_into_merkle_tree(merkle_table & tab,␊ |
141 | netcmd_item_type type,␊ |
142 | id const & leaf,␊ |
143 | size_t level);␊ |
144 | ␊ |
145 | inline void␊ |
146 | insert_into_merkle_tree(merkle_table & tab,␊ |
147 | netcmd_item_type type,␊ |
148 | hexenc<id> const & hex_leaf,␊ |
149 | size_t level)␊ |
150 | {␊ |
151 | id leaf;␊ |
152 | decode_hexenc(hex_leaf, leaf);␊ |
153 | insert_into_merkle_tree(tab, type, leaf, level);␊ |
154 | }␊ |
155 | ␊ |
156 | // Recalculate the hashes in the given tree. Must be called after␊ |
157 | // insert_into_merkle_tree, and before using tree (but you can batch up␊ |
158 | // multiple calls to insert_into_merkle_tree and then only call this once).␊ |
159 | ␊ |
160 | id␊ |
161 | recalculate_merkle_codes(merkle_table & tab,␊ |
162 | prefix const & pref,␊ |
163 | size_t level);␊ |
164 | ␊ |
165 | // Local Variables:␊ |
166 | // mode: C++␊ |
167 | // fill-column: 76␊ |
168 | // c-file-style: "gnu"␊ |
169 | // indent-tabs-mode: nil␊ |
170 | // End:␊ |
171 | // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:␊ |
172 | ␊ |
173 | #endif // __MERKLE_TREE_HH__␊ |