1 | #ifndef __MERKLE_TREE_HH__␊ |
2 | #define __MERKLE_TREE_HH__␊ |
3 | ␊ |
4 | // Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>␊ |
5 | //␊ |
6 | // This program is made available under the GNU GPL version 2.0 or␊ |
7 | // greater. See the accompanying file COPYING for details.␊ |
8 | //␊ |
9 | // This program is distributed WITHOUT ANY WARRANTY; without even the␊ |
10 | // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR␊ |
11 | // PURPOSE.␊ |
12 | ␊ |
13 | #include <map>␊ |
14 | #include <set>␊ |
15 | ␊ |
16 | #include <boost/shared_ptr.hpp>␊ |
17 | #include <boost/dynamic_bitset.hpp>␊ |
18 | ␊ |
19 | #include "vector.hh"␊ |
20 | #include "vocab.hh"␊ |
21 | #include "transforms.hh"␊ |
22 | #include "hash_map.hh"␊ |
23 | ␊ |
24 | // This file contains data structures and functions for managing merkle␊ |
25 | // trees. A merkle tree is, conceptually, a general recursive construction␊ |
26 | // whereby a range of K data elements is divided up into buckets. Each␊ |
27 | // bucket is then hashed, and the hash values of the buckets at level N of␊ |
28 | // the tree are used as data elements held in buckets at level N-1. At␊ |
29 | // level 0 there is only one bucket.␊ |
30 | //␊ |
31 | // The result is a tree in which each node has J "slots", each of which␊ |
32 | // summarizes (as a hashcode) the entire subtree beneath it. this makes a␊ |
33 | // pair of merkle trees amenable to setwise operations such as union or␊ |
34 | // difference while only inspecting D*log_base_J(K) nodes where D is the␊ |
35 | // number of differences between trees.␊ |
36 | //␊ |
37 | // We build merkle trees over a few collections of objects in our database␊ |
38 | // and use these to synchronize with remote hosts. See netsync.{cc,hh} and␊ |
39 | // refiner.{cc,hh} for more details.␊ |
40 | ␊ |
41 | typedef enum␊ |
42 | {␊ |
43 | file_item = 2,␊ |
44 | key_item = 3,␊ |
45 | revision_item = 4,␊ |
46 | cert_item = 5,␊ |
47 | epoch_item = 6␊ |
48 | }␊ |
49 | netcmd_item_type;␊ |
50 | ␊ |
51 | void netcmd_item_type_to_string(netcmd_item_type t, std::string & typestr);␊ |
52 | ␊ |
53 | typedef enum␊ |
54 | {␊ |
55 | empty_state,␊ |
56 | leaf_state,␊ |
57 | subtree_state␊ |
58 | }␊ |
59 | slot_state;␊ |
60 | ␊ |
61 | struct merkle_node␊ |
62 | {␊ |
63 | size_t level;␊ |
64 | boost::dynamic_bitset<unsigned char> pref;␊ |
65 | size_t total_num_leaves;␊ |
66 | boost::dynamic_bitset<unsigned char> bitmap;␊ |
67 | std::vector<id> slots;␊ |
68 | netcmd_item_type type;␊ |
69 | ␊ |
70 | merkle_node();␊ |
71 | bool operator==(merkle_node const & other) const;␊ |
72 | void check_invariants() const;␊ |
73 | ␊ |
74 | void get_raw_prefix(prefix & pref) const;␊ |
75 | void get_hex_prefix(hexenc<prefix> & hpref) const;␊ |
76 | ␊ |
77 | void get_raw_slot(size_t slot, id & val) const;␊ |
78 | void set_raw_slot(size_t slot, id const & val);␊ |
79 | ␊ |
80 | void extended_prefix(size_t slot, boost::dynamic_bitset<unsigned char> & extended) const;␊ |
81 | void extended_raw_prefix(size_t slot, prefix & extended) const;␊ |
82 | void extended_hex_prefix(size_t slot, hexenc<prefix> & extended) const;␊ |
83 | ␊ |
84 | slot_state get_slot_state(size_t n) const;␊ |
85 | void set_slot_state(size_t n, slot_state st);␊ |
86 | };␊ |
87 | ␊ |
88 | typedef boost::shared_ptr<merkle_node> merkle_ptr;␊ |
89 | ␊ |
90 | typedef std::pair<prefix,size_t> merkle_node_id;␊ |
91 | namespace hashmap {␊ |
92 | template<>␊ |
93 | struct hash<merkle_node_id>␊ |
94 | {␊ |
95 | hash<std::string> sh;␊ |
96 | size_t operator()(merkle_node_id const & m) const␊ |
97 | {␊ |
98 | return sh(m.first()) + m.second;␊ |
99 | }␊ |
100 | };␊ |
101 | }␊ |
102 | typedef hashmap::hash_map<merkle_node_id, merkle_ptr> merkle_table;␊ |
103 | ␊ |
104 | ␊ |
105 | size_t prefix_length_in_bits(size_t level);␊ |
106 | size_t prefix_length_in_bytes(size_t level);␊ |
107 | void write_node(merkle_node const & in, std::string & outbuf);␊ |
108 | void read_node(std::string const & inbuf, size_t & pos, merkle_node & out);␊ |
109 | ␊ |
110 | std::string raw_sha1(std::string const & in);␊ |
111 | ␊ |
112 | ␊ |
113 | bool␊ |
114 | locate_item(merkle_table & table,␊ |
115 | id const & val,␊ |
116 | size_t & slotnum,␊ |
117 | merkle_ptr & mp);␊ |
118 | ␊ |
119 | ␊ |
120 | void␊ |
121 | pick_slot_and_prefix_for_value(id const & val,␊ |
122 | size_t level,␊ |
123 | size_t & slotnum,␊ |
124 | boost::dynamic_bitset<unsigned char> & pref);␊ |
125 | ␊ |
126 | // Collect the items inside a subtree.␊ |
127 | ␊ |
128 | void␊ |
129 | collect_items_in_subtree(merkle_table & tab,␊ |
130 | prefix const & pref,␊ |
131 | size_t level,␊ |
132 | std::set<id> & items);␊ |
133 | ␊ |
134 | // Insert an item into a tree.␊ |
135 | ␊ |
136 | void␊ |
137 | insert_into_merkle_tree(merkle_table & tab,␊ |
138 | netcmd_item_type type,␊ |
139 | id const & leaf,␊ |
140 | size_t level);␊ |
141 | ␊ |
142 | inline void␊ |
143 | insert_into_merkle_tree(merkle_table & tab,␊ |
144 | netcmd_item_type type,␊ |
145 | hexenc<id> const & hex_leaf,␊ |
146 | size_t level)␊ |
147 | {␊ |
148 | id leaf;␊ |
149 | decode_hexenc(hex_leaf, leaf);␊ |
150 | insert_into_merkle_tree(tab, type, leaf, level);␊ |
151 | }␊ |
152 | ␊ |
153 | // Recalculate the hashes in the given tree. Must be called after␊ |
154 | // insert_into_merkle_tree, and before using tree (but you can batch up␊ |
155 | // multiple calls to insert_into_merkle_tree and then only call this once).␊ |
156 | ␊ |
157 | id␊ |
158 | recalculate_merkle_codes(merkle_table & tab,␊ |
159 | prefix const & pref,␊ |
160 | size_t level);␊ |
161 | ␊ |
162 | // Local Variables:␊ |
163 | // mode: C++␊ |
164 | // fill-column: 76␊ |
165 | // c-file-style: "gnu"␊ |
166 | // indent-tabs-mode: nil␊ |
167 | // End:␊ |
168 | // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:␊ |
169 | ␊ |
170 | #endif // __MERKLE_TREE_HH__␊ |