CatapultServer  v0.5.0.1 (Elephant)
PatriciaTree.h
Go to the documentation of this file.
1 
21 #pragma once
22 #include "TreeNode.h"
23 
24 namespace catapult { namespace tree {
25 
27  template<typename TEncoder, typename TDataSource>
28  class PatriciaTree {
29  public:
30  using KeyType = typename TEncoder::KeyType;
31  using ValueType = typename TEncoder::ValueType;
32 
33  public:
35  explicit PatriciaTree(TDataSource& dataSource) : m_dataSource(dataSource)
36  {}
37 
38  public:
40  Hash256 root() const {
41  return m_rootNode.hash();
42  }
43 
44  // region set
45 
46  public:
48  void set(const KeyType& key, const ValueType& value) {
49  auto keyPath = TreeNodePath(TEncoder::EncodeKey(key));
50  auto encodedValue = TEncoder::EncodeValue(value);
51  m_rootNode = set(m_rootNode, { keyPath, encodedValue});
52  }
53 
54  private:
57  const Hash256& Value;
58  };
59 
60  private:
61  TreeNode set(const TreeNode& node, const PathValuePairRef& newPair) {
62  // if the node is empty, just create a new leaf
63  if (node.empty())
64  return TreeNode(createLeaf(newPair));
65 
66  if (node.isLeaf()) {
67  const auto& leafNode = node.asLeafNode();
68 
69  // if leaf node already points to desired location, just change value
70  if (leafNode.path() == newPair.Path)
71  return TreeNode(createLeaf(newPair));
72 
73  // if the path is different, the leaf node needs to be split into a branch
74  return TreeNode(branchLeafNode(leafNode, newPair));
75  }
76 
77  // the current node is a branch node that needs updating
78  return TreeNode(updateBranchLink(BranchTreeNode(node.asBranchNode()), newPair));
79  }
80 
81  LeafTreeNode createLeaf(const PathValuePairRef& pair) {
82  return LeafTreeNode(pair.Path, pair.Value);
83  }
84 
85  // this function is called when a branch needs to replace a leaf node
86  BranchTreeNode branchLeafNode(const LeafTreeNode& leafNode, const PathValuePairRef& newPair) {
87  const auto& leafPath = leafNode.path();
88  auto differenceIndex = FindFirstDifferenceIndex(leafPath, newPair.Path);
89  auto sharedPath = leafPath.subpath(0, differenceIndex);
90 
91  // create and save the two component nodes
92  auto node1 = LeafTreeNode(leafPath.subpath(differenceIndex + 1), leafNode.value());
93  auto node2 = LeafTreeNode(newPair.Path.subpath(differenceIndex + 1), newPair.Value);
94 
95  // create the branch node with two links
96  auto branchNode = BranchTreeNode(sharedPath);
97  setLink(branchNode, node1, leafPath.nibbleAt(differenceIndex));
98  setLink(branchNode, node2, newPair.Path.nibbleAt(differenceIndex));
99  return branchNode;
100  }
101 
102  // this function is called when processing a branch node
103  BranchTreeNode updateBranchLink(BranchTreeNode&& branchNode, const PathValuePairRef& newPair) {
104  const auto& branchPath = branchNode.path();
105  auto differenceIndex = FindFirstDifferenceIndex(branchPath, newPair.Path);
106 
107  // if the path of the existing branch node is not empty and completely shared with the new node,
108  // attach the new node to the end of the branch
109  if (!branchPath.empty() && differenceIndex == branchPath.size()) {
110  auto newPairPathContinuation = newPair.Path.subpath(differenceIndex);
111  return insertNewPairIntoBranch(branchNode, TreeNodePath(), 0, { newPairPathContinuation, newPair.Value });
112  }
113 
114  return insertNewPairIntoBranch(branchNode, branchPath, differenceIndex, newPair);
115  }
116 
118  BranchTreeNode& branchNode,
119  const TreeNodePath& branchPath,
120  size_t differenceIndex,
121  const PathValuePairRef& newPair) {
122  auto isBranchPathConsumed = branchPath.empty();
123 
124  // if the branch path is completely consumed, find the connecting node in the database
125  // if it is not completely consumed, a branch is being split
126  auto pNextNode = isBranchPathConsumed ? getLinkedNode(branchNode, newPair.Path.nibbleAt(0)) : nullptr;
127  if (!pNextNode)
128  pNextNode = std::make_unique<TreeNode>();
129 
130  // attach the new node to the existing node (if there is no existing node, it will be set as a leaf)
131  auto updatedNextNode = set(*pNextNode, { newPair.Path.subpath(differenceIndex + 1), newPair.Value });
132 
133  // if the new node is a link, set it directly in the existing branch node
134  if (isBranchPathConsumed) {
135  setLink(branchNode, updatedNextNode, newPair.Path.nibbleAt(0));
136  return branchNode;
137  }
138 
139  // otherwise, create a new branch node at the shared path
140  auto newBranchNode = BranchTreeNode(branchPath.subpath(0, differenceIndex));
141  setLink(newBranchNode, updatedNextNode, newPair.Path.nibbleAt(differenceIndex));
142 
143  // truncate the path of the original branch node so that it is connected to the new branch node
144  auto branchLinkIndex = branchPath.nibbleAt(differenceIndex);
145  branchNode.setPath(branchPath.subpath(differenceIndex + 1));
146 
147  // link the new and old branch nodes
148  setLink(newBranchNode, branchNode, branchLinkIndex);
149  return newBranchNode;
150  }
151 
152  // endregion
153 
154  // region unset
155 
156  public:
158  bool unset(const KeyType& key) {
159  auto keyPath = TreeNodePath(TEncoder::EncodeKey(key));
160  auto canMerge = true;
161  return unset(m_rootNode, keyPath, m_rootNode, canMerge);
162  }
163 
164  private:
165  bool unset(const TreeNode& node, const TreeNodePath& keyPath, TreeNode& updatedNode, bool& canMerge) {
166  // if the node is empty, there is nothing to do
167  if (node.empty())
168  return false;
169 
170  auto differenceIndex = FindFirstDifferenceIndex(node.path(), keyPath);
171  if (differenceIndex == keyPath.size()) {
172  // matching node was found, so clear it
173  updatedNode = TreeNode();
174  return true;
175  }
176 
177  // if the node is not a branch, no node in the tree can completely match `keyPath`
178  if (!node.isBranch())
179  return false;
180 
181  // look up the branch connecting with `keyPath`, if it is not found (or not found recursively), no node in the tree can match
182  const auto& branchNode = node.asBranchNode();
183  auto nodeLinkIndex = keyPath.nibbleAt(differenceIndex);
184  auto pNextNode = getLinkedNode(branchNode, nodeLinkIndex);
185 
186  TreeNode updatedNextNode;
187  if (!pNextNode || !unset(*pNextNode, keyPath.subpath(differenceIndex + 1), updatedNextNode, canMerge))
188  return false;
189 
190  // when removing a node, at most a single branch node can be collapsed
191  // subsequently, just update branch links in parent branches
192  updatedNode = canMerge
193  ? unsetBranchLink(BranchTreeNode(branchNode), nodeLinkIndex)
194  : updateBranchLink(BranchTreeNode(branchNode), nodeLinkIndex, updatedNextNode);
195 
196  canMerge = false;
197  return true;
198  }
199 
200  TreeNode unsetBranchLink(BranchTreeNode&& branchNode, size_t linkIndex) {
201  // unset the link
202  branchNode.clearLink(linkIndex);
203  if (1 != branchNode.numLinks())
204  return TreeNode(branchNode);
205 
206  // merge the branch if it only has a single link (if the tree state is valid, the referenced node must exist)
207  auto lastLinkIndex = branchNode.highestLinkIndex();
208  auto referencedNode = getLinkedNode(branchNode, lastLinkIndex)->copy();
209 
210  auto mergedPath = TreeNodePath::Join(branchNode.path(), lastLinkIndex, referencedNode.path());
211  referencedNode.setPath(mergedPath);
212  return std::move(referencedNode);
213  }
214 
215  TreeNode updateBranchLink(BranchTreeNode&& branchNode, size_t linkIndex, const TreeNode& linkedNode) {
216  setLink(branchNode, linkedNode, linkIndex);
217  return TreeNode(branchNode);
218  }
219 
220  // endregion
221 
222  // region lookup
223 
224  public:
226  std::pair<Hash256, bool> lookup(const KeyType& key, std::vector<TreeNode>& nodePath) const {
227  auto keyPath = TreeNodePath(TEncoder::EncodeKey(key));
228  return lookup(m_rootNode, keyPath, nodePath);
229  }
230 
231  private:
232  std::pair<Hash256, bool> lookup(const TreeNode& node, const TreeNodePath& keyPath, std::vector<TreeNode>& nodePath) const {
233  // if the node is empty, there is nothing to do
234  if (node.empty())
235  return LookupNotFoundResult();
236 
237  nodePath.push_back(node.copy());
238  auto differenceIndex = FindFirstDifferenceIndex(node.path(), keyPath);
239  if (!node.isBranch()) // if the node is a leaf, it must fully match `keyPath` to be in the tree
240  return differenceIndex == keyPath.size() ? std::make_pair(node.asLeafNode().value(), true) : LookupNotFoundResult();
241 
242  // look up the branch connecting with `keyPath`, if it is not found (or not found recursively), no node in the tree can match
243  const auto& branchNode = node.asBranchNode();
244  auto nodeLinkIndex = keyPath.nibbleAt(differenceIndex);
245  auto pNextNode = getLinkedNode(branchNode, nodeLinkIndex);
246  if (!pNextNode)
247  return LookupNotFoundResult();
248 
249  return lookup(*pNextNode, keyPath.subpath(differenceIndex + 1), nodePath);
250  }
251 
252  static std::pair<Hash256, bool> LookupNotFoundResult() {
253  return std::make_pair(Hash256(), false);
254  }
255 
256  // endregion
257 
258  // region tryLoad + setRoot + clear
259 
260  public:
262  bool tryLoad(const Hash256& rootHash) {
263  auto pRootNode = m_dataSource.get(rootHash);
264  if (pRootNode)
265  m_rootNode = pRootNode->copy();
266 
267  return !!pRootNode;
268  }
269 
271  void setRoot(const TreeNode& rootNode) {
272  m_rootNode = rootNode.copy();
273  }
274 
276  void clear() {
277  m_rootNode = TreeNode();
278  }
279 
280  // endregion
281 
282  // region saveAll
283 
284  public:
286  void saveAll() {
287  if (!m_rootNode.empty())
289  }
290 
291  private:
292  void save(const TreeNode& node) {
293  if (node.isLeaf())
294  m_dataSource.set(node.asLeafNode());
295  else
296  m_dataSource.set(node.asBranchNode());
297  }
298 
299  void saveAll(const TreeNode& node) {
300  if (!node.isBranch()) {
301  save(node);
302  return;
303  }
304 
305  // if the node is a branch, save and prune all its links
306  auto branchNode = BranchTreeNode(node.asBranchNode());
307  for (auto i = 0u; i < BranchTreeNode::Max_Links; ++i) {
308  auto pLinkedNode = branchNode.linkedNode(i);
309  if (!pLinkedNode)
310  continue;
311 
312  saveAll(*pLinkedNode);
313  }
314 
315  branchNode.compactLinks();
316  m_dataSource.set(branchNode);
317  }
318 
319  // endregion
320 
321  private:
322  // region links
323 
324  std::unique_ptr<const TreeNode> getLinkedNode(const BranchTreeNode& branchNode, size_t index) const {
325  // copy from memory, if available; otherwise, copy from data source
326  auto pLinkedNode = branchNode.linkedNode(index);
327  return pLinkedNode ? std::move(pLinkedNode) : m_dataSource.get(branchNode.link(index));
328  }
329 
330  void setLink(BranchTreeNode& branchNode, const TreeNode& node, size_t index) {
331  branchNode.setLink(node, index);
332  }
333 
334  template<typename TNode>
335  void setLink(BranchTreeNode& branchNode, const TNode& node, size_t index) {
336  branchNode.setLink(TreeNode(node), index);
337  }
338 
339  // endregion
340 
341  private:
342  TDataSource& m_dataSource;
344  };
345 }}
346 
catapult::tree::PatriciaTree::PathValuePairRef::Value
const Hash256 & Value
Definition: PatriciaTree.h:57
catapult::tree::PatriciaTree::root
Hash256 root() const
Gets the root hash that uniquely identifies this tree.
Definition: PatriciaTree.h:40
catapult::tree::TreeNode::copy
TreeNode copy() const
Creates a copy of this node.
Definition: TreeNode.cpp:234
catapult::tree::PatriciaTree::save
void save(const TreeNode &node)
Definition: PatriciaTree.h:292
catapult::tree::PatriciaTree::m_dataSource
TDataSource & m_dataSource
Definition: PatriciaTree.h:342
catapult::tree::BranchTreeNode::path
const TreeNodePath & path() const
Gets the node path.
Definition: TreeNode.cpp:90
catapult::tree::PatriciaTree::PatriciaTree
PatriciaTree(TDataSource &dataSource)
Creates a tree around a dataSource.
Definition: PatriciaTree.h:35
catapult::tree::FindFirstDifferenceIndex
size_t FindFirstDifferenceIndex(const TreeNodePath &lhs, const TreeNodePath &rhs)
Compares two paths (lhs and rhs) and returns the index of the first non-equal nibble.
Definition: TreeNodePath.cpp:142
catapult::tree::PatriciaTree::getLinkedNode
std::unique_ptr< const TreeNode > getLinkedNode(const BranchTreeNode &branchNode, size_t index) const
Definition: PatriciaTree.h:324
catapult::tree::TreeNodePath::empty
bool empty() const
Returns true if this path is empty.
Definition: TreeNodePath.cpp:54
catapult::tree::PatriciaTree
Represents a compact patricia tree.
Definition: PatriciaTree.h:28
catapult::tree::PatriciaTree::saveAll
void saveAll()
Saves all tree nodes to the underlying data source.
Definition: PatriciaTree.h:286
catapult::Hash256
utils::ByteArray< Hash256_Size, Hash256_tag > Hash256
Definition: src/catapult/types.h:47
catapult::tree::TreeNodePath::subpath
TreeNodePath subpath(size_t offset) const
Creates a subpath starting at nibble offset.
Definition: TreeNodePath.cpp:78
catapult::tree::PatriciaTree::setLink
void setLink(BranchTreeNode &branchNode, const TNode &node, size_t index)
Definition: PatriciaTree.h:335
catapult::tree::TreeNodePath::nibbleAt
uint8_t nibbleAt(size_t index) const
Gets the nibble at index.
Definition: TreeNodePath.cpp:62
catapult::tree::PatriciaTree< TEncoder, catapult::tree::ReadThroughMemoryDataSource< TDataSource > >::KeyType
typename TEncoder::KeyType KeyType
Definition: PatriciaTree.h:30
catapult::tree::PatriciaTree::setLink
void setLink(BranchTreeNode &branchNode, const TreeNode &node, size_t index)
Definition: PatriciaTree.h:330
catapult::tree::PatriciaTree::saveAll
void saveAll(const TreeNode &node)
Definition: PatriciaTree.h:299
catapult::tree::TreeNode::hash
const Hash256 & hash() const
Gets the hash representation of this node.
Definition: TreeNode.cpp:202
catapult::tree::TreeNode::empty
bool empty() const
Returns true if this node represents an empty node.
Definition: TreeNode.cpp:181
catapult::tree::PatriciaTree< TEncoder, catapult::tree::ReadThroughMemoryDataSource< TDataSource > >::ValueType
typename TEncoder::ValueType ValueType
Definition: PatriciaTree.h:31
catapult::tree::PatriciaTree::unset
bool unset(const TreeNode &node, const TreeNodePath &keyPath, TreeNode &updatedNode, bool &canMerge)
Definition: PatriciaTree.h:165
catapult::tree::PatriciaTree::unsetBranchLink
TreeNode unsetBranchLink(BranchTreeNode &&branchNode, size_t linkIndex)
Definition: PatriciaTree.h:200
TreeNode.h
catapult::tree::LeafTreeNode::value
const Hash256 & value() const
Gets the node value.
Definition: TreeNode.cpp:72
catapult::tree::LeafTreeNode::path
const TreeNodePath & path() const
Gets the node path.
Definition: TreeNode.cpp:68
catapult::tree::PatriciaTree::tryLoad
bool tryLoad(const Hash256 &rootHash)
Loads the node with hash rootHash and sets it as the root node.
Definition: PatriciaTree.h:262
catapult::tree::TreeNode::isLeaf
bool isLeaf() const
Returns true if this node represents a leaf node.
Definition: TreeNode.cpp:189
catapult::tree::BranchTreeNode::Max_Links
static constexpr size_t Max_Links
Maximum number of branch links.
Definition: TreeNode.h:63
catapult::tree::PatriciaTree::createLeaf
LeafTreeNode createLeaf(const PathValuePairRef &pair)
Definition: PatriciaTree.h:81
catapult::tree::PatriciaTree::unset
bool unset(const KeyType &key)
Removes the value associated with key from the tree.
Definition: PatriciaTree.h:158
catapult::tree::BranchTreeNode::link
const Hash256 & link(size_t index) const
Gets the branch link at index.
Definition: TreeNode.cpp:102
catapult::tree::TreeNode::asBranchNode
const BranchTreeNode & asBranchNode() const
Gets a branch node interface to this node.
Definition: TreeNode.cpp:227
catapult::tree::PatriciaTree::LookupNotFoundResult
static std::pair< Hash256, bool > LookupNotFoundResult()
Definition: PatriciaTree.h:252
catapult::tree::PatriciaTree::PathValuePairRef
Definition: PatriciaTree.h:55
catapult::tree::PatriciaTree::updateBranchLink
TreeNode updateBranchLink(BranchTreeNode &&branchNode, size_t linkIndex, const TreeNode &linkedNode)
Definition: PatriciaTree.h:215
catapult::tree::BranchTreeNode::setPath
void setPath(const TreeNodePath &path)
Sets the branch node path.
Definition: TreeNode.cpp:130
catapult::tree::PatriciaTree::lookup
std::pair< Hash256, bool > lookup(const KeyType &key, std::vector< TreeNode > &nodePath) const
Tries to find the value associated with key in the tree and stores proof of existence or not in nodeP...
Definition: PatriciaTree.h:226
catapult::tree::PatriciaTree::branchLeafNode
BranchTreeNode branchLeafNode(const LeafTreeNode &leafNode, const PathValuePairRef &newPair)
Definition: PatriciaTree.h:86
catapult::tree::PatriciaTree::set
TreeNode set(const TreeNode &node, const PathValuePairRef &newPair)
Definition: PatriciaTree.h:61
catapult::tree::PatriciaTree::set
void set(const KeyType &key, const ValueType &value)
Sets key to value in the tree.
Definition: PatriciaTree.h:48
catapult::tree::LeafTreeNode
Represents a leaf tree node.
Definition: TreeNode.h:34
catapult::tree::PatriciaTree::lookup
std::pair< Hash256, bool > lookup(const TreeNode &node, const TreeNodePath &keyPath, std::vector< TreeNode > &nodePath) const
Definition: PatriciaTree.h:232
catapult::tree::PatriciaTree::PathValuePairRef::Path
const TreeNodePath & Path
Definition: PatriciaTree.h:56
catapult::tree::TreeNode::path
const TreeNodePath & path() const
Gets the node path.
Definition: TreeNode.cpp:193
catapult::tree::PatriciaTree::setRoot
void setRoot(const TreeNode &rootNode)
Sets the root to rootNode.
Definition: PatriciaTree.h:271
catapult::tree::PatriciaTree::clear
void clear()
Clears the tree.
Definition: PatriciaTree.h:276
catapult::tree::PatriciaTree::insertNewPairIntoBranch
BranchTreeNode insertNewPairIntoBranch(BranchTreeNode &branchNode, const TreeNodePath &branchPath, size_t differenceIndex, const PathValuePairRef &newPair)
Definition: PatriciaTree.h:117
catapult::tree::BranchTreeNode::setLink
void setLink(const Hash256 &link, size_t index)
Sets the branch link at index.
Definition: TreeNode.cpp:135
catapult::tree::TreeNode::asLeafNode
const LeafTreeNode & asLeafNode() const
Gets a leaf node interface to this node.
Definition: TreeNode.cpp:220
catapult
Definition: AddressExtractionExtension.cpp:28
catapult::tree::TreeNodePath::size
size_t size() const
Gets the number of nibbles in this path.
Definition: TreeNodePath.cpp:58
catapult::tree::TreeNodePath::Join
static TreeNodePath Join(const TreeNodePath &lhs, const TreeNodePath &rhs)
Joins lhs and rhs into a new path.
Definition: TreeNodePath.cpp:116
catapult::tree::TreeNode
Represents a tree node.
Definition: TreeNode.h:124
catapult::utils::ByteArray< Hash256_Size, Hash256_tag >
catapult::tree::TreeNode::isBranch
bool isBranch() const
Returns true if this node represents a branch node.
Definition: TreeNode.cpp:185
catapult::tree::PatriciaTree::updateBranchLink
BranchTreeNode updateBranchLink(BranchTreeNode &&branchNode, const PathValuePairRef &newPair)
Definition: PatriciaTree.h:103
catapult::tree::TreeNodePath
Represents a path in a tree.
Definition: TreeNodePath.h:31
catapult::tree::PatriciaTree::m_rootNode
TreeNode m_rootNode
Definition: PatriciaTree.h:343
catapult::tree::BranchTreeNode::linkedNode
std::unique_ptr< const TreeNode > linkedNode(size_t index) const
Gets a copy of the linked node at index or nullptr if no linked node is present.
Definition: TreeNode.cpp:107
catapult::tree::BranchTreeNode
Represents a branch tree node.
Definition: TreeNode.h:60