Source code for arborist.merkle
"""Merkle tree with non-commutative HashCombine.
Python port of ~/git/proxy.unturf.com/pkg/verified/merkle.go conventions:
- Domain separation via single-byte prefixes (leaf=0x00, node=0x03).
- HashCombine is non-commutative; sibling order matters always.
- Odd layers self-duplicate the trailing element (NOT zero-pad).
- Proof carries explicit IsLeft flag per sibling (NOT lexical sort).
- Empty tree root is ZeroHash (32 zero bytes).
"""
from __future__ import annotations
import hashlib
from dataclasses import dataclass, field
from typing import Iterable
LEAF_PREFIX = b"\x00"
NODE_PREFIX = b"\x03"
ZERO_HASH = b"\x00" * 32
HASH_LEN = 32
def _sha256(*parts: bytes) -> bytes:
h = hashlib.sha256()
for p in parts:
h.update(p)
return h.digest()
[docs]
def hash_leaf(content: bytes) -> bytes:
"""Hash a leaf with domain prefix 0x00."""
return _sha256(LEAF_PREFIX, content)
[docs]
def hash_combine(left: bytes, right: bytes) -> bytes:
"""Non-commutative interior combine with domain prefix 0x03."""
if len(left) != HASH_LEN or len(right) != HASH_LEN:
raise ValueError("hash inputs must be 32 bytes")
return _sha256(NODE_PREFIX, left, right)
[docs]
@dataclass(frozen=True)
class ProofNode:
"""One sibling step in a Merkle inclusion proof.
is_left=True means the sibling sits to the LEFT of the running hash,
so verification order is: HashCombine(sibling, current).
"""
hash: bytes
is_left: bool
[docs]
@dataclass(frozen=True)
class MerkleProof:
"""Merkle inclusion proof: leaf + index + sibling path to root.
Proves that a leaf at leaf_index exists in a tree with the given root.
verify_proof() recomputes the root by combining the leaf with siblings.
"""
leaf: bytes
leaf_index: int
siblings: tuple[ProofNode, ...]
root: bytes
[docs]
@dataclass
class MerkleTree:
"""Layered tree. layers[0] = leaves, layers[-1] = [root]."""
layers: list[list[bytes]] = field(default_factory=list)
@property
def root(self) -> bytes:
"""Root hash of the tree (topmost layer). Empty tree → ZERO_HASH."""
if not self.layers or not self.layers[-1]:
return ZERO_HASH
return self.layers[-1][0]
@property
def leaves(self) -> list[bytes]:
"""Leaf hashes in input order (bottom layer of tree)."""
return self.layers[0] if self.layers else []
[docs]
@classmethod
def build(cls, leaves: Iterable[bytes]) -> MerkleTree:
"""Build tree from leaf hashes. Applies odd-element self-duplication."""
leaves = list(leaves)
if not leaves:
return cls(layers=[[]])
layers: list[list[bytes]] = [list(leaves)]
current = list(leaves)
while len(current) > 1:
nxt: list[bytes] = []
i = 0
while i < len(current):
left = current[i]
right = current[i + 1] if i + 1 < len(current) else current[i]
nxt.append(hash_combine(left, right))
i += 2
layers.append(nxt)
current = nxt
return cls(layers=layers)
[docs]
def proof(self, leaf_index: int) -> MerkleProof:
"""Generate inclusion proof for leaf_index. Raises IndexError if out of range."""
if not self.layers or not self.layers[0]:
raise IndexError("empty tree has no proofs")
if leaf_index < 0 or leaf_index >= len(self.layers[0]):
raise IndexError(f"leaf_index {leaf_index} out of range")
siblings: list[ProofNode] = []
idx = leaf_index
# Walk up every layer except the root layer.
for layer in self.layers[:-1]:
if idx % 2 == 0:
sibling_idx = idx + 1
is_left = False # sibling is to our right
else:
sibling_idx = idx - 1
is_left = True # sibling is to our left
if sibling_idx >= len(layer):
# odd-element rule: self-duplicate
sibling_idx = idx
siblings.append(ProofNode(hash=layer[sibling_idx], is_left=is_left))
idx //= 2
return MerkleProof(
leaf=self.layers[0][leaf_index],
leaf_index=leaf_index,
siblings=tuple(siblings),
root=self.root,
)
[docs]
def verify_proof(proof: MerkleProof) -> bool:
"""Recompute root from leaf + sibling path. Returns True iff matches."""
current = proof.leaf
for node in proof.siblings:
if node.is_left:
current = hash_combine(node.hash, current)
else:
current = hash_combine(current, node.hash)
return current == proof.root
[docs]
def proof_to_dict(proof: MerkleProof) -> dict:
"""JSON-serializable form for storage in providence_cache.merkle_proof."""
return {
"leaf": proof.leaf.hex(),
"leaf_index": proof.leaf_index,
"siblings": [
{"hash": s.hash.hex(), "is_left": s.is_left} for s in proof.siblings
],
"root": proof.root.hex(),
}
[docs]
def proof_from_dict(d: dict) -> MerkleProof:
"""Deserialize a proof from JSON dict (inverse of proof_to_dict)."""
return MerkleProof(
leaf=bytes.fromhex(d["leaf"]),
leaf_index=int(d["leaf_index"]),
siblings=tuple(
ProofNode(hash=bytes.fromhex(s["hash"]), is_left=bool(s["is_left"]))
for s in d["siblings"]
),
root=bytes.fromhex(d["root"]),
)