Source code for arborist.merkle

"""Merkle tree with non-commutative HashCombine.

Python port of ~/git/proxy.unturf.com/pkg/verified/merkle.go conventions:

- Domain separation via single-byte prefixes (leaf=0x00, node=0x03).
- HashCombine is non-commutative; sibling order matters always.
- Odd layers self-duplicate the trailing element (NOT zero-pad).
- Proof carries explicit IsLeft flag per sibling (NOT lexical sort).
- Empty tree root is ZeroHash (32 zero bytes).
"""

from __future__ import annotations

import hashlib
from dataclasses import dataclass, field
from typing import Iterable

LEAF_PREFIX = b"\x00"
NODE_PREFIX = b"\x03"
ZERO_HASH = b"\x00" * 32
HASH_LEN = 32


def _sha256(*parts: bytes) -> bytes:
    h = hashlib.sha256()
    for p in parts:
        h.update(p)
    return h.digest()


[docs] def hash_leaf(content: bytes) -> bytes: """Hash a leaf with domain prefix 0x00.""" return _sha256(LEAF_PREFIX, content)
[docs] def hash_combine(left: bytes, right: bytes) -> bytes: """Non-commutative interior combine with domain prefix 0x03.""" if len(left) != HASH_LEN or len(right) != HASH_LEN: raise ValueError("hash inputs must be 32 bytes") return _sha256(NODE_PREFIX, left, right)
[docs] @dataclass(frozen=True) class ProofNode: """One sibling step in a Merkle inclusion proof. is_left=True means the sibling sits to the LEFT of the running hash, so verification order is: HashCombine(sibling, current). """ hash: bytes is_left: bool
[docs] @dataclass(frozen=True) class MerkleProof: """Merkle inclusion proof: leaf + index + sibling path to root. Proves that a leaf at leaf_index exists in a tree with the given root. verify_proof() recomputes the root by combining the leaf with siblings. """ leaf: bytes leaf_index: int siblings: tuple[ProofNode, ...] root: bytes
[docs] @dataclass class MerkleTree: """Layered tree. layers[0] = leaves, layers[-1] = [root].""" layers: list[list[bytes]] = field(default_factory=list) @property def root(self) -> bytes: """Root hash of the tree (topmost layer). Empty tree → ZERO_HASH.""" if not self.layers or not self.layers[-1]: return ZERO_HASH return self.layers[-1][0] @property def leaves(self) -> list[bytes]: """Leaf hashes in input order (bottom layer of tree).""" return self.layers[0] if self.layers else []
[docs] @classmethod def build(cls, leaves: Iterable[bytes]) -> MerkleTree: """Build tree from leaf hashes. Applies odd-element self-duplication.""" leaves = list(leaves) if not leaves: return cls(layers=[[]]) layers: list[list[bytes]] = [list(leaves)] current = list(leaves) while len(current) > 1: nxt: list[bytes] = [] i = 0 while i < len(current): left = current[i] right = current[i + 1] if i + 1 < len(current) else current[i] nxt.append(hash_combine(left, right)) i += 2 layers.append(nxt) current = nxt return cls(layers=layers)
[docs] def proof(self, leaf_index: int) -> MerkleProof: """Generate inclusion proof for leaf_index. Raises IndexError if out of range.""" if not self.layers or not self.layers[0]: raise IndexError("empty tree has no proofs") if leaf_index < 0 or leaf_index >= len(self.layers[0]): raise IndexError(f"leaf_index {leaf_index} out of range") siblings: list[ProofNode] = [] idx = leaf_index # Walk up every layer except the root layer. for layer in self.layers[:-1]: if idx % 2 == 0: sibling_idx = idx + 1 is_left = False # sibling is to our right else: sibling_idx = idx - 1 is_left = True # sibling is to our left if sibling_idx >= len(layer): # odd-element rule: self-duplicate sibling_idx = idx siblings.append(ProofNode(hash=layer[sibling_idx], is_left=is_left)) idx //= 2 return MerkleProof( leaf=self.layers[0][leaf_index], leaf_index=leaf_index, siblings=tuple(siblings), root=self.root, )
[docs] def verify_proof(proof: MerkleProof) -> bool: """Recompute root from leaf + sibling path. Returns True iff matches.""" current = proof.leaf for node in proof.siblings: if node.is_left: current = hash_combine(node.hash, current) else: current = hash_combine(current, node.hash) return current == proof.root
[docs] def proof_to_dict(proof: MerkleProof) -> dict: """JSON-serializable form for storage in providence_cache.merkle_proof.""" return { "leaf": proof.leaf.hex(), "leaf_index": proof.leaf_index, "siblings": [ {"hash": s.hash.hex(), "is_left": s.is_left} for s in proof.siblings ], "root": proof.root.hex(), }
[docs] def proof_from_dict(d: dict) -> MerkleProof: """Deserialize a proof from JSON dict (inverse of proof_to_dict).""" return MerkleProof( leaf=bytes.fromhex(d["leaf"]), leaf_index=int(d["leaf_index"]), siblings=tuple( ProofNode(hash=bytes.fromhex(s["hash"]), is_left=bool(s["is_left"])) for s in d["siblings"] ), root=bytes.fromhex(d["root"]), )