Source code for arborist.distill.base

"""Distiller ABC.

A Distiller compresses a surface Document into a core Document. The runner
generates Merkle proofs for every contributing source chunk so the resulting
derivation row cryptographically binds the core back to its source.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass

from arborist.document import Document


[docs] @dataclass class DistillationResult: """What a Distiller returns for one source. contributing_chunk_indices: 0-based indices into source_chunks that fed the core's content. The runner Merkle-proves each one against the source's document_root and stores those proofs in derivations.proof_blob. """ core: Document contributing_chunk_indices: list[int]
[docs] class Distiller(ABC): """Pure function: surface Document + its chunks -> core DistillationResult. Distillers must be deterministic — same source bytes produce the same core bytes. Bumping a distiller's algorithm requires bumping its `name`. """ name: str
[docs] @abstractmethod def distill( self, source: Document, source_chunks: list[str] ) -> DistillationResult: ...