Source code for arborist.distill.base
"""Distiller ABC.
A Distiller compresses a surface Document into a core Document. The runner
generates Merkle proofs for every contributing source chunk so the resulting
derivation row cryptographically binds the core back to its source.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from arborist.document import Document
[docs]
@dataclass
class DistillationResult:
"""What a Distiller returns for one source.
contributing_chunk_indices: 0-based indices into source_chunks that fed
the core's content. The runner Merkle-proves each one against the
source's document_root and stores those proofs in derivations.proof_blob.
"""
core: Document
contributing_chunk_indices: list[int]
[docs]
class Distiller(ABC):
"""Pure function: surface Document + its chunks -> core DistillationResult.
Distillers must be deterministic — same source bytes produce the same core
bytes. Bumping a distiller's algorithm requires bumping its `name`.
"""
name: str
[docs]
@abstractmethod
def distill(
self, source: Document, source_chunks: list[str]
) -> DistillationResult:
...