Source code for arborist.source

"""Source ABC.

Adding a new corpus to arborist = one new Source subclass. The Source contract
is intentionally minimal: yield Document objects, one at a time.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Iterator

from arborist.document import Document


[docs] class Source(ABC): """A corpus that yields documents into the ingest pipeline.""" #: source_type tag stored on every Document this source produces. source_type: str
[docs] @abstractmethod def iter_documents(self) -> Iterator[Document]: """Yield Document objects. Must be deterministic & idempotent.""" ...