Source code for arborist.source
"""Source ABC.
Adding a new corpus to arborist = one new Source subclass. The Source contract
is intentionally minimal: yield Document objects, one at a time.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Iterator
from arborist.document import Document
[docs]
class Source(ABC):
"""A corpus that yields documents into the ingest pipeline."""
#: source_type tag stored on every Document this source produces.
source_type: str
[docs]
@abstractmethod
def iter_documents(self) -> Iterator[Document]:
"""Yield Document objects. Must be deterministic & idempotent."""
...