Source code for arborist.search.base

"""Search backend ABC + Hit dataclass with explicit audit mode.

Every search hit carries an `audit_mode` so callers never overclaim. Arborist
adapts the Merkle-AGI v7 audit-mode trichotomy to the RAG layer:
- STRICT     — Merkle-verified evidence: every claim cited verbatim against
               the source-content tree.
- HYBRID     — partial / mixed evidence: some claims source-grounded, others
               emerged from training. Cache or search hit is partially trusted.
- UNGROUNDED — no recoverable proof of grounding. Keyword (FTS5) hits land
               here by default; LLM answers fall here when no double-quoted
               span, sentence, or proper-noun phrase verifies against context.
               Substrate name was VISUAL (no formal guarantees attached); we
               renamed to UNGROUNDED so the RAG semantic is explicit.
"""

from __future__ import annotations

import enum
import sqlite3
from abc import ABC, abstractmethod
from dataclasses import dataclass


[docs] class AuditMode(str, enum.Enum): STRICT = "STRICT" HYBRID = "HYBRID" UNGROUNDED = "UNGROUNDED"
[docs] @dataclass(frozen=True) class Hit: document_root: str document_uri: str chunk_idx: int snippet: str score: float audit_mode: AuditMode title: str | None = None
[docs] class SearchBackend(ABC): """A search hook over the chunk store.""" name: str audit_mode: AuditMode # default mode this backend reports def __init__(self, conn: sqlite3.Connection): self.conn = conn
[docs] @abstractmethod def search(self, query: str, limit: int = 20) -> list[Hit]: ...