Source code for arborist.search.base
"""Search backend ABC + Hit dataclass with explicit audit mode.
Every search hit carries an `audit_mode` so callers never overclaim. Arborist
adapts the Merkle-AGI v7 audit-mode trichotomy to the RAG layer:
- STRICT — Merkle-verified evidence: every claim cited verbatim against
the source-content tree.
- HYBRID — partial / mixed evidence: some claims source-grounded, others
emerged from training. Cache or search hit is partially trusted.
- UNGROUNDED — no recoverable proof of grounding. Keyword (FTS5) hits land
here by default; LLM answers fall here when no double-quoted
span, sentence, or proper-noun phrase verifies against context.
Substrate name was VISUAL (no formal guarantees attached); we
renamed to UNGROUNDED so the RAG semantic is explicit.
"""
from __future__ import annotations
import enum
import sqlite3
from abc import ABC, abstractmethod
from dataclasses import dataclass
[docs]
class AuditMode(str, enum.Enum):
STRICT = "STRICT"
HYBRID = "HYBRID"
UNGROUNDED = "UNGROUNDED"
[docs]
@dataclass(frozen=True)
class Hit:
document_root: str
document_uri: str
chunk_idx: int
snippet: str
score: float
audit_mode: AuditMode
title: str | None = None
[docs]
class SearchBackend(ABC):
"""A search hook over the chunk store."""
name: str
audit_mode: AuditMode # default mode this backend reports
def __init__(self, conn: sqlite3.Connection):
self.conn = conn
[docs]
@abstractmethod
def search(self, query: str, limit: int = 20) -> list[Hit]:
...