Source code for arborist.qa.runner

"""Q&A runner: cache-first lookup -> inference fallback -> provable record.

Implements the v9.8 admissibility invariant: no record reused unless
all 8 cache_key dimensions match AND state is 'live' (not
failed/stale/quarantined).

- Cache hit  -> persisted audit_mode (STRICT/HYBRID/UNGROUNDED).
- Cache miss -> call ChatClient, run faithfulness check, classify,
  store record, audit event.
"""

from __future__ import annotations

import json
import sqlite3
import time

from arborist import (
    CANONICALIZATION_VERSION,
    SCHEMA_VERSION,
)
from arborist.compress import unpack_chunk
from arborist.merkle import MerkleTree, proof_to_dict
from arborist.qa.client import ChatClient
from arborist.qa.prompts import (
    CLAIM_LATTICE_GROUNDING_REMINDER,
    CLAIM_LATTICE_JSON_GROUNDING_REMINDER,
    CLAIM_LATTICE_JSON_SYSTEM_PROMPT,
    CLAIM_LATTICE_SYSTEM_PROMPT,
)
from arborist.qa.keys import (
    DEFAULT_FIDELITY,
    DEFAULT_QUESTION_DEDUP,
    FIDELITY_MODES,
    QUESTION_DEDUP_MODES,
    cache_key,
    canonical_question,
    conversation_hash,
    governance_policy_hash,
    model_profile_hash,
    question_hash,
    verifier_policy_hash,
)
from arborist.qa.dag import build_run_dag
from arborist.qa.evidence import (
    build_evidence_map,
    evidence_map_root,
    render_evidence_map,
    render_evidence_map_for_json,
)
from arborist.qa.repair import mechanical_repair, reprompt_repair
from arborist.qa.verify import (
    ANSWER_MODES,
    CLAIM_LATTICE_JSON_SCHEMA,
    DEFAULT_ANSWER_MODE,
    verify_claim_lattice,
    verify_claim_lattice_json,
    verify_quotes,
)
from arborist.store import append_audit, transaction

try:
    from arborist.wikitext import BASE_VERSION as _WIKITEXT_BASE_VERSION
    from arborist.wikitext import to_base as _wikitext_to_base
except ImportError:  # pragma: no cover
    _WIKITEXT_BASE_VERSION = None
    _wikitext_to_base = None


DEFAULT_POLICY = {
    # Ticket #000007 — query-layer hyphen-fold marker. See
    # arborist/qa/query.py:DEFAULT_QUERY_POLICY for rationale.
    "hyphen_fold_v1": True,
    # Ticket #000006 amend 2026-05-02b (Rule 9). See
    # arborist/qa/query.py:DEFAULT_QUERY_POLICY for full rationale.
    "claim_lattice_subject_tokens_absent_threshold": 3,
    "system_prompt": (
        "Answer the user's question based ONLY on the document below. "
        "For EVERY factual claim, include a verbatim quote from the "
        "document enclosed in double quotes (\"...\"). The quoted span "
        "must appear word-for-word. Make a claim only when a verbatim "
        "quote in the document directly supports it. "
        "If the answer is in the document, write it. "
        "If the answer is absent from the document, say 'I don't know "
        "based on the provided document.' and stop there. "
        "Stay inside the document at all times."
    ),
    # Restated rule fired as a user message right before the document +
    # question arrive. See arborist/qa/query.py for the rationale (recent
    # user-turn instructions outweigh decayed system-turn rules in 8B
    # instruction-tuned models).
    "grounding_reminder": (
        "REMINDER: wrap every factual claim in double quotes (\"...\") "
        "and the quoted span must appear word-for-word in the "
        "document. Each claim earns a verbatim quote. "
        "Now answer the question on the next message."
    ),
    "temperature": 0.1,
    "top_p": 1.0,
    "max_tokens": 512,
    "entity_policy": "proximity",
    "entity_proximity_n": 3,
    "entity_proximity_window": 300,
    # Mechanical answer repair after first verify. Off by default; see
    # arborist/qa/query.py for semantics.
    "repair_enabled": False,
    "repair_max_reprompts": 0,
    # Strip wikitext markup before the LLM ever sees the context. Lets
    # Hermes quote prose verbatim and shrinks token bills (~43% on
    # Wikipedia chunks). Bumps governance_policy_hash so prior cached
    # answers under raw-wikitext policy stay distinct on lookup. Set
    # via the wikitext extras; no-op if mwparserfromhell isn't installed.
    "base_version": _WIKITEXT_BASE_VERSION,
    # G0 / CTI — claim-lattice-pointer answer mode. "quote" (default):
    # existing behavior, model writes prose with verbatim quotes inline.
    # "claim_lattice_pointer": runtime builds an evidence map and shows
    # the model short pointer ids (E1, E2, …); model writes natural
    # prose with bracket pointer tags ("Claim. [E12]") instead of
    # quoting source text. Renderer interpolates literal spans at
    # display time. Synthetic-elision-by-construction-impossible: the
    # model never types the quote string. Two-layer id discipline keeps
    # the cache & run-DAG keyed on content-addressed evidence_ids.
    # Folds into governance_policy_hash so two modes write under
    # different cache_keys and never alias. No iterative repair in
    # pointer mode (one-shot benchmark discipline).
    "answer_mode": DEFAULT_ANSWER_MODE,
    "claim_lattice_system_prompt": CLAIM_LATTICE_SYSTEM_PROMPT,
    "claim_lattice_grounding_reminder": CLAIM_LATTICE_GROUNDING_REMINDER,
    # Allowed source roles for claim-lattice verification. Roles outside
    # this set get classified SOURCE_ROLE_BLOCKED and downgrade the
    # verdict. Mirrors arborist.qa.verify.DEFAULT_ALLOWED_SOURCE_ROLES;
    # noisy_background_source / sequel_background_source are excluded by
    # default. Folds into governance_policy_hash on change.
    "claim_lattice_allowed_source_roles": [
        "primary_answer_source",
        "secondary_context_source",
        "background_source",
        "unclassified",
        # Self-promoted providence records (`arborist://providence/`
        # URI scheme). Trusted-as-fact substrate per the
        # self-reference design — STRICT live records past the
        # kindergarten window. See
        # docs/self-reference-design.md for the
        # falsification trust model.
        "self_reference_source",
    ],
    # Hard cap on pointer ids per claim line — mirrors prompt Rule 9.
    # Lines exceeding this cap classify as SCHEMA_INVALID and the
    # verdict can no longer reach STRICT. Folds into
    # governance_policy_hash so changing the cap invalidates prior
    # cached records.
    "claim_lattice_max_pointers_per_claim": 2,
    # Minimum claim-token coverage required for the citation-overlap
    # check (Rule 6) to pass. Pre-2026-04-30 the threshold was implicit
    # at "≥1 shared token", which let through lazy-anchored claims
    # whose only overlap was a single topical word (e.g. "Yale
    # University... [E9]" cited to a highway-data span containing only
    # "Connecticut"). 0.30 means a 10-token claim needs ≥3 of its
    # content tokens to appear in the cited span. Short claims (≤3
    # content tokens) keep the old ≥1-token floor so narrow factoids
    # like "Steve Jobs co-founded Apple" still pass. Folds into
    # governance_policy_hash on change.
    "claim_lattice_min_citation_coverage": 0.30,
    # Bare-name claim guard. A claim with fewer than this many content
    # tokens (>=4 chars, post-spotlight-stopword) is rejected as
    # SCHEMA_INVALID. Catches the JP-dinosaurs lazy-anchor where
    # "Triceratops. [E16]" passes lexical overlap on a single token
    # even when E16 is a video-game tie-in chunk rather than the film
    # article. Default 2: bare-entity-name claims (one content token
    # after stopword strip) fail; sentence-shape claims pass. Note
    # ``_content_tokens`` already filters "appears", "shown", etc. so
    # "Trex appears" → 1 content token (filtered), "Trex appears in
    # the film" → 2 content tokens (passes). Folds into
    # governance_policy_hash on change.
    "claim_lattice_min_claim_content_tokens": 2,
    # Lazy-anchor smell auto-demote. When >= threshold of verified
    # pointer-pairs cite a single pointer AND there are >= min_pairs
    # total, cap audit_mode at HYBRID. The smell sidecar was advisory
    # only pre-2026-04-30; now it's load-bearing. STRICT requires
    # diverse anchoring across pointers.
    "claim_lattice_lazy_anchor_demote_threshold": 0.5,
    "claim_lattice_lazy_anchor_demote_min_pairs": 3,
    # Warrant-lite — relation-question hard check (Ticket H from
    # feedback-3, 2026-05-01). Detects relation-shape questions
    # ("who is X's boss?", "who founded Y?") and requires the cited
    # span to contain at least one named answer entity (proper-noun
    # phrase) from the claim. Catches the Homer-Simpson lazy-anchor
    # case where claim asserts "Mr. Burns" but cited span is the
    # voice-actor bio. WARRANT_MISSING violations cap audit_mode
    # at HYBRID. See arborist/qa/warrant.py.
    "claim_lattice_warrant_check_enabled": True,
    "claim_lattice_deflection_check_enabled": True,
    # Format-collapse check (pointer-mode only): when the model emits
    # ≥5 meaningful prose lines with zero `[E\d+]` pointer tags, it
    # abandoned the claim_lattice_pointer protocol entirely. Soft-demote
    # so audit display surfaces "format collapsed" vs "graceful per-
    # claim refusal" — different failure shapes, same UNGROUNDED rung.
    # JSON-mode collapse already shows up as SCHEMA_INVALID so this
    # check is redundant there. Surfaced 2026-05-02 by fox's "Winners
    # of all major sports?" case where Hermes dumped 50+ free-form
    # sentences.
    "claim_lattice_format_collapse_check_enabled": True,
    # Quantifier preflight guard (Ticket #000008 Phase 2). Per-call
    # claim cap derived from the question's quantifier intensity and
    # the configured model profile (arborist/qa/model_profiles.py).
    # Phase 2 lands the lookup wiring with apply_caps=False per
    # §10.11.3 dry-run discipline — claim_cap_applied is computed
    # and reported on the result dict, but the verifier still uses
    # claim_lattice_max_claims_per_answer as the actual cap.
    # Operator flips quantifier_guard_apply_caps=True after dry-run
    # bench review confirms classifier output across the full
    # question set.
    #
    # Six-level disable hierarchy (§10.11.2):
    #   - quantifier_guard_enabled: master kill (False = no
    #     classifier output, no cap lookup, no telemetry).
    #   - quantifier_guard_apply_caps: dry-run gate (True = cap
    #     applied; False = cap reported but not applied).
    #   - quantifier_caps_by_intensity: per-call override dict;
    #     wins over the model_profiles.py table when present.
    #   - quantifier_guard_modes: list of answer_modes the guard
    #     applies to. Quote mode opts out by default — already
    #     stable HYBRID 0.455 on baseline, different failure shape.
    "quantifier_guard_enabled": True,
    "quantifier_guard_apply_caps": False,
    # When apply_caps flips True, this allowlist gates which modes
    # actually have caps applied. n=5 verification 2026-05-03 (#000008
    # §12.10): cap on claim_lattice (JSON) wins +14pp on STRICT-rate;
    # cap on claim_lattice_pointer fires TOO_MANY_CLAIMS 20× without
    # moving the verdict floor (still 0 STRICT). Default to JSON only
    # so flipping the master switch doesn't add wasted cap-noise on
    # pointer mode. Empty list / None = honor quantifier_guard_modes
    # (legacy fallback).
    "quantifier_apply_caps_modes": ["claim_lattice"],
    "quantifier_caps_by_intensity": {},
    "quantifier_guard_modes": ["claim_lattice_pointer", "claim_lattice"],
    # Phase 3 — broad-quantifier reminder injection. Default ON for
    # lattice modes (gated via quantifier_guard_modes) per the
    # 2026-05-03 bench A/B (#000008 §12). Reminder eliminates
    # FORMAT_COLLAPSED (2→0), reduces NO_EVIDENCE_POINTER 33%,
    # boosts mean ratio +17pp on pointer / +21pp on JSON, and
    # rescues JSON UNGROUNDED 7→1. n=5 verification 2026-05-03
    # confirms the compound effect with cap survives at higher
    # sample size. Quote mode is mode-gated off (different failure
    # shape; paraphrase verifier doesn't need pointer-tag reminders).
    "quantifier_reminder_enabled": True,
    # Phase 4 — strict reject for broad-unbounded queries. When True
    # AND intensity ∈ {ALL, COMPREHENSIVE, OPEN_REQUEST} AND
    # scope_bound_hint == "unbounded", query()/ask() return UNGROUNDED
    # before the LLM call with a BROAD_QUANTIFIER_REJECTED violation.
    # Saves the ~10-15s LLM call on rejected runs. Default OFF — opt-in
    # via --reject-broad CLI flag or per-call policy override.
    # Bounded universals (e.g. all members of the Beatles, year-anchored
    # questions) are NOT rejected per §10.1.
    "quantifier_reject_broad": False,
    # Ticket #000010 — Meta-Cognition Preflight Guard (M0 / MCTL).
    # Pure deterministic detectors (temporal, contradiction,
    # false-premise-lite, out-of-corpus) wrap the #000008 quantifier
    # classifier and surface a QuestionState on the result dict.
    # Master switch ON by default — detectors are pure-on-question
    # so cost is negligible. Each sub-detector has its own enable
    # switch for granular A/B. block_on_contradiction defaults False
    # (label-only by default; opt-in to hard-block — false-positive
    # rate not yet bench-validated).
    "metacognition_enabled": True,
    "metacognition_temporal_check": True,
    "metacognition_contradiction_check": True,
    "metacognition_false_premise_check": True,
    "metacognition_out_of_corpus_check": True,
    "metacognition_block_on_contradiction": False,
    # Ticket #000011 — soft preflight sidecar. Default OFF —
    # adds one short LLM round-trip (~200ms median) so cost is
    # operator-opt-in only. NEVER enters the verifier proof path
    # (D1); produces only SOFT_* labels that surface as advisory
    # hints alongside the deterministic detector output.
    "soft_preflight_enabled": False,
    # Claim-count ceiling. Bench finding (2026-04-30 york-england):
    # "tell me all there is to know about X" prompted Hermes to spam
    # 26-59 encyclopedic claims sourced from training, only 2-4 of
    # which grounded in retrieval. Atomic-claim prompt rule (b5925c8)
    # cut this to ~10, but a hard structural cap is defense in depth.
    # Cap of 12 admits typical entity-list questions (5-7 dinosaurs,
    # Simpsons + pets) while flagging the runaway shape. Folds into
    # governance_policy_hash on change.
    "claim_lattice_max_claims_per_answer": 12,
    # JSON variant — `answer_mode="claim_lattice"`. Mirrors the
    # multi-source query path. Pairs with grammar-constrained inference
    # (vLLM guided_json, Claude/GPT-4 native JSON, Qwen 3.6 reasoner).
    # Lenient pre-parser in verify_claim_lattice_json keeps the path
    # survivable on inference paths without grammar guidance.
    "claim_lattice_json_system_prompt": CLAIM_LATTICE_JSON_SYSTEM_PROMPT,
    "claim_lattice_json_grounding_reminder": CLAIM_LATTICE_JSON_GROUNDING_REMINDER,
    "claim_lattice_use_guided_json": True,
    # JSON-mode stop sequences. Hermes-3-8B sometimes spams whitespace
    # / newlines after the closing brace on broad-descriptive shapes
    # ("plot of X", "tell me about Y") — the response runs out the
    # max_tokens budget and the lenient parser sees truncated JSON.
    # Stopping on a blank line cuts the runaway. JSON-mode output
    # never legitimately contains a blank line (single object, single
    # line) so this is a safe filter. Folds into
    # governance_policy_hash on change.
    "claim_lattice_json_stop_sequences": ["\n\n"],
    # Verifier content-token rules version (#000053). "v2-acronym-aware"
    # = `arborist.qa.evidence._content_tokens` keeps all-caps 2-3-char
    # acronyms (CPU/GPU/DNA/FBI…) as content tokens; pre-#000053 dropped
    # every <4-char token, so a CPU/GPU claim cited to a "CPU foo" /
    # "GPU bar" article tripped TITLE_MISMATCH spuriously. A pure
    # marker — it doesn't gate code (the tokenizer change is
    # unconditional), it exists so the change folds into
    # `verifier_policy_hash` and prior cache records orphan on lookup.
    "content_token_rules": "v2-acronym-aware",
}


#: Mapping from verdict audit_mode → Δ5F utility signal for the
#: advisory controller step. Same shape the dry-run simulator uses
#: (``bench/scripts/prometheus_sigma_sweep_dryrun.py``) so the
#: live-runtime advisory log and the offline sweep dry-run agree on
#: cost-class economics.
_AUDIT_MODE_DELTA_5F = {
    "STRICT": 0.05,
    "CANONICAL_PROJECTION": 0.10,
    "HYBRID": 0.00,
    "UNGROUNDED": -0.10,
}
_AUDIT_MODE_CAPITAL_COST = {
    "STRICT": 1.0,
    "CANONICAL_PROJECTION": 0.05,
    "HYBRID": 0.8,
    "UNGROUNDED": 0.4,
}


def _emit_qa_controller_advisory(
    conn: sqlite3.Connection, cache_key: str, verdict: dict
) -> None:
    """#000037 Phase 2 — write an advisory controller_decision row.

    Synthesizes a single-branch ControllerInput from the verdict, runs
    the Phase 1 controller (pure function, no LLM call, no I/O), and
    persists the resulting decision + difficulty + allocation rows to
    the sibling ``controller_events`` table (does NOT enter
    ``audit_events.event_hash`` preimage — chain integrity unchanged).

    Idempotent under retry (``UNIQUE(event_kind, body_hash)``). Pure
    advisory: every QA cycle emits one of these regardless of cache
    hit/miss state, but downstream consumers can prune or aggregate
    rows without affecting the QA result.

    Lazy import keeps the QA hot path free of substrate-module load
    cost on calls that never reach this helper (cache hits + early
    returns).
    """
    from arborist.substrate.prometheus import (
        BatteryDeltas,
        ControllerBranch,
        ControllerInput,
        controller_decide,
        safe_weights,
    )
    from arborist.substrate.prometheus_audit import emit_controller_events

    audit_mode = (verdict.get("audit_mode") or "UNGROUNDED").upper()
    n_quotes = int(verdict.get("n_quotes") or 0)
    unverified = verdict.get("unverified_quotes") or []
    if isinstance(unverified, str):
        try:
            unverified = json.loads(unverified)
        except (TypeError, ValueError):
            unverified = []
    n_unverified = len(unverified) if isinstance(unverified, list) else 0
    witness_divergence = (n_unverified / n_quotes) if n_quotes > 0 else 0.0

    branch = ControllerBranch(
        branch_id=f"qa:{cache_key[:16]}",
        deltas=BatteryDeltas(
            delta_5s=0.0, delta_5t=0.0,
            delta_5f=_AUDIT_MODE_DELTA_5F.get(audit_mode, 0.0),
            delta_5r=0.0,
        ),
        witness_divergence=witness_divergence,
        capital_cost=_AUDIT_MODE_CAPITAL_COST.get(audit_mode, 1.0),
        payoff_b=1.0,
    )
    decision = controller_decide(
        ControllerInput(
            organism_root=f"qa:{cache_key}",
            branches=(branch,),
            budget=1,
            hermes_utilization=0,
            weights=safe_weights(),
            difficulty=1.0,
            divergence_rate=witness_divergence,
        )
    )
    emit_controller_events(conn, decision, organism_root=f"qa:{cache_key}")


def _ms_since(t: float) -> float:
    return round((time.monotonic() - t) * 1000, 1)



[docs]
def ask(
    conn: sqlite3.Connection,
    *,
    document_root: str,
    question: str,
    client: ChatClient,
    model_id: str,
    revision: str = "",
    quantization: str = "",
    policy: dict | None = None,
    chain: str = "private",
    fidelity: str | None = None,
) -> dict:
    """Look up cached answer or run inference. Returns a result dict.

    See ``arborist.qa.query.query`` for `fidelity` semantics — it
    controls lookup tolerance: ``"strict"`` only checks the cache_key
    matching the call's ``policy["question_dedup"]``; the default
    ``"equivalence_class"`` falls back to the alternate dedup mode's
    cache_key on miss so a fast-cache agent can reuse records written
    under either mode. Result includes ``lookup_path``.
    """
    policy = policy or DEFAULT_POLICY
    if fidelity is None:
        fidelity = policy.get("fidelity", DEFAULT_FIDELITY)
    if fidelity not in FIDELITY_MODES:
        raise ValueError(
            f"fidelity must be one of {FIDELITY_MODES}, got {fidelity!r}"
        )
    # Quantifier preflight (Ticket #000008 Phase 1+2). Same wiring
    # as query() — see arborist/qa/query.py for the rationale and
    # disable hierarchy.
    from arborist.qa.model_profiles import cap_for_intensity
    from arborist.qa.quantifier import classify_question_quantifier
    answer_mode_for_guard = policy.get("answer_mode", "quote")
    quantifier_guard_on = bool(policy.get("quantifier_guard_enabled", True))
    quantifier_guard_modes = policy.get(
        "quantifier_guard_modes",
        ["claim_lattice_pointer", "claim_lattice"],
    )
    quantifier_mode_gated = answer_mode_for_guard in (quantifier_guard_modes or [])
    if quantifier_guard_on:
        quantifier = classify_question_quantifier(question)
    else:
        quantifier = {
            "intensity": None,
            "matched_token": None,
            "explicit_count": None,
            "is_broad": False,
            "operational_shape": None,
            "scope_bound_hint": "unknown",
            "classifier_version": None,
        }
    if quantifier_guard_on and quantifier_mode_gated and quantifier["intensity"]:
        claim_cap_lookup = cap_for_intensity(
            model_profile_id=model_id,
            intensity=quantifier["intensity"],
            explicit_count=quantifier["explicit_count"],
            policy_overrides=policy.get("quantifier_caps_by_intensity") or None,
        )
    else:
        claim_cap_lookup = None
    quantifier_apply_caps = bool(policy.get("quantifier_guard_apply_caps", False))
    quantifier_apply_caps_modes = policy.get(
        "quantifier_apply_caps_modes",
        quantifier_guard_modes,  # legacy fallback
    ) or quantifier_guard_modes
    quantifier_caps_mode_gated = answer_mode_for_guard in (
        quantifier_apply_caps_modes or []
    )
    _policy_max_claims = int(policy.get("claim_lattice_max_claims_per_answer", 12))
    if (
        quantifier_apply_caps
        and quantifier_caps_mode_gated
        and claim_cap_lookup is not None
    ):
        effective_max_claims = int(claim_cap_lookup)
    else:
        effective_max_claims = _policy_max_claims
    # Ticket #000010 — meta-cognition preflight (mirror of query()).
    from arborist.qa.metacognition import preflight_question
    question_state = preflight_question(
        question,
        model_profile_id=model_id,
        reference_frames=(),
        policy=policy,
    )
    t_start = time.monotonic()

    doc = conn.execute(
        "SELECT document_uri, chunking_version FROM documents "
        "WHERE document_root = ?",
        (document_root,),
    ).fetchone()
    if doc is None:
        return {"status": "unknown_document"}

    chunk_rows = conn.execute(
        "SELECT idx, leaf_hash, content FROM chunks "
        "WHERE document_root = ? ORDER BY idx ASC",
        (document_root,),
    ).fetchall()
    if not chunk_rows:
        return {"status": "unknown_document"}
    if any(r["content"] is None for r in chunk_rows):
        return {"status": "source_cold", "msg": "rehydrate before asking"}

    answer_mode = policy.get("answer_mode", DEFAULT_ANSWER_MODE)
    if answer_mode not in ANSWER_MODES:
        raise ValueError(
            f"policy['answer_mode'] must be one of {ANSWER_MODES}, got {answer_mode!r}"
        )

    chunk_texts = [unpack_chunk(r["content"]) for r in chunk_rows]
    document_text = "\n\n".join(chunk_texts)

    # Wikitext → prose before the LLM sees it. The model can then quote
    # verbatim against the prose form; the verifier compares like-against-
    # like. Idempotent if context is already plain prose. Gated on
    # policy["base_version"] so this is part of governance_policy_hash.
    if policy.get("base_version") and _wikitext_to_base is not None:
        document_text = _wikitext_to_base(document_text)
        chunk_texts = [_wikitext_to_base(t) for t in chunk_texts]

    evidence_map = []
    if answer_mode == "claim_lattice_pointer":
        # Quote-by-pointer: one evidence object per chunk. The model sees
        # the literal spans labeled with content-addressed IDs and is
        # instructed to reference IDs, not type quote text. Synthetic
        # elision is impossible by construction — the model never produces
        # the quote string.
        chunks_for_map = [
            {
                "source_root": document_root,
                "document_uri": doc["document_uri"],
                "title": None,
                "chunk_idx": r["idx"],
                "chunk_root": r["leaf_hash"],
                "span": chunk_texts[i],
                "source_role": "primary_answer_source",
            }
            for i, r in enumerate(chunk_rows)
        ]
        evidence_map = build_evidence_map(chunks_for_map)
        sys_prompt = policy["claim_lattice_system_prompt"]
        grounding_reminder = policy.get("claim_lattice_grounding_reminder")
        rendered_evidence = render_evidence_map(evidence_map)

        def _user_payload(q: str) -> str:
            return f"EVIDENCE:\n\n{rendered_evidence}\n\n---\n\nQUESTION: {q}"
    elif answer_mode == "claim_lattice":
        # JSON variant — same per-chunk evidence map as pointer mode,
        # blocks labeled with content-addressed evidence_id (long hex)
        # since the model emits IDs in JSON. Pairs with grammar-
        # constrained inference; lenient pre-parser handles drift.
        chunks_for_map = [
            {
                "source_root": document_root,
                "document_uri": doc["document_uri"],
                "title": None,
                "chunk_idx": r["idx"],
                "chunk_root": r["leaf_hash"],
                "span": chunk_texts[i],
                "source_role": "primary_answer_source",
            }
            for i, r in enumerate(chunk_rows)
        ]
        evidence_map = build_evidence_map(chunks_for_map)
        sys_prompt = policy.get(
            "claim_lattice_json_system_prompt",
            policy["claim_lattice_system_prompt"],
        )
        grounding_reminder = policy.get(
            "claim_lattice_json_grounding_reminder",
            policy.get("claim_lattice_grounding_reminder"),
        )
        rendered_evidence = render_evidence_map_for_json(evidence_map)

        def _user_payload(q: str) -> str:
            return f"EVIDENCE:\n\n{rendered_evidence}\n\n---\n\nQUESTION: {q}"
    else:
        sys_prompt = policy["system_prompt"]
        grounding_reminder = policy.get("grounding_reminder")

        def _user_payload(q: str) -> str:
            return f"Document:\n\n{document_text}\n\n---\n\nQuestion: {q}"

    # System sets the policy; a user-turn reminder restates the rule one
    # message before the payload arrives. Payload (document or evidence
    # map + question) lands last as the most-recent tokens before
    # generation.
    messages = [{"role": "system", "content": sys_prompt}]
    if grounding_reminder:
        messages.append({"role": "user", "content": grounding_reminder})
    # Quantifier-specific reminder (Ticket #000008 Phase 3, default
    # off). When the question is broad (ALL/COMPREHENSIVE/OPEN_
    # REQUEST) and the operator opted in via
    # quantifier_reminder_enabled=True, append a one-line reminder
    # restating the cap and the no-prior-enumeration rule.
    # quantifier_reminder_enabled defaults to False because Hermes-3-8B
    # already ignores parts of the existing reminder under enumeration
    # pressure (§3 Option B con); empirical effect requires bench
    # measurement before flipping default-on (§10.8 decision tree).
    if (
        quantifier_guard_on
        and quantifier_mode_gated
        and quantifier.get("is_broad")
        and bool(policy.get("quantifier_reminder_enabled", False))
    ):
        from arborist.qa.quantifier_reminder import broad_quantifier_reminder
        broad = broad_quantifier_reminder(
            intensity=quantifier["intensity"],
            cap=effective_max_claims,
            scope_bound_hint=quantifier["scope_bound_hint"],
        )
        if broad:
            messages.append({"role": "user", "content": broad})
    messages.append({"role": "user", "content": _user_payload(question)})

    mhash = model_profile_hash(model_id, revision, quantization)

    # Dedup-mode-aware cache_key. See arborist/qa/query.py for rationale —
    # policy_variant matches the alternate mode so governance_policy_hash
    # agrees with what an agent under that mode would have written,
    # enabling cross-silo fallback.
    def _ckey_for_mode(mode: str) -> str:
        canon_q = canonical_question(question, mode=mode)
        canon_msgs = list(messages[:-1]) + [
            {"role": "user", "content": _user_payload(canon_q)},
        ]
        policy_variant = dict(policy, question_dedup=mode)
        return cache_key(
            document_root,
            question_hash(question, mode=mode),
            mhash,
            conversation_hash(canon_msgs),
            governance_policy_hash(policy_variant),
            SCHEMA_VERSION,
            CANONICALIZATION_VERSION,
            doc["chunking_version"],
            verifier_policy_hash(policy_variant),
        )

    ghash = governance_policy_hash(policy)  # for the legacy INSERT below

    primary_dedup = policy.get("question_dedup", DEFAULT_QUESTION_DEDUP)
    if primary_dedup not in QUESTION_DEDUP_MODES:
        raise ValueError(
            f"policy['question_dedup'] must be one of {QUESTION_DEDUP_MODES}, "
            f"got {primary_dedup!r}"
        )
    # Re-derive the per-mode hashes for use in the INSERT below. _ckey_for_mode
    # already builds them, but the legacy INSERT references qhash/chash by name.
    qhash = question_hash(question, mode=primary_dedup)
    canonical_q_primary = canonical_question(question, mode=primary_dedup)
    canonical_messages_primary = list(messages[:-1]) + [
        {"role": "user", "content": _user_payload(canonical_q_primary)},
    ]
    chash = conversation_hash(canonical_messages_primary)
    primary_ckey = _ckey_for_mode(primary_dedup)
    ckey = primary_ckey  # legacy name for the rest of the function

    t_lookup = time.monotonic()
    cached = conn.execute(
        "SELECT * FROM providence_cache "
        "WHERE cache_key = ? AND falsification_state = 'live'",
        (primary_ckey,),
    ).fetchone()
    hit_ckey = primary_ckey
    lookup_path = primary_dedup if cached is not None else None
    if cached is None and fidelity == "equivalence_class":
        other_mode = (
            "equivalence_class" if primary_dedup == "strict" else "strict"
        )
        other_ckey = _ckey_for_mode(other_mode)
        if other_ckey != primary_ckey:
            cached = conn.execute(
                "SELECT * FROM providence_cache "
                "WHERE cache_key = ? AND falsification_state = 'live'",
                (other_ckey,),
            ).fetchone()
            if cached is not None:
                hit_ckey = other_ckey
                lookup_path = f"{other_mode}_fallback"
    cache_lookup_ms = _ms_since(t_lookup)
    if cached is not None:
        with transaction(conn):
            now = int(time.time())
            conn.execute(
                "UPDATE providence_cache "
                "SET hit_count = hit_count + 1, last_hit_at = ? "
                "WHERE cache_key = ?",
                (now, hit_ckey),
            )
        return {
            "status": "cache_hit",
            "audit_mode": cached["audit_mode"],
            "cache_key": hit_ckey,
            "lookup_path": lookup_path,
            "source_root": document_root,
            "answer_text": cached["answer_text"],
            "merkle_proof": json.loads(cached["merkle_proof"]),
            "n_quotes": cached["n_quotes"],
            "n_verified": cached["n_verified"],
            "verifier_method": cached["verifier_method"],
            "unverified_quotes": (
                json.loads(cached["unverified_quotes"])
                if cached["unverified_quotes"]
                else []
            ),
            "partially_verified_quotes": [],
            # Quantifier preflight (Ticket #000008 Phase 1+2). Pure
            # on the question string, so cache hits re-classify
            # cheaply and carry the same schema as miss-path rows.
            "quantifier_intensity": quantifier["intensity"],
            "quantifier_matched_token": quantifier["matched_token"],
            "scope_bound_hint": quantifier["scope_bound_hint"],
            "quantifier_explicit_count": quantifier["explicit_count"],
            "claim_cap_applied": claim_cap_lookup,
            # Ticket #000010 — meta-cognition QuestionState.
            "question_state": question_state.to_dict(),
            "timings": {
                "cache_lookup_ms": cache_lookup_ms,
                "llm_ms": None,
                "total_ms": _ms_since(t_start),
            },
        }

    t_llm = time.monotonic()
    # JSON mode: pass structured-output extras under all three engine
    # conventions (vLLM `guided_json`, llama.cpp `json_schema`, OpenAI-
    # spec `response_format`) so the same call site enforces the schema
    # whether the model is on vLLM, llama.cpp, or OpenAI-spec. Engines
    # silently drop unknown keys. Pre-2026-05-19 this was vLLM-only
    # (`guided_json` alone); llama.cpp Qwen got NO enforcement and
    # relied on the lenient pre-parser to clean drift — the parse-
    # tolerant fallback still handles whatever drift remains.
    from arborist.qa.verify import claim_lattice_structured_output_extras
    extra_body: dict | None = None
    stop_seqs: list[str] | None = None
    if answer_mode == "claim_lattice" and policy.get(
        "claim_lattice_use_guided_json", True
    ):
        extra_body = claim_lattice_structured_output_extras()
    if answer_mode == "claim_lattice":
        # JSON-mode token-runaway guard. On broad-descriptive /
        # comparison questions Hermes-3-8B sometimes spams whitespace
        # / newlines after the closing brace until max_tokens
        # exhausts; the resulting truncated payload won't parse and
        # the run lands UNGROUNDED 0/0 at 12-15s instead of 2-4s.
        # Stopping on a blank line (\n\n) cuts the runaway —
        # well-formed JSON-mode output never contains a blank line
        # since the model emits a single object on one line (or
        # with simple internal newlines).
        stop_seqs = list(policy.get(
            "claim_lattice_json_stop_sequences", ["\n\n"]
        ))
    raw_answer = client.chat_completion(
        messages,
        model=model_id,
        temperature=policy["temperature"],
        max_tokens=policy["max_tokens"],
        top_p=policy.get("top_p", 1.0),
        extra_body=extra_body,
        stop=stop_seqs,
    )
    llm_ms = _ms_since(t_llm)

    repair_changes: list[dict] = []
    pre_repair_verdict: dict | None = None

    # Phase 3 of #000031: load the warrant-chain core_root set once
    # from the conn's shards directory. The verifier consults this
    # set to suppress WARRANT_MISSING when the cited chunk's document
    # has a warrant-resolver derivation row (Merkle-bound primary-
    # source backing). Empty set if the shard has no derivations
    # rows yet — fully backward-compatible.
    _warrant_chain_roots: frozenset[str] = frozenset()
    try:
        from pathlib import Path as _Path

        from arborist.qa.warrant_chain import warrant_chain_lookup as _wcl

        _db_path = conn.execute("PRAGMA database_list").fetchall()
        # PRAGMA database_list rows: (seq, name, file). Main DB is
        # the first row with name='main'.
        _main_row = next((r for r in _db_path if r[1] == "main"), None)
        if _main_row and _main_row[2]:
            _warrant_chain_roots = _wcl(_Path(_main_row[2]).parent)
    except Exception:
        # Fail-closed: empty set means no suppression, behavior
        # identical to pre-Phase-3.
        _warrant_chain_roots = frozenset()

    if answer_mode == "claim_lattice_pointer":
        verdict = verify_claim_lattice(
            raw_answer,
            evidence_map,
            allowed_source_roles=tuple(
                policy.get(
                    "claim_lattice_allowed_source_roles",
                    [
                        "primary_answer_source",
                        "secondary_context_source",
                        "background_source",
                        "unclassified",
                    ],
                )
            ),
            max_pointers_per_claim=int(policy.get(
                "claim_lattice_max_pointers_per_claim", 2
            )),
            min_citation_coverage=float(policy.get(
                "claim_lattice_min_citation_coverage", 0.30
            )),
            min_claim_content_tokens=int(policy.get(
                "claim_lattice_min_claim_content_tokens", 3
            )),
            lazy_anchor_demote_threshold=float(policy.get(
                "claim_lattice_lazy_anchor_demote_threshold", 0.5
            )),
            lazy_anchor_demote_min_pairs=int(policy.get(
                "claim_lattice_lazy_anchor_demote_min_pairs", 3
            )),
            max_claims_per_answer=effective_max_claims,
            subject_tokens_absent_threshold=int(policy.get(
                "claim_lattice_subject_tokens_absent_threshold", 3
            )),
            question=question,
            warrant_check_enabled=bool(policy.get(
                "claim_lattice_warrant_check_enabled", True
            )),
            deflection_check_enabled=bool(policy.get(
                "claim_lattice_deflection_check_enabled", True
            )),
            format_collapse_check_enabled=bool(policy.get(
                "claim_lattice_format_collapse_check_enabled", True
            )),
            warrant_chain_roots=_warrant_chain_roots,
        )
        # Rendered prose (literal spans interpolated) is the user-facing
        # answer text — never the model's raw pointer-line output. If
        # rendering produced nothing (no valid claims), persist the raw
        # output so an operator can see what the model actually said.
        rendered = verdict["rendered_text"]
        answer_text = rendered if rendered else raw_answer
    elif answer_mode == "claim_lattice":
        verdict = verify_claim_lattice_json(
            raw_answer,
            evidence_map,
            allowed_source_roles=tuple(
                policy.get(
                    "claim_lattice_allowed_source_roles",
                    [
                        "primary_answer_source",
                        "secondary_context_source",
                        "background_source",
                        "unclassified",
                    ],
                )
            ),
            max_evidence_per_claim=int(policy.get(
                "claim_lattice_max_pointers_per_claim", 2
            )),
            min_citation_coverage=float(policy.get(
                "claim_lattice_min_citation_coverage", 0.30
            )),
            max_claims_per_answer=effective_max_claims,
            subject_tokens_absent_threshold=int(policy.get(
                "claim_lattice_subject_tokens_absent_threshold", 3
            )),
            question=question,
            warrant_check_enabled=bool(policy.get(
                "claim_lattice_warrant_check_enabled", True
            )),
            deflection_check_enabled=bool(policy.get(
                "claim_lattice_deflection_check_enabled", True
            )),
            warrant_chain_roots=_warrant_chain_roots,
        )
        rendered = verdict["rendered_text"]
        answer_text = rendered if rendered else raw_answer
    else:
        answer_text = raw_answer
        verdict = verify_quotes(
            answer_text,
            document_text,
            entity_policy=policy.get("entity_policy", "hybrid"),
            proximity_n=policy.get("entity_proximity_n", 3),
            proximity_window=policy.get("entity_proximity_window", 300),
        )

        def _verify(text: str) -> dict:
            return verify_quotes(
                text,
                document_text,
                entity_policy=policy.get("entity_policy", "hybrid"),
                proximity_n=policy.get("entity_proximity_n", 3),
                proximity_window=policy.get("entity_proximity_window", 300),
            )

        if (
            policy.get("repair_enabled")
            and verdict["audit_mode"] != "STRICT"
            and verdict.get("unverified_quotes")
        ):
            repair_result = mechanical_repair(
                answer_text, verdict["unverified_quotes"], document_text
            )
            if repair_result["changes"]:
                new_verdict = _verify(repair_result["repaired_text"])
                if new_verdict["n_verified"] >= verdict["n_verified"]:
                    pre_repair_verdict = verdict
                    answer_text = repair_result["repaired_text"]
                    verdict = new_verdict
                    repair_changes = list(repair_result["changes"])

            max_reprompts = int(policy.get("repair_max_reprompts", 0))
            for _ in range(max_reprompts):
                if (
                    verdict["audit_mode"] == "STRICT"
                    or not verdict.get("unverified_quotes")
                ):
                    break
                new_text = reprompt_repair(
                    chat_client=client,
                    model_id=model_id,
                    original_messages=messages,
                    original_answer=answer_text,
                    failed_quotes=verdict["unverified_quotes"],
                    policy=policy,
                )
                if not new_text:
                    break
                new_verdict = _verify(new_text)
                if new_verdict["n_verified"] > verdict["n_verified"]:
                    if pre_repair_verdict is None:
                        pre_repair_verdict = verdict
                    answer_text = new_text
                    verdict = new_verdict
                    repair_changes.append({
                        "action": "reprompt_rewrite",
                        "diagnosis": "model_feedback_loop",
                    })
                else:
                    break

    unverified_blob = (
        json.dumps(verdict["unverified_quotes"], separators=(",", ":"))
        if verdict["unverified_quotes"]
        else None
    )

    leaves = [bytes.fromhex(r["leaf_hash"]) for r in chunk_rows]
    tree = MerkleTree.build(leaves)
    proof_obj = {
        "document_root": document_root,
        "chunk_0_proof": proof_to_dict(tree.proof(0)),
    }
    proof_blob = json.dumps(proof_obj, separators=(",", ":"))

    # Per-run Merkle-DAG (see arborist/qa/dag.py). Single-doc shape:
    # the only "source" is document_root. Quote mode: 7 stages base
    # (8 with #000009 preflight). Pointer mode: 9 stages base (10 with
    # preflight); context drops out and answer splits into raw_answer
    # / parsed_claim_lattice / render.
    ev_root = evidence_map_root(evidence_map) if evidence_map else None
    parsed_lattice = None
    is_lattice_mode = answer_mode in ("claim_lattice_pointer", "claim_lattice")
    if is_lattice_mode:
        # Per-claim list of {claim_text, content-addressed evidence_ids}
        # for the parsed_claim_lattice node hash. Pointer ids are
        # run-dependent; evidence_ids are content-addressed → the run-
        # DAG hashes the run-stable form. Same shape for JSON and
        # pointer; verifier already returns evidence_id_pairs.
        evidence_id_pairs = verdict.get("evidence_id_pairs") or []
        parsed_lattice = [
            {
                "claim_text": cs.get("text", ""),
                "evidence_ids": evidence_id_pairs[i] if i < len(evidence_id_pairs) else [],
            }
            for i, cs in enumerate(verdict.get("claim_statuses") or [])
        ]
    # Ticket #000009 — preflight node binding (mirror of query();
    # nested CTI clauses per ticket §8 / 2026-05-04 feedback).
    from arborist.qa.dag import preflight_node_hash
    # verifier_policy_hash + model_profile_hash imported at module
    # top; do NOT re-import locally (free-variable shadowing).
    ghash_for_dag = verifier_policy_hash(policy)
    claim_cap_actually_applied = (
        claim_cap_lookup
        if (quantifier_apply_caps
            and quantifier_caps_mode_gated
            and claim_cap_lookup is not None)
        else None
    )
    reminder_eligible = (
        quantifier_guard_on
        and quantifier_mode_gated
        and quantifier.get("is_broad", False)
    )
    reminder_enabled = bool(policy.get("quantifier_reminder_enabled", False))
    reminder_injected = reminder_eligible and reminder_enabled
    reminder_template_id = None
    if reminder_injected:
        reminder_template_id = (
            "broad-quantifier-bounded-v1"
            if quantifier.get("scope_bound_hint") == "bounded"
            else "broad-quantifier-unbounded-v1"
        )
    # Build payload + hash separately so we can persist both into
    # run_dag_blob (Ticket #000009 §7.2 — `arborist providence
    # --show-preflight` renders the full clause set).
    from arborist.qa.dag import (
        _canonical_json as _runner_canon,
        _sha256_hex as _runner_sha,
        build_preflight_node_payload as _runner_build_payload,
    )
    _runner_preflight_payload = _runner_build_payload(
        question_state=question_state.to_dict(),
        quantifier=quantifier,
        answer_contract={
            "guard_enabled": quantifier_guard_on,
            "mode_gated": quantifier_mode_gated,
            "apply_caps_active": quantifier_apply_caps,
            "apply_caps_mode_gated": quantifier_caps_mode_gated,
            "claim_cap_resolved": claim_cap_lookup,
            "claim_cap_applied": claim_cap_actually_applied,
            "manual_quotes_allowed": False,
            "evidence_pointer_required": is_lattice_mode,
            "allow_unbounded_enumeration": False,
            "reject_broad_active": bool(
                policy.get("quantifier_reject_broad", False)
            ),
            "metacognition_enabled": bool(
                policy.get("metacognition_enabled", True)
            ),
            "block_on_contradiction": bool(
                policy.get("metacognition_block_on_contradiction", False)
            ),
        },
        prompt_contract={
            "reminder_enabled": reminder_enabled,
            "reminder_injected": reminder_injected,
            "reminder_template_id": reminder_template_id,
        },
        evidence_contract={
            "max_evidence_ids_exposed": int(policy.get(
                "claim_lattice_max_pointers_per_claim", 2
            )),
            "one_claim_per_line": is_lattice_mode,
        },
        policy_refs={
            "governance_policy_hash": ghash_for_dag,
            "model_profile_hash": mhash,
            "answer_mode": answer_mode,
        },
    )
    preflight_hash = _runner_sha(_runner_canon(_runner_preflight_payload))
    run_dag = build_run_dag(
        question_hash=qhash,
        sources=[{
            "document_root": document_root,
            "source_role": "primary_answer_source",
            "score": None,
            "chunk_idx": None,
        }],
        context_root=document_root,
        conversation_hash=chash,
        answer_text=answer_text,
        audit_mode=verdict["audit_mode"],
        verifier_method=verdict["verifier_method"],
        n_quotes=verdict["n_quotes"],
        n_verified=verdict["n_verified"],
        claim_statuses=verdict.get("claim_statuses", []),
        lookup_path="miss",
        evidence_map_root=ev_root,
        answer_mode=answer_mode if answer_mode != "quote" else None,
        violations=verdict.get("violations"),
        raw_answer_text=raw_answer if is_lattice_mode else None,
        parsed_lattice=parsed_lattice,
        rendered_text=answer_text if is_lattice_mode else None,
        preflight_hash=preflight_hash,
        preflight_payload=_runner_preflight_payload,
    )
    run_dag_blob = json.dumps(run_dag, separators=(",", ":"))

    now = int(time.time())
    with transaction(conn):
        if repair_changes and pre_repair_verdict is not None:
            append_audit(
                conn,
                event_type="providence_repair",
                subject_root=ckey,
                body={
                    "kind": "mechanical",
                    "n_changes": len(repair_changes),
                    "changes": repair_changes,
                    "pre_audit_mode": pre_repair_verdict["audit_mode"],
                    "post_audit_mode": verdict["audit_mode"],
                    "pre_n_verified": pre_repair_verdict["n_verified"],
                    "post_n_verified": verdict["n_verified"],
                },
                ts=now,
            )
        event_hash = append_audit(
            conn,
            event_type="providence_write",
            subject_root=ckey,
            body={
                "source_root": document_root,
                "model_id": model_id,
                "revision": revision,
                "quantization": quantization,
                "chunks_in_context": len(chunk_rows),
                "answer_chars": len(answer_text),
                "audit_mode": verdict["audit_mode"],
                "n_quotes": verdict["n_quotes"],
                "n_verified": verdict["n_verified"],
                "verifier_method": verdict["verifier_method"],
            },
            ts=now,
        )
        # Capital ledger (ticket #000020). Sibling table; advisory.
        from arborist.capital import profile_for_op, record as capital_record

        capital_profile, capital_inputs = profile_for_op(
            "qa",
            {
                "cache_hit": False,
                "answer_chars": len(answer_text),
                "llm_seconds": llm_ms / 1000.0,
            },
        )
        capital_record(
            conn,
            audit_event_hash=event_hash,
            op_type="qa",
            profile=capital_profile,
            estimator_inputs=capital_inputs,
            ts=now,
        )
        conn.execute(
            # ON CONFLICT(cache_key) DO NOTHING: the cache lookup above runs
            # outside this transaction, so two concurrent ask()s on the same
            # cache_key can both miss and both reach this INSERT — the loser
            # no-ops instead of raising UNIQUE constraint failed (its answer is
            # equivalent: same question/model/policy ⇒ same cache_key).
            "INSERT INTO providence_cache "
            "(cache_key, source_root, document_uri, question_hash, question_text, "
            " answer_text, merkle_proof, model_profile_hash, conversation_hash, "
            " governance_policy_hash, schema_version, canonicalization_version, "
            " chunking_version, falsification_state, chain, audit_event_hash, "
            " created_at, hit_count, audit_mode, n_quotes, n_verified, "
            " unverified_quotes, verifier_method, run_dag_root, run_dag_blob) "
            "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'live', ?, ?, ?, 0, "
            " ?, ?, ?, ?, ?, ?, ?) "
            "ON CONFLICT(cache_key) DO NOTHING",
            (
                ckey,
                document_root,
                doc["document_uri"],
                qhash,
                question,
                answer_text,
                proof_blob,
                mhash,
                chash,
                ghash,
                SCHEMA_VERSION,
                CANONICALIZATION_VERSION,
                doc["chunking_version"],
                chain,
                event_hash,
                now,
                verdict["audit_mode"],
                verdict["n_quotes"],
                verdict["n_verified"],
                unverified_blob,
                verdict["verifier_method"],
                run_dag["root"],
                run_dag_blob,
            ),
        )

    # #000037 Phase 2 — emit advisory controller_events for this QA
    # cycle. Single-branch decision, byte-cheap. Wrapped so any
    # controller-emit error never blocks the QA return. Audit-only.
    try:
        _emit_qa_controller_advisory(conn, ckey, verdict)
    except Exception:  # pragma: no cover — advisory must not break QA
        pass

    from arborist.qa.dag import localize_failure as _localize
    failure_stage = _localize(
        audit_mode=verdict["audit_mode"],
        n_sources=1,  # ask() runs against one document
        n_quotes=verdict["n_quotes"],
        n_verified=verdict["n_verified"],
    )
    return {
        "status": "cache_miss_then_written",
        "audit_mode": verdict["audit_mode"],
        "cache_key": ckey,
        "run_dag_root": run_dag["root"],
        "lookup_path": "miss",
        "failure_stage": failure_stage,
        "repair_changes": repair_changes,
        "pre_repair_audit_mode": (
            pre_repair_verdict["audit_mode"] if pre_repair_verdict else None
        ),
        "source_root": document_root,
        "answer_text": answer_text,
        "merkle_proof": proof_obj,
        "n_quotes": verdict["n_quotes"],
        "n_verified": verdict["n_verified"],
        "verifier_method": verdict["verifier_method"],
        "unverified_quotes": verdict["unverified_quotes"],
        "partially_verified_quotes": verdict.get("partially_verified_quotes") or [],
        # Quantifier preflight (Ticket #000008 Phase 1+2). See query.py
        # for full rationale; runner.ask carries the same schema for
        # CLI-side `arborist ask` parity with `arborist query`.
        "quantifier_intensity": quantifier["intensity"],
        "quantifier_matched_token": quantifier["matched_token"],
        "scope_bound_hint": quantifier["scope_bound_hint"],
        "quantifier_explicit_count": quantifier["explicit_count"],
        "claim_cap_applied": claim_cap_lookup,
        # Ticket #000010 — meta-cognition QuestionState.
        "question_state": question_state.to_dict(),
        # Sidecar smell signals (claim_lattice mode only) — render-
        # layer; never persisted, never in run_dag_root.
        "pointer_id_distribution": verdict.get("pointer_id_distribution"),
        "lazy_anchor_ratio": verdict.get("lazy_anchor_ratio"),
        "timings": {
            "cache_lookup_ms": cache_lookup_ms,
            "llm_ms": llm_ms,
            "total_ms": _ms_since(t_start),
        },
    }