"""Post-LLM faithfulness check: did the answer ground its claims in context?
Three layered strategies, tried in order. The first one that finds evidence
classifies the answer. ``verifier_method`` on the result records which path
fired so the audit chain stays diagnostic.
1. **quote** — model wrapped claims in double quotes per system prompt.
Strongest signal — explicit, verbatim, model-asserted.
2. **span** — no quotes, but bullet/sentence-level lines from the answer
appear verbatim in context. Catches models that quote inline without
``"..."`` marks.
3. **entity** — no quotes and no span match, but multi-word proper-noun
phrases from the answer appear verbatim in context. Catches the
Wikipedia-infobox-to-prose case: the model paraphrases structure so
spans diverge, but every named entity is intact and grounded.
Each strategy classifies into v9.8's audit-mode trichotomy (RAG-adapted
vocabulary; substrate calls UNGROUNDED "VISUAL"):
- **STRICT** — every evidence unit (>=1) verifies verbatim against context
- **HYBRID** — some verify, others do not (mixed source / emergent)
- **UNGROUNDED** — no evidence, or none verify (purely emergent)
``unverified_quotes`` (kept under that name for schema continuity) collects
spans the model produced that don't appear in any source — the
corpus-growth signal mined by `arborist emergent`.
Hard rule (CLAUDE.md "soft hash vs hard hash"): every check is a lexical
substring test under norm-v1 + lowercase canonicalization. No embeddings,
no semantic similarity, no fuzzy alignment. The contract is "this token
sequence either is or isn't in the context."
Wikitext context is run through ``arborist.wikitext.to_base`` before the
substring test. The corpus stores raw wikitext (so the link graph is
recoverable from any page), but the LLM produces clean prose. Without
the strip, every wikilink-carrying source paragraph compares as
"different surface form" and the verifier wrongly reports UNGROUNDED on
genuine source-grounded quotes. With the strip, paraphrases of *markup*
(``[[Cloud]]`` vs ``Cloud``) verify, while paraphrases of *prose* still
flag honestly. mwparserfromhell is an optional dep; if absent, the
strip is a no-op and verification falls back to today's behavior.
"""
from __future__ import annotations
import re
import unicodedata
from arborist.qa.warrant import warrant_check
# Deferred import: arborist.qa.inspect imports arborist.compress &
# arborist.store at module load. The verifier doesn't need either
# until deflection actually runs, so defer to call-site to keep
# import order clean if anything else imports verify.py.
try:
from arborist.wikitext import to_base as _wikitext_to_base
except ImportError: # pragma: no cover
_wikitext_to_base = None
# Locate every double-quote character (ASCII or curly). Sequential
# pairing in extract_quotes() turns these into intentional (open, close)
# pairs: 1st & 2nd char, 3rd & 4th, etc. Pure-regex pairing fails on
# adjacent quote pairs like `"title" prose "quote"` — the regex captures
# `prose` as a "quoted span" because every `"` looks like both an opener
# and a closer to it.
_QUOTE_CHAR_RE = re.compile(r'["“”]')
# Bullet markers at line start: -, *, +, •, 1., 2), etc.
_BULLET_RE = re.compile(r'^\s*(?:[-*+•]|\d+[.)])\s+')
# Sentence boundary: punctuation + whitespace + capital letter.
_SENT_RE = re.compile(r'(?<=[.!?])\s+(?=[A-Z])')
# Multi-word capitalized phrase. Two or more whitespace-separated tokens,
# each starting with a capital letter (allowing initials like "A.", hyphens
# like "Carrie-Anne", and trailing lowercase like "Smith"). Matches "Keanu
# Reeves", "Thomas A. Anderson", "Carrie-Anne Moss", "Agent Smith". Skips
# single capitalized words to avoid sentence-starter false positives.
_PROPER_NOUN_RE = re.compile(
# Token gap is non-newline whitespace so a phrase never crosses a
# paragraph break. Caught a real case where "Joe Pantoliano\n\nThe
# sources" matched as one phrase via \s+.
r"\b[A-Z][A-Za-z'’\-]*(?:[ \t]+(?:[A-Z]\.|[A-Z][A-Za-z'’\-]+))+\b"
)
MIN_QUOTE_CHARS = 8
MIN_SPAN_CHARS = 12
# Trailing parenthetical the model often appends to verbatim source
# prose, defeating substring match even when the prose itself IS in
# the corpus. Examples we strip:
#
# "...lightning-based attacks. (Source: https://...)"
# "...invented in 1976. (citing Wikipedia)"
# "...protagonist of the game. (see Pikachu_(character))"
# "...released in 1997 (https://en.wikipedia.org/...)"
#
# Conservative regex: only strips a SINGLE trailing parenthetical at
# end-of-string after optional whitespace, and only when the contents
# either start with a recognized citation cue OR contain a URL. Refuses
# to strip parentheticals that look like genuine prose (e.g.
# "Pikachu (a Pokémon species)").
_TRAILING_CITATION_RE = re.compile(
r"""
\s*\(\s*
(?: # one of:
(?:source|src|citing|see|ref|reference|from) # citation cue word
\s*:?\s*[^()]* # optional content
| # OR
https?://\S+\s*[^()]* # URL-led parenthetical
)
\s*\)\s*$
""",
re.IGNORECASE | re.VERBOSE,
)
def _strip_trailing_citation(text: str) -> str:
"""Drop a trailing `(Source: ...)`-like parenthetical, if any.
Idempotent: applies once. The model rarely chains citations, so we
don't loop. Returns the input unchanged if no match.
"""
return _TRAILING_CITATION_RE.sub("", text).rstrip()
# Paraphrase strategy thresholds. The 4th verifier method runs only
# when quote/span/entity have all failed to classify; it accepts a
# span as "paraphrase-verified" when token coverage in the source
# context crosses ``DEFAULT_PARAPHRASE_COVERAGE`` and the span has at
# least ``DEFAULT_PARAPHRASE_MIN_TOKENS`` content tokens (>4 chars).
#
# Soft-signal note: token overlap is a heuristic, NOT byte-equivalence.
# The hard chain still records `verifier_method = 'paraphrase'` so an
# auditor can distinguish lexical-verbatim from paraphrase-overlap.
# A span verified via paraphrase contributes to `n_verified` but the
# audit_mode trichotomy stays unchanged: STRICT requires all units
# (quote/span/entity/paraphrase) to verify; HYBRID = some verify some
# don't; UNGROUNDED = none.
DEFAULT_PARAPHRASE_COVERAGE = 0.85
DEFAULT_PARAPHRASE_MIN_TOKENS = 4
# Entity-path policies. Entity-existence in source is circumstantial, not
# claim-level proof — the model could correctly name entities while making
# claims around them that came from training. These four policies span the
# trade-off: max-trust to no-trust.
#
# strict all entities verify → STRICT (legacy behavior; overclaims)
# hybrid any entity verifies → HYBRID (honest cap; safe default)
# drop skip entity path entirely → UNGROUNDED (most conservative)
# proximity STRICT only if N verified entities cluster within W chars
# of each other in context (e.g. an infobox cast list).
# Otherwise demotes to HYBRID/UNGROUNDED based on partial match.
ENTITY_POLICIES = ("strict", "hybrid", "drop", "proximity")
DEFAULT_ENTITY_POLICY = "proximity"
# Proximity tuning. N entities within W chars of each other.
DEFAULT_PROXIMITY_N = 3
DEFAULT_PROXIMITY_WINDOW = 300
# Framing phrases that aren't real claims even when they're in source. We
# strip these from the answer before span/entity extraction so they don't
# become noise in unverified_quotes.
_FRAMING_PREFIXES = (
"based on the provided sources",
"based on the provided source",
"based on the source",
"based on the document",
"the sources do not",
"the source does not",
"the document does not",
"according to the sources",
"according to the source",
)
def _normalize(s: str) -> str:
"""norm-v1 + lowercase. Same canonicalization as chunk leaf hashing,
plus case-folding so the verifier doesn't fail on capitalization drift
between source prose and the model's quoted span."""
s = unicodedata.normalize("NFC", s)
s = " ".join(s.split())
return s.lower()
def _is_framing(span: str) -> bool:
n = _normalize(span)
return any(n.startswith(p) for p in _FRAMING_PREFIXES)
def _classify(verified: list[str], unverified: list[str]) -> str:
if verified and not unverified:
return "STRICT"
if verified:
return "HYBRID"
return "UNGROUNDED"
def _has_entity_cluster(
verified_entities: list[str],
norm_ctx: str,
n: int,
window: int,
) -> bool:
"""Do at least `n` distinct verified entities appear within `window`
chars of each other in `norm_ctx`? Crude proxy for "the source has a
section about these entities" (cast list, infobox, roster) vs
"the source incidentally mentions them in scattered prose."
"""
if len(verified_entities) < n:
return False
positions: list[int] = []
for e in verified_entities:
idx = norm_ctx.find(_normalize(e))
if idx >= 0:
positions.append(idx)
if len(positions) < n:
return False
positions.sort()
for i in range(len(positions) - n + 1):
if positions[i + n - 1] - positions[i] <= window:
return True
return False
def _check_each(items: list[str], norm_ctx: str) -> tuple[list[str], list[str]]:
verified: list[str] = []
unverified: list[str] = []
for it in items:
if _normalize(it) in norm_ctx:
verified.append(it)
else:
unverified.append(it)
return verified, unverified
def _token_coverage(
span: str,
norm_ctx: str,
*,
min_token_len: int = 4,
) -> tuple[float, int]:
"""Fraction of meaningful tokens from `span` that appear anywhere
in `norm_ctx`. Returns ``(coverage_fraction, content_token_count)``.
Two filters narrow the token set to topical content:
1. ``len(t) >= min_token_len`` (default 4). Excludes ``the / a / an
/ of / to / is / in / on / at`` etc. — short function words.
2. Stopword exclusion. Common English filler 4+ chars long
(``from``, ``with``, ``have``, ``this``, ``that``, ``which``,
``where``, ``their``, ``would``, ``could``, etc.) match almost
any English text & inflate coverage scores for cases where the
*topical* content is missing. Filtering them tightens the
signal: a span that paraphrases stylistic choice (replaces
``with`` → ``from``) still scores 1.0 if topical tokens match;
a span where topical tokens are missing scores lower because
the denominator dropped.
The 0.85 paraphrase-coverage threshold is calibrated for THIS
cleaner signal — lowering the threshold would promote fabrications
(Q1 Batman case: "wealthy/businessman/resides" missing, model-
invented). Tightening the denominator instead keeps the threshold
stable and the discrimination crisp.
Strip from numerator AND denominator so a span composed entirely
of stopwords (e.g. ``"have been there"``) returns coverage 0.0
rather than dividing by zero.
"""
nspan = _normalize(span)
# Strip per-token leading/trailing punctuation BEFORE the length
# gate and the stopword filter so `batman,` and `wayne.` line up
# with bare `batman`/`wayne` in context. We don't apply this in
# _normalize() because the substring path needs punctuation-
# preserving canonicalization (a span ending in `."` is a
# different surface from one ending without).
tokens: list[str] = []
for raw in nspan.split():
t = raw.strip(_TOKEN_PUNCT_STRIP)
if len(t) < min_token_len:
continue
if t in _ENGLISH_STOPWORDS:
continue
tokens.append(t)
if not tokens:
return (0.0, 0)
present = sum(1 for t in tokens if t in norm_ctx)
return (present / len(tokens), len(tokens))
# Per-token punctuation stripped before the coverage check. Tokens like
# `wayne,` and `bruce!` line up with bare `wayne` / `bruce` in context.
# We don't strip apostrophes (``'``) so possessives stay distinct:
# ``batman's`` is a different content token from ``batman``.
_TOKEN_PUNCT_STRIP = ".,;:!?\"()[]{}"
# Digit-run extractor for the paraphrase numeric-agreement gate. A run is
# one or more digits with optional *internal* thousands-comma separators
# ("8,849"); the comma is stripped for comparison so "8,849" and "8849"
# collapse, but "300" stays distinct from "300000" (← "300,000"). Decimal
# fractions split at the dot ("3.14" → {"3", "14"}) — fine for the
# near-miss patterns this gate targets (years / counts / magnitudes).
_DIGIT_RUN_RE = re.compile(r"\d[\d,]*\d|\d")
def _numeric_signature(text: str) -> frozenset[str]:
"""Comma-stripped digit-runs in ``text``.
Used by :func:`_check_each_with_paraphrase`: a span that token-covers
the source but asserts a *number the source does not contain* is not
paraphrase-grounded — it's a near-miss the lexical coverage check
can't see ("Water boils at 50 degrees" vs source "...100 degrees..."
token-covers 100% because "50"/"100" aren't content tokens). The gate
is conservative on legitimate paraphrases: a number written with or
without thousands commas matches; a number absent from the source
(a rounding, a magnitude error, a swapped year) does not — and a
rounding-paraphrase demoting from paraphrase-grounded to unverified
is the honest call (it isn't a verbatim grounding).
"""
return frozenset(m.group(0).replace(",", "") for m in _DIGIT_RUN_RE.finditer(text))
_SENTENCE_BREAK_RE = re.compile(r"[.!?](?=\s)")
def _is_single_sentence(text: str) -> bool:
"""One sentence — no internal ``. `` / ``! `` / ``? `` break (a trailing
terminator with nothing after it is fine). A numbered/bulleted list
("1. … 2. …") or a multi-sentence summary is not single; "Insulin
was discovered by Alexander Fleming." is. Conservative: a borderline
abbreviation ("8,849 m. tall.") reads as not-single, so the gate it
guards simply doesn't fire — the safe direction."""
return not _SENTENCE_BREAK_RE.search(text.strip())
def _entity_salient_disagrees(answer_text: str, norm_ctx: str) -> bool:
"""Entity-strategy gate (ticket #000048): does the answer assert a
*salient near-proper-noun token* — a > 4-char Capitalized content
token, stopword-filtered — or a digit-number that the source lacks?
Used in :func:`verify_quotes` *only* on the weakest entity grounding
— a single short sentence whose lone matching multi-word proper noun
is not part of a cluster (caller checks ``len(verified) <= 1`` and
:func:`_is_single_sentence`). There, "Insulin was discovered by
Alexander Fleming" against "Penicillin was discovered by Alexander
Fleming" matches on the shared "Alexander Fleming" while the
*swapped subject* ("Insulin", capitalized, absent from the source)
is exactly the falsehood — so we decline the grounding. The narrow
caller-gate is what keeps a structured multi-claim summary (the
Matrix cast list — many entities, a tight cluster — or the TMNT
answer — a numbered list with parenthetical nicknames the source
omits) untouched: model-added accurate detail in a real summary
isn't a contradiction, and only the single-sentence-one-weak-match
shape is.
"""
if not (_numeric_signature(_normalize(answer_text)) <= _numeric_signature(norm_ctx)):
return True
for raw in answer_text.split():
core = raw.strip(_TOKEN_PUNCT_STRIP)
if len(core) <= 4 or not core[0].isupper():
continue
low = core.lower()
if low in _ENGLISH_STOPWORDS:
continue
if low not in norm_ctx:
return True
return False
# Common English stopwords of length >= 4 chars. Hand-curated rather
# than imported from NLTK to keep arborist dependency-light and the
# behavior pinned to a known set. Tokens are normalized form
# (lowercase, NFC). Includes auxiliaries, prepositions, pronouns,
# wh-words, conjunctions, and high-frequency adverbs/quantifiers that
# carry little topical signal.
_ENGLISH_STOPWORDS = frozenset({
# auxiliaries / be-forms (>= 4)
"have", "been", "being", "were", "will", "would", "could", "should",
"might", "must", "shall",
# prepositions / particles (>= 4)
"from", "with", "into", "onto", "upon", "over", "under", "after",
"before", "between", "through", "across", "above", "below", "behind",
"beside", "beyond", "during", "without", "within", "until", "since",
"about", "around", "along", "among",
# demonstratives / pronouns (>= 4)
"this", "that", "these", "those", "their", "them", "they", "there",
"here", "your", "yours", "ours", "mine",
# wh-words (>= 4)
"what", "when", "where", "which", "while", "whom", "whose",
"whoever", "whatever", "wherever", "whenever",
# conjunctions (>= 4)
"because", "although", "however", "therefore", "though", "unless",
# adverbs / quantifiers (>= 4)
"also", "very", "much", "many", "more", "most", "less", "some",
"such", "even", "ever", "just", "only", "than", "then", "still",
"again", "always", "never", "often", "rather", "really", "quite",
"very", "well", "back", "next",
# auxiliaries / discourse (>= 4)
"also", "both", "each", "every", "into", "like",
})
def _is_prose_span(span: str, *, min_lowercase_content: int = 2) -> bool:
"""Heuristic: does this span look like prose (eligible for paraphrase
matching) vs. a list of proper nouns (better matched by entity)?
Counts tokens in the ORIGINAL (pre-normalize) span whose first
letter is lowercase and whose length is >= 4 chars. A pure list
like ``"Keanu Reeves, Laurence Fishburne"`` has zero such tokens —
those go through the entity path. A real sentence like ``"Pikachu
is a species of Pokémon creatures..."`` has multiple ("species",
"creatures", "from", etc.) and qualifies for paraphrase.
Threshold ``min_lowercase_content=2`` lets through "the cast..."
(1 lowercase content token: "cast") only if more lowercase prose
is present. Lists with a leading "The" don't sneak through.
"""
n_lower = 0
for tok in span.split():
# Strip leading/trailing punctuation for the case check (so
# "cast:" still counts as a lowercase token).
core = tok.strip(".,;:!?\"'()[]{}")
if len(core) < 4:
continue
if core[0].islower():
n_lower += 1
if n_lower >= min_lowercase_content:
return True
return False
def _build_claim_statuses(
*,
verified: list[str] | None = None,
paraphrase: list[str] | None = None,
unverified: list[str] | None = None,
method: str,
) -> list[dict]:
"""Per-evidence-unit status list. Three labels drawn from the toy-
Hermes taxonomy:
VERIFIED_QUOTE unit substring-matched in normalized context
(any of quote/span/entity strategies)
SUPPORTED_PARAPHRASE unit cleared the paraphrase token-coverage
threshold (>=85% topical tokens present)
UNSUPPORTED unit didn't match anything
Soft-signal labels (QUOTE_INTEGRITY_FAILED, SOURCE_MISMATCH,
FALSIFIED) live in the sidecar / falsification machinery, not on
the binary verifier output — see the
``feedback_verifier_no_diagnostics`` discipline. Order preserved so
callers can map back to the model's original answer ordering.
"""
out: list[dict] = []
for t in verified or []:
out.append({"text": t, "status": "VERIFIED_QUOTE", "method": method})
for t in paraphrase or []:
out.append(
{
"text": t,
"status": "SUPPORTED_PARAPHRASE",
"method": "paraphrase",
}
)
for t in unverified or []:
out.append({"text": t, "status": "UNSUPPORTED", "method": method})
return out
def _check_each_with_paraphrase(
items: list[str],
norm_ctx: str,
*,
paraphrase_coverage: float = DEFAULT_PARAPHRASE_COVERAGE,
paraphrase_min_tokens: int = DEFAULT_PARAPHRASE_MIN_TOKENS,
) -> tuple[list[str], list[str], list[str]]:
"""Three-bucket variant of ``_check_each``.
For each item:
- if its normalized form is a substring of ``norm_ctx``: STRICT
(verbatim verified)
- else if its meaningful-token coverage in ``norm_ctx`` is
``>= paraphrase_coverage`` AND it has at least
``paraphrase_min_tokens`` content tokens AND every digit-run
it asserts appears (comma-normalized) in ``norm_ctx``:
PARAPHRASE (token-overlap verified)
- else: UNVERIFIED
The numeric-agreement gate (#000046) catches the near-miss the
coverage check is blind to: a span whose content tokens are all
present but whose *number* is wrong — "Water boils at 50 degrees"
against a source that says 100 token-covers 100% because
"50"/"100" aren't >4-char content tokens. A rounding-paraphrase
that drops to UNVERIFIED here is the honest verdict (it isn't a
verbatim grounding); a number written with vs without thousands
commas still matches.
Returns ``(strict_verified, paraphrase_verified, unverified)``.
Order within each bucket preserves input order.
Soft-signal note: paraphrase verification is heuristic. Callers
that need byte-equivalence (proof export to other peers, audit
chain claims) must use ``_check_each`` directly.
"""
strict: list[str] = []
paraphrase: list[str] = []
unverified: list[str] = []
ctx_numbers = _numeric_signature(norm_ctx)
for it in items:
if _normalize(it) in norm_ctx:
strict.append(it)
continue
# Paraphrase only fires for prose-shaped spans. Lists of proper
# nouns (e.g. "Keanu Reeves, Laurence Fishburne") fall through
# to the entity strategy where proximity policy can disambiguate
# tight clusters from scattered mentions.
if not _is_prose_span(it):
unverified.append(it)
continue
cov, n_tok = _token_coverage(it, norm_ctx)
numbers_ok = _numeric_signature(_normalize(it)) <= ctx_numbers
if n_tok >= paraphrase_min_tokens and cov >= paraphrase_coverage and numbers_ok:
paraphrase.append(it)
else:
unverified.append(it)
return strict, paraphrase, unverified
[docs]
def verify_quotes(
answer_text: str,
context: str,
*,
entity_policy: str = DEFAULT_ENTITY_POLICY,
proximity_n: int = DEFAULT_PROXIMITY_N,
proximity_window: int = DEFAULT_PROXIMITY_WINDOW,
) -> dict:
"""Classify an answer's grounding against its retrieved context.
Tries quote → span → entity verification in sequence. The first
strategy that finds evidence classifies the answer; later strategies
don't run.
``entity_policy`` controls how the entity path classifies — see
``ENTITY_POLICIES``. The quote and span paths are unaffected; they
are explicit-claim evidence and always classify per the trichotomy.
Returns a dict with these keys::
n_quotes: int # evidence units extracted (any path)
n_verified: int # of those, how many appear verbatim
audit_mode: str # STRICT | HYBRID | UNGROUNDED
unverified_quotes: [str] # spans we couldn't ground in context
verifier_method: str # 'quote' | 'span' | 'entity' | 'none'
"""
if entity_policy not in ENTITY_POLICIES:
raise ValueError(
f"entity_policy must be one of {ENTITY_POLICIES}, got {entity_policy!r}"
)
# Wikitext markup → plain prose. Identity if mwparserfromhell isn't
# installed (extras: pip install 'arborist[wikitext]').
if _wikitext_to_base is not None:
context = _wikitext_to_base(context)
norm_ctx = _normalize(context)
# Strategy 1: explicit double-quoted spans.
quotes = extract_quotes(answer_text)
if quotes:
verified, unverified = _check_each(quotes, norm_ctx)
return {
"n_quotes": len(quotes),
"n_verified": len(verified),
"audit_mode": _classify(verified, unverified),
"unverified_quotes": unverified,
"verifier_method": "quote",
"claim_statuses": _build_claim_statuses(
verified=verified, unverified=unverified, method="quote"
),
}
# Strategy 2: bullet/sentence spans. Try verbatim substring first;
# then fall back to paraphrase (token-coverage) for items that didn't
# substring-match. Spans verified via paraphrase contribute to
# n_verified — the verifier_method label flips to "paraphrase" when
# any soft-verified items are present so an auditor can tell.
spans = extract_claim_spans(answer_text)
if spans:
strict, paraphrase, unverified = _check_each_with_paraphrase(
spans, norm_ctx
)
verified = strict + paraphrase
if verified:
method = "paraphrase" if paraphrase else "span"
return {
"n_quotes": len(spans),
"n_verified": len(verified),
"audit_mode": _classify(verified, unverified),
"unverified_quotes": unverified,
"verifier_method": method,
"claim_statuses": _build_claim_statuses(
verified=strict,
paraphrase=paraphrase,
unverified=unverified,
method="span",
),
}
# Strategy 3: multi-word proper nouns (entity grounding) — gated by
# `entity_policy`. Entity-existence is weaker proof than quote or span;
# the operator chooses how much weight to give it.
if entity_policy == "drop":
# Skip entity path entirely. Falls through to UNGROUNDED/none.
pass
else:
entities = extract_proper_nouns(answer_text)
if entities:
verified, unverified = _check_each(entities, norm_ctx)
if verified:
if entity_policy == "strict":
# Legacy behavior: all match → STRICT. Overclaims.
mode = _classify(verified, unverified)
elif entity_policy == "hybrid":
# Cap at HYBRID. Honest middle: evidence exists, but
# entity-existence ≠ claim-existence.
mode = "HYBRID"
else: # proximity
cluster = _has_entity_cluster(
verified, norm_ctx, proximity_n, proximity_window
)
if cluster and not unverified:
mode = "STRICT"
elif cluster:
mode = "HYBRID"
elif (
# #000048: weakest grounding — a single short
# sentence whose lone matching multi-word proper
# noun isn't part of a cluster. If the claim
# asserts a salient capitalized token / number the
# source lacks (a swapped subject / city / ocean /
# year — "Insulin was discovered by Alexander
# Fleming" vs "Penicillin was…"), the shared
# proper noun doesn't ground it → UNGROUNDED.
len(verified) <= 1
and _is_single_sentence(answer_text)
and _entity_salient_disagrees(answer_text, norm_ctx)
):
mode = "UNGROUNDED"
elif verified:
mode = "HYBRID"
else:
mode = "UNGROUNDED"
return {
"n_quotes": len(entities),
"n_verified": len(verified),
"audit_mode": mode,
"unverified_quotes": unverified,
"verifier_method": "entity",
"claim_statuses": _build_claim_statuses(
verified=verified,
unverified=unverified,
method="entity",
),
}
# Nothing extracted, or nothing verified. Truly emergent.
return {
"n_quotes": 0,
"n_verified": 0,
"audit_mode": "UNGROUNDED",
"unverified_quotes": [],
"verifier_method": "none",
"claim_statuses": [],
}
# ---------------------------------------------------------------------------
# Claim-lattice-pointer verifier — G0 / CTI Clause Lattice Intelligence.
#
# Companion to ``verify_quotes`` for ``policy["answer_mode"] ==
# "claim_lattice_pointer"``. The model emits weak natural-language
# pointer clauses (``Claim text. [E12]``); the runtime parses them
# into structured claim nodes and runs deterministic checks against
# the runtime-built evidence map.
#
# Why pointer-line, not JSON: small instruction-tuned models follow
# citation-style prose far more reliably than free-form JSON. JSON
# discipline failures generate spurious one-shot SCHEMA_INVALID
# verdicts even when the model knew the right answer; pointer-line
# stays inside the model's prose-generation distribution.
#
# Why decimal pointer ids (E1, E2, …) not hex (E1f8e4c2a):
# - One BPE token per id in standard tokenizers (8-hex tokenizes to
# 4–6 tokens of out-of-distribution noise that nudges the model
# toward DSL/code mode).
# - Citation style (footnotes, references) is heavily represented in
# training; random hex is not.
# - The runtime maps pointer ids back to content-addressed evidence
# ids (``evidence_map_by_pointer_id``) for cache, run-DAG, audit.
#
# Hard-soft boundary: only deterministic checks. No entailment, no
# completeness, no predicate compatibility. Those stay sidecar.
# ---------------------------------------------------------------------------
ANSWER_MODES = ("quote", "claim_lattice_pointer", "claim_lattice")
DEFAULT_ANSWER_MODE = "quote"
# JSON-mode pre-parser. 8B and small-context models drift on JSON
# discipline (markdown fences, prose preamble, smart quotes, trailing
# commas). Larger reasoning models (Qwen 3.6 reasoner, Claude, GPT-4)
# emit valid JSON natively; the pre-parser is the defensive belt that
# keeps the JSON path survivable across the inference-quality spectrum.
# Lenient on syntax, strict on semantics: parsed JSON still has to
# pass the schema check & the same hard verifier rules as pointer mode.
_JSON_FENCE_RE = re.compile(r"^\s*```(?:json)?\s*\n(.*?)\n\s*```\s*$", re.DOTALL)
_TRAILING_COMMA_RE = re.compile(r",(\s*[}\]])")
def _repair_truncated_json(text: str) -> tuple[str, list[str]]:
"""Best-effort completion of truncated JSON.
Walks ``text`` once tracking string state and bracket/brace stack.
At end-of-input, if the parse is unbalanced (stuck mid-string,
open ``[`` / ``{`` without matching close) or has a dangling
structural artifact (trailing comma, partial key), repair so the
result is parseable. Returns ``(repaired, fixups)``.
Targets the truncation pattern observed on Hermes-3-8B JSON-mode
output for broad-descriptive questions: the model writes one
long claim text and runs out of ``max_tokens`` mid-sentence,
yielding e.g.::
{"claims":[{"text":"The Apollo program was the United
States spaceflight effort which landed... <CUT>
No closing ``"``, no closing ``}``, no closing ``]``, no closing
outer ``}``. The lenient parse path can't recover any structure.
Self-healing closes the open string, balances the stack, and
drops trailing commas / partial keys so the partial content is
preserved as a single claim with whatever fields survived.
Fixups recorded:
- ``close_string`` — appended ``"`` to close an open string
- ``drop_partial_key`` — dropped a key without value
(``,"key":`` or ``,"key`` or ``"key":``)
- ``strip_trailing_comma`` — removed a comma immediately
before stack close (separate from the regex pass which
only handles structurally-correct trailing commas)
- ``close_brace`` / ``close_bracket`` — appended ``}`` / ``]``
per open frame on the stack
Conservative: never inserts content (no key names, no values,
no commas), only closes / drops. Worst case the repair is a
no-op or makes parsing fail in a different way; never silently
fabricates data.
"""
if not text or not text.strip():
return text, []
fixups: list[str] = []
stack: list[str] = []
in_string = False
escaped = False
for ch in text:
if escaped:
escaped = False
continue
if in_string:
if ch == "\\":
escaped = True
elif ch == '"':
in_string = False
else:
if ch == '"':
in_string = True
elif ch == "{" or ch == "[":
stack.append(ch)
elif ch == "}" or ch == "]":
if stack:
stack.pop()
if not stack and not in_string:
return text, fixups
repaired = text
if in_string:
repaired += '"'
fixups.append("close_string")
# Drop dangling structural fragments after the last legitimate
# value, walking back from end-of-string. Order matters: handle
# partial-key (`,"foo":` or `,"foo"` or `"foo":`) before
# trailing-comma so we don't strip the wrong comma.
while True:
rstripped = repaired.rstrip()
if not rstripped:
break
# Partial key: `..., "key": ` (colon at end after trim).
m = _PARTIAL_KEY_COLON_RE.search(rstripped)
if m and m.end() == len(rstripped):
repaired = rstripped[: m.start()]
if "drop_partial_key" not in fixups:
fixups.append("drop_partial_key")
continue
# Partial key: `..., "key"` (key without colon).
m = _PARTIAL_KEY_NO_COLON_RE.search(rstripped)
if m and m.end() == len(rstripped):
repaired = rstripped[: m.start()]
if "drop_partial_key" not in fixups:
fixups.append("drop_partial_key")
continue
# Trailing comma — strip when it would otherwise stick before close.
if rstripped.endswith(","):
repaired = rstripped[:-1]
if "strip_trailing_comma" not in fixups:
fixups.append("strip_trailing_comma")
continue
break
# Close stack in reverse, mapping `{` → `}`, `[` → `]`.
while stack:
opener = stack.pop()
if opener == "{":
repaired += "}"
fixups.append("close_brace")
else:
repaired += "]"
fixups.append("close_bracket")
return repaired, fixups
# Partial-key patterns for _repair_truncated_json. ``,"foo":`` or
# ``"foo":`` at end-of-string after rstrip = a key with no value.
_PARTIAL_KEY_COLON_RE = re.compile(r',?\s*"[^"]*"\s*:\s*$')
# ``,"foo"`` or ``"foo"`` with no following colon — also dangling.
_PARTIAL_KEY_NO_COLON_RE = re.compile(r',\s*"[^"]*"\s*$')
def _lenient_json_parse(raw: str) -> tuple[object, list[str]]:
"""Parse ``raw`` as JSON, defensively peeling common model drift.
Returns ``(parsed_obj, fixups_applied)`` — fixups list is empty
when strict parse succeeded, otherwise names what we had to peel
(``"fence"``, ``"prose_trim"``, ``"curly_quotes"``,
``"trailing_comma"``) or repair (``"close_string"``,
``"close_brace"``, ``"close_bracket"``, ``"drop_partial_key"``,
``"strip_trailing_comma"``).
Raises ``json.JSONDecodeError`` if the lenient pass also fails.
The fixups list lands in the verify payload so an agent can
observe model drift across runs & decide whether the inference
path is JSON-clean enough to keep using.
"""
import json as _json
fixups: list[str] = []
try:
return _json.loads(raw), fixups
except _json.JSONDecodeError:
pass
text = raw
# 1. Strip markdown fence wrappers (```json\n...\n``` or ```\n...\n```).
m = _JSON_FENCE_RE.match(text)
if m:
text = m.group(1)
fixups.append("fence")
# 2. Trim leading prose to first `{` or `[`; trailing prose past last
# matching `}`/`]`. Preserves the JSON object even when the model
# writes "Here is the JSON: {...}\n\nLet me know if you need more."
first_brace = min(
(text.find(c) for c in "{[" if text.find(c) >= 0),
default=-1,
)
last_brace = max(text.rfind("}"), text.rfind("]"))
if first_brace > 0 or (last_brace >= 0 and last_brace < len(text) - 1):
if first_brace >= 0 and last_brace >= first_brace:
text = text[first_brace : last_brace + 1]
fixups.append("prose_trim")
# 3. Normalize curly quotes — model-emitted “…” / ‘…’ become "…" / '…'.
if any(c in text for c in "“”‘’"):
text = (
text.replace("“", '"').replace("”", '"')
.replace("‘", "'").replace("’", "'")
)
fixups.append("curly_quotes")
# 4. Fix trailing commas before `}` or `]`. Conservative: only
# comma immediately followed by whitespace + close bracket.
if "," in text:
new_text = _TRAILING_COMMA_RE.sub(r"\1", text)
if new_text != text:
text = new_text
fixups.append("trailing_comma")
# 5. Try strict parse before invoking the truncation repair —
# peeling alone may have made it valid.
try:
return _json.loads(text), fixups
except _json.JSONDecodeError:
pass
# 6. Self-heal truncated JSON: close open strings, drop dangling
# partial keys / trailing commas, balance the bracket stack. The
# model ran out of max_tokens mid-output; close what we can and
# parse the partial structure rather than failing the whole run.
repaired, repair_fixups = _repair_truncated_json(text)
if repair_fixups:
text = repaired
fixups.extend(repair_fixups)
return _json.loads(text), fixups
# Default allowed source roles for claim_lattice_pointer mode. Roles
# outside this set get classified as SOURCE_ROLE_BLOCKED. Mirrors the
# role classifications in arborist/qa/query.py:_classify_source_role;
# "noisy_background_source" and "sequel_background_source" are
# deliberately excluded by default.
DEFAULT_ALLOWED_SOURCE_ROLES = (
"primary_answer_source",
"secondary_context_source",
"background_source",
"unclassified",
)
def _has_manual_quote(text: str) -> bool:
"""Strict no-quote rule: ANY double-quote character in claim text
is a violation.
The premise of claim-lattice-pointer mode is that models do not type
quote text — period. Even a 3-char quoted span (``"hi"``) is a
model-asserted verbatim citation that the runtime didn't authorize.
Catching every quote keeps the discipline honest: the model is
forbidden, not just length-discouraged.
Covers ASCII (``"``) and curly quotes (``“`` / ``”``) — same set
``_QUOTE_CHAR_RE`` recognizes for the legacy quote verifier.
"""
return any(ch in text for ch in ('"', '“', '”'))
DEFAULT_MIN_CITATION_COVERAGE = 0.30
# Premise-parroting / generic-vocab-ride-along threshold. When ≥ this
# many tokens shared by the question AND the claim are ABSENT from the
# union of cited evidence spans, the claim is parroting the question's
# subject without anchoring it. Surfaced by the 200-cycle bench-emergent
# delta on `steer/reply/correcter` (Ticket #000006 amend 2026-05-02b):
# claim affirmed three question-distinctive tokens (correcter, steer,
# reply) that appeared ZERO times in the cited 33.5K-char glossary. The
# generic linguistic vocabulary (language, communication, terms,
# relationships) carried Rule 5's coverage check on its own.
#
# Threshold of 3 keeps the signal unambiguous: a single absent parroted
# token is often a stem-variant near-miss; three or more is the
# parroting fingerprint. Folds into verifier_policy_hash.
DEFAULT_SUBJECT_TOKENS_ABSENT_THRESHOLD = 3
def _claim_textually_overlaps_evidence(
claim_text: str,
evidence_span: str,
*,
min_coverage: float = DEFAULT_MIN_CITATION_COVERAGE,
) -> bool:
"""Return True if claim's content-token coverage in ``evidence_span``
meets ``min_coverage`` (case-insensitive substring match).
Hard 6th check on a (claim, pointer) pair. Catches the lazy-anchor
failure where the model cites an evidence pointer whose text has
insufficient overlap with the claim's actual subject — e.g. claim
"Yale University in New Haven and the University of Connecticut..."
cited to a highway-data span containing only the token
``connecticut`` (1/10 = 10% coverage; below the 30% default
threshold → CITATION_MISMATCH).
Pre-2026-04-30 this function required only ≥1 shared content token,
which let through lazy-anchored claims whose only overlap was a
common topical word. Coverage-based threshold scales with claim
length: short claims (1-3 content tokens) need 1 match (same as
the old behavior), longer claims need a proportional share.
Lexical only, no NER, no embeddings; stays inside the soft/hard
boundary.
A pure-stopword claim (no content tokens after the spotlight token
extractor's filter) returns True vacuously — there's nothing
topical to check, and the verifier's other hard checks already
own that case (claim_text_non_empty, no_manual_quotes, etc.).
"""
from arborist.qa.evidence import _content_tokens
tokens = _content_tokens(claim_text)
if not tokens:
return True
span_lower = evidence_span.lower()
matched = sum(1 for t in tokens if t in span_lower)
coverage = matched / len(tokens)
# Floor: a single shared content token always counts when the claim
# is itself short (≤3 content tokens) so single-fact narrow claims
# like "Steve Jobs co-founded Apple" don't fail on a coverage
# technicality. The threshold bites on prose-shaped multi-token
# claims where 1/10 token overlap is the lazy-anchor signature.
if matched >= 1 and len(tokens) <= 3:
return True
return coverage >= min_coverage
def _parroted_subject_tokens_absent(
question_text: str | None,
claim_text: str,
cited_spans: list[str],
) -> set[str]:
"""Return claim∩question content tokens that are NOT present in
the union of cited evidence spans.
Premise-parroting / generic-vocab-ride-along detector (Ticket
#000006 amend 2026-05-02b). The model affirms the question's
distinctive subject tokens in its claim, but those tokens are
absent from the cited evidence — the citation rode in on
overlapping generic vocabulary while the actual subject went
unverified.
Mechanism: substring match on lowercased text, mirroring Rule 5
(`_claim_textually_overlaps_evidence`). Stem-tolerant via the
substring rule — "polar" matches inside "bipolar", "rare"
matches "rarely", etc.
No-question-text → empty set (skip the check).
No question∩claim overlap → empty set (claim isn't parroting).
Empty cited_spans → return the full parroted set (defensive; no
grounding at all is its own failure mode caught elsewhere).
"""
from arborist.qa.evidence import _content_tokens
if not question_text or not claim_text:
return set()
qtok = set(_content_tokens(question_text))
ctok = set(_content_tokens(claim_text))
parroted = qtok & ctok
if not parroted:
return set()
union_lower = " ".join((s or "").lower() for s in cited_spans)
if not union_lower.strip():
return parroted
return {t for t in parroted if t not in union_lower}
DEFAULT_MAX_POINTERS_PER_CLAIM = 2
DEFAULT_MIN_CLAIM_CONTENT_TOKENS = 2
DEFAULT_LAZY_ANCHOR_DEMOTE_THRESHOLD = 0.5
DEFAULT_LAZY_ANCHOR_DEMOTE_MIN_PAIRS = 3
# Claim-count ceiling. Bench evidence (2026-04-30 york-england run):
# pre-atomic-claim-rule, JSON mode emitted 26-59 claim-pointer pairs
# of which only 2-4 verified — the model treats "tell me all there
# is to know about X" as a license to spam encyclopedic claims from
# training. Atomic-claim prompt rule (commit b5925c8) reduced this
# to ~10 well-formed claims, but defence-in-depth: any answer with
# more than this many claims is structurally suspect regardless of
# how each claim verifies. Default 12 chosen to comfortably admit
# entity-list questions ("dinosaurs in jurassic park" → 5; "simpsons
# family + pets" → 5-7) while catching the runaway shape.
DEFAULT_MAX_CLAIMS_PER_ANSWER = 12
# Title-relevance check (Rule 8). Cited evidence's source title must
# share at least one stemmed content token with the claim text.
# Catches the retrieval-driven hallucination class fox surfaced
# 2026-05-02 on "explain spin glass modeling & tensors?": claim
# tokens {spin, glass, modeling, tensor, ...} cited to a chunk from
# the *Quantum chromodynamics* article whose title tokens are
# {quantum, chromodynamics} — zero overlap. Token-coverage check
# inside the chunk passed accidentally on shared physics vocabulary;
# the SOURCE was never about the claim's subject.
def _claim_title_overlap(claim_text: str, source_title: str | None) -> bool:
"""Return True iff the source title shares ≥1 stemmed content
token with the claim text. Vacuous-pass when either side has no
extractable tokens (defensive — prevents the rule from firing
on degenerate inputs)."""
if not source_title or not claim_text:
return True
from arborist.qa.evidence import _content_tokens as _ct
claim_tokens = _ct(claim_text)
title_tokens = _ct((source_title or "").replace("_", " "))
if not claim_tokens or not title_tokens:
return True
# Reuse the retrieval-side stem helper so possessive / plural
# collapse the same way ('movies' vs 'movie', 'simpsons' vs
# 'simpson'). Defined in qa/query.py to avoid an import cycle:
# inline a minimal copy here instead.
def _stem(t: str) -> str:
if len(t) > 4 and t.endswith("s") and not t.endswith("ss"):
return t[:-1]
return t
claim_stems = {_stem(t) for t in claim_tokens}
title_stems = {_stem(t) for t in title_tokens}
return bool(claim_stems & title_stems)
[docs]
def verify_claim_lattice(
answer_text: str,
evidence_map,
*,
allowed_source_roles: tuple[str, ...] = DEFAULT_ALLOWED_SOURCE_ROLES,
max_pointers_per_claim: int = DEFAULT_MAX_POINTERS_PER_CLAIM,
min_citation_coverage: float = DEFAULT_MIN_CITATION_COVERAGE,
min_claim_content_tokens: int = DEFAULT_MIN_CLAIM_CONTENT_TOKENS,
lazy_anchor_demote_threshold: float = DEFAULT_LAZY_ANCHOR_DEMOTE_THRESHOLD,
lazy_anchor_demote_min_pairs: int = DEFAULT_LAZY_ANCHOR_DEMOTE_MIN_PAIRS,
max_claims_per_answer: int = DEFAULT_MAX_CLAIMS_PER_ANSWER,
subject_tokens_absent_threshold: int = DEFAULT_SUBJECT_TOKENS_ABSENT_THRESHOLD,
question: str | None = None,
warrant_check_enabled: bool = True,
deflection_check_enabled: bool = True,
format_collapse_check_enabled: bool = True,
warrant_chain_roots: frozenset[str] = frozenset(),
) -> dict:
"""Deterministic verifier for ``answer_mode="claim_lattice_pointer"``.
The model wrote pointer-line prose (``Claim text. [E12]``); the
parser pulled (claim_text, [pointer_ids]) pairs from each non-empty
line. This verifier maps each pointer id back to its
content-addressed evidence object and runs six hard checks:
1. Parser succeeded — ``parse_status == "PARSED"`` (line had a
bracket tag). NO_EVIDENCE_POINTER claims (prose without tag)
count toward the denominator and downgrade the verdict.
2. Pointer id resolves to an entry in the runtime-built evidence
map. No model-invented ids.
3. Resolved entry's ``source_role`` is in ``allowed_source_roles``.
4. Claim text non-empty after tag strip.
5. Claim's content tokens textually overlap the cited evidence
span at coverage ≥ ``min_citation_coverage`` (per-pair, lexical
only — see ``_claim_textually_overlaps_evidence``). Catches the
magnet-chunk lazy-anchor where the model cites an evidence
pointer whose text contains few claim-content tokens.
6. Pointer count per claim does not exceed ``max_pointers_per_claim``
(default 2 — matches the prompt's "1 or 2 pointers per claim"
rule). When exceeded, the claim is TRIMMED to the first N
pointers and verification proceeds normally; a
``POINTER_OVERFLOW_TRIMMED`` violation is recorded so STRICT is
no longer reachable (audit_mode caps at HYBRID for the run).
Trim-and-verify (vs hard fail) protects correct claims that
were over-cited (e.g. "Leonardo painted the Mona Lisa.
[E2,...,E14]") while keeping the over-citation pattern
surfaced. The dropped pointers count toward ``n_quotes`` so
the denominator reflects what the model emitted.
Removed 2026-04-30: the strict no-double-quote rule. The model
routinely paraphrases source prose but copies named-quoted phrases
verbatim (e.g. ``"Constitution State"`` from a Connecticut span).
Hard-rejecting claims that contained any ``"`` char was rejecting
factually correct, source-grounded claims for cosmetic punctuation.
The coverage threshold (Rule 5) and pointer cap (Rule 6) carry the
weight of catching synthetic-quote / mega-claim failures the old
rule was meant to catch. ``_has_manual_quote`` is still defined and
used by ``verify_claim_lattice_json``.
Returns a verdict in the same shape as ``verify_quotes`` + extras::
n_quotes total claim-pointer pairs (denominator)
n_verified pairs where pointer resolved AND
source_role allowed AND coverage met
AND claim text non-empty
audit_mode STRICT / HYBRID / UNGROUNDED
unverified_quotes claim texts that didn't reach
EVIDENCE_LINKED -- kept under that name
for schema continuity with verify_quotes
verifier_method "claim_lattice"
claim_statuses per-claim {text, evidence_ids,
pointer_ids, status, reasons[]}; status in
{EVIDENCE_LINKED, EVIDENCE_LINKED_PARTIAL,
UNKNOWN_EVIDENCE_ID, SOURCE_ROLE_BLOCKED,
CITATION_MISMATCH, NO_EVIDENCE_POINTER,
SCHEMA_INVALID}
violations structured violation records for the
run-DAG / sidecar
rendered_text human-readable prose with literal spans
interpolated; what the runner persists
as answer_text
evidence_id_pairs per-claim list of resolved
content-addressed evidence_ids
(run-stable form). Used to thread the
parsed lattice into the run-DAG.
"""
from arborist.qa.evidence import (
evidence_map_by_pointer_id as _by_pointer,
render_claim_lattice as _render,
)
from arborist.qa.parse_claims import parse_pointer_claims
by_pointer = _by_pointer(evidence_map)
violations: list[dict] = []
claim_statuses: list[dict] = []
# Three-bucket rendering. ``unverified`` holds claims whose
# status reached neither EVIDENCE_LINKED nor EVIDENCE_LINKED_PARTIAL
# — i.e. fully failed (no pointer verified). ``partially_verified``
# holds EVIDENCE_LINKED_PARTIAL claims (some pointers ok, some
# failed). The renderer shows them as their own section so a
# claim never appears in BOTH a verified bullet and the unverified
# footer; that previously happened for partial-status claims and
# read as "is it grounded or not?". Per-pointer detail lives in
# ``claim_statuses`` for audit.
unverified: list[str] = []
partially_verified: list[str] = []
raw_claims = parse_pointer_claims(answer_text or "")
# Claim-count ceiling — see DEFAULT_MAX_CLAIMS_PER_ANSWER. Records
# the violation but doesn't truncate; the per-claim loop below
# still verifies every claim so the operator sees full evidence
# of the runaway. Demotes verdict via the violation list.
if len(raw_claims) > max_claims_per_answer:
violations.append({
"kind": "TOO_MANY_CLAIMS",
"n_claims": len(raw_claims),
"max": max_claims_per_answer,
})
n_pairs = 0
n_pairs_verified = 0
# Renderer claims: pointer-id form so the human display still shows
# the short tags the model used.
valid_claims: list[dict] = []
# Evidence-id pairs: content-addressed form for the run-DAG &
# cache. Per-claim list so the parsed_claim_lattice node hashes the
# run-stable handle, not the run-dependent pointer-id.
evidence_id_pairs: list[list[str]] = []
for idx, c in enumerate(raw_claims):
claim_text = c.claim_text
pointer_ids = c.pointer_ids
parse_status = c.parse_status
if parse_status == "NO_EVIDENCE_POINTER":
violations.append({
"kind": "NO_EVIDENCE_POINTER",
"claim_idx": idx,
"claim_text": claim_text,
})
claim_statuses.append({
"claim_idx": idx,
"text": claim_text,
"pointer_ids": [],
"evidence_ids": [],
"status": "NO_EVIDENCE_POINTER",
"reasons": ["no [E\\d+] tag on line"],
})
unverified.append(claim_text)
evidence_id_pairs.append([])
# NO_EVIDENCE_POINTER counts as one denominator pair so the
# verdict reflects the failure rate.
n_pairs += 1
continue
if not claim_text:
violations.append({
"kind": "SCHEMA_INVALID",
"claim_idx": idx,
"reason": "tag with no claim text",
})
claim_statuses.append({
"claim_idx": idx,
"text": "",
"pointer_ids": pointer_ids,
"evidence_ids": [],
"status": "SCHEMA_INVALID",
"reasons": ["tag with no claim text"],
})
n_pairs += len(pointer_ids)
evidence_id_pairs.append([])
continue
# Bare-name claim guard. A claim like "Tyrannosaurus rex. [E15]"
# has 1 content token; the citation passes any span that mentions
# T-rex anywhere, even when E15 is a video-game-behavior chunk
# rather than a film-context one. Forcing a sentence-shape claim
# ("Tyrannosaurus rex appeared in the first JP film") raises the
# token-coverage bar so an off-topic chunk can no longer satisfy
# the citation. Folds into governance_policy_hash via
# ``claim_lattice_min_claim_content_tokens``.
from arborist.qa.evidence import _content_tokens as _ct
claim_content_tokens = _ct(claim_text)
if len(claim_content_tokens) < min_claim_content_tokens:
violations.append({
"kind": "SCHEMA_INVALID",
"claim_idx": idx,
"reason": (
f"bare-name claim ({len(claim_content_tokens)} content "
f"tokens < {min_claim_content_tokens}); write a sentence"
),
})
claim_statuses.append({
"claim_idx": idx,
"text": claim_text,
"pointer_ids": pointer_ids,
"evidence_ids": [],
"status": "SCHEMA_INVALID",
"reasons": ["bare-name claim — write a sentence with predicate"],
})
n_pairs += len(pointer_ids)
evidence_id_pairs.append([])
unverified.append(claim_text)
continue
# Pointer-count cap (Rule 9). Catches the encyclopedic-mega-
# claim where the model produces one line citing every
# pointer at once. Counts every pointer toward the denominator
# so the failure is loud in n_quotes. 2026-04-30: trim-and-
# verify rather than hard-fail. A correct claim cited with too
# many pointers ("Leonardo da Vinci painted the Mona Lisa.
# [E2,...,E14]") deserves to count if its first N pointers
# actually verify; the violation still blocks STRICT (audit_mode
# caps at HYBRID) so the over-citation pattern stays surfaced.
# Hard SCHEMA_INVALID would have nuked correct answers for a
# cosmetic over-cite. The dropped pointers count toward
# n_pairs so the denominator reflects what the model emitted.
pointer_overflow_trimmed = False
if len(pointer_ids) > max_pointers_per_claim:
dropped = pointer_ids[max_pointers_per_claim:]
n_pairs += len(dropped)
pointer_ids = pointer_ids[:max_pointers_per_claim]
pointer_overflow_trimmed = True
violations.append({
"kind": "POINTER_OVERFLOW_TRIMMED",
"claim_idx": idx,
"kept": list(pointer_ids),
"dropped": dropped,
"max_pointers_per_claim": max_pointers_per_claim,
})
per_id_results: list[dict] = []
resolved_evidence_ids: list[str] = []
for pid in pointer_ids:
n_pairs += 1
obj = by_pointer.get(pid)
if obj is None:
violations.append({
"kind": "UNKNOWN_EVIDENCE_ID",
"claim_idx": idx,
"pointer_id": pid,
})
per_id_results.append({
"pid": pid, "ok": False, "kind": "UNKNOWN_EVIDENCE_ID",
})
continue
if obj.source_role not in allowed_source_roles:
violations.append({
"kind": "SOURCE_ROLE_BLOCKED",
"claim_idx": idx,
"pointer_id": pid,
"evidence_id": obj.evidence_id,
"source_role": obj.source_role,
})
per_id_results.append({
"pid": pid, "ok": False, "kind": "SOURCE_ROLE_BLOCKED",
})
continue
if not _claim_textually_overlaps_evidence(
claim_text, obj.span, min_coverage=min_citation_coverage
):
# Cited evidence span has zero textual overlap with any
# content token from the claim. Strongest lazy-anchor
# signal promoted to a hard fail — the model cited a
# magnet chunk that doesn't textually support its claim.
violations.append({
"kind": "CITATION_MISMATCH",
"claim_idx": idx,
"pointer_id": pid,
"evidence_id": obj.evidence_id,
})
per_id_results.append({
"pid": pid, "ok": False, "kind": "CITATION_MISMATCH",
})
continue
per_id_results.append({"pid": pid, "ok": True, "evidence_id": obj.evidence_id})
resolved_evidence_ids.append(obj.evidence_id)
n_pairs_verified += 1
ok_pids = [r["pid"] for r in per_id_results if r["ok"]]
bad_kinds = sorted({r["kind"] for r in per_id_results if not r["ok"]})
if ok_pids and not bad_kinds:
status = "EVIDENCE_LINKED"
elif ok_pids:
status = "EVIDENCE_LINKED_PARTIAL"
elif "CITATION_MISMATCH" in bad_kinds:
status = "CITATION_MISMATCH"
elif "UNKNOWN_EVIDENCE_ID" in bad_kinds:
status = "UNKNOWN_EVIDENCE_ID"
elif "SOURCE_ROLE_BLOCKED" in bad_kinds:
status = "SOURCE_ROLE_BLOCKED"
else:
status = "SCHEMA_INVALID"
claim_statuses.append({
"claim_idx": idx,
"text": claim_text,
"pointer_ids": pointer_ids,
"evidence_ids": resolved_evidence_ids,
"status": status,
"reasons": bad_kinds,
})
evidence_id_pairs.append(resolved_evidence_ids)
if status in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"):
valid_claims.append({"text": claim_text, "pointer_ids": ok_pids})
if status == "EVIDENCE_LINKED_PARTIAL":
partially_verified.append(claim_text)
elif status != "EVIDENCE_LINKED":
unverified.append(claim_text)
rendered_text = _render(valid_claims, by_pointer) if valid_claims else ""
# Aggregate. STRICT requires ≥1 verified pair AND zero violations of
# any kind (schema, unknown pointer, blocked role, manual quote,
# missing pointer). HYBRID = some pairs verified, some failed.
# UNGROUNDED = no verified pairs (no parseable claims, or every
# claim failed at least one check).
if n_pairs_verified > 0 and not violations:
audit_mode = "STRICT"
elif n_pairs_verified > 0:
audit_mode = "HYBRID"
else:
audit_mode = "UNGROUNDED"
# Anchor-smell sidecar (render-layer only — never persisted as a
# v9.8 field). Counts how many distinct pointer_ids the model used
# across the verified-or-partial claims. ``lazy_anchor_ratio`` is
# the max share any single pointer claimed: 1.0 = every claim cites
# the same pointer (Hermes-3-8B's lazy-anchor habit on the JP-
# dinosaurs benchmark), 1/N = every claim cites a unique pointer.
# The distribution is recoverable from ``claim_statuses`` which
# IS persisted in run_dag_blob; we surface the derived numbers in
# the verdict for the human renderer, but they never thread back
# into ``build_run_dag``'s verify_payload, so ``run_dag_root``
# stays clean.
pointer_distribution: dict[str, int] = {}
for cs in claim_statuses:
if cs["status"] not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"):
continue
for pid in cs.get("pointer_ids") or []:
pointer_distribution[pid] = pointer_distribution.get(pid, 0) + 1
total_pointers = sum(pointer_distribution.values())
lazy_anchor_ratio = (
max(pointer_distribution.values()) / total_pointers
if total_pointers
else 0.0
)
# Smell → demote. Pre-2026-04-30 the lazy-anchor signal was advisory
# only; the verdict could still be STRICT while every claim cited
# the same magnet chunk. Now: when ratio ≥ threshold AND total pairs
# ≥ floor, cap audit_mode at HYBRID. STRICT becomes unreachable for
# answers where one pointer carries every claim — that pattern is
# almost never honest verbatim grounding. UNGROUNDED is left alone
# (a verdict with zero verified pairs has no smell to flag).
lazy_anchor_demoted = False
if (
audit_mode == "STRICT"
and total_pointers >= lazy_anchor_demote_min_pairs
and lazy_anchor_ratio >= lazy_anchor_demote_threshold
):
audit_mode = "HYBRID"
lazy_anchor_demoted = True
violations.append({
"kind": "LAZY_ANCHOR_DEMOTE",
"ratio": round(lazy_anchor_ratio, 3),
"min_pairs": lazy_anchor_demote_min_pairs,
"threshold": lazy_anchor_demote_threshold,
})
# Warrant-lite — relation-question hard check (Ticket H from
# feedback-3, 2026-05-01). Claim-cited spans must contain at
# least one of the claim's named answer entities (proper-noun
# phrases). Catches the Homer-Simpson lazy-anchor case fox
# surfaced — claim asserts "Mr. Burns" but cited span is
# Castellaneta voice-actor prose. See arborist/qa/warrant.py
# for the lexical algorithm and rationale (deterministic,
# not NLI). Fires only when the question shape suggests a
# relation lookup AND the lookup is enabled by policy
# (warrant_check_enabled). Per-claim WARRANT_MISSING violations
# cap audit_mode at HYBRID via the same demote pattern as
# lazy_anchor_demoted.
warrant_missing_claims: list[int] = []
warrant_proven_claim_idxs: list[int] = []
if warrant_check_enabled:
for cs in claim_statuses:
if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"):
continue
cited_eids = cs.get("evidence_ids") or []
cited_evidence_objs = [
obj
for eid in cited_eids
for obj in [evidence_map_by_evidence_id_local(evidence_map, eid)]
if obj is not None
]
cited_spans = [obj.span for obj in cited_evidence_objs]
ok, missing = warrant_check(
cs.get("text") or "", cited_spans, question=question
)
if not ok:
# Phase 3 of #000031: if any cited evidence's source
# document has a warrant-resolver derivation row (i.e.,
# the cited chunk is a claim-pack record with a
# Merkle-bound primary-source backing), suppress
# WARRANT_MISSING — the warrant chain DOES exist, just
# not at the lexical-anchor level. Track on a separate
# `warrant_proven_claim_idxs` field for render-layer
# transparency.
cited_source_roots = [obj.source_root for obj in cited_evidence_objs]
if warrant_chain_roots and any(
r in warrant_chain_roots for r in cited_source_roots
):
warrant_proven_claim_idxs.append(cs.get("claim_idx"))
continue
warrant_missing_claims.append(cs.get("claim_idx"))
violations.append({
"kind": "WARRANT_MISSING",
"claim_idx": cs.get("claim_idx"),
"missing_anchors": missing,
"rationale": (
"claim asserts an answer entity or specific date "
"not present in any cited span — pointer-linked "
"but warrant missing"
),
})
if warrant_missing_claims and audit_mode == "STRICT":
audit_mode = "HYBRID"
# Rule 8 — Title-relevance check. For each claim that resolved,
# at least one cited evidence's source title must share a
# stemmed content token with the claim. Catches the
# retrieval-driven hallucination class (2026-05-02 spin-glass
# case): claim about spin glass cited to a chunk from
# *Quantum chromodynamics* — token-coverage check passed on
# incidental physics vocabulary, but the SOURCE was never about
# the claim's subject.
title_mismatch_claims: list[int] = []
for cs in claim_statuses:
if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"):
continue
cited_eids = cs.get("evidence_ids") or []
cited_titles = [
obj.title
for eid in cited_eids
for obj in [evidence_map_by_evidence_id_local(evidence_map, eid)]
if obj is not None
]
if not cited_titles:
continue
any_overlap = any(
_claim_title_overlap(cs.get("text") or "", t)
for t in cited_titles
)
if not any_overlap:
title_mismatch_claims.append(cs.get("claim_idx"))
violations.append({
"kind": "TITLE_MISMATCH",
"claim_idx": cs.get("claim_idx"),
"claim_text": (cs.get("text") or "")[:200],
"cited_titles": cited_titles,
"rationale": (
"no cited source's title shares a content token "
"with the claim — pointer-linked but the cited "
"document is structurally unrelated to the claim"
),
})
if title_mismatch_claims and audit_mode == "STRICT":
audit_mode = "HYBRID"
# Tightening (2026-05-02 emergent-log finding): when EVERY resolving
# claim has TITLE_MISMATCH, the substrate has zero structural
# grounding for the user's question — every cited source is
# title-irrelevant. The cashback case ("widescreens offer cashback"
# cited to a generic Coupon article) had n_verified=1 but the
# citation was meaningless; HYBRID overclaimed. Demote to
# UNGROUNDED so the four-rung ladder maps it to UNGROUNDED, not
# POINTER-LINKED-PARTIAL.
n_resolving = sum(
1
for cs in claim_statuses
if cs.get("status") in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL")
)
if (
title_mismatch_claims
and n_resolving > 0
and len(title_mismatch_claims) == n_resolving
):
audit_mode = "UNGROUNDED"
# Rule 9 — Subject-tokens-absent / premise-parroting check (Ticket
# #000006 amend 2026-05-02b, surfaced by `steer/reply/correcter`
# 200-cycle bench-emergent finding). For each resolving claim,
# collect the union of cited evidence spans and check whether ≥
# subject_tokens_absent_threshold tokens shared by question AND
# claim are absent from that union. If so, the claim is parroting
# the question's distinctive subject without anchoring it — the
# citation rode in on overlapping generic vocabulary while the
# actual subject went unverified.
subject_absent_claims: list[int] = []
if question and subject_tokens_absent_threshold > 0:
for cs in claim_statuses:
if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"):
continue
cited_eids = cs.get("evidence_ids") or []
cited_spans = []
for eid in cited_eids:
obj = evidence_map_by_evidence_id_local(evidence_map, eid)
if obj is not None and obj.span:
cited_spans.append(obj.span)
if not cited_spans:
continue
absent = _parroted_subject_tokens_absent(
question, cs.get("text") or "", cited_spans
)
if len(absent) >= subject_tokens_absent_threshold:
subject_absent_claims.append(cs.get("claim_idx"))
violations.append({
"kind": "SUBJECT_TOKENS_ABSENT",
"claim_idx": cs.get("claim_idx"),
"claim_text": (cs.get("text") or "")[:200],
"absent_tokens": sorted(absent),
"rationale": (
f"{len(absent)} question-distinctive tokens echoed in "
f"the claim are absent from cited evidence — claim "
f"parrots question premise without anchoring it"
),
})
if subject_absent_claims and audit_mode == "STRICT":
audit_mode = "HYBRID"
# Deflection check (soft demote, promoted from sidecar 2026-05-02).
# When the question's subject anchor is missing from the answer,
# the model deflected — answered an adjacent grounded question
# instead of the user's specific one. Caught the live cases:
# "who burns the amazon river?" → answered about Amazon
# Rainforest deforestation, "river" never in answer
# "what culture burns the amazon rain forest?" → answered
# "what causes burning", "culture" never in answer
# Both passed every other check but the user's question wasn't
# structurally answered. DEFLECTION_DETECTED downgrades
# EVIDENCE-WARRANTED → ANCHOR-WARRANTED via the soft-demote
# path. Render-layer ladder picks this up automatically.
deflection_detected = False
if deflection_check_enabled and question and rendered_text:
# Deferred import to avoid pulling arborist.compress + arborist.store
# at verify.py module-load time when callers may not need them.
from arborist.qa.inspect import diagnose_deflection
signal = diagnose_deflection(question, rendered_text)
if signal.get("kind") == "deflection":
deflection_detected = True
violations.append({
"kind": "DEFLECTION_DETECTED",
"subject_anchor": signal.get("subject_anchor"),
"overlap_ratio": signal.get("overlap_ratio"),
"rationale": (
"answer's content tokens omit the question's "
"subject anchor — model answered an adjacent "
"grounded question rather than the user's "
"specific one"
),
})
if deflection_detected and audit_mode == "STRICT":
audit_mode = "HYBRID"
# Format-collapse check (FORMAT_COLLAPSED soft demote).
# The "winners of all major sports?" case fox surfaced 2026-05-02:
# Hermes melted under an under-specified broad question, dumped
# 50+ free-form prose claims with ZERO `[E\d+]` pointer tags. The
# parser found 2 line-shaped fragments to count as claims; both
# ungrounded → UNGROUNDED 0/2. Verifier was honest, but operators
# couldn't tell from the audit line whether UNGROUNDED meant
# "tried to ground & failed" vs "abandoned the protocol entirely."
# This sidecar separates those two failure shapes by inspecting
# the raw answer text for the absence of bracket tags amid
# multiple meaningful prose lines.
format_collapsed = False
if format_collapse_check_enabled and answer_text:
meaningful_lines = [
line for line in answer_text.splitlines()
if len(line.strip()) > 20
]
bracket_count = len(re.findall(r"\[E\d+", answer_text))
if len(meaningful_lines) >= 5 and bracket_count == 0:
format_collapsed = True
violations.append({
"kind": "FORMAT_COLLAPSED",
"meaningful_lines": len(meaningful_lines),
"bracket_count": bracket_count,
"rationale": (
"model emitted multi-line prose with zero [E\\d+] "
"pointer tags — abandoned the claim_lattice_pointer "
"protocol entirely. UNGROUNDED below this signal is "
"format collapse, not graceful per-claim refusal."
),
})
if format_collapsed and audit_mode == "STRICT":
audit_mode = "HYBRID"
return {
"n_quotes": n_pairs,
"n_verified": n_pairs_verified,
"audit_mode": audit_mode,
"unverified_quotes": unverified,
"partially_verified_quotes": partially_verified,
"verifier_method": "claim_lattice",
"claim_statuses": claim_statuses,
"violations": violations,
"rendered_text": rendered_text,
"evidence_id_pairs": evidence_id_pairs,
"pointer_id_distribution": pointer_distribution,
"lazy_anchor_ratio": lazy_anchor_ratio,
"lazy_anchor_demoted": lazy_anchor_demoted,
"warrant_missing_claim_idxs": warrant_missing_claims,
"warrant_proven_claim_idxs": warrant_proven_claim_idxs,
"title_mismatch_claim_idxs": title_mismatch_claims,
"deflection_detected": deflection_detected,
"format_collapsed": format_collapsed,
}
[docs]
def evidence_map_by_evidence_id_local(evidence_map, eid: str):
"""Local helper — returns the EvidenceObject whose ``evidence_id``
matches ``eid``, or None. Avoids the import-cycle risk of pulling
`evidence_map_by_evidence_id` into this module's hot path; the
O(N) walk is fine since evidence maps are <30 entries.
"""
for obj in evidence_map or []:
if obj.evidence_id == eid:
return obj
return None
# ---------------------------------------------------------------------------
# JSON variant — `answer_mode="claim_lattice"`. Same lattice semantics as
# the pointer variant, but the model emits a structured JSON object
# {"claims":[{"text":str,"evidence_ids":[str,...]}]} with content-
# addressed evidence_ids directly. Pairs naturally with grammar-
# constrained inference (vLLM guided_json, Claude/GPT-4 native JSON
# mode, Qwen 3.6 reasoner) where schema-conformance is generation-time-
# enforced. The lenient pre-parser above keeps the path survivable on
# inference paths without grammar guidance.
# ---------------------------------------------------------------------------
CLAIM_LATTICE_JSON_SCHEMA = {
"type": "object",
"properties": {
"claims": {
"type": "array",
"items": {
"type": "object",
"properties": {
"text": {"type": "string"},
"evidence_ids": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["text", "evidence_ids"],
"additionalProperties": False,
},
},
},
"required": ["claims"],
"additionalProperties": False,
}
[docs]
def verify_claim_lattice_json(
answer_json_text: str,
evidence_map,
*,
allowed_source_roles: tuple[str, ...] = DEFAULT_ALLOWED_SOURCE_ROLES,
max_evidence_per_claim: int = DEFAULT_MAX_POINTERS_PER_CLAIM,
min_citation_coverage: float = DEFAULT_MIN_CITATION_COVERAGE,
max_claims_per_answer: int = DEFAULT_MAX_CLAIMS_PER_ANSWER,
subject_tokens_absent_threshold: int = DEFAULT_SUBJECT_TOKENS_ABSENT_THRESHOLD,
question: str | None = None,
warrant_check_enabled: bool = True,
deflection_check_enabled: bool = True,
warrant_chain_roots: frozenset[str] = frozenset(),
) -> dict:
"""Deterministic verifier for ``answer_mode="claim_lattice"`` (JSON).
Parses the model's JSON output (lenient pre-parser handles markdown
fences / preamble / curly quotes / trailing commas), validates the
schema, then runs the same hard checks as ``verify_claim_lattice``
but reading ``evidence_ids`` from the JSON claim objects.
2026-04-30: switched from content-addressed evidence_ids
(``Eed1b6e396``) to pointer_ids (``E1``, ``E2``, …) in the prompt
& JSON output. Hermes-3-8B was fabricating plausible content-
addressed IDs (``E1b6e396``-style near-misses) on cross-document
relationship questions; the verifier correctly rejected them as
UNKNOWN_EVIDENCE_ID but the answer text was often factually
correct, leaving us with honest UNGROUNDED on right answers.
Pointer IDs are short, enumerable, and fabrication-obvious. The
runtime still resolves each pointer_id to its content-addressed
evidence_id internally and stores that in ``evidence_id_pairs``
(cache/run-DAG continuity); only the prompt-facing surface
changes.
1. JSON parses (lenient). Failure → SCHEMA_INVALID, UNGROUNDED.
2. Top-level is ``{"claims": [...]}``.
3. Each claim is ``{"text": str, "evidence_ids": [str, ...]}``.
4. Each evidence_id resolves in the runtime-built evidence map
(no model-invented IDs).
5. Resolved entry's ``source_role`` is in ``allowed_source_roles``.
6. Claim text contains no double-quote characters anywhere.
7. Claim text non-empty.
8. Claim's content tokens textually overlap the cited evidence span.
9. ``len(evidence_ids) <= max_evidence_per_claim``.
Returns a verdict in the same shape as ``verify_claim_lattice`` plus
a ``json_fixups`` field naming any drift the lenient parser had to
peel (``"fence"`` / ``"prose_trim"`` / ``"curly_quotes"`` /
``"trailing_comma"``). Empty list = strict JSON parse on first try.
"""
from arborist.qa.evidence import (
evidence_map_by_pointer_id as _by_pointer,
render_claim_lattice as _render,
)
by_pointer = _by_pointer(evidence_map)
violations: list[dict] = []
claim_statuses: list[dict] = []
unverified: list[str] = []
json_fixups: list[str] = []
parsed = None
try:
parsed, json_fixups = _lenient_json_parse(answer_json_text or "")
except Exception as exc:
violations.append({
"kind": "SCHEMA_INVALID",
"reason": f"json parse: {str(exc)[:200]}",
})
if parsed is not None and not isinstance(parsed, dict):
violations.append({
"kind": "SCHEMA_INVALID",
"reason": f"top-level not object (got {type(parsed).__name__})",
})
parsed = None
raw_claims = (parsed or {}).get("claims") if parsed is not None else None
if parsed is not None and not isinstance(raw_claims, list):
violations.append({
"kind": "SCHEMA_INVALID",
"reason": "missing or non-list 'claims'",
})
raw_claims = None
# Claim-count ceiling — same defense-in-depth signal as the
# pointer verifier. A "tell me all there is to know" prompt
# shape can spam encyclopedic claims; cap demotes the verdict
# so the runaway is operator-visible regardless of per-claim
# verification success.
if isinstance(raw_claims, list) and len(raw_claims) > max_claims_per_answer:
violations.append({
"kind": "TOO_MANY_CLAIMS",
"n_claims": len(raw_claims),
"max": max_claims_per_answer,
})
n_pairs = 0
n_pairs_verified = 0
valid_claims: list[dict] = []
evidence_id_pairs: list[list[str]] = []
for idx, c in enumerate(raw_claims or []):
if not isinstance(c, dict):
violations.append({
"kind": "SCHEMA_INVALID",
"claim_idx": idx,
"reason": f"claim not object (got {type(c).__name__})",
})
claim_statuses.append({
"text": "", "evidence_ids": [],
"status": "SCHEMA_INVALID", "reasons": ["not_object"],
})
continue
claim_text = c.get("text") or ""
eids = c.get("evidence_ids") or []
if not isinstance(claim_text, str) or not isinstance(eids, list):
violations.append({
"kind": "SCHEMA_INVALID", "claim_idx": idx,
"reason": "claim shape: text=str, evidence_ids=list[str]",
})
claim_statuses.append({
"text": str(claim_text)[:200], "evidence_ids": [],
"status": "SCHEMA_INVALID", "reasons": ["bad_field_types"],
})
continue
# Manual-quote prohibition (same rule as pointer mode).
if _has_manual_quote(claim_text):
violations.append({
"kind": "MANUAL_QUOTE_VIOLATION", "claim_idx": idx,
"claim_text": claim_text[:200],
})
unverified.append(claim_text)
claim_statuses.append({
"text": claim_text, "evidence_ids": eids,
"status": "MANUAL_QUOTE_VIOLATION",
"reasons": ["double_quote_in_text"],
})
n_pairs += max(1, len(eids))
continue
if not claim_text.strip():
violations.append({
"kind": "SCHEMA_INVALID", "claim_idx": idx,
"reason": "empty claim text",
})
claim_statuses.append({
"text": "", "evidence_ids": eids,
"status": "SCHEMA_INVALID", "reasons": ["empty_text"],
})
continue
if len(eids) > max_evidence_per_claim:
violations.append({
"kind": "TOO_MANY_EVIDENCE_IDS", "claim_idx": idx,
"claim_text": claim_text[:200],
"n_ids": len(eids), "max": max_evidence_per_claim,
})
# Per-id resolution + checks. ``eids`` are pointer_ids
# (E1, E2, …) emitted by the model; we resolve each to its
# EvidenceObject and capture the content-addressed
# ``evidence_id`` for the cache/run-DAG handle. Pointer-style
# IDs make fabrication obvious — if only E1-E10 were shown,
# an emitted "E27" reads as a hallucination at the schema
# check, not as a near-miss content-addressed string.
per_id_results = []
verified_pointer_ids: list[str] = []
verified_evidence_ids: list[str] = []
for eid in eids:
if not isinstance(eid, str):
per_id_results.append({"eid": str(eid), "ok": False, "kind": "SCHEMA_INVALID"})
continue
obj = by_pointer.get(eid)
if obj is None:
per_id_results.append({"eid": eid, "ok": False, "kind": "UNKNOWN_EVIDENCE_ID"})
violations.append({
"kind": "UNKNOWN_EVIDENCE_ID",
"claim_idx": idx, "evidence_id": eid,
})
continue
if obj.source_role not in allowed_source_roles:
per_id_results.append({"eid": eid, "ok": False, "kind": "SOURCE_ROLE_BLOCKED"})
violations.append({
"kind": "SOURCE_ROLE_BLOCKED",
"claim_idx": idx, "evidence_id": obj.evidence_id,
"pointer_id": eid,
"source_role": obj.source_role,
})
continue
if not _claim_textually_overlaps_evidence(
claim_text, obj.span, min_coverage=min_citation_coverage
):
per_id_results.append({"eid": eid, "ok": False, "kind": "CITATION_MISMATCH"})
violations.append({
"kind": "CITATION_MISMATCH",
"claim_idx": idx, "evidence_id": obj.evidence_id,
"pointer_id": eid,
"claim_text": claim_text[:200],
})
continue
per_id_results.append({"eid": eid, "ok": True})
verified_pointer_ids.append(eid)
verified_evidence_ids.append(obj.evidence_id)
n_pairs += max(1, len(eids))
n_pairs_verified += len(verified_pointer_ids)
if not eids:
claim_statuses.append({
"text": claim_text, "evidence_ids": [],
"status": "NO_EVIDENCE_POINTER",
"reasons": ["no_evidence_ids"],
})
unverified.append(claim_text)
n_pairs += 1
continue
if len(verified_pointer_ids) == len(eids):
status = "EVIDENCE_LINKED"
elif verified_pointer_ids:
status = "EVIDENCE_LINKED_PARTIAL"
else:
# Pick the worst per-id reason for the claim status.
kinds = [r["kind"] for r in per_id_results if not r["ok"]]
status = kinds[0] if kinds else "UNKNOWN_EVIDENCE_ID"
unverified.append(claim_text)
# claim_statuses records BOTH ids: pointer (what model wrote)
# and content-addressed (run-stable handle). Keeps the audit
# trail legible at both layers.
claim_statuses.append({
"text": claim_text,
"pointer_ids": list(eids),
"evidence_ids": list(verified_evidence_ids),
"status": status,
"reasons": [r["kind"] for r in per_id_results if not r["ok"]],
})
if verified_pointer_ids:
# Renderer takes the pointer-id form (model's view) and the
# by_pointer index; cache/run-DAG get the content-addressed
# evidence_ids (run-stable form).
valid_claims.append({
"text": claim_text,
"pointer_ids": verified_pointer_ids,
})
evidence_id_pairs.append(list(verified_evidence_ids))
rendered_text = _render(valid_claims, by_pointer) if valid_claims else ""
if n_pairs_verified > 0 and not violations:
audit_mode = "STRICT"
elif n_pairs_verified > 0:
audit_mode = "HYBRID"
else:
audit_mode = "UNGROUNDED"
# Warrant-lite — same relation-question hard check as the pointer
# variant. See verify_claim_lattice for the rationale (Ticket H,
# 2026-05-01). Identical demote-to-HYBRID semantics; the JSON
# variant carries the same WARRANT_MISSING violations & the same
# warrant_missing_claim_idxs field on the verdict.
warrant_missing_claims: list[int] = []
warrant_proven_claim_idxs: list[int] = []
if warrant_check_enabled:
for cs in claim_statuses:
if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"):
continue
cited_eids = cs.get("evidence_ids") or []
cited_evidence_objs = [
obj
for eid in cited_eids
for obj in [evidence_map_by_evidence_id_local(evidence_map, eid)]
if obj is not None
]
cited_spans = [obj.span for obj in cited_evidence_objs]
ok, missing = warrant_check(
cs.get("text") or "", cited_spans, question=question
)
if not ok:
# Phase 3 of #000031: see verify_claim_lattice for
# the full rationale. Same suppression logic — if
# the cited chunk's document has a warrant-resolver
# derivation row, the warrant chain exists at the
# Merkle level even if the lexical anchor doesn't
# fire.
cited_source_roots = [obj.source_root for obj in cited_evidence_objs]
if warrant_chain_roots and any(
r in warrant_chain_roots for r in cited_source_roots
):
warrant_proven_claim_idxs.append(cs.get("claim_idx"))
continue
warrant_missing_claims.append(cs.get("claim_idx"))
violations.append({
"kind": "WARRANT_MISSING",
"claim_idx": cs.get("claim_idx"),
"missing_anchors": missing,
"rationale": (
"claim asserts an answer entity or specific date "
"not present in any cited span — pointer-linked "
"but warrant missing"
),
})
if warrant_missing_claims and audit_mode == "STRICT":
audit_mode = "HYBRID"
# Rule 8 — Title-relevance check (mirrors the pointer variant).
# See verify_claim_lattice for rationale (2026-05-02 spin-glass
# case). Demote-to-HYBRID semantics; JSON variant emits the same
# TITLE_MISMATCH violation kind & title_mismatch_claim_idxs field.
title_mismatch_claims: list[int] = []
for cs in claim_statuses:
if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"):
continue
cited_eids = cs.get("evidence_ids") or []
cited_titles = [
obj.title
for eid in cited_eids
for obj in [evidence_map_by_evidence_id_local(evidence_map, eid)]
if obj is not None
]
if not cited_titles:
continue
any_overlap = any(
_claim_title_overlap(cs.get("text") or "", t)
for t in cited_titles
)
if not any_overlap:
title_mismatch_claims.append(cs.get("claim_idx"))
violations.append({
"kind": "TITLE_MISMATCH",
"claim_idx": cs.get("claim_idx"),
"claim_text": (cs.get("text") or "")[:200],
"cited_titles": cited_titles,
"rationale": (
"no cited source's title shares a content token "
"with the claim — pointer-linked but the cited "
"document is structurally unrelated to the claim"
),
})
if title_mismatch_claims and audit_mode == "STRICT":
audit_mode = "HYBRID"
# Tightening (2026-05-02): mirrors the pointer-variant promotion.
# When EVERY resolving claim has TITLE_MISMATCH, demote to
# UNGROUNDED — the substrate has zero structural grounding for
# the user's question. See verify_claim_lattice for full rationale.
n_resolving = sum(
1
for cs in claim_statuses
if cs.get("status") in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL")
)
if (
title_mismatch_claims
and n_resolving > 0
and len(title_mismatch_claims) == n_resolving
):
audit_mode = "UNGROUNDED"
# Rule 9 — Subject-tokens-absent / premise-parroting check. See
# `verify_claim_lattice` for the full rationale.
subject_absent_claims: list[int] = []
if question and subject_tokens_absent_threshold > 0:
for cs in claim_statuses:
if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"):
continue
cited_eids = cs.get("evidence_ids") or []
cited_spans = []
for eid in cited_eids:
obj = evidence_map_by_evidence_id_local(evidence_map, eid)
if obj is not None and obj.span:
cited_spans.append(obj.span)
if not cited_spans:
continue
absent = _parroted_subject_tokens_absent(
question, cs.get("text") or "", cited_spans
)
if len(absent) >= subject_tokens_absent_threshold:
subject_absent_claims.append(cs.get("claim_idx"))
violations.append({
"kind": "SUBJECT_TOKENS_ABSENT",
"claim_idx": cs.get("claim_idx"),
"claim_text": (cs.get("text") or "")[:200],
"absent_tokens": sorted(absent),
"rationale": (
f"{len(absent)} question-distinctive tokens echoed in "
f"the claim are absent from cited evidence — claim "
f"parrots question premise without anchoring it"
),
})
if subject_absent_claims and audit_mode == "STRICT":
audit_mode = "HYBRID"
# Deflection check (parallel to pointer-variant promotion).
deflection_detected = False
if deflection_check_enabled and question and rendered_text:
# Deferred import to avoid pulling arborist.compress + arborist.store
# at verify.py module-load time when callers may not need them.
from arborist.qa.inspect import diagnose_deflection
signal = diagnose_deflection(question, rendered_text)
if signal.get("kind") == "deflection":
deflection_detected = True
violations.append({
"kind": "DEFLECTION_DETECTED",
"subject_anchor": signal.get("subject_anchor"),
"overlap_ratio": signal.get("overlap_ratio"),
"rationale": (
"answer's content tokens omit the question's "
"subject anchor — model answered an adjacent "
"grounded question rather than the user's "
"specific one"
),
})
if deflection_detected and audit_mode == "STRICT":
audit_mode = "HYBRID"
# Same `verifier_method` as the pointer variant ("claim_lattice")
# so the providence_cache CHECK constraint accepts both. The mode
# is disambiguated downstream via `answer_mode` on the run-DAG &
# via the JSON-only `json_fixups` field on this verdict.
return {
"n_quotes": n_pairs,
"n_verified": n_pairs_verified,
"audit_mode": audit_mode,
"unverified_quotes": unverified,
"verifier_method": "claim_lattice",
"claim_statuses": claim_statuses,
"violations": violations,
"rendered_text": rendered_text,
"evidence_id_pairs": evidence_id_pairs,
"json_fixups": json_fixups,
"warrant_missing_claim_idxs": warrant_missing_claims,
"warrant_proven_claim_idxs": warrant_proven_claim_idxs,
"title_mismatch_claim_idxs": title_mismatch_claims,
}