Source code for arborist.qa.verify

"""Post-LLM faithfulness check: did the answer ground its claims in context?

Three layered strategies, tried in order. The first one that finds evidence
classifies the answer. ``verifier_method`` on the result records which path
fired so the audit chain stays diagnostic.

1. **quote** — model wrapped claims in double quotes per system prompt.
   Strongest signal — explicit, verbatim, model-asserted.
2. **span** — no quotes, but bullet/sentence-level lines from the answer
   appear verbatim in context. Catches models that quote inline without
   ``"..."`` marks.
3. **entity** — no quotes and no span match, but multi-word proper-noun
   phrases from the answer appear verbatim in context. Catches the
   Wikipedia-infobox-to-prose case: the model paraphrases structure so
   spans diverge, but every named entity is intact and grounded.

Each strategy classifies into v9.8's audit-mode trichotomy (RAG-adapted
vocabulary; substrate calls UNGROUNDED "VISUAL"):

- **STRICT** — every evidence unit (>=1) verifies verbatim against context
- **HYBRID** — some verify, others do not (mixed source / emergent)
- **UNGROUNDED** — no evidence, or none verify (purely emergent)

``unverified_quotes`` (kept under that name for schema continuity) collects
spans the model produced that don't appear in any source — the
corpus-growth signal mined by `arborist emergent`.

Hard rule (CLAUDE.md "soft hash vs hard hash"): every check is a lexical
substring test under norm-v1 + lowercase canonicalization. No embeddings,
no semantic similarity, no fuzzy alignment. The contract is "this token
sequence either is or isn't in the context."

Wikitext context is run through ``arborist.wikitext.to_base`` before the
substring test. The corpus stores raw wikitext (so the link graph is
recoverable from any page), but the LLM produces clean prose. Without
the strip, every wikilink-carrying source paragraph compares as
"different surface form" and the verifier wrongly reports UNGROUNDED on
genuine source-grounded quotes. With the strip, paraphrases of *markup*
(``[[Cloud]]`` vs ``Cloud``) verify, while paraphrases of *prose* still
flag honestly. mwparserfromhell is an optional dep; if absent, the
strip is a no-op and verification falls back to today's behavior.
"""

from __future__ import annotations

import re
import unicodedata

from arborist.qa.warrant import warrant_check

# Deferred import: arborist.qa.inspect imports arborist.compress &
# arborist.store at module load. The verifier doesn't need either
# until deflection actually runs, so defer to call-site to keep
# import order clean if anything else imports verify.py.
try:
    from arborist.wikitext import to_base as _wikitext_to_base
except ImportError:  # pragma: no cover
    _wikitext_to_base = None


# Locate every double-quote character (ASCII or curly). Sequential
# pairing in extract_quotes() turns these into intentional (open, close)
# pairs: 1st & 2nd char, 3rd & 4th, etc. Pure-regex pairing fails on
# adjacent quote pairs like `"title" prose "quote"` — the regex captures
# `prose` as a "quoted span" because every `"` looks like both an opener
# and a closer to it.
_QUOTE_CHAR_RE = re.compile(r'["“”]')

# Bullet markers at line start: -, *, +, •, 1., 2), etc.
_BULLET_RE = re.compile(r'^\s*(?:[-*+•]|\d+[.)])\s+')

# Sentence boundary: punctuation + whitespace + capital letter.
_SENT_RE = re.compile(r'(?<=[.!?])\s+(?=[A-Z])')

# Multi-word capitalized phrase. Two or more whitespace-separated tokens,
# each starting with a capital letter (allowing initials like "A.", hyphens
# like "Carrie-Anne", and trailing lowercase like "Smith"). Matches "Keanu
# Reeves", "Thomas A. Anderson", "Carrie-Anne Moss", "Agent Smith". Skips
# single capitalized words to avoid sentence-starter false positives.
_PROPER_NOUN_RE = re.compile(
    # Token gap is non-newline whitespace so a phrase never crosses a
    # paragraph break. Caught a real case where "Joe Pantoliano\n\nThe
    # sources" matched as one phrase via \s+.
    r"\b[A-Z][A-Za-z'’\-]*(?:[ \t]+(?:[A-Z]\.|[A-Z][A-Za-z'’\-]+))+\b"
)

MIN_QUOTE_CHARS = 8
MIN_SPAN_CHARS = 12

# Trailing parenthetical the model often appends to verbatim source
# prose, defeating substring match even when the prose itself IS in
# the corpus. Examples we strip:
#
#     "...lightning-based attacks. (Source: https://...)"
#     "...invented in 1976. (citing Wikipedia)"
#     "...protagonist of the game. (see Pikachu_(character))"
#     "...released in 1997 (https://en.wikipedia.org/...)"
#
# Conservative regex: only strips a SINGLE trailing parenthetical at
# end-of-string after optional whitespace, and only when the contents
# either start with a recognized citation cue OR contain a URL. Refuses
# to strip parentheticals that look like genuine prose (e.g.
# "Pikachu (a Pokémon species)").
_TRAILING_CITATION_RE = re.compile(
    r"""
    \s*\(\s*
    (?:                                           # one of:
        (?:source|src|citing|see|ref|reference|from)  # citation cue word
        \s*:?\s*[^()]*                            #   optional content
      |                                           # OR
        https?://\S+\s*[^()]*                     # URL-led parenthetical
    )
    \s*\)\s*$
    """,
    re.IGNORECASE | re.VERBOSE,
)


def _strip_trailing_citation(text: str) -> str:
    """Drop a trailing `(Source: ...)`-like parenthetical, if any.

    Idempotent: applies once. The model rarely chains citations, so we
    don't loop. Returns the input unchanged if no match.
    """
    return _TRAILING_CITATION_RE.sub("", text).rstrip()


# Paraphrase strategy thresholds. The 4th verifier method runs only
# when quote/span/entity have all failed to classify; it accepts a
# span as "paraphrase-verified" when token coverage in the source
# context crosses ``DEFAULT_PARAPHRASE_COVERAGE`` and the span has at
# least ``DEFAULT_PARAPHRASE_MIN_TOKENS`` content tokens (>4 chars).
#
# Soft-signal note: token overlap is a heuristic, NOT byte-equivalence.
# The hard chain still records `verifier_method = 'paraphrase'` so an
# auditor can distinguish lexical-verbatim from paraphrase-overlap.
# A span verified via paraphrase contributes to `n_verified` but the
# audit_mode trichotomy stays unchanged: STRICT requires all units
# (quote/span/entity/paraphrase) to verify; HYBRID = some verify some
# don't; UNGROUNDED = none.
DEFAULT_PARAPHRASE_COVERAGE = 0.85
DEFAULT_PARAPHRASE_MIN_TOKENS = 4

# Entity-path policies. Entity-existence in source is circumstantial, not
# claim-level proof — the model could correctly name entities while making
# claims around them that came from training. These four policies span the
# trade-off: max-trust to no-trust.
#
#   strict     all entities verify → STRICT (legacy behavior; overclaims)
#   hybrid     any entity verifies → HYBRID (honest cap; safe default)
#   drop       skip entity path entirely → UNGROUNDED (most conservative)
#   proximity  STRICT only if N verified entities cluster within W chars
#              of each other in context (e.g. an infobox cast list).
#              Otherwise demotes to HYBRID/UNGROUNDED based on partial match.
ENTITY_POLICIES = ("strict", "hybrid", "drop", "proximity")
DEFAULT_ENTITY_POLICY = "proximity"

# Proximity tuning. N entities within W chars of each other.
DEFAULT_PROXIMITY_N = 3
DEFAULT_PROXIMITY_WINDOW = 300

# Framing phrases that aren't real claims even when they're in source. We
# strip these from the answer before span/entity extraction so they don't
# become noise in unverified_quotes.
_FRAMING_PREFIXES = (
    "based on the provided sources",
    "based on the provided source",
    "based on the source",
    "based on the document",
    "the sources do not",
    "the source does not",
    "the document does not",
    "according to the sources",
    "according to the source",
)


def _normalize(s: str) -> str:
    """norm-v1 + lowercase. Same canonicalization as chunk leaf hashing,
    plus case-folding so the verifier doesn't fail on capitalization drift
    between source prose and the model's quoted span."""
    s = unicodedata.normalize("NFC", s)
    s = " ".join(s.split())
    return s.lower()


def _is_framing(span: str) -> bool:
    n = _normalize(span)
    return any(n.startswith(p) for p in _FRAMING_PREFIXES)


[docs] def extract_quotes(answer_text: str) -> list[str]: """Pull double-quoted spans of length >= MIN_QUOTE_CHARS from `answer_text`. Sequential pairing: locate every double-quote character, then pair them as (1st, 2nd), (3rd, 4th), .... Each pair brackets one quoted span; text between consecutive pairs is the model's own framing prose (not captured). This is the correct model for adjacent quote pairs like `"title" prose "quote"` — naive regex matching paired the close of "title" with the open of "quote" and captured `prose` as a phantom quote, dragging classifications down to HYBRID incorrectly. """ positions = [m.start() for m in _QUOTE_CHAR_RE.finditer(answer_text)] quotes: list[str] = [] for i in range(0, len(positions) - 1, 2): span = answer_text[positions[i] + 1:positions[i + 1]] # Strip model-appended citations BEFORE the length gate so a # `verbatim... (Source: https://...)` span that's >MIN long with # the citation but short without it doesn't bypass the test. span = _strip_trailing_citation(span) if len(span) >= MIN_QUOTE_CHARS: quotes.append(span) return quotes
[docs] def extract_claim_spans(answer_text: str) -> list[str]: """Strip bullet markers, split into sentences, drop framing prefixes. Returns each non-empty span of length >= MIN_SPAN_CHARS. These are the "claim units" the model wrote — each one we'll substring-test against context. """ spans: list[str] = [] for line in answer_text.splitlines(): stripped = _BULLET_RE.sub("", line).strip() if not stripped: continue for sent in _SENT_RE.split(stripped): sent = sent.strip().rstrip(".:,;") # Drop model-appended `(Source: ...)` parentheticals so a # verbatim sentence doesn't fail substring match because of # an inserted citation. sent = _strip_trailing_citation(sent) if len(sent) < MIN_SPAN_CHARS: continue if _is_framing(sent): continue spans.append(sent) return spans
[docs] def extract_proper_nouns(answer_text: str) -> list[str]: """Pull multi-word capitalized phrases. Deduplicated, order preserved. Multi-word only — single capitalized words at sentence start are too noisy ("Based", "Now", "However"). Multi-word phrases like "Keanu Reeves" or "Thomas A. Anderson" are reliable proper-noun candidates and substring-test cleanly against source prose or structured wikitext. """ seen: set[str] = set() out: list[str] = [] for m in _PROPER_NOUN_RE.findall(answer_text): if m in seen: continue seen.add(m) out.append(m) return out
def _classify(verified: list[str], unverified: list[str]) -> str: if verified and not unverified: return "STRICT" if verified: return "HYBRID" return "UNGROUNDED" def _has_entity_cluster( verified_entities: list[str], norm_ctx: str, n: int, window: int, ) -> bool: """Do at least `n` distinct verified entities appear within `window` chars of each other in `norm_ctx`? Crude proxy for "the source has a section about these entities" (cast list, infobox, roster) vs "the source incidentally mentions them in scattered prose." """ if len(verified_entities) < n: return False positions: list[int] = [] for e in verified_entities: idx = norm_ctx.find(_normalize(e)) if idx >= 0: positions.append(idx) if len(positions) < n: return False positions.sort() for i in range(len(positions) - n + 1): if positions[i + n - 1] - positions[i] <= window: return True return False def _check_each(items: list[str], norm_ctx: str) -> tuple[list[str], list[str]]: verified: list[str] = [] unverified: list[str] = [] for it in items: if _normalize(it) in norm_ctx: verified.append(it) else: unverified.append(it) return verified, unverified def _token_coverage( span: str, norm_ctx: str, *, min_token_len: int = 4, ) -> tuple[float, int]: """Fraction of meaningful tokens from `span` that appear anywhere in `norm_ctx`. Returns ``(coverage_fraction, content_token_count)``. Two filters narrow the token set to topical content: 1. ``len(t) >= min_token_len`` (default 4). Excludes ``the / a / an / of / to / is / in / on / at`` etc. — short function words. 2. Stopword exclusion. Common English filler 4+ chars long (``from``, ``with``, ``have``, ``this``, ``that``, ``which``, ``where``, ``their``, ``would``, ``could``, etc.) match almost any English text & inflate coverage scores for cases where the *topical* content is missing. Filtering them tightens the signal: a span that paraphrases stylistic choice (replaces ``with`` → ``from``) still scores 1.0 if topical tokens match; a span where topical tokens are missing scores lower because the denominator dropped. The 0.85 paraphrase-coverage threshold is calibrated for THIS cleaner signal — lowering the threshold would promote fabrications (Q1 Batman case: "wealthy/businessman/resides" missing, model- invented). Tightening the denominator instead keeps the threshold stable and the discrimination crisp. Strip from numerator AND denominator so a span composed entirely of stopwords (e.g. ``"have been there"``) returns coverage 0.0 rather than dividing by zero. """ nspan = _normalize(span) # Strip per-token leading/trailing punctuation BEFORE the length # gate and the stopword filter so `batman,` and `wayne.` line up # with bare `batman`/`wayne` in context. We don't apply this in # _normalize() because the substring path needs punctuation- # preserving canonicalization (a span ending in `."` is a # different surface from one ending without). tokens: list[str] = [] for raw in nspan.split(): t = raw.strip(_TOKEN_PUNCT_STRIP) if len(t) < min_token_len: continue if t in _ENGLISH_STOPWORDS: continue tokens.append(t) if not tokens: return (0.0, 0) present = sum(1 for t in tokens if t in norm_ctx) return (present / len(tokens), len(tokens)) # Per-token punctuation stripped before the coverage check. Tokens like # `wayne,` and `bruce!` line up with bare `wayne` / `bruce` in context. # We don't strip apostrophes (``'``) so possessives stay distinct: # ``batman's`` is a different content token from ``batman``. _TOKEN_PUNCT_STRIP = ".,;:!?\"()[]{}" # Digit-run extractor for the paraphrase numeric-agreement gate. A run is # one or more digits with optional *internal* thousands-comma separators # ("8,849"); the comma is stripped for comparison so "8,849" and "8849" # collapse, but "300" stays distinct from "300000" (← "300,000"). Decimal # fractions split at the dot ("3.14" → {"3", "14"}) — fine for the # near-miss patterns this gate targets (years / counts / magnitudes). _DIGIT_RUN_RE = re.compile(r"\d[\d,]*\d|\d") def _numeric_signature(text: str) -> frozenset[str]: """Comma-stripped digit-runs in ``text``. Used by :func:`_check_each_with_paraphrase`: a span that token-covers the source but asserts a *number the source does not contain* is not paraphrase-grounded — it's a near-miss the lexical coverage check can't see ("Water boils at 50 degrees" vs source "...100 degrees..." token-covers 100% because "50"/"100" aren't content tokens). The gate is conservative on legitimate paraphrases: a number written with or without thousands commas matches; a number absent from the source (a rounding, a magnitude error, a swapped year) does not — and a rounding-paraphrase demoting from paraphrase-grounded to unverified is the honest call (it isn't a verbatim grounding). """ return frozenset(m.group(0).replace(",", "") for m in _DIGIT_RUN_RE.finditer(text)) _SENTENCE_BREAK_RE = re.compile(r"[.!?](?=\s)") def _is_single_sentence(text: str) -> bool: """One sentence — no internal ``. `` / ``! `` / ``? `` break (a trailing terminator with nothing after it is fine). A numbered/bulleted list ("1. … 2. …") or a multi-sentence summary is not single; "Insulin was discovered by Alexander Fleming." is. Conservative: a borderline abbreviation ("8,849 m. tall.") reads as not-single, so the gate it guards simply doesn't fire — the safe direction.""" return not _SENTENCE_BREAK_RE.search(text.strip()) def _entity_salient_disagrees(answer_text: str, norm_ctx: str) -> bool: """Entity-strategy gate (ticket #000048): does the answer assert a *salient near-proper-noun token* — a > 4-char Capitalized content token, stopword-filtered — or a digit-number that the source lacks? Used in :func:`verify_quotes` *only* on the weakest entity grounding — a single short sentence whose lone matching multi-word proper noun is not part of a cluster (caller checks ``len(verified) <= 1`` and :func:`_is_single_sentence`). There, "Insulin was discovered by Alexander Fleming" against "Penicillin was discovered by Alexander Fleming" matches on the shared "Alexander Fleming" while the *swapped subject* ("Insulin", capitalized, absent from the source) is exactly the falsehood — so we decline the grounding. The narrow caller-gate is what keeps a structured multi-claim summary (the Matrix cast list — many entities, a tight cluster — or the TMNT answer — a numbered list with parenthetical nicknames the source omits) untouched: model-added accurate detail in a real summary isn't a contradiction, and only the single-sentence-one-weak-match shape is. """ if not (_numeric_signature(_normalize(answer_text)) <= _numeric_signature(norm_ctx)): return True for raw in answer_text.split(): core = raw.strip(_TOKEN_PUNCT_STRIP) if len(core) <= 4 or not core[0].isupper(): continue low = core.lower() if low in _ENGLISH_STOPWORDS: continue if low not in norm_ctx: return True return False # Common English stopwords of length >= 4 chars. Hand-curated rather # than imported from NLTK to keep arborist dependency-light and the # behavior pinned to a known set. Tokens are normalized form # (lowercase, NFC). Includes auxiliaries, prepositions, pronouns, # wh-words, conjunctions, and high-frequency adverbs/quantifiers that # carry little topical signal. _ENGLISH_STOPWORDS = frozenset({ # auxiliaries / be-forms (>= 4) "have", "been", "being", "were", "will", "would", "could", "should", "might", "must", "shall", # prepositions / particles (>= 4) "from", "with", "into", "onto", "upon", "over", "under", "after", "before", "between", "through", "across", "above", "below", "behind", "beside", "beyond", "during", "without", "within", "until", "since", "about", "around", "along", "among", # demonstratives / pronouns (>= 4) "this", "that", "these", "those", "their", "them", "they", "there", "here", "your", "yours", "ours", "mine", # wh-words (>= 4) "what", "when", "where", "which", "while", "whom", "whose", "whoever", "whatever", "wherever", "whenever", # conjunctions (>= 4) "because", "although", "however", "therefore", "though", "unless", # adverbs / quantifiers (>= 4) "also", "very", "much", "many", "more", "most", "less", "some", "such", "even", "ever", "just", "only", "than", "then", "still", "again", "always", "never", "often", "rather", "really", "quite", "very", "well", "back", "next", # auxiliaries / discourse (>= 4) "also", "both", "each", "every", "into", "like", }) def _is_prose_span(span: str, *, min_lowercase_content: int = 2) -> bool: """Heuristic: does this span look like prose (eligible for paraphrase matching) vs. a list of proper nouns (better matched by entity)? Counts tokens in the ORIGINAL (pre-normalize) span whose first letter is lowercase and whose length is >= 4 chars. A pure list like ``"Keanu Reeves, Laurence Fishburne"`` has zero such tokens — those go through the entity path. A real sentence like ``"Pikachu is a species of Pokémon creatures..."`` has multiple ("species", "creatures", "from", etc.) and qualifies for paraphrase. Threshold ``min_lowercase_content=2`` lets through "the cast..." (1 lowercase content token: "cast") only if more lowercase prose is present. Lists with a leading "The" don't sneak through. """ n_lower = 0 for tok in span.split(): # Strip leading/trailing punctuation for the case check (so # "cast:" still counts as a lowercase token). core = tok.strip(".,;:!?\"'()[]{}") if len(core) < 4: continue if core[0].islower(): n_lower += 1 if n_lower >= min_lowercase_content: return True return False def _build_claim_statuses( *, verified: list[str] | None = None, paraphrase: list[str] | None = None, unverified: list[str] | None = None, method: str, ) -> list[dict]: """Per-evidence-unit status list. Three labels drawn from the toy- Hermes taxonomy: VERIFIED_QUOTE unit substring-matched in normalized context (any of quote/span/entity strategies) SUPPORTED_PARAPHRASE unit cleared the paraphrase token-coverage threshold (>=85% topical tokens present) UNSUPPORTED unit didn't match anything Soft-signal labels (QUOTE_INTEGRITY_FAILED, SOURCE_MISMATCH, FALSIFIED) live in the sidecar / falsification machinery, not on the binary verifier output — see the ``feedback_verifier_no_diagnostics`` discipline. Order preserved so callers can map back to the model's original answer ordering. """ out: list[dict] = [] for t in verified or []: out.append({"text": t, "status": "VERIFIED_QUOTE", "method": method}) for t in paraphrase or []: out.append( { "text": t, "status": "SUPPORTED_PARAPHRASE", "method": "paraphrase", } ) for t in unverified or []: out.append({"text": t, "status": "UNSUPPORTED", "method": method}) return out def _check_each_with_paraphrase( items: list[str], norm_ctx: str, *, paraphrase_coverage: float = DEFAULT_PARAPHRASE_COVERAGE, paraphrase_min_tokens: int = DEFAULT_PARAPHRASE_MIN_TOKENS, ) -> tuple[list[str], list[str], list[str]]: """Three-bucket variant of ``_check_each``. For each item: - if its normalized form is a substring of ``norm_ctx``: STRICT (verbatim verified) - else if its meaningful-token coverage in ``norm_ctx`` is ``>= paraphrase_coverage`` AND it has at least ``paraphrase_min_tokens`` content tokens AND every digit-run it asserts appears (comma-normalized) in ``norm_ctx``: PARAPHRASE (token-overlap verified) - else: UNVERIFIED The numeric-agreement gate (#000046) catches the near-miss the coverage check is blind to: a span whose content tokens are all present but whose *number* is wrong — "Water boils at 50 degrees" against a source that says 100 token-covers 100% because "50"/"100" aren't >4-char content tokens. A rounding-paraphrase that drops to UNVERIFIED here is the honest verdict (it isn't a verbatim grounding); a number written with vs without thousands commas still matches. Returns ``(strict_verified, paraphrase_verified, unverified)``. Order within each bucket preserves input order. Soft-signal note: paraphrase verification is heuristic. Callers that need byte-equivalence (proof export to other peers, audit chain claims) must use ``_check_each`` directly. """ strict: list[str] = [] paraphrase: list[str] = [] unverified: list[str] = [] ctx_numbers = _numeric_signature(norm_ctx) for it in items: if _normalize(it) in norm_ctx: strict.append(it) continue # Paraphrase only fires for prose-shaped spans. Lists of proper # nouns (e.g. "Keanu Reeves, Laurence Fishburne") fall through # to the entity strategy where proximity policy can disambiguate # tight clusters from scattered mentions. if not _is_prose_span(it): unverified.append(it) continue cov, n_tok = _token_coverage(it, norm_ctx) numbers_ok = _numeric_signature(_normalize(it)) <= ctx_numbers if n_tok >= paraphrase_min_tokens and cov >= paraphrase_coverage and numbers_ok: paraphrase.append(it) else: unverified.append(it) return strict, paraphrase, unverified
[docs] def verify_quotes( answer_text: str, context: str, *, entity_policy: str = DEFAULT_ENTITY_POLICY, proximity_n: int = DEFAULT_PROXIMITY_N, proximity_window: int = DEFAULT_PROXIMITY_WINDOW, ) -> dict: """Classify an answer's grounding against its retrieved context. Tries quote → span → entity verification in sequence. The first strategy that finds evidence classifies the answer; later strategies don't run. ``entity_policy`` controls how the entity path classifies — see ``ENTITY_POLICIES``. The quote and span paths are unaffected; they are explicit-claim evidence and always classify per the trichotomy. Returns a dict with these keys:: n_quotes: int # evidence units extracted (any path) n_verified: int # of those, how many appear verbatim audit_mode: str # STRICT | HYBRID | UNGROUNDED unverified_quotes: [str] # spans we couldn't ground in context verifier_method: str # 'quote' | 'span' | 'entity' | 'none' """ if entity_policy not in ENTITY_POLICIES: raise ValueError( f"entity_policy must be one of {ENTITY_POLICIES}, got {entity_policy!r}" ) # Wikitext markup → plain prose. Identity if mwparserfromhell isn't # installed (extras: pip install 'arborist[wikitext]'). if _wikitext_to_base is not None: context = _wikitext_to_base(context) norm_ctx = _normalize(context) # Strategy 1: explicit double-quoted spans. quotes = extract_quotes(answer_text) if quotes: verified, unverified = _check_each(quotes, norm_ctx) return { "n_quotes": len(quotes), "n_verified": len(verified), "audit_mode": _classify(verified, unverified), "unverified_quotes": unverified, "verifier_method": "quote", "claim_statuses": _build_claim_statuses( verified=verified, unverified=unverified, method="quote" ), } # Strategy 2: bullet/sentence spans. Try verbatim substring first; # then fall back to paraphrase (token-coverage) for items that didn't # substring-match. Spans verified via paraphrase contribute to # n_verified — the verifier_method label flips to "paraphrase" when # any soft-verified items are present so an auditor can tell. spans = extract_claim_spans(answer_text) if spans: strict, paraphrase, unverified = _check_each_with_paraphrase( spans, norm_ctx ) verified = strict + paraphrase if verified: method = "paraphrase" if paraphrase else "span" return { "n_quotes": len(spans), "n_verified": len(verified), "audit_mode": _classify(verified, unverified), "unverified_quotes": unverified, "verifier_method": method, "claim_statuses": _build_claim_statuses( verified=strict, paraphrase=paraphrase, unverified=unverified, method="span", ), } # Strategy 3: multi-word proper nouns (entity grounding) — gated by # `entity_policy`. Entity-existence is weaker proof than quote or span; # the operator chooses how much weight to give it. if entity_policy == "drop": # Skip entity path entirely. Falls through to UNGROUNDED/none. pass else: entities = extract_proper_nouns(answer_text) if entities: verified, unverified = _check_each(entities, norm_ctx) if verified: if entity_policy == "strict": # Legacy behavior: all match → STRICT. Overclaims. mode = _classify(verified, unverified) elif entity_policy == "hybrid": # Cap at HYBRID. Honest middle: evidence exists, but # entity-existence ≠ claim-existence. mode = "HYBRID" else: # proximity cluster = _has_entity_cluster( verified, norm_ctx, proximity_n, proximity_window ) if cluster and not unverified: mode = "STRICT" elif cluster: mode = "HYBRID" elif ( # #000048: weakest grounding — a single short # sentence whose lone matching multi-word proper # noun isn't part of a cluster. If the claim # asserts a salient capitalized token / number the # source lacks (a swapped subject / city / ocean / # year — "Insulin was discovered by Alexander # Fleming" vs "Penicillin was…"), the shared # proper noun doesn't ground it → UNGROUNDED. len(verified) <= 1 and _is_single_sentence(answer_text) and _entity_salient_disagrees(answer_text, norm_ctx) ): mode = "UNGROUNDED" elif verified: mode = "HYBRID" else: mode = "UNGROUNDED" return { "n_quotes": len(entities), "n_verified": len(verified), "audit_mode": mode, "unverified_quotes": unverified, "verifier_method": "entity", "claim_statuses": _build_claim_statuses( verified=verified, unverified=unverified, method="entity", ), } # Nothing extracted, or nothing verified. Truly emergent. return { "n_quotes": 0, "n_verified": 0, "audit_mode": "UNGROUNDED", "unverified_quotes": [], "verifier_method": "none", "claim_statuses": [], }
# --------------------------------------------------------------------------- # Claim-lattice-pointer verifier — G0 / CTI Clause Lattice Intelligence. # # Companion to ``verify_quotes`` for ``policy["answer_mode"] == # "claim_lattice_pointer"``. The model emits weak natural-language # pointer clauses (``Claim text. [E12]``); the runtime parses them # into structured claim nodes and runs deterministic checks against # the runtime-built evidence map. # # Why pointer-line, not JSON: small instruction-tuned models follow # citation-style prose far more reliably than free-form JSON. JSON # discipline failures generate spurious one-shot SCHEMA_INVALID # verdicts even when the model knew the right answer; pointer-line # stays inside the model's prose-generation distribution. # # Why decimal pointer ids (E1, E2, …) not hex (E1f8e4c2a): # - One BPE token per id in standard tokenizers (8-hex tokenizes to # 4–6 tokens of out-of-distribution noise that nudges the model # toward DSL/code mode). # - Citation style (footnotes, references) is heavily represented in # training; random hex is not. # - The runtime maps pointer ids back to content-addressed evidence # ids (``evidence_map_by_pointer_id``) for cache, run-DAG, audit. # # Hard-soft boundary: only deterministic checks. No entailment, no # completeness, no predicate compatibility. Those stay sidecar. # --------------------------------------------------------------------------- ANSWER_MODES = ("quote", "claim_lattice_pointer", "claim_lattice") DEFAULT_ANSWER_MODE = "quote" # JSON-mode pre-parser. 8B and small-context models drift on JSON # discipline (markdown fences, prose preamble, smart quotes, trailing # commas). Larger reasoning models (Qwen 3.6 reasoner, Claude, GPT-4) # emit valid JSON natively; the pre-parser is the defensive belt that # keeps the JSON path survivable across the inference-quality spectrum. # Lenient on syntax, strict on semantics: parsed JSON still has to # pass the schema check & the same hard verifier rules as pointer mode. _JSON_FENCE_RE = re.compile(r"^\s*```(?:json)?\s*\n(.*?)\n\s*```\s*$", re.DOTALL) _TRAILING_COMMA_RE = re.compile(r",(\s*[}\]])") def _repair_truncated_json(text: str) -> tuple[str, list[str]]: """Best-effort completion of truncated JSON. Walks ``text`` once tracking string state and bracket/brace stack. At end-of-input, if the parse is unbalanced (stuck mid-string, open ``[`` / ``{`` without matching close) or has a dangling structural artifact (trailing comma, partial key), repair so the result is parseable. Returns ``(repaired, fixups)``. Targets the truncation pattern observed on Hermes-3-8B JSON-mode output for broad-descriptive questions: the model writes one long claim text and runs out of ``max_tokens`` mid-sentence, yielding e.g.:: {"claims":[{"text":"The Apollo program was the United States spaceflight effort which landed... <CUT> No closing ``"``, no closing ``}``, no closing ``]``, no closing outer ``}``. The lenient parse path can't recover any structure. Self-healing closes the open string, balances the stack, and drops trailing commas / partial keys so the partial content is preserved as a single claim with whatever fields survived. Fixups recorded: - ``close_string`` — appended ``"`` to close an open string - ``drop_partial_key`` — dropped a key without value (``,"key":`` or ``,"key`` or ``"key":``) - ``strip_trailing_comma`` — removed a comma immediately before stack close (separate from the regex pass which only handles structurally-correct trailing commas) - ``close_brace`` / ``close_bracket`` — appended ``}`` / ``]`` per open frame on the stack Conservative: never inserts content (no key names, no values, no commas), only closes / drops. Worst case the repair is a no-op or makes parsing fail in a different way; never silently fabricates data. """ if not text or not text.strip(): return text, [] fixups: list[str] = [] stack: list[str] = [] in_string = False escaped = False for ch in text: if escaped: escaped = False continue if in_string: if ch == "\\": escaped = True elif ch == '"': in_string = False else: if ch == '"': in_string = True elif ch == "{" or ch == "[": stack.append(ch) elif ch == "}" or ch == "]": if stack: stack.pop() if not stack and not in_string: return text, fixups repaired = text if in_string: repaired += '"' fixups.append("close_string") # Drop dangling structural fragments after the last legitimate # value, walking back from end-of-string. Order matters: handle # partial-key (`,"foo":` or `,"foo"` or `"foo":`) before # trailing-comma so we don't strip the wrong comma. while True: rstripped = repaired.rstrip() if not rstripped: break # Partial key: `..., "key": ` (colon at end after trim). m = _PARTIAL_KEY_COLON_RE.search(rstripped) if m and m.end() == len(rstripped): repaired = rstripped[: m.start()] if "drop_partial_key" not in fixups: fixups.append("drop_partial_key") continue # Partial key: `..., "key"` (key without colon). m = _PARTIAL_KEY_NO_COLON_RE.search(rstripped) if m and m.end() == len(rstripped): repaired = rstripped[: m.start()] if "drop_partial_key" not in fixups: fixups.append("drop_partial_key") continue # Trailing comma — strip when it would otherwise stick before close. if rstripped.endswith(","): repaired = rstripped[:-1] if "strip_trailing_comma" not in fixups: fixups.append("strip_trailing_comma") continue break # Close stack in reverse, mapping `{` → `}`, `[` → `]`. while stack: opener = stack.pop() if opener == "{": repaired += "}" fixups.append("close_brace") else: repaired += "]" fixups.append("close_bracket") return repaired, fixups # Partial-key patterns for _repair_truncated_json. ``,"foo":`` or # ``"foo":`` at end-of-string after rstrip = a key with no value. _PARTIAL_KEY_COLON_RE = re.compile(r',?\s*"[^"]*"\s*:\s*$') # ``,"foo"`` or ``"foo"`` with no following colon — also dangling. _PARTIAL_KEY_NO_COLON_RE = re.compile(r',\s*"[^"]*"\s*$') def _lenient_json_parse(raw: str) -> tuple[object, list[str]]: """Parse ``raw`` as JSON, defensively peeling common model drift. Returns ``(parsed_obj, fixups_applied)`` — fixups list is empty when strict parse succeeded, otherwise names what we had to peel (``"fence"``, ``"prose_trim"``, ``"curly_quotes"``, ``"trailing_comma"``) or repair (``"close_string"``, ``"close_brace"``, ``"close_bracket"``, ``"drop_partial_key"``, ``"strip_trailing_comma"``). Raises ``json.JSONDecodeError`` if the lenient pass also fails. The fixups list lands in the verify payload so an agent can observe model drift across runs & decide whether the inference path is JSON-clean enough to keep using. """ import json as _json fixups: list[str] = [] try: return _json.loads(raw), fixups except _json.JSONDecodeError: pass text = raw # 1. Strip markdown fence wrappers (```json\n...\n``` or ```\n...\n```). m = _JSON_FENCE_RE.match(text) if m: text = m.group(1) fixups.append("fence") # 2. Trim leading prose to first `{` or `[`; trailing prose past last # matching `}`/`]`. Preserves the JSON object even when the model # writes "Here is the JSON: {...}\n\nLet me know if you need more." first_brace = min( (text.find(c) for c in "{[" if text.find(c) >= 0), default=-1, ) last_brace = max(text.rfind("}"), text.rfind("]")) if first_brace > 0 or (last_brace >= 0 and last_brace < len(text) - 1): if first_brace >= 0 and last_brace >= first_brace: text = text[first_brace : last_brace + 1] fixups.append("prose_trim") # 3. Normalize curly quotes — model-emitted “…” / ‘…’ become "…" / '…'. if any(c in text for c in "“”‘’"): text = ( text.replace("“", '"').replace("”", '"') .replace("‘", "'").replace("’", "'") ) fixups.append("curly_quotes") # 4. Fix trailing commas before `}` or `]`. Conservative: only # comma immediately followed by whitespace + close bracket. if "," in text: new_text = _TRAILING_COMMA_RE.sub(r"\1", text) if new_text != text: text = new_text fixups.append("trailing_comma") # 5. Try strict parse before invoking the truncation repair — # peeling alone may have made it valid. try: return _json.loads(text), fixups except _json.JSONDecodeError: pass # 6. Self-heal truncated JSON: close open strings, drop dangling # partial keys / trailing commas, balance the bracket stack. The # model ran out of max_tokens mid-output; close what we can and # parse the partial structure rather than failing the whole run. repaired, repair_fixups = _repair_truncated_json(text) if repair_fixups: text = repaired fixups.extend(repair_fixups) return _json.loads(text), fixups # Default allowed source roles for claim_lattice_pointer mode. Roles # outside this set get classified as SOURCE_ROLE_BLOCKED. Mirrors the # role classifications in arborist/qa/query.py:_classify_source_role; # "noisy_background_source" and "sequel_background_source" are # deliberately excluded by default. DEFAULT_ALLOWED_SOURCE_ROLES = ( "primary_answer_source", "secondary_context_source", "background_source", "unclassified", ) def _has_manual_quote(text: str) -> bool: """Strict no-quote rule: ANY double-quote character in claim text is a violation. The premise of claim-lattice-pointer mode is that models do not type quote text — period. Even a 3-char quoted span (``"hi"``) is a model-asserted verbatim citation that the runtime didn't authorize. Catching every quote keeps the discipline honest: the model is forbidden, not just length-discouraged. Covers ASCII (``"``) and curly quotes (``“`` / ``”``) — same set ``_QUOTE_CHAR_RE`` recognizes for the legacy quote verifier. """ return any(ch in text for ch in ('"', '“', '”')) DEFAULT_MIN_CITATION_COVERAGE = 0.30 # Premise-parroting / generic-vocab-ride-along threshold. When ≥ this # many tokens shared by the question AND the claim are ABSENT from the # union of cited evidence spans, the claim is parroting the question's # subject without anchoring it. Surfaced by the 200-cycle bench-emergent # delta on `steer/reply/correcter` (Ticket #000006 amend 2026-05-02b): # claim affirmed three question-distinctive tokens (correcter, steer, # reply) that appeared ZERO times in the cited 33.5K-char glossary. The # generic linguistic vocabulary (language, communication, terms, # relationships) carried Rule 5's coverage check on its own. # # Threshold of 3 keeps the signal unambiguous: a single absent parroted # token is often a stem-variant near-miss; three or more is the # parroting fingerprint. Folds into verifier_policy_hash. DEFAULT_SUBJECT_TOKENS_ABSENT_THRESHOLD = 3 def _claim_textually_overlaps_evidence( claim_text: str, evidence_span: str, *, min_coverage: float = DEFAULT_MIN_CITATION_COVERAGE, ) -> bool: """Return True if claim's content-token coverage in ``evidence_span`` meets ``min_coverage`` (case-insensitive substring match). Hard 6th check on a (claim, pointer) pair. Catches the lazy-anchor failure where the model cites an evidence pointer whose text has insufficient overlap with the claim's actual subject — e.g. claim "Yale University in New Haven and the University of Connecticut..." cited to a highway-data span containing only the token ``connecticut`` (1/10 = 10% coverage; below the 30% default threshold → CITATION_MISMATCH). Pre-2026-04-30 this function required only ≥1 shared content token, which let through lazy-anchored claims whose only overlap was a common topical word. Coverage-based threshold scales with claim length: short claims (1-3 content tokens) need 1 match (same as the old behavior), longer claims need a proportional share. Lexical only, no NER, no embeddings; stays inside the soft/hard boundary. A pure-stopword claim (no content tokens after the spotlight token extractor's filter) returns True vacuously — there's nothing topical to check, and the verifier's other hard checks already own that case (claim_text_non_empty, no_manual_quotes, etc.). """ from arborist.qa.evidence import _content_tokens tokens = _content_tokens(claim_text) if not tokens: return True span_lower = evidence_span.lower() matched = sum(1 for t in tokens if t in span_lower) coverage = matched / len(tokens) # Floor: a single shared content token always counts when the claim # is itself short (≤3 content tokens) so single-fact narrow claims # like "Steve Jobs co-founded Apple" don't fail on a coverage # technicality. The threshold bites on prose-shaped multi-token # claims where 1/10 token overlap is the lazy-anchor signature. if matched >= 1 and len(tokens) <= 3: return True return coverage >= min_coverage def _parroted_subject_tokens_absent( question_text: str | None, claim_text: str, cited_spans: list[str], ) -> set[str]: """Return claim∩question content tokens that are NOT present in the union of cited evidence spans. Premise-parroting / generic-vocab-ride-along detector (Ticket #000006 amend 2026-05-02b). The model affirms the question's distinctive subject tokens in its claim, but those tokens are absent from the cited evidence — the citation rode in on overlapping generic vocabulary while the actual subject went unverified. Mechanism: substring match on lowercased text, mirroring Rule 5 (`_claim_textually_overlaps_evidence`). Stem-tolerant via the substring rule — "polar" matches inside "bipolar", "rare" matches "rarely", etc. No-question-text → empty set (skip the check). No question∩claim overlap → empty set (claim isn't parroting). Empty cited_spans → return the full parroted set (defensive; no grounding at all is its own failure mode caught elsewhere). """ from arborist.qa.evidence import _content_tokens if not question_text or not claim_text: return set() qtok = set(_content_tokens(question_text)) ctok = set(_content_tokens(claim_text)) parroted = qtok & ctok if not parroted: return set() union_lower = " ".join((s or "").lower() for s in cited_spans) if not union_lower.strip(): return parroted return {t for t in parroted if t not in union_lower} DEFAULT_MAX_POINTERS_PER_CLAIM = 2 DEFAULT_MIN_CLAIM_CONTENT_TOKENS = 2 DEFAULT_LAZY_ANCHOR_DEMOTE_THRESHOLD = 0.5 DEFAULT_LAZY_ANCHOR_DEMOTE_MIN_PAIRS = 3 # Claim-count ceiling. Bench evidence (2026-04-30 york-england run): # pre-atomic-claim-rule, JSON mode emitted 26-59 claim-pointer pairs # of which only 2-4 verified — the model treats "tell me all there # is to know about X" as a license to spam encyclopedic claims from # training. Atomic-claim prompt rule (commit b5925c8) reduced this # to ~10 well-formed claims, but defence-in-depth: any answer with # more than this many claims is structurally suspect regardless of # how each claim verifies. Default 12 chosen to comfortably admit # entity-list questions ("dinosaurs in jurassic park" → 5; "simpsons # family + pets" → 5-7) while catching the runaway shape. DEFAULT_MAX_CLAIMS_PER_ANSWER = 12 # Title-relevance check (Rule 8). Cited evidence's source title must # share at least one stemmed content token with the claim text. # Catches the retrieval-driven hallucination class fox surfaced # 2026-05-02 on "explain spin glass modeling & tensors?": claim # tokens {spin, glass, modeling, tensor, ...} cited to a chunk from # the *Quantum chromodynamics* article whose title tokens are # {quantum, chromodynamics} — zero overlap. Token-coverage check # inside the chunk passed accidentally on shared physics vocabulary; # the SOURCE was never about the claim's subject. def _claim_title_overlap(claim_text: str, source_title: str | None) -> bool: """Return True iff the source title shares ≥1 stemmed content token with the claim text. Vacuous-pass when either side has no extractable tokens (defensive — prevents the rule from firing on degenerate inputs).""" if not source_title or not claim_text: return True from arborist.qa.evidence import _content_tokens as _ct claim_tokens = _ct(claim_text) title_tokens = _ct((source_title or "").replace("_", " ")) if not claim_tokens or not title_tokens: return True # Reuse the retrieval-side stem helper so possessive / plural # collapse the same way ('movies' vs 'movie', 'simpsons' vs # 'simpson'). Defined in qa/query.py to avoid an import cycle: # inline a minimal copy here instead. def _stem(t: str) -> str: if len(t) > 4 and t.endswith("s") and not t.endswith("ss"): return t[:-1] return t claim_stems = {_stem(t) for t in claim_tokens} title_stems = {_stem(t) for t in title_tokens} return bool(claim_stems & title_stems)
[docs] def verify_claim_lattice( answer_text: str, evidence_map, *, allowed_source_roles: tuple[str, ...] = DEFAULT_ALLOWED_SOURCE_ROLES, max_pointers_per_claim: int = DEFAULT_MAX_POINTERS_PER_CLAIM, min_citation_coverage: float = DEFAULT_MIN_CITATION_COVERAGE, min_claim_content_tokens: int = DEFAULT_MIN_CLAIM_CONTENT_TOKENS, lazy_anchor_demote_threshold: float = DEFAULT_LAZY_ANCHOR_DEMOTE_THRESHOLD, lazy_anchor_demote_min_pairs: int = DEFAULT_LAZY_ANCHOR_DEMOTE_MIN_PAIRS, max_claims_per_answer: int = DEFAULT_MAX_CLAIMS_PER_ANSWER, subject_tokens_absent_threshold: int = DEFAULT_SUBJECT_TOKENS_ABSENT_THRESHOLD, question: str | None = None, warrant_check_enabled: bool = True, deflection_check_enabled: bool = True, format_collapse_check_enabled: bool = True, warrant_chain_roots: frozenset[str] = frozenset(), ) -> dict: """Deterministic verifier for ``answer_mode="claim_lattice_pointer"``. The model wrote pointer-line prose (``Claim text. [E12]``); the parser pulled (claim_text, [pointer_ids]) pairs from each non-empty line. This verifier maps each pointer id back to its content-addressed evidence object and runs six hard checks: 1. Parser succeeded — ``parse_status == "PARSED"`` (line had a bracket tag). NO_EVIDENCE_POINTER claims (prose without tag) count toward the denominator and downgrade the verdict. 2. Pointer id resolves to an entry in the runtime-built evidence map. No model-invented ids. 3. Resolved entry's ``source_role`` is in ``allowed_source_roles``. 4. Claim text non-empty after tag strip. 5. Claim's content tokens textually overlap the cited evidence span at coverage ≥ ``min_citation_coverage`` (per-pair, lexical only — see ``_claim_textually_overlaps_evidence``). Catches the magnet-chunk lazy-anchor where the model cites an evidence pointer whose text contains few claim-content tokens. 6. Pointer count per claim does not exceed ``max_pointers_per_claim`` (default 2 — matches the prompt's "1 or 2 pointers per claim" rule). When exceeded, the claim is TRIMMED to the first N pointers and verification proceeds normally; a ``POINTER_OVERFLOW_TRIMMED`` violation is recorded so STRICT is no longer reachable (audit_mode caps at HYBRID for the run). Trim-and-verify (vs hard fail) protects correct claims that were over-cited (e.g. "Leonardo painted the Mona Lisa. [E2,...,E14]") while keeping the over-citation pattern surfaced. The dropped pointers count toward ``n_quotes`` so the denominator reflects what the model emitted. Removed 2026-04-30: the strict no-double-quote rule. The model routinely paraphrases source prose but copies named-quoted phrases verbatim (e.g. ``"Constitution State"`` from a Connecticut span). Hard-rejecting claims that contained any ``"`` char was rejecting factually correct, source-grounded claims for cosmetic punctuation. The coverage threshold (Rule 5) and pointer cap (Rule 6) carry the weight of catching synthetic-quote / mega-claim failures the old rule was meant to catch. ``_has_manual_quote`` is still defined and used by ``verify_claim_lattice_json``. Returns a verdict in the same shape as ``verify_quotes`` + extras:: n_quotes total claim-pointer pairs (denominator) n_verified pairs where pointer resolved AND source_role allowed AND coverage met AND claim text non-empty audit_mode STRICT / HYBRID / UNGROUNDED unverified_quotes claim texts that didn't reach EVIDENCE_LINKED -- kept under that name for schema continuity with verify_quotes verifier_method "claim_lattice" claim_statuses per-claim {text, evidence_ids, pointer_ids, status, reasons[]}; status in {EVIDENCE_LINKED, EVIDENCE_LINKED_PARTIAL, UNKNOWN_EVIDENCE_ID, SOURCE_ROLE_BLOCKED, CITATION_MISMATCH, NO_EVIDENCE_POINTER, SCHEMA_INVALID} violations structured violation records for the run-DAG / sidecar rendered_text human-readable prose with literal spans interpolated; what the runner persists as answer_text evidence_id_pairs per-claim list of resolved content-addressed evidence_ids (run-stable form). Used to thread the parsed lattice into the run-DAG. """ from arborist.qa.evidence import ( evidence_map_by_pointer_id as _by_pointer, render_claim_lattice as _render, ) from arborist.qa.parse_claims import parse_pointer_claims by_pointer = _by_pointer(evidence_map) violations: list[dict] = [] claim_statuses: list[dict] = [] # Three-bucket rendering. ``unverified`` holds claims whose # status reached neither EVIDENCE_LINKED nor EVIDENCE_LINKED_PARTIAL # — i.e. fully failed (no pointer verified). ``partially_verified`` # holds EVIDENCE_LINKED_PARTIAL claims (some pointers ok, some # failed). The renderer shows them as their own section so a # claim never appears in BOTH a verified bullet and the unverified # footer; that previously happened for partial-status claims and # read as "is it grounded or not?". Per-pointer detail lives in # ``claim_statuses`` for audit. unverified: list[str] = [] partially_verified: list[str] = [] raw_claims = parse_pointer_claims(answer_text or "") # Claim-count ceiling — see DEFAULT_MAX_CLAIMS_PER_ANSWER. Records # the violation but doesn't truncate; the per-claim loop below # still verifies every claim so the operator sees full evidence # of the runaway. Demotes verdict via the violation list. if len(raw_claims) > max_claims_per_answer: violations.append({ "kind": "TOO_MANY_CLAIMS", "n_claims": len(raw_claims), "max": max_claims_per_answer, }) n_pairs = 0 n_pairs_verified = 0 # Renderer claims: pointer-id form so the human display still shows # the short tags the model used. valid_claims: list[dict] = [] # Evidence-id pairs: content-addressed form for the run-DAG & # cache. Per-claim list so the parsed_claim_lattice node hashes the # run-stable handle, not the run-dependent pointer-id. evidence_id_pairs: list[list[str]] = [] for idx, c in enumerate(raw_claims): claim_text = c.claim_text pointer_ids = c.pointer_ids parse_status = c.parse_status if parse_status == "NO_EVIDENCE_POINTER": violations.append({ "kind": "NO_EVIDENCE_POINTER", "claim_idx": idx, "claim_text": claim_text, }) claim_statuses.append({ "claim_idx": idx, "text": claim_text, "pointer_ids": [], "evidence_ids": [], "status": "NO_EVIDENCE_POINTER", "reasons": ["no [E\\d+] tag on line"], }) unverified.append(claim_text) evidence_id_pairs.append([]) # NO_EVIDENCE_POINTER counts as one denominator pair so the # verdict reflects the failure rate. n_pairs += 1 continue if not claim_text: violations.append({ "kind": "SCHEMA_INVALID", "claim_idx": idx, "reason": "tag with no claim text", }) claim_statuses.append({ "claim_idx": idx, "text": "", "pointer_ids": pointer_ids, "evidence_ids": [], "status": "SCHEMA_INVALID", "reasons": ["tag with no claim text"], }) n_pairs += len(pointer_ids) evidence_id_pairs.append([]) continue # Bare-name claim guard. A claim like "Tyrannosaurus rex. [E15]" # has 1 content token; the citation passes any span that mentions # T-rex anywhere, even when E15 is a video-game-behavior chunk # rather than a film-context one. Forcing a sentence-shape claim # ("Tyrannosaurus rex appeared in the first JP film") raises the # token-coverage bar so an off-topic chunk can no longer satisfy # the citation. Folds into governance_policy_hash via # ``claim_lattice_min_claim_content_tokens``. from arborist.qa.evidence import _content_tokens as _ct claim_content_tokens = _ct(claim_text) if len(claim_content_tokens) < min_claim_content_tokens: violations.append({ "kind": "SCHEMA_INVALID", "claim_idx": idx, "reason": ( f"bare-name claim ({len(claim_content_tokens)} content " f"tokens < {min_claim_content_tokens}); write a sentence" ), }) claim_statuses.append({ "claim_idx": idx, "text": claim_text, "pointer_ids": pointer_ids, "evidence_ids": [], "status": "SCHEMA_INVALID", "reasons": ["bare-name claim — write a sentence with predicate"], }) n_pairs += len(pointer_ids) evidence_id_pairs.append([]) unverified.append(claim_text) continue # Pointer-count cap (Rule 9). Catches the encyclopedic-mega- # claim where the model produces one line citing every # pointer at once. Counts every pointer toward the denominator # so the failure is loud in n_quotes. 2026-04-30: trim-and- # verify rather than hard-fail. A correct claim cited with too # many pointers ("Leonardo da Vinci painted the Mona Lisa. # [E2,...,E14]") deserves to count if its first N pointers # actually verify; the violation still blocks STRICT (audit_mode # caps at HYBRID) so the over-citation pattern stays surfaced. # Hard SCHEMA_INVALID would have nuked correct answers for a # cosmetic over-cite. The dropped pointers count toward # n_pairs so the denominator reflects what the model emitted. pointer_overflow_trimmed = False if len(pointer_ids) > max_pointers_per_claim: dropped = pointer_ids[max_pointers_per_claim:] n_pairs += len(dropped) pointer_ids = pointer_ids[:max_pointers_per_claim] pointer_overflow_trimmed = True violations.append({ "kind": "POINTER_OVERFLOW_TRIMMED", "claim_idx": idx, "kept": list(pointer_ids), "dropped": dropped, "max_pointers_per_claim": max_pointers_per_claim, }) per_id_results: list[dict] = [] resolved_evidence_ids: list[str] = [] for pid in pointer_ids: n_pairs += 1 obj = by_pointer.get(pid) if obj is None: violations.append({ "kind": "UNKNOWN_EVIDENCE_ID", "claim_idx": idx, "pointer_id": pid, }) per_id_results.append({ "pid": pid, "ok": False, "kind": "UNKNOWN_EVIDENCE_ID", }) continue if obj.source_role not in allowed_source_roles: violations.append({ "kind": "SOURCE_ROLE_BLOCKED", "claim_idx": idx, "pointer_id": pid, "evidence_id": obj.evidence_id, "source_role": obj.source_role, }) per_id_results.append({ "pid": pid, "ok": False, "kind": "SOURCE_ROLE_BLOCKED", }) continue if not _claim_textually_overlaps_evidence( claim_text, obj.span, min_coverage=min_citation_coverage ): # Cited evidence span has zero textual overlap with any # content token from the claim. Strongest lazy-anchor # signal promoted to a hard fail — the model cited a # magnet chunk that doesn't textually support its claim. violations.append({ "kind": "CITATION_MISMATCH", "claim_idx": idx, "pointer_id": pid, "evidence_id": obj.evidence_id, }) per_id_results.append({ "pid": pid, "ok": False, "kind": "CITATION_MISMATCH", }) continue per_id_results.append({"pid": pid, "ok": True, "evidence_id": obj.evidence_id}) resolved_evidence_ids.append(obj.evidence_id) n_pairs_verified += 1 ok_pids = [r["pid"] for r in per_id_results if r["ok"]] bad_kinds = sorted({r["kind"] for r in per_id_results if not r["ok"]}) if ok_pids and not bad_kinds: status = "EVIDENCE_LINKED" elif ok_pids: status = "EVIDENCE_LINKED_PARTIAL" elif "CITATION_MISMATCH" in bad_kinds: status = "CITATION_MISMATCH" elif "UNKNOWN_EVIDENCE_ID" in bad_kinds: status = "UNKNOWN_EVIDENCE_ID" elif "SOURCE_ROLE_BLOCKED" in bad_kinds: status = "SOURCE_ROLE_BLOCKED" else: status = "SCHEMA_INVALID" claim_statuses.append({ "claim_idx": idx, "text": claim_text, "pointer_ids": pointer_ids, "evidence_ids": resolved_evidence_ids, "status": status, "reasons": bad_kinds, }) evidence_id_pairs.append(resolved_evidence_ids) if status in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"): valid_claims.append({"text": claim_text, "pointer_ids": ok_pids}) if status == "EVIDENCE_LINKED_PARTIAL": partially_verified.append(claim_text) elif status != "EVIDENCE_LINKED": unverified.append(claim_text) rendered_text = _render(valid_claims, by_pointer) if valid_claims else "" # Aggregate. STRICT requires ≥1 verified pair AND zero violations of # any kind (schema, unknown pointer, blocked role, manual quote, # missing pointer). HYBRID = some pairs verified, some failed. # UNGROUNDED = no verified pairs (no parseable claims, or every # claim failed at least one check). if n_pairs_verified > 0 and not violations: audit_mode = "STRICT" elif n_pairs_verified > 0: audit_mode = "HYBRID" else: audit_mode = "UNGROUNDED" # Anchor-smell sidecar (render-layer only — never persisted as a # v9.8 field). Counts how many distinct pointer_ids the model used # across the verified-or-partial claims. ``lazy_anchor_ratio`` is # the max share any single pointer claimed: 1.0 = every claim cites # the same pointer (Hermes-3-8B's lazy-anchor habit on the JP- # dinosaurs benchmark), 1/N = every claim cites a unique pointer. # The distribution is recoverable from ``claim_statuses`` which # IS persisted in run_dag_blob; we surface the derived numbers in # the verdict for the human renderer, but they never thread back # into ``build_run_dag``'s verify_payload, so ``run_dag_root`` # stays clean. pointer_distribution: dict[str, int] = {} for cs in claim_statuses: if cs["status"] not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"): continue for pid in cs.get("pointer_ids") or []: pointer_distribution[pid] = pointer_distribution.get(pid, 0) + 1 total_pointers = sum(pointer_distribution.values()) lazy_anchor_ratio = ( max(pointer_distribution.values()) / total_pointers if total_pointers else 0.0 ) # Smell → demote. Pre-2026-04-30 the lazy-anchor signal was advisory # only; the verdict could still be STRICT while every claim cited # the same magnet chunk. Now: when ratio ≥ threshold AND total pairs # ≥ floor, cap audit_mode at HYBRID. STRICT becomes unreachable for # answers where one pointer carries every claim — that pattern is # almost never honest verbatim grounding. UNGROUNDED is left alone # (a verdict with zero verified pairs has no smell to flag). lazy_anchor_demoted = False if ( audit_mode == "STRICT" and total_pointers >= lazy_anchor_demote_min_pairs and lazy_anchor_ratio >= lazy_anchor_demote_threshold ): audit_mode = "HYBRID" lazy_anchor_demoted = True violations.append({ "kind": "LAZY_ANCHOR_DEMOTE", "ratio": round(lazy_anchor_ratio, 3), "min_pairs": lazy_anchor_demote_min_pairs, "threshold": lazy_anchor_demote_threshold, }) # Warrant-lite — relation-question hard check (Ticket H from # feedback-3, 2026-05-01). Claim-cited spans must contain at # least one of the claim's named answer entities (proper-noun # phrases). Catches the Homer-Simpson lazy-anchor case fox # surfaced — claim asserts "Mr. Burns" but cited span is # Castellaneta voice-actor prose. See arborist/qa/warrant.py # for the lexical algorithm and rationale (deterministic, # not NLI). Fires only when the question shape suggests a # relation lookup AND the lookup is enabled by policy # (warrant_check_enabled). Per-claim WARRANT_MISSING violations # cap audit_mode at HYBRID via the same demote pattern as # lazy_anchor_demoted. warrant_missing_claims: list[int] = [] warrant_proven_claim_idxs: list[int] = [] if warrant_check_enabled: for cs in claim_statuses: if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"): continue cited_eids = cs.get("evidence_ids") or [] cited_evidence_objs = [ obj for eid in cited_eids for obj in [evidence_map_by_evidence_id_local(evidence_map, eid)] if obj is not None ] cited_spans = [obj.span for obj in cited_evidence_objs] ok, missing = warrant_check( cs.get("text") or "", cited_spans, question=question ) if not ok: # Phase 3 of #000031: if any cited evidence's source # document has a warrant-resolver derivation row (i.e., # the cited chunk is a claim-pack record with a # Merkle-bound primary-source backing), suppress # WARRANT_MISSING — the warrant chain DOES exist, just # not at the lexical-anchor level. Track on a separate # `warrant_proven_claim_idxs` field for render-layer # transparency. cited_source_roots = [obj.source_root for obj in cited_evidence_objs] if warrant_chain_roots and any( r in warrant_chain_roots for r in cited_source_roots ): warrant_proven_claim_idxs.append(cs.get("claim_idx")) continue warrant_missing_claims.append(cs.get("claim_idx")) violations.append({ "kind": "WARRANT_MISSING", "claim_idx": cs.get("claim_idx"), "missing_anchors": missing, "rationale": ( "claim asserts an answer entity or specific date " "not present in any cited span — pointer-linked " "but warrant missing" ), }) if warrant_missing_claims and audit_mode == "STRICT": audit_mode = "HYBRID" # Rule 8 — Title-relevance check. For each claim that resolved, # at least one cited evidence's source title must share a # stemmed content token with the claim. Catches the # retrieval-driven hallucination class (2026-05-02 spin-glass # case): claim about spin glass cited to a chunk from # *Quantum chromodynamics* — token-coverage check passed on # incidental physics vocabulary, but the SOURCE was never about # the claim's subject. title_mismatch_claims: list[int] = [] for cs in claim_statuses: if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"): continue cited_eids = cs.get("evidence_ids") or [] cited_titles = [ obj.title for eid in cited_eids for obj in [evidence_map_by_evidence_id_local(evidence_map, eid)] if obj is not None ] if not cited_titles: continue any_overlap = any( _claim_title_overlap(cs.get("text") or "", t) for t in cited_titles ) if not any_overlap: title_mismatch_claims.append(cs.get("claim_idx")) violations.append({ "kind": "TITLE_MISMATCH", "claim_idx": cs.get("claim_idx"), "claim_text": (cs.get("text") or "")[:200], "cited_titles": cited_titles, "rationale": ( "no cited source's title shares a content token " "with the claim — pointer-linked but the cited " "document is structurally unrelated to the claim" ), }) if title_mismatch_claims and audit_mode == "STRICT": audit_mode = "HYBRID" # Tightening (2026-05-02 emergent-log finding): when EVERY resolving # claim has TITLE_MISMATCH, the substrate has zero structural # grounding for the user's question — every cited source is # title-irrelevant. The cashback case ("widescreens offer cashback" # cited to a generic Coupon article) had n_verified=1 but the # citation was meaningless; HYBRID overclaimed. Demote to # UNGROUNDED so the four-rung ladder maps it to UNGROUNDED, not # POINTER-LINKED-PARTIAL. n_resolving = sum( 1 for cs in claim_statuses if cs.get("status") in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL") ) if ( title_mismatch_claims and n_resolving > 0 and len(title_mismatch_claims) == n_resolving ): audit_mode = "UNGROUNDED" # Rule 9 — Subject-tokens-absent / premise-parroting check (Ticket # #000006 amend 2026-05-02b, surfaced by `steer/reply/correcter` # 200-cycle bench-emergent finding). For each resolving claim, # collect the union of cited evidence spans and check whether ≥ # subject_tokens_absent_threshold tokens shared by question AND # claim are absent from that union. If so, the claim is parroting # the question's distinctive subject without anchoring it — the # citation rode in on overlapping generic vocabulary while the # actual subject went unverified. subject_absent_claims: list[int] = [] if question and subject_tokens_absent_threshold > 0: for cs in claim_statuses: if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"): continue cited_eids = cs.get("evidence_ids") or [] cited_spans = [] for eid in cited_eids: obj = evidence_map_by_evidence_id_local(evidence_map, eid) if obj is not None and obj.span: cited_spans.append(obj.span) if not cited_spans: continue absent = _parroted_subject_tokens_absent( question, cs.get("text") or "", cited_spans ) if len(absent) >= subject_tokens_absent_threshold: subject_absent_claims.append(cs.get("claim_idx")) violations.append({ "kind": "SUBJECT_TOKENS_ABSENT", "claim_idx": cs.get("claim_idx"), "claim_text": (cs.get("text") or "")[:200], "absent_tokens": sorted(absent), "rationale": ( f"{len(absent)} question-distinctive tokens echoed in " f"the claim are absent from cited evidence — claim " f"parrots question premise without anchoring it" ), }) if subject_absent_claims and audit_mode == "STRICT": audit_mode = "HYBRID" # Deflection check (soft demote, promoted from sidecar 2026-05-02). # When the question's subject anchor is missing from the answer, # the model deflected — answered an adjacent grounded question # instead of the user's specific one. Caught the live cases: # "who burns the amazon river?" → answered about Amazon # Rainforest deforestation, "river" never in answer # "what culture burns the amazon rain forest?" → answered # "what causes burning", "culture" never in answer # Both passed every other check but the user's question wasn't # structurally answered. DEFLECTION_DETECTED downgrades # EVIDENCE-WARRANTED → ANCHOR-WARRANTED via the soft-demote # path. Render-layer ladder picks this up automatically. deflection_detected = False if deflection_check_enabled and question and rendered_text: # Deferred import to avoid pulling arborist.compress + arborist.store # at verify.py module-load time when callers may not need them. from arborist.qa.inspect import diagnose_deflection signal = diagnose_deflection(question, rendered_text) if signal.get("kind") == "deflection": deflection_detected = True violations.append({ "kind": "DEFLECTION_DETECTED", "subject_anchor": signal.get("subject_anchor"), "overlap_ratio": signal.get("overlap_ratio"), "rationale": ( "answer's content tokens omit the question's " "subject anchor — model answered an adjacent " "grounded question rather than the user's " "specific one" ), }) if deflection_detected and audit_mode == "STRICT": audit_mode = "HYBRID" # Format-collapse check (FORMAT_COLLAPSED soft demote). # The "winners of all major sports?" case fox surfaced 2026-05-02: # Hermes melted under an under-specified broad question, dumped # 50+ free-form prose claims with ZERO `[E\d+]` pointer tags. The # parser found 2 line-shaped fragments to count as claims; both # ungrounded → UNGROUNDED 0/2. Verifier was honest, but operators # couldn't tell from the audit line whether UNGROUNDED meant # "tried to ground & failed" vs "abandoned the protocol entirely." # This sidecar separates those two failure shapes by inspecting # the raw answer text for the absence of bracket tags amid # multiple meaningful prose lines. format_collapsed = False if format_collapse_check_enabled and answer_text: meaningful_lines = [ line for line in answer_text.splitlines() if len(line.strip()) > 20 ] bracket_count = len(re.findall(r"\[E\d+", answer_text)) if len(meaningful_lines) >= 5 and bracket_count == 0: format_collapsed = True violations.append({ "kind": "FORMAT_COLLAPSED", "meaningful_lines": len(meaningful_lines), "bracket_count": bracket_count, "rationale": ( "model emitted multi-line prose with zero [E\\d+] " "pointer tags — abandoned the claim_lattice_pointer " "protocol entirely. UNGROUNDED below this signal is " "format collapse, not graceful per-claim refusal." ), }) if format_collapsed and audit_mode == "STRICT": audit_mode = "HYBRID" return { "n_quotes": n_pairs, "n_verified": n_pairs_verified, "audit_mode": audit_mode, "unverified_quotes": unverified, "partially_verified_quotes": partially_verified, "verifier_method": "claim_lattice", "claim_statuses": claim_statuses, "violations": violations, "rendered_text": rendered_text, "evidence_id_pairs": evidence_id_pairs, "pointer_id_distribution": pointer_distribution, "lazy_anchor_ratio": lazy_anchor_ratio, "lazy_anchor_demoted": lazy_anchor_demoted, "warrant_missing_claim_idxs": warrant_missing_claims, "warrant_proven_claim_idxs": warrant_proven_claim_idxs, "title_mismatch_claim_idxs": title_mismatch_claims, "deflection_detected": deflection_detected, "format_collapsed": format_collapsed, }
[docs] def evidence_map_by_evidence_id_local(evidence_map, eid: str): """Local helper — returns the EvidenceObject whose ``evidence_id`` matches ``eid``, or None. Avoids the import-cycle risk of pulling `evidence_map_by_evidence_id` into this module's hot path; the O(N) walk is fine since evidence maps are <30 entries. """ for obj in evidence_map or []: if obj.evidence_id == eid: return obj return None
# --------------------------------------------------------------------------- # JSON variant — `answer_mode="claim_lattice"`. Same lattice semantics as # the pointer variant, but the model emits a structured JSON object # {"claims":[{"text":str,"evidence_ids":[str,...]}]} with content- # addressed evidence_ids directly. Pairs naturally with grammar- # constrained inference (vLLM guided_json, Claude/GPT-4 native JSON # mode, Qwen 3.6 reasoner) where schema-conformance is generation-time- # enforced. The lenient pre-parser above keeps the path survivable on # inference paths without grammar guidance. # --------------------------------------------------------------------------- CLAIM_LATTICE_JSON_SCHEMA = { "type": "object", "properties": { "claims": { "type": "array", "items": { "type": "object", "properties": { "text": {"type": "string"}, "evidence_ids": { "type": "array", "items": {"type": "string"}, }, }, "required": ["text", "evidence_ids"], "additionalProperties": False, }, }, }, "required": ["claims"], "additionalProperties": False, }
[docs] def claim_lattice_structured_output_extras( schema: dict | None = None, *, name: str = "claim_lattice", ) -> dict: """Multi-engine ``extra_body`` for JSON-schema enforcement on chat completions. Each inference engine recognises its own key and silently drops the others, so sending all three lets the same call site work across vLLM, llama.cpp, and OpenAI-spec endpoints without per-endpoint branching: * ``guided_json`` — vLLM grammar-constrained sampling * ``json_schema`` — llama.cpp native shorthand * ``response_format``— OpenAI-spec ``{type: json_schema, …}`` (honoured by llama.cpp and newer vLLM) Returns a dict you splat into ``client.chat_completion(extra_body=…)``. Defaults to the claim- lattice schema; pass an alternate schema to reuse the helper for other structured-output features. The ``name`` is required by OpenAI-spec ``response_format`` and is the user-visible label for the schema in some engines' error messages. Added 2026-05-19 to enable the Arborist arm to run with Qwen on llama.cpp (the old single-key ``guided_json`` was silently dropped on llama.cpp, leaving Qwen un-enforced and the parse-tolerant fallback doing all the work). Hermes/vLLM path is unchanged — it still picks up ``guided_json`` and ignores the other two.""" s = schema if schema is not None else CLAIM_LATTICE_JSON_SCHEMA return { "guided_json": s, "json_schema": s, "response_format": { "type": "json_schema", "json_schema": { "name": name, "schema": s, "strict": True, }, }, }
[docs] def verify_claim_lattice_json( answer_json_text: str, evidence_map, *, allowed_source_roles: tuple[str, ...] = DEFAULT_ALLOWED_SOURCE_ROLES, max_evidence_per_claim: int = DEFAULT_MAX_POINTERS_PER_CLAIM, min_citation_coverage: float = DEFAULT_MIN_CITATION_COVERAGE, max_claims_per_answer: int = DEFAULT_MAX_CLAIMS_PER_ANSWER, subject_tokens_absent_threshold: int = DEFAULT_SUBJECT_TOKENS_ABSENT_THRESHOLD, question: str | None = None, warrant_check_enabled: bool = True, deflection_check_enabled: bool = True, warrant_chain_roots: frozenset[str] = frozenset(), ) -> dict: """Deterministic verifier for ``answer_mode="claim_lattice"`` (JSON). Parses the model's JSON output (lenient pre-parser handles markdown fences / preamble / curly quotes / trailing commas), validates the schema, then runs the same hard checks as ``verify_claim_lattice`` but reading ``evidence_ids`` from the JSON claim objects. 2026-04-30: switched from content-addressed evidence_ids (``Eed1b6e396``) to pointer_ids (``E1``, ``E2``, …) in the prompt & JSON output. Hermes-3-8B was fabricating plausible content- addressed IDs (``E1b6e396``-style near-misses) on cross-document relationship questions; the verifier correctly rejected them as UNKNOWN_EVIDENCE_ID but the answer text was often factually correct, leaving us with honest UNGROUNDED on right answers. Pointer IDs are short, enumerable, and fabrication-obvious. The runtime still resolves each pointer_id to its content-addressed evidence_id internally and stores that in ``evidence_id_pairs`` (cache/run-DAG continuity); only the prompt-facing surface changes. 1. JSON parses (lenient). Failure → SCHEMA_INVALID, UNGROUNDED. 2. Top-level is ``{"claims": [...]}``. 3. Each claim is ``{"text": str, "evidence_ids": [str, ...]}``. 4. Each evidence_id resolves in the runtime-built evidence map (no model-invented IDs). 5. Resolved entry's ``source_role`` is in ``allowed_source_roles``. 6. Claim text contains no double-quote characters anywhere. 7. Claim text non-empty. 8. Claim's content tokens textually overlap the cited evidence span. 9. ``len(evidence_ids) <= max_evidence_per_claim``. Returns a verdict in the same shape as ``verify_claim_lattice`` plus a ``json_fixups`` field naming any drift the lenient parser had to peel (``"fence"`` / ``"prose_trim"`` / ``"curly_quotes"`` / ``"trailing_comma"``). Empty list = strict JSON parse on first try. """ from arborist.qa.evidence import ( evidence_map_by_pointer_id as _by_pointer, render_claim_lattice as _render, ) by_pointer = _by_pointer(evidence_map) violations: list[dict] = [] claim_statuses: list[dict] = [] unverified: list[str] = [] json_fixups: list[str] = [] parsed = None try: parsed, json_fixups = _lenient_json_parse(answer_json_text or "") except Exception as exc: violations.append({ "kind": "SCHEMA_INVALID", "reason": f"json parse: {str(exc)[:200]}", }) if parsed is not None and not isinstance(parsed, dict): violations.append({ "kind": "SCHEMA_INVALID", "reason": f"top-level not object (got {type(parsed).__name__})", }) parsed = None raw_claims = (parsed or {}).get("claims") if parsed is not None else None if parsed is not None and not isinstance(raw_claims, list): violations.append({ "kind": "SCHEMA_INVALID", "reason": "missing or non-list 'claims'", }) raw_claims = None # Claim-count ceiling — same defense-in-depth signal as the # pointer verifier. A "tell me all there is to know" prompt # shape can spam encyclopedic claims; cap demotes the verdict # so the runaway is operator-visible regardless of per-claim # verification success. if isinstance(raw_claims, list) and len(raw_claims) > max_claims_per_answer: violations.append({ "kind": "TOO_MANY_CLAIMS", "n_claims": len(raw_claims), "max": max_claims_per_answer, }) n_pairs = 0 n_pairs_verified = 0 valid_claims: list[dict] = [] evidence_id_pairs: list[list[str]] = [] for idx, c in enumerate(raw_claims or []): if not isinstance(c, dict): violations.append({ "kind": "SCHEMA_INVALID", "claim_idx": idx, "reason": f"claim not object (got {type(c).__name__})", }) claim_statuses.append({ "text": "", "evidence_ids": [], "status": "SCHEMA_INVALID", "reasons": ["not_object"], }) continue claim_text = c.get("text") or "" eids = c.get("evidence_ids") or [] if not isinstance(claim_text, str) or not isinstance(eids, list): violations.append({ "kind": "SCHEMA_INVALID", "claim_idx": idx, "reason": "claim shape: text=str, evidence_ids=list[str]", }) claim_statuses.append({ "text": str(claim_text)[:200], "evidence_ids": [], "status": "SCHEMA_INVALID", "reasons": ["bad_field_types"], }) continue # Manual-quote prohibition (same rule as pointer mode). if _has_manual_quote(claim_text): violations.append({ "kind": "MANUAL_QUOTE_VIOLATION", "claim_idx": idx, "claim_text": claim_text[:200], }) unverified.append(claim_text) claim_statuses.append({ "text": claim_text, "evidence_ids": eids, "status": "MANUAL_QUOTE_VIOLATION", "reasons": ["double_quote_in_text"], }) n_pairs += max(1, len(eids)) continue if not claim_text.strip(): violations.append({ "kind": "SCHEMA_INVALID", "claim_idx": idx, "reason": "empty claim text", }) claim_statuses.append({ "text": "", "evidence_ids": eids, "status": "SCHEMA_INVALID", "reasons": ["empty_text"], }) continue if len(eids) > max_evidence_per_claim: violations.append({ "kind": "TOO_MANY_EVIDENCE_IDS", "claim_idx": idx, "claim_text": claim_text[:200], "n_ids": len(eids), "max": max_evidence_per_claim, }) # Per-id resolution + checks. ``eids`` are pointer_ids # (E1, E2, …) emitted by the model; we resolve each to its # EvidenceObject and capture the content-addressed # ``evidence_id`` for the cache/run-DAG handle. Pointer-style # IDs make fabrication obvious — if only E1-E10 were shown, # an emitted "E27" reads as a hallucination at the schema # check, not as a near-miss content-addressed string. per_id_results = [] verified_pointer_ids: list[str] = [] verified_evidence_ids: list[str] = [] for eid in eids: if not isinstance(eid, str): per_id_results.append({"eid": str(eid), "ok": False, "kind": "SCHEMA_INVALID"}) continue obj = by_pointer.get(eid) if obj is None: per_id_results.append({"eid": eid, "ok": False, "kind": "UNKNOWN_EVIDENCE_ID"}) violations.append({ "kind": "UNKNOWN_EVIDENCE_ID", "claim_idx": idx, "evidence_id": eid, }) continue if obj.source_role not in allowed_source_roles: per_id_results.append({"eid": eid, "ok": False, "kind": "SOURCE_ROLE_BLOCKED"}) violations.append({ "kind": "SOURCE_ROLE_BLOCKED", "claim_idx": idx, "evidence_id": obj.evidence_id, "pointer_id": eid, "source_role": obj.source_role, }) continue if not _claim_textually_overlaps_evidence( claim_text, obj.span, min_coverage=min_citation_coverage ): per_id_results.append({"eid": eid, "ok": False, "kind": "CITATION_MISMATCH"}) violations.append({ "kind": "CITATION_MISMATCH", "claim_idx": idx, "evidence_id": obj.evidence_id, "pointer_id": eid, "claim_text": claim_text[:200], }) continue per_id_results.append({"eid": eid, "ok": True}) verified_pointer_ids.append(eid) verified_evidence_ids.append(obj.evidence_id) n_pairs += max(1, len(eids)) n_pairs_verified += len(verified_pointer_ids) if not eids: claim_statuses.append({ "text": claim_text, "evidence_ids": [], "status": "NO_EVIDENCE_POINTER", "reasons": ["no_evidence_ids"], }) unverified.append(claim_text) n_pairs += 1 continue if len(verified_pointer_ids) == len(eids): status = "EVIDENCE_LINKED" elif verified_pointer_ids: status = "EVIDENCE_LINKED_PARTIAL" else: # Pick the worst per-id reason for the claim status. kinds = [r["kind"] for r in per_id_results if not r["ok"]] status = kinds[0] if kinds else "UNKNOWN_EVIDENCE_ID" unverified.append(claim_text) # claim_statuses records BOTH ids: pointer (what model wrote) # and content-addressed (run-stable handle). Keeps the audit # trail legible at both layers. claim_statuses.append({ "text": claim_text, "pointer_ids": list(eids), "evidence_ids": list(verified_evidence_ids), "status": status, "reasons": [r["kind"] for r in per_id_results if not r["ok"]], }) if verified_pointer_ids: # Renderer takes the pointer-id form (model's view) and the # by_pointer index; cache/run-DAG get the content-addressed # evidence_ids (run-stable form). valid_claims.append({ "text": claim_text, "pointer_ids": verified_pointer_ids, }) evidence_id_pairs.append(list(verified_evidence_ids)) rendered_text = _render(valid_claims, by_pointer) if valid_claims else "" if n_pairs_verified > 0 and not violations: audit_mode = "STRICT" elif n_pairs_verified > 0: audit_mode = "HYBRID" else: audit_mode = "UNGROUNDED" # Warrant-lite — same relation-question hard check as the pointer # variant. See verify_claim_lattice for the rationale (Ticket H, # 2026-05-01). Identical demote-to-HYBRID semantics; the JSON # variant carries the same WARRANT_MISSING violations & the same # warrant_missing_claim_idxs field on the verdict. warrant_missing_claims: list[int] = [] warrant_proven_claim_idxs: list[int] = [] if warrant_check_enabled: for cs in claim_statuses: if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"): continue cited_eids = cs.get("evidence_ids") or [] cited_evidence_objs = [ obj for eid in cited_eids for obj in [evidence_map_by_evidence_id_local(evidence_map, eid)] if obj is not None ] cited_spans = [obj.span for obj in cited_evidence_objs] ok, missing = warrant_check( cs.get("text") or "", cited_spans, question=question ) if not ok: # Phase 3 of #000031: see verify_claim_lattice for # the full rationale. Same suppression logic — if # the cited chunk's document has a warrant-resolver # derivation row, the warrant chain exists at the # Merkle level even if the lexical anchor doesn't # fire. cited_source_roots = [obj.source_root for obj in cited_evidence_objs] if warrant_chain_roots and any( r in warrant_chain_roots for r in cited_source_roots ): warrant_proven_claim_idxs.append(cs.get("claim_idx")) continue warrant_missing_claims.append(cs.get("claim_idx")) violations.append({ "kind": "WARRANT_MISSING", "claim_idx": cs.get("claim_idx"), "missing_anchors": missing, "rationale": ( "claim asserts an answer entity or specific date " "not present in any cited span — pointer-linked " "but warrant missing" ), }) if warrant_missing_claims and audit_mode == "STRICT": audit_mode = "HYBRID" # Rule 8 — Title-relevance check (mirrors the pointer variant). # See verify_claim_lattice for rationale (2026-05-02 spin-glass # case). Demote-to-HYBRID semantics; JSON variant emits the same # TITLE_MISMATCH violation kind & title_mismatch_claim_idxs field. title_mismatch_claims: list[int] = [] for cs in claim_statuses: if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"): continue cited_eids = cs.get("evidence_ids") or [] cited_titles = [ obj.title for eid in cited_eids for obj in [evidence_map_by_evidence_id_local(evidence_map, eid)] if obj is not None ] if not cited_titles: continue any_overlap = any( _claim_title_overlap(cs.get("text") or "", t) for t in cited_titles ) if not any_overlap: title_mismatch_claims.append(cs.get("claim_idx")) violations.append({ "kind": "TITLE_MISMATCH", "claim_idx": cs.get("claim_idx"), "claim_text": (cs.get("text") or "")[:200], "cited_titles": cited_titles, "rationale": ( "no cited source's title shares a content token " "with the claim — pointer-linked but the cited " "document is structurally unrelated to the claim" ), }) if title_mismatch_claims and audit_mode == "STRICT": audit_mode = "HYBRID" # Tightening (2026-05-02): mirrors the pointer-variant promotion. # When EVERY resolving claim has TITLE_MISMATCH, demote to # UNGROUNDED — the substrate has zero structural grounding for # the user's question. See verify_claim_lattice for full rationale. n_resolving = sum( 1 for cs in claim_statuses if cs.get("status") in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL") ) if ( title_mismatch_claims and n_resolving > 0 and len(title_mismatch_claims) == n_resolving ): audit_mode = "UNGROUNDED" # Rule 9 — Subject-tokens-absent / premise-parroting check. See # `verify_claim_lattice` for the full rationale. subject_absent_claims: list[int] = [] if question and subject_tokens_absent_threshold > 0: for cs in claim_statuses: if cs.get("status") not in ("EVIDENCE_LINKED", "EVIDENCE_LINKED_PARTIAL"): continue cited_eids = cs.get("evidence_ids") or [] cited_spans = [] for eid in cited_eids: obj = evidence_map_by_evidence_id_local(evidence_map, eid) if obj is not None and obj.span: cited_spans.append(obj.span) if not cited_spans: continue absent = _parroted_subject_tokens_absent( question, cs.get("text") or "", cited_spans ) if len(absent) >= subject_tokens_absent_threshold: subject_absent_claims.append(cs.get("claim_idx")) violations.append({ "kind": "SUBJECT_TOKENS_ABSENT", "claim_idx": cs.get("claim_idx"), "claim_text": (cs.get("text") or "")[:200], "absent_tokens": sorted(absent), "rationale": ( f"{len(absent)} question-distinctive tokens echoed in " f"the claim are absent from cited evidence — claim " f"parrots question premise without anchoring it" ), }) if subject_absent_claims and audit_mode == "STRICT": audit_mode = "HYBRID" # Deflection check (parallel to pointer-variant promotion). deflection_detected = False if deflection_check_enabled and question and rendered_text: # Deferred import to avoid pulling arborist.compress + arborist.store # at verify.py module-load time when callers may not need them. from arborist.qa.inspect import diagnose_deflection signal = diagnose_deflection(question, rendered_text) if signal.get("kind") == "deflection": deflection_detected = True violations.append({ "kind": "DEFLECTION_DETECTED", "subject_anchor": signal.get("subject_anchor"), "overlap_ratio": signal.get("overlap_ratio"), "rationale": ( "answer's content tokens omit the question's " "subject anchor — model answered an adjacent " "grounded question rather than the user's " "specific one" ), }) if deflection_detected and audit_mode == "STRICT": audit_mode = "HYBRID" # Same `verifier_method` as the pointer variant ("claim_lattice") # so the providence_cache CHECK constraint accepts both. The mode # is disambiguated downstream via `answer_mode` on the run-DAG & # via the JSON-only `json_fixups` field on this verdict. return { "n_quotes": n_pairs, "n_verified": n_pairs_verified, "audit_mode": audit_mode, "unverified_quotes": unverified, "verifier_method": "claim_lattice", "claim_statuses": claim_statuses, "violations": violations, "rendered_text": rendered_text, "evidence_id_pairs": evidence_id_pairs, "json_fixups": json_fixups, "warrant_missing_claim_idxs": warrant_missing_claims, "warrant_proven_claim_idxs": warrant_proven_claim_idxs, "title_mismatch_claim_idxs": title_mismatch_claims, }