"""Helpers for iterative LLM coding prompts and response parsing."""
import json
from pydantic import BaseModel, Field
[docs]
class JudgeResponse(BaseModel):
"""Structured output returned by the LLM judge for one hierarchy level."""
score: float = Field(description="Confidence score between 0 and 1.")
explanation: str = Field(description="Reason for this score.")
_SYSTEM = """You are a classification agent for beneficiary feedback items.
Your task is to classify the feedback item using only the options provided at the current hierarchy level.
Goal:
Select the best-supported option(s) while balancing:
- precision: avoid clearly wrong labels
- recall: do not miss labels that are reasonably supported
Instructions:
- Use only the current-level options provided.
- Use the feedback text as the main evidence.
- Use the parent path context only to interpret the current level correctly and disambiguate meaning.
- Select an option if it is:
- clearly supported by the feedback text, or
- a reasonable interpretation that is strongly implied by the text
- Do not select an option if it is:
- only loosely related,
- a weak or doubtful match,
- dependent on speculation beyond the text,
- more general than what the text actually supports
- Multi-label is allowed, but only when the feedback contains multiple distinct ideas that separately support different options.
- Do not select multiple options that express the same underlying idea.
- Prefer the best-fitting option(s) rather than returning none.
- Return an empty list only when none of the options are meaningfully supported by the feedback.
Selection guidance:
- Most items should result in 1 selected option.
- Select 2 or more only when the text clearly contains multiple distinct classifiable ideas.
- Avoid broad over-selection.
Output rules:
- Output JSON only.
- Do not output markdown.
- Do not output explanations.
- Do not output any text other than the JSON object.
Return exactly this format:
{"selected":[<integer indices>]}"""
SYSTEM_PROMPT = _SYSTEM
def _build_user_message(
*,
feedback_text: str,
current_level: str,
labels: list[str],
hierarchy_path: list[tuple[str, str]],
) -> str:
"""Format the user turn: feedback, optional path, current level, numbered options."""
if hierarchy_path:
path_lines = "\n".join(f"{label}: {value}" for label, value in hierarchy_path)
path_block = f"Hierarchy path already selected:\n{path_lines}\n\n"
else:
path_block = ""
options = "\n".join(f"{i}: {labels[i]}" for i in range(len(labels)))
return (
f"Feedback:\n---\n{feedback_text}\n---\n"
f"{path_block}"
f"Current level:\n{current_level}\n\n"
f"Options:\n{options}"
)
def _parse_selected_indices(raw: str, num_options: int) -> list[int]:
"""JSON ``{"selected": [...]}`` → unique indices in ``0 .. num_options-1``."""
try:
selected = json.loads(raw.strip())["selected"]
if not isinstance(selected, list):
return []
except Exception:
return []
out: list[int] = []
for x in selected:
try:
i = int(x)
except (TypeError, ValueError):
continue
if 0 <= i < num_options:
out.append(i)
return list(dict.fromkeys(out))
[docs]
def build_pick_messages(
*,
feedback_text: str,
current_level: str,
labels: list[str],
hierarchy_path: list[tuple[str, str]] | None = None,
) -> tuple[str, str]:
"""Build the system and user messages for one hierarchy-level pick."""
if not labels:
return SYSTEM_PROMPT, ""
path = hierarchy_path or []
return SYSTEM_PROMPT, _build_user_message(
feedback_text=feedback_text,
current_level=current_level,
labels=labels,
hierarchy_path=path,
)
[docs]
def parse_selected_indices(raw: str, num_options: int) -> list[int]:
"""Parse the model JSON response for one hierarchy-level pick."""
return _parse_selected_indices(raw, num_options)
_JUDGE_SYSTEM = """You are evaluating whether a code assignment fits a feedback record.
Context:
These feedback records are collected from community members by Red Cross / Red Crescent National Societies as part of humanitarian programs. Feedback is qualitative and unstructured. It may be:
- Short or incomplete (a few words or one sentence)
- Indirect or emotionally expressed rather than explicit
- Originally written in a local language and translated
- About services, access, staff behaviour, health, safety, or community concerns
Your task:
Assess how well the assigned code label at the requested level fits the feedback record, given the full code path (Type > Category > Code) as context.
Important:
- Do not penalise feedback for being brief or colloquial — short feedback is normal in this domain.
- Do not require exact keyword matches. Assess meaning and intent.
- A reasonable interpretation of ambiguous feedback can still warrant a high confidence score, as long as it is grounded in the text.
- Do not assign high confidence based on superficial similarity alone — the code must genuinely capture what the community member is expressing.
Scoring:
Assign a score from 0.0 to 1.0. Use the full continuous range — do not round to fixed values.
Reference anchors:
- 1.0: the feedback clearly and directly supports this assignment
- 0.75: the feedback reasonably supports this assignment
- 0.5: the assignment is plausible but uncertain
- 0.25: the fit is weak or speculative
- 0.0: the feedback does not support this assignment or the assignment is clearly wrong
Scores between anchors are expected and encouraged. For example, a strong but not perfect match might be 0.85."""
[docs]
def build_judge_messages(
*,
feedback_text: str,
level: str,
path: list[tuple[str, str]],
) -> tuple[str, str]:
"""Build system and user messages for a single-level judge call.
Parameters
----------
feedback_text:
Raw text of the feedback record being coded.
level:
The hierarchy level being evaluated: ``"Type"``, ``"Category"``, or ``"Code"``.
path:
Full code path up to and including the current level, as
``[(level_name, label), ...]``. E.g. for the Category judge:
``[("Type", "Service Delivery"), ("Category", "Staff Behavior")]``.
"""
path_lines = "\n".join(f"{name}: {label}" for name, label in path)
user = (
f"Feedback:\n---\n{feedback_text}\n---\n\n"
f"Code path:\n{path_lines}\n\n"
f"Evaluate the {level} assignment."
)
return _JUDGE_SYSTEM, user