Source code for qfa.services.coding_classifier

"""Helpers for iterative LLM coding prompts and response parsing."""

import json

from pydantic import BaseModel, Field



[docs]
class JudgeResponse(BaseModel):
    """Structured output returned by the LLM judge for one hierarchy level."""

    score: float = Field(description="Confidence score between 0 and 1.")
    explanation: str = Field(description="Reason for this score.")



_SYSTEM = """You are a classification agent for beneficiary feedback items.

Your task is to classify the feedback item using only the options provided at the current hierarchy level.

Goal:
Select the best-supported option(s) while balancing:
- precision: avoid clearly wrong labels
- recall: do not miss labels that are reasonably supported

Instructions:
- Use only the current-level options provided.
- Use the feedback text as the main evidence.
- Use the parent path context only to interpret the current level correctly and disambiguate meaning.
- Select an option if it is:
  - clearly supported by the feedback text, or
  - a reasonable interpretation that is strongly implied by the text
- Do not select an option if it is:
  - only loosely related,
  - a weak or doubtful match,
  - dependent on speculation beyond the text,
  - more general than what the text actually supports
- Multi-label is allowed, but only when the feedback contains multiple distinct ideas that separately support different options.
- Do not select multiple options that express the same underlying idea.
- Prefer the best-fitting option(s) rather than returning none.
- Return an empty list only when none of the options are meaningfully supported by the feedback.

Selection guidance:
- Most items should result in 1 selected option.
- Select 2 or more only when the text clearly contains multiple distinct classifiable ideas.
- Avoid broad over-selection.

Output rules:
- Output JSON only.
- Do not output markdown.
- Do not output explanations.
- Do not output any text other than the JSON object.

Return exactly this format:
{"selected":[<integer indices>]}"""

SYSTEM_PROMPT = _SYSTEM


def _build_user_message(
    *,
    feedback_text: str,
    current_level: str,
    labels: list[str],
    hierarchy_path: list[tuple[str, str]],
) -> str:
    """Format the user turn: feedback, optional path, current level, numbered options."""
    if hierarchy_path:
        path_lines = "\n".join(f"{label}: {value}" for label, value in hierarchy_path)
        path_block = f"Hierarchy path already selected:\n{path_lines}\n\n"
    else:
        path_block = ""
    options = "\n".join(f"{i}: {labels[i]}" for i in range(len(labels)))
    return (
        f"Feedback:\n---\n{feedback_text}\n---\n"
        f"{path_block}"
        f"Current level:\n{current_level}\n\n"
        f"Options:\n{options}"
    )


def _parse_selected_indices(raw: str, num_options: int) -> list[int]:
    """JSON ``{"selected": [...]}`` → unique indices in ``0 .. num_options-1``."""
    try:
        selected = json.loads(raw.strip())["selected"]
        if not isinstance(selected, list):
            return []
    except Exception:
        return []
    out: list[int] = []
    for x in selected:
        try:
            i = int(x)
        except (TypeError, ValueError):
            continue
        if 0 <= i < num_options:
            out.append(i)
    return list(dict.fromkeys(out))



[docs]
def build_pick_messages(
    *,
    feedback_text: str,
    current_level: str,
    labels: list[str],
    hierarchy_path: list[tuple[str, str]] | None = None,
) -> tuple[str, str]:
    """Build the system and user messages for one hierarchy-level pick."""
    if not labels:
        return SYSTEM_PROMPT, ""

    path = hierarchy_path or []
    return SYSTEM_PROMPT, _build_user_message(
        feedback_text=feedback_text,
        current_level=current_level,
        labels=labels,
        hierarchy_path=path,
    )




[docs]
def parse_selected_indices(raw: str, num_options: int) -> list[int]:
    """Parse the model JSON response for one hierarchy-level pick."""
    return _parse_selected_indices(raw, num_options)



_JUDGE_SYSTEM = """You are evaluating whether a code assignment fits a feedback record.

Context:
These feedback records are collected from community members by Red Cross / Red Crescent National Societies as part of humanitarian programs. Feedback is qualitative and unstructured. It may be:
- Short or incomplete (a few words or one sentence)
- Indirect or emotionally expressed rather than explicit
- Originally written in a local language and translated
- About services, access, staff behaviour, health, safety, or community concerns

Your task:
Assess how well the assigned code label at the requested level fits the feedback record, given the full code path (Type > Category > Code) as context.

Important:
- Do not penalise feedback for being brief or colloquial — short feedback is normal in this domain.
- Do not require exact keyword matches. Assess meaning and intent.
- A reasonable interpretation of ambiguous feedback can still warrant a high confidence score, as long as it is grounded in the text.
- Do not assign high confidence based on superficial similarity alone — the code must genuinely capture what the community member is expressing.

Scoring:
Assign a score from 0.0 to 1.0. Use the full continuous range — do not round to fixed values.

Reference anchors:
- 1.0: the feedback clearly and directly supports this assignment
- 0.75: the feedback reasonably supports this assignment
- 0.5: the assignment is plausible but uncertain
- 0.25: the fit is weak or speculative
- 0.0: the feedback does not support this assignment or the assignment is clearly wrong

Scores between anchors are expected and encouraged. For example, a strong but not perfect match might be 0.85."""



[docs]
def build_judge_messages(
    *,
    feedback_text: str,
    level: str,
    path: list[tuple[str, str]],
) -> tuple[str, str]:
    """Build system and user messages for a single-level judge call.

    Parameters
    ----------
    feedback_text:
        Raw text of the feedback record being coded.
    level:
        The hierarchy level being evaluated: ``"Type"``, ``"Category"``, or ``"Code"``.
    path:
        Full code path up to and including the current level, as
        ``[(level_name, label), ...]``. E.g. for the Category judge:
        ``[("Type", "Service Delivery"), ("Category", "Staff Behavior")]``.
    """
    path_lines = "\n".join(f"{name}: {label}" for name, label in path)
    user = (
        f"Feedback:\n---\n{feedback_text}\n---\n\n"
        f"Code path:\n{path_lines}\n\n"
        f"Evaluate the {level} assignment."
    )
    return _JUDGE_SYSTEM, user