Spaces:

MCP-1st-Birthday
/

12-Angry-Agent

Running

File size: 7,764 Bytes

"""LlamaIndex case index for semantic search over case documents.

Provides RAG capabilities for smolagents tools to query evidence and case details.
Uses Nebius embeddings via the decoupled embedding service.
"""

from typing import TYPE_CHECKING

from llama_index.core import VectorStoreIndex, Document

from services.embeddings import get_embedding_service

if TYPE_CHECKING:
    from case_db.models import CriminalCase


class CaseIndex:
    """Semantic search over case documents for agent tool use.

    Creates a vector index from case summary, evidence, and witness testimonies.
    Agents can query this index to find relevant information during deliberation.

    Uses Nebius embeddings (4096-dim) via the centralized embedding service.
    """

    def __init__(self, case: "CriminalCase"):
        """Initialize the case index.

        Args:
            case: The criminal case to index
        """
        self.case = case

        # Initialize embedding service (configures LlamaIndex Settings)
        self._embedding_service = get_embedding_service()

        # Build the index
        self.index = self._build_index()
        # Use retriever instead of query_engine to avoid redundant LLM calls
        # The CodeAgent will reason about raw retrieved docs directly
        self.retriever = self.index.as_retriever(similarity_top_k=3)

    def _build_index(self) -> VectorStoreIndex:
        """Build vector index from case documents."""
        documents = []

        # Index case summary
        documents.append(Document(
            text=self.case.summary,
            metadata={
                "type": "summary",
                "case_id": self.case.case_id
            }
        ))

        # Index charges
        if self.case.charges:
            charges_text = f"Charges: {', '.join(self.case.charges)}"
            documents.append(Document(
                text=charges_text,
                metadata={
                    "type": "charges",
                    "case_id": self.case.case_id
                }
            ))

        # Index each piece of evidence
        for evidence in self.case.evidence:
            doc_text = f"Evidence ({evidence.type}): {evidence.description}"
            if evidence.contestable and evidence.contest_reason:
                doc_text += f" [Contestable: {evidence.contest_reason}]"

            documents.append(Document(
                text=doc_text,
                metadata={
                    "type": "evidence",
                    "evidence_type": evidence.type,
                    "evidence_id": evidence.evidence_id,
                    "case_id": self.case.case_id,
                    "strength_prosecution": evidence.strength_prosecution,
                    "strength_defense": evidence.strength_defense,
                }
            ))

        # Index witness testimonies
        for witness in self.case.witnesses:
            doc_text = (
                f"Witness {witness.name} ({witness.role}, {witness.side}): "
                f"{witness.testimony_summary}"
            )
            if witness.credibility_issues:
                doc_text += f" [Credibility issues: {', '.join(witness.credibility_issues)}]"

            documents.append(Document(
                text=doc_text,
                metadata={
                    "type": "witness",
                    "witness_id": witness.witness_id,
                    "witness_name": witness.name,
                    "witness_role": witness.role,
                    "witness_side": witness.side,
                    "case_id": self.case.case_id,
                }
            ))

        # Index defendant background if available
        if self.case.defendant:
            defendant_text = f"Defendant: {self.case.defendant.name}"
            if self.case.defendant.age:
                defendant_text += f", age {self.case.defendant.age}"
            if self.case.defendant.occupation:
                defendant_text += f", {self.case.defendant.occupation}"
            if self.case.defendant.background:
                defendant_text += f". Background: {self.case.defendant.background}"
            if self.case.defendant.prior_record:
                defendant_text += f". Prior record: {', '.join(self.case.defendant.prior_record)}"

            documents.append(Document(
                text=defendant_text,
                metadata={
                    "type": "defendant",
                    "case_id": self.case.case_id,
                }
            ))

        # Index prosecution arguments
        for i, arg in enumerate(self.case.prosecution_arguments):
            documents.append(Document(
                text=f"Prosecution argument: {arg}",
                metadata={
                    "type": "prosecution_argument",
                    "argument_index": i,
                    "case_id": self.case.case_id,
                }
            ))

        # Index defense arguments
        for i, arg in enumerate(self.case.defense_arguments):
            documents.append(Document(
                text=f"Defense argument: {arg}",
                metadata={
                    "type": "defense_argument",
                    "argument_index": i,
                    "case_id": self.case.case_id,
                }
            ))

        return VectorStoreIndex.from_documents(documents)

    def query(self, question: str) -> str:
        """Query the case index for relevant information.

        Args:
            question: Natural language question about the case

        Returns:
            Relevant case documents (raw text, no LLM synthesis)
        """
        nodes = self.retriever.retrieve(question)
        if not nodes:
            return "No relevant information found."
        # Return raw document text for CodeAgent to reason about
        return "\n\n".join([node.text for node in nodes])

    def query_evidence(self, query: str) -> str:
        """Query specifically for evidence-related information.

        Args:
            query: What evidence to search for

        Returns:
            Relevant evidence information
        """
        full_query = f"Evidence related to: {query}"
        return self.query(full_query)

    def query_witnesses(self, query: str) -> str:
        """Query specifically for witness testimony.

        Args:
            query: What witness information to search for

        Returns:
            Relevant witness testimony
        """
        full_query = f"Witness testimony about: {query}"
        return self.query(full_query)

    def get_all_evidence_summaries(self) -> list[str]:
        """Get list of all evidence summaries for quick reference."""
        return [
            f"[{e.evidence_id}] {e.type}: {e.description}"
            for e in self.case.evidence
        ]

    def get_all_witness_summaries(self) -> list[str]:
        """Get list of all witness summaries for quick reference."""
        return [
            f"[{w.witness_id}] {w.name} ({w.role}): {w.testimony_summary[:100]}..."
            for w in self.case.witnesses
        ]


class CaseIndexFactory:
    """Factory for creating and caching case indices."""

    _cache: dict[str, CaseIndex] = {}

    @classmethod
    def get_index(cls, case: "CriminalCase") -> CaseIndex:
        """Get or create a case index.

        Caches indices by case_id to avoid rebuilding.

        Args:
            case: The criminal case to index

        Returns:
            CaseIndex for the case
        """
        if case.case_id not in cls._cache:
            cls._cache[case.case_id] = CaseIndex(case)
        return cls._cache[case.case_id]

    @classmethod
    def clear_cache(cls) -> None:
        """Clear the index cache."""
        cls._cache.clear()