File size: 7,764 Bytes
af2657b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373ff24
 
 
af2657b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373ff24
af2657b
373ff24
 
 
 
 
af2657b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
"""LlamaIndex case index for semantic search over case documents.

Provides RAG capabilities for smolagents tools to query evidence and case details.
Uses Nebius embeddings via the decoupled embedding service.
"""

from typing import TYPE_CHECKING

from llama_index.core import VectorStoreIndex, Document

from services.embeddings import get_embedding_service

if TYPE_CHECKING:
    from case_db.models import CriminalCase


class CaseIndex:
    """Semantic search over case documents for agent tool use.

    Creates a vector index from case summary, evidence, and witness testimonies.
    Agents can query this index to find relevant information during deliberation.

    Uses Nebius embeddings (4096-dim) via the centralized embedding service.
    """

    def __init__(self, case: "CriminalCase"):
        """Initialize the case index.

        Args:
            case: The criminal case to index
        """
        self.case = case

        # Initialize embedding service (configures LlamaIndex Settings)
        self._embedding_service = get_embedding_service()

        # Build the index
        self.index = self._build_index()
        # Use retriever instead of query_engine to avoid redundant LLM calls
        # The CodeAgent will reason about raw retrieved docs directly
        self.retriever = self.index.as_retriever(similarity_top_k=3)

    def _build_index(self) -> VectorStoreIndex:
        """Build vector index from case documents."""
        documents = []

        # Index case summary
        documents.append(Document(
            text=self.case.summary,
            metadata={
                "type": "summary",
                "case_id": self.case.case_id
            }
        ))

        # Index charges
        if self.case.charges:
            charges_text = f"Charges: {', '.join(self.case.charges)}"
            documents.append(Document(
                text=charges_text,
                metadata={
                    "type": "charges",
                    "case_id": self.case.case_id
                }
            ))

        # Index each piece of evidence
        for evidence in self.case.evidence:
            doc_text = f"Evidence ({evidence.type}): {evidence.description}"
            if evidence.contestable and evidence.contest_reason:
                doc_text += f" [Contestable: {evidence.contest_reason}]"

            documents.append(Document(
                text=doc_text,
                metadata={
                    "type": "evidence",
                    "evidence_type": evidence.type,
                    "evidence_id": evidence.evidence_id,
                    "case_id": self.case.case_id,
                    "strength_prosecution": evidence.strength_prosecution,
                    "strength_defense": evidence.strength_defense,
                }
            ))

        # Index witness testimonies
        for witness in self.case.witnesses:
            doc_text = (
                f"Witness {witness.name} ({witness.role}, {witness.side}): "
                f"{witness.testimony_summary}"
            )
            if witness.credibility_issues:
                doc_text += f" [Credibility issues: {', '.join(witness.credibility_issues)}]"

            documents.append(Document(
                text=doc_text,
                metadata={
                    "type": "witness",
                    "witness_id": witness.witness_id,
                    "witness_name": witness.name,
                    "witness_role": witness.role,
                    "witness_side": witness.side,
                    "case_id": self.case.case_id,
                }
            ))

        # Index defendant background if available
        if self.case.defendant:
            defendant_text = f"Defendant: {self.case.defendant.name}"
            if self.case.defendant.age:
                defendant_text += f", age {self.case.defendant.age}"
            if self.case.defendant.occupation:
                defendant_text += f", {self.case.defendant.occupation}"
            if self.case.defendant.background:
                defendant_text += f". Background: {self.case.defendant.background}"
            if self.case.defendant.prior_record:
                defendant_text += f". Prior record: {', '.join(self.case.defendant.prior_record)}"

            documents.append(Document(
                text=defendant_text,
                metadata={
                    "type": "defendant",
                    "case_id": self.case.case_id,
                }
            ))

        # Index prosecution arguments
        for i, arg in enumerate(self.case.prosecution_arguments):
            documents.append(Document(
                text=f"Prosecution argument: {arg}",
                metadata={
                    "type": "prosecution_argument",
                    "argument_index": i,
                    "case_id": self.case.case_id,
                }
            ))

        # Index defense arguments
        for i, arg in enumerate(self.case.defense_arguments):
            documents.append(Document(
                text=f"Defense argument: {arg}",
                metadata={
                    "type": "defense_argument",
                    "argument_index": i,
                    "case_id": self.case.case_id,
                }
            ))

        return VectorStoreIndex.from_documents(documents)

    def query(self, question: str) -> str:
        """Query the case index for relevant information.

        Args:
            question: Natural language question about the case

        Returns:
            Relevant case documents (raw text, no LLM synthesis)
        """
        nodes = self.retriever.retrieve(question)
        if not nodes:
            return "No relevant information found."
        # Return raw document text for CodeAgent to reason about
        return "\n\n".join([node.text for node in nodes])

    def query_evidence(self, query: str) -> str:
        """Query specifically for evidence-related information.

        Args:
            query: What evidence to search for

        Returns:
            Relevant evidence information
        """
        full_query = f"Evidence related to: {query}"
        return self.query(full_query)

    def query_witnesses(self, query: str) -> str:
        """Query specifically for witness testimony.

        Args:
            query: What witness information to search for

        Returns:
            Relevant witness testimony
        """
        full_query = f"Witness testimony about: {query}"
        return self.query(full_query)

    def get_all_evidence_summaries(self) -> list[str]:
        """Get list of all evidence summaries for quick reference."""
        return [
            f"[{e.evidence_id}] {e.type}: {e.description}"
            for e in self.case.evidence
        ]

    def get_all_witness_summaries(self) -> list[str]:
        """Get list of all witness summaries for quick reference."""
        return [
            f"[{w.witness_id}] {w.name} ({w.role}): {w.testimony_summary[:100]}..."
            for w in self.case.witnesses
        ]


class CaseIndexFactory:
    """Factory for creating and caching case indices."""

    _cache: dict[str, CaseIndex] = {}

    @classmethod
    def get_index(cls, case: "CriminalCase") -> CaseIndex:
        """Get or create a case index.

        Caches indices by case_id to avoid rebuilding.

        Args:
            case: The criminal case to index

        Returns:
            CaseIndex for the case
        """
        if case.case_id not in cls._cache:
            cls._cache[case.case_id] = CaseIndex(case)
        return cls._cache[case.case_id]

    @classmethod
    def clear_cache(cls) -> None:
        """Clear the index cache."""
        cls._cache.clear()