Spaces:
Running
Running
File size: 7,764 Bytes
af2657b 373ff24 af2657b 373ff24 af2657b 373ff24 af2657b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
"""LlamaIndex case index for semantic search over case documents.
Provides RAG capabilities for smolagents tools to query evidence and case details.
Uses Nebius embeddings via the decoupled embedding service.
"""
from typing import TYPE_CHECKING
from llama_index.core import VectorStoreIndex, Document
from services.embeddings import get_embedding_service
if TYPE_CHECKING:
from case_db.models import CriminalCase
class CaseIndex:
"""Semantic search over case documents for agent tool use.
Creates a vector index from case summary, evidence, and witness testimonies.
Agents can query this index to find relevant information during deliberation.
Uses Nebius embeddings (4096-dim) via the centralized embedding service.
"""
def __init__(self, case: "CriminalCase"):
"""Initialize the case index.
Args:
case: The criminal case to index
"""
self.case = case
# Initialize embedding service (configures LlamaIndex Settings)
self._embedding_service = get_embedding_service()
# Build the index
self.index = self._build_index()
# Use retriever instead of query_engine to avoid redundant LLM calls
# The CodeAgent will reason about raw retrieved docs directly
self.retriever = self.index.as_retriever(similarity_top_k=3)
def _build_index(self) -> VectorStoreIndex:
"""Build vector index from case documents."""
documents = []
# Index case summary
documents.append(Document(
text=self.case.summary,
metadata={
"type": "summary",
"case_id": self.case.case_id
}
))
# Index charges
if self.case.charges:
charges_text = f"Charges: {', '.join(self.case.charges)}"
documents.append(Document(
text=charges_text,
metadata={
"type": "charges",
"case_id": self.case.case_id
}
))
# Index each piece of evidence
for evidence in self.case.evidence:
doc_text = f"Evidence ({evidence.type}): {evidence.description}"
if evidence.contestable and evidence.contest_reason:
doc_text += f" [Contestable: {evidence.contest_reason}]"
documents.append(Document(
text=doc_text,
metadata={
"type": "evidence",
"evidence_type": evidence.type,
"evidence_id": evidence.evidence_id,
"case_id": self.case.case_id,
"strength_prosecution": evidence.strength_prosecution,
"strength_defense": evidence.strength_defense,
}
))
# Index witness testimonies
for witness in self.case.witnesses:
doc_text = (
f"Witness {witness.name} ({witness.role}, {witness.side}): "
f"{witness.testimony_summary}"
)
if witness.credibility_issues:
doc_text += f" [Credibility issues: {', '.join(witness.credibility_issues)}]"
documents.append(Document(
text=doc_text,
metadata={
"type": "witness",
"witness_id": witness.witness_id,
"witness_name": witness.name,
"witness_role": witness.role,
"witness_side": witness.side,
"case_id": self.case.case_id,
}
))
# Index defendant background if available
if self.case.defendant:
defendant_text = f"Defendant: {self.case.defendant.name}"
if self.case.defendant.age:
defendant_text += f", age {self.case.defendant.age}"
if self.case.defendant.occupation:
defendant_text += f", {self.case.defendant.occupation}"
if self.case.defendant.background:
defendant_text += f". Background: {self.case.defendant.background}"
if self.case.defendant.prior_record:
defendant_text += f". Prior record: {', '.join(self.case.defendant.prior_record)}"
documents.append(Document(
text=defendant_text,
metadata={
"type": "defendant",
"case_id": self.case.case_id,
}
))
# Index prosecution arguments
for i, arg in enumerate(self.case.prosecution_arguments):
documents.append(Document(
text=f"Prosecution argument: {arg}",
metadata={
"type": "prosecution_argument",
"argument_index": i,
"case_id": self.case.case_id,
}
))
# Index defense arguments
for i, arg in enumerate(self.case.defense_arguments):
documents.append(Document(
text=f"Defense argument: {arg}",
metadata={
"type": "defense_argument",
"argument_index": i,
"case_id": self.case.case_id,
}
))
return VectorStoreIndex.from_documents(documents)
def query(self, question: str) -> str:
"""Query the case index for relevant information.
Args:
question: Natural language question about the case
Returns:
Relevant case documents (raw text, no LLM synthesis)
"""
nodes = self.retriever.retrieve(question)
if not nodes:
return "No relevant information found."
# Return raw document text for CodeAgent to reason about
return "\n\n".join([node.text for node in nodes])
def query_evidence(self, query: str) -> str:
"""Query specifically for evidence-related information.
Args:
query: What evidence to search for
Returns:
Relevant evidence information
"""
full_query = f"Evidence related to: {query}"
return self.query(full_query)
def query_witnesses(self, query: str) -> str:
"""Query specifically for witness testimony.
Args:
query: What witness information to search for
Returns:
Relevant witness testimony
"""
full_query = f"Witness testimony about: {query}"
return self.query(full_query)
def get_all_evidence_summaries(self) -> list[str]:
"""Get list of all evidence summaries for quick reference."""
return [
f"[{e.evidence_id}] {e.type}: {e.description}"
for e in self.case.evidence
]
def get_all_witness_summaries(self) -> list[str]:
"""Get list of all witness summaries for quick reference."""
return [
f"[{w.witness_id}] {w.name} ({w.role}): {w.testimony_summary[:100]}..."
for w in self.case.witnesses
]
class CaseIndexFactory:
"""Factory for creating and caching case indices."""
_cache: dict[str, CaseIndex] = {}
@classmethod
def get_index(cls, case: "CriminalCase") -> CaseIndex:
"""Get or create a case index.
Caches indices by case_id to avoid rebuilding.
Args:
case: The criminal case to index
Returns:
CaseIndex for the case
"""
if case.case_id not in cls._cache:
cls._cache[case.case_id] = CaseIndex(case)
return cls._cache[case.case_id]
@classmethod
def clear_cache(cls) -> None:
"""Clear the index cache."""
cls._cache.clear()
|