Spaces:

kn29
/

rag-chat

Sleeping

App Files Files Community

Kartik Narang commited on Sep 22

Commit

fc6a53f

1 Parent(s): da63606

first commit

Browse files

Files changed (3) hide show

app.py +678 -0
rag.py +593 -0
requirements.txt +23 -0

app.py ADDED Viewed

	@@ -0,0 +1,678 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import pymongo
+import os
+import numpy as np
+from datetime import datetime, timedelta
+import logging
+from typing import Dict, Any, Optional, List
+import base64
+import json
+import threading
+import time
+from collections import defaultdict
+import faiss
+# Import our simplified advanced RAG system
+import rag
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(title="Advanced RAG Chat Service", version="1.0.0")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Configure this properly in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global variables
+MONGO_CLIENT = None
+DB = None
+RAG_INITIALIZED = False
+# In-memory session stores
+# Format: {session_id: {"chunks": [...], "faiss_index": faiss.Index, "indexed": bool, "metadata": {...}}}
+SESSION_STORES = {}
+STORE_LOCK = threading.RLock()
+CLEANUP_INTERVAL = 3600  # 1 hour cleanup interval
+STORE_TTL = 24 * 3600  # 24 hours TTL for in-memory stores
+# Request/Response models
+class ChatRequest(BaseModel):
+    message: str
+class ChatResponse(BaseModel):
+    success: bool
+    answer: str
+    sources: List[Dict[str, Any]]
+    chat_history: List[Dict[str, Any]]
+    processing_time: float
+    session_id: str
+    query_analysis: Optional[Dict[str, Any]] = None
+    confidence: Optional[float] = None
+class InitRequest(BaseModel):
+    pass
+class InitResponse(BaseModel):
+    success: bool
+    session_id: str
+    message: str
+    chunk_count: int
+    title: str
+    document_info: Optional[Dict[str, Any]] = None
+class HealthResponse(BaseModel):
+    status: str
+    mongodb_connected: bool
+    rag_initialized: bool
+    active_sessions: int
+    memory_usage: Dict[str, Any]
+def create_session_logger(session_id: str):
+    """Create a logger with session context"""
+    return logging.LoggerAdapter(logger, {'session_id': session_id})
+def connect_mongodb():
+    """Initialize MongoDB connection"""
+    global MONGO_CLIENT, DB
+    try:
+        mongodb_url = os.getenv("MONGODB_URL", "mongodb://localhost:27017/")
+        MONGO_CLIENT = pymongo.MongoClient(mongodb_url)
+        DB = MONGO_CLIENT["legal_rag_db"]
+        # Test connection
+        DB.command("ping")
+        # Create indexes for chats collection
+        logger.info("Creating MongoDB indexes for chats...")
+        DB.chats.create_index("session_id")
+        DB.chats.create_index("created_at", expireAfterSeconds=24*60*60)  # 24 hour TTL
+        DB.chats.create_index([("session_id", 1), ("created_at", 1)])  # Compound index
+        logger.info("MongoDB connected successfully")
+        return True
+    except Exception as e:
+        logger.error(f"MongoDB connection failed: {e}")
+        return False
+def initialize_rag():
+    """Initialize RAG system"""
+    global RAG_INITIALIZED
+    try:
+        model_id = os.getenv("EMBEDDING_MODEL_ID", "sentence-transformers/all-MiniLM-L6-v2")
+        groq_api_key = os.getenv("GROQ_API_KEY")
+        logger.info(f"Initializing RAG system with model: {model_id}")
+        rag.initialize_models(model_id, groq_api_key)
+        RAG_INITIALIZED = True
+        logger.info("RAG system initialized successfully")
+        return True
+    except Exception as e:
+        logger.error(f"RAG initialization failed: {e}")
+        return False
+def decode_embedding_from_storage(embedding_list: List[float]) -> np.ndarray:
+    """Convert embedding from MongoDB list back to numpy array"""
+    try:
+        return np.array(embedding_list, dtype=np.float32)
+    except Exception as e:
+        logger.error(f"Failed to decode embedding: {e}")
+        return np.array([])
+def load_session_from_mongodb(session_id: str) -> Dict[str, Any]:
+    """Load session data from MongoDB with precomputed embeddings"""
+    session_logger = create_session_logger(session_id)
+    try:
+        # Get session metadata
+        session_doc = DB.sessions.find_one({"session_id": session_id})
+        if not session_doc:
+            raise ValueError(f"Session {session_id} not found")
+        if session_doc.get("status") != "completed":
+            raise ValueError(f"Session {session_id} not completed yet (status: {session_doc.get('status')})")
+        session_logger.info("Loading session chunks with precomputed embeddings from MongoDB")
+        # Get all chunks for this session with embeddings
+        chunks_cursor = DB.chunks.find({"session_id": session_id}).sort("created_at", 1)
+        chunks_list = list(chunks_cursor)
+        if not chunks_list:
+            raise ValueError(f"No chunks found for session {session_id}")
+        session_logger.info(f"Found {len(chunks_list)} chunks with embeddings")
+        # Convert MongoDB chunks to format needed by RAG system
+        processed_chunks = []
+        embeddings_matrix = []
+        for i, chunk_doc in enumerate(chunks_list):
+            # Decode the precomputed embedding
+            embedding_list = chunk_doc.get('embedding', [])
+            if not embedding_list:
+                session_logger.warning(f"Chunk {chunk_doc.get('chunk_id', i)} missing embedding")
+                continue
+            embedding = decode_embedding_from_storage(embedding_list)
+            if embedding.size == 0:
+                session_logger.warning(f"Failed to decode embedding for chunk {chunk_doc.get('chunk_id', i)}")
+                continue
+            # Format chunk for RAG system
+            processed_chunk = {
+                'id': chunk_doc.get('chunk_id', f'chunk_{i}'),
+                'text': chunk_doc['text'],
+                'title': chunk_doc.get('title', session_doc.get('title', 'Document')),
+                'section_type': chunk_doc.get('section_type', 'content'),
+                'importance_score': chunk_doc.get('importance_score', 1.0),
+                'entities': chunk_doc.get('entities', []),
+                'embedding': embedding  # Precomputed embedding as numpy array
+            }
+            processed_chunks.append(processed_chunk)
+            embeddings_matrix.append(embedding)
+        if not processed_chunks:
+            raise ValueError(f"No valid chunks with embeddings found for session {session_id}")
+        # Stack embeddings for FAISS index
+        embeddings_matrix = np.vstack(embeddings_matrix).astype('float32')
+        session_store = {
+            "chunks": processed_chunks,
+            "embeddings_matrix": embeddings_matrix,
+            "faiss_index": None,  # Will be built in indexing step
+            "indexed": False,
+            "metadata": {
+                "session_id": session_id,
+                "title": session_doc.get("title", "Document"),
+                "chunk_count": len(processed_chunks),
+                "loaded_at": datetime.utcnow(),
+                "document_info": {
+                    "filename": session_doc.get("filename", "Unknown"),
+                    "text_length": session_doc.get("text_length", 0),
+                    "word_count": session_doc.get("word_count", 0),
+                    "file_size": session_doc.get("file_size", 0),
+                    "processing_completed_at": session_doc.get("processing_completed_at")
+                }
+            }
+        }
+        session_logger.info(f"Loaded {len(processed_chunks)} chunks with precomputed embeddings")
+        return session_store
+    except Exception as e:
+        session_logger.error(f"Failed to load session from MongoDB: {e}")
+        raise
+def build_faiss_index_from_embeddings(session_id: str) -> Dict[str, Any]:
+    """Build FAISS index from precomputed embeddings"""
+    session_logger = create_session_logger(session_id)
+    with STORE_LOCK:
+        if session_id not in SESSION_STORES:
+            raise ValueError(f"Session {session_id} not loaded")
+        store = SESSION_STORES[session_id]
+        if store["indexed"]:
+            session_logger.info("Session already indexed")
+            return store["metadata"]
+        chunks = store["chunks"]
+        embeddings_matrix = store["embeddings_matrix"]
+    try:
+        session_logger.info(f"Building FAISS index from {len(chunks)} precomputed embeddings...")
+        # Create FAISS index (Inner Product for normalized embeddings)
+        dimension = embeddings_matrix.shape[1]
+        faiss_index = faiss.IndexFlatIP(dimension)
+        # Add embeddings to FAISS index
+        faiss_index.add(embeddings_matrix)
+        # Set global RAG data for this session
+        rag.CHUNKS_DATA = chunks
+        rag.DENSE_INDEX = faiss_index
+        # Build other indices (BM25, concept graph, etc.) using precomputed chunks
+        session_logger.info("Building additional retrieval indices...")
+        # BM25 index for sparse retrieval
+        tokenized_corpus = [chunk['text'].lower().split() for chunk in chunks]
+        rag.BM25_INDEX = rag.BM25Okapi(tokenized_corpus)
+        # ColBERT-style token index
+        rag.TOKEN_TO_CHUNKS = defaultdict(set)
+        for i, chunk in enumerate(chunks):
+            tokens = chunk['text'].lower().split()
+            for token in tokens:
+                rag.TOKEN_TO_CHUNKS[token].add(i)
+        # Legal concept graph
+        import networkx as nx
+        rag.CONCEPT_GRAPH = nx.Graph()
+        for i, chunk in enumerate(chunks):
+            rag.CONCEPT_GRAPH.add_node(i, text=chunk['text'][:200], importance=chunk['importance_score'])
+            # Add edges between chunks with shared entities
+            for j, other_chunk in enumerate(chunks[i+1:], i+1):
+                shared_entities = set(e['text'] for e in chunk['entities']) & \
+                                set(e['text'] for e in other_chunk['entities'])
+                if shared_entities:
+                    rag.CONCEPT_GRAPH.add_edge(i, j, weight=len(shared_entities))
+        # Mark as indexed and store FAISS index
+        with STORE_LOCK:
+            SESSION_STORES[session_id]["faiss_index"] = faiss_index
+            SESSION_STORES[session_id]["indexed"] = True
+        session_logger.info(f"FAISS index built successfully from precomputed embeddings: {len(chunks)} chunks indexed")
+        return SESSION_STORES[session_id]["metadata"]
+    except Exception as e:
+        session_logger.error(f"Failed to build FAISS index from embeddings: {e}")
+        raise
+def save_chat_message(session_id: str, role: str, message: str):
+    """Save chat message to MongoDB"""
+    try:
+        chat_doc = {
+            "session_id": session_id,
+            "role": role,
+            "message": message,
+            "created_at": datetime.utcnow()
+        }
+        DB.chats.insert_one(chat_doc)
+    except Exception as e:
+        logger.error(f"Failed to save chat message for session {session_id}: {e}")
+def get_chat_history(session_id: str, limit: int = 50) -> List[Dict[str, Any]]:
+    """Get chat history for a session"""
+    try:
+        chats_cursor = DB.chats.find(
+            {"session_id": session_id}
+        ).sort("created_at", 1).limit(limit)
+        chat_history = []
+        for chat_doc in chats_cursor:
+            chat_history.append({
+                "role": chat_doc["role"],
+                "message": chat_doc["message"],
+                "timestamp": chat_doc["created_at"].isoformat()
+            })
+        return chat_history
+    except Exception as e:
+        logger.error(f"Failed to get chat history for session {session_id}: {e}")
+        return []
+def cleanup_old_stores():
+    """Background cleanup of old in-memory stores"""
+    while True:
+        try:
+            current_time = datetime.utcnow()
+            expired_sessions = []
+            with STORE_LOCK:
+                for session_id, store in SESSION_STORES.items():
+                    loaded_at = store["metadata"]["loaded_at"]
+                    if (current_time - loaded_at).total_seconds() > STORE_TTL:
+                        expired_sessions.append(session_id)
+                for session_id in expired_sessions:
+                    # Clean up FAISS index and other resources
+                    if SESSION_STORES[session_id].get("faiss_index"):
+                        del SESSION_STORES[session_id]["faiss_index"]
+                    del SESSION_STORES[session_id]
+                    logger.info(f"Cleaned up expired store for session: {session_id}")
+            if expired_sessions:
+                logger.info(f"Cleaned up {len(expired_sessions)} expired session stores")
+        except Exception as e:
+            logger.error(f"Cleanup error: {e}")
+        time.sleep(CLEANUP_INTERVAL)
+@app.on_event("startup")
+async def startup_event():
+    """Initialize connections on startup"""
+    logger.info("Starting up Advanced RAG Chat Service...")
+    # Connect to MongoDB
+    if not connect_mongodb():
+        logger.error("Failed to connect to MongoDB")
+        raise Exception("MongoDB connection failed")
+    # Initialize RAG system
+    if not initialize_rag():
+        logger.error("Failed to initialize RAG system")
+        raise Exception("RAG initialization failed")
+    # Start background cleanup thread
+    cleanup_thread = threading.Thread(target=cleanup_old_stores, daemon=True)
+    cleanup_thread.start()
+    logger.info("Background cleanup thread started")
+    logger.info("Startup completed successfully")
+@app.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint"""
+    try:
+        # Check MongoDB connection
+        mongodb_connected = False
+        active_sessions = 0
+        if DB is not None:
+            try:
+                DB.command("ping")
+                mongodb_connected = True
+                # Count sessions with recent chats
+                one_hour_ago = datetime.utcnow() - timedelta(hours=1)
+                active_sessions = len(DB.chats.distinct("session_id", {"created_at": {"$gte": one_hour_ago}}))
+            except:
+                pass
+        # Memory usage info
+        with STORE_LOCK:
+            memory_sessions = len(SESSION_STORES)
+            indexed_sessions = sum(1 for store in SESSION_STORES.values() if store["indexed"])
+        return HealthResponse(
+            status="healthy" if mongodb_connected and RAG_INITIALIZED else "unhealthy",
+            mongodb_connected=mongodb_connected,
+            rag_initialized=RAG_INITIALIZED,
+            active_sessions=active_sessions,
+            memory_usage={
+                "loaded_sessions": memory_sessions,
+                "indexed_sessions": indexed_sessions,
+                "store_ttl_hours": STORE_TTL / 3600
+            }
+        )
+    except Exception as e:
+        logger.error(f"Health check failed: {e}")
+        return HealthResponse(
+            status="unhealthy",
+            mongodb_connected=False,
+            rag_initialized=False,
+            active_sessions=0,
+            memory_usage={}
+        )
+@app.post("/init/{session_id}", response_model=InitResponse)
+async def initialize_session(session_id: str, request: InitRequest):
+    """Initialize RAG context for a session using precomputed embeddings"""
+    session_logger = create_session_logger(session_id)
+    if DB is None:
+        raise HTTPException(status_code=503, detail="Database not connected")
+    if not RAG_INITIALIZED:
+        raise HTTPException(status_code=503, detail="RAG system not initialized")
+    # Check if already loaded and indexed
+    with STORE_LOCK:
+        if session_id in SESSION_STORES and SESSION_STORES[session_id]["indexed"]:
+            store = SESSION_STORES[session_id]
+            metadata = store["metadata"]
+            session_logger.info("Session already initialized and indexed with precomputed embeddings")
+            return InitResponse(
+                success=True,
+                session_id=session_id,
+                message="Session already initialized with precomputed embeddings",
+                chunk_count=metadata["chunk_count"],
+                title=metadata["title"],
+                document_info=metadata["document_info"]
+            )
+    try:
+        session_logger.info("Initializing session with precomputed embeddings from MongoDB")
+        # Load session data with precomputed embeddings from MongoDB
+        session_store = load_session_from_mongodb(session_id)
+        # Store in memory
+        with STORE_LOCK:
+            SESSION_STORES[session_id] = session_store
+        # Build FAISS index from precomputed embeddings (no re-embedding!)
+        metadata = build_faiss_index_from_embeddings(session_id)
+        session_logger.info(f"Session initialized with precomputed embeddings: {metadata['chunk_count']} chunks indexed")
+        return InitResponse(
+            success=True,
+            session_id=session_id,
+            message=f"Session initialized with precomputed embeddings: {metadata['chunk_count']} chunks ready for advanced RAG",
+            chunk_count=metadata["chunk_count"],
+            title=metadata["title"],
+            document_info=metadata["document_info"]
+        )
+    except ValueError as e:
+        session_logger.error(f"Session initialization failed: {e}")
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        session_logger.error(f"Session initialization error: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to initialize session: {str(e)}")
+@app.post("/chat/{session_id}", response_model=ChatResponse)
+async def chat_with_document(session_id: str, request: ChatRequest):
+    """Handle chat query with advanced RAG using precomputed embeddings"""
+    session_logger = create_session_logger(session_id)
+    start_time = time.time()
+    if DB is None:
+        raise HTTPException(status_code=503, detail="Database not connected")
+    if not RAG_INITIALIZED:
+        raise HTTPException(status_code=503, detail="RAG system not initialized")
+    # Validate request
+    if not request.message.strip():
+        raise HTTPException(status_code=400, detail="Empty message provided")
+    try:
+        session_logger.info(f"Processing advanced RAG query: {request.message[:100]}...")
+        # Check if session is initialized and indexed
+        with STORE_LOCK:
+            if session_id not in SESSION_STORES:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Session {session_id} not initialized. Call /init/{session_id} first."
+                )
+            if not SESSION_STORES[session_id]["indexed"]:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Session {session_id} not indexed. Call /init/{session_id} first."
+                )
+        # Query using advanced RAG system (now using precomputed embeddings)
+        result = rag.query_documents(request.message, top_k=5)
+        if 'error' in result:
+            raise HTTPException(status_code=500, detail=result['error'])
+        answer = result.get('answer', 'Unable to generate answer.')
+        sources = result.get('sources', [])
+        query_analysis = result.get('query_analysis', {})
+        confidence = result.get('confidence', 0.0)
+        # Save chat messages to MongoDB for persistence
+        save_chat_message(session_id, "user", request.message)
+        save_chat_message(session_id, "assistant", answer)
+        # Get updated chat history
+        chat_history = get_chat_history(session_id)
+        processing_time = time.time() - start_time
+        session_logger.info(f"Advanced RAG query processed in {processing_time:.2f}s with confidence {confidence:.1f}% using precomputed embeddings")
+        # Prepare sources for response
+        formatted_sources = [
+            {
+                "chunk_id": source.get("chunk_id", ""),
+                "title": source.get("title", ""),
+                "section": source.get("section", ""),
+                "relevance_score": source.get("relevance_score", 0.0),
+                "text_preview": source.get("excerpt", "")[:300] + "..." if len(source.get("excerpt", "")) > 300 else source.get("excerpt", ""),
+                "entities": source.get("entities", [])
+            }
+            for source in sources
+        ]
+        return ChatResponse(
+            success=True,
+            answer=answer,
+            sources=formatted_sources,
+            chat_history=chat_history,
+            processing_time=processing_time,
+            session_id=session_id,
+            query_analysis=query_analysis,
+            confidence=confidence
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        session_logger.error(f"Advanced RAG chat processing failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Chat processing failed: {str(e)}")
+@app.get("/history/{session_id}")
+async def get_session_history(session_id: str):
+    """Get chat history for a session"""
+    session_logger = create_session_logger(session_id)
+    if DB is None:
+        raise HTTPException(status_code=503, detail="Database not connected")
+    try:
+        chat_history = get_chat_history(session_id, limit=100)
+        session_logger.info(f"Retrieved {len(chat_history)} chat messages")
+        return {
+            "success": True,
+            "session_id": session_id,
+            "chat_history": chat_history,
+            "total_messages": len(chat_history)
+        }
+    except Exception as e:
+        session_logger.error(f"Failed to get chat history: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to retrieve chat history: {str(e)}")
+@app.delete("/session/{session_id}")
+async def cleanup_session(session_id: str):
+    """Clean up session from memory"""
+    session_logger = create_session_logger(session_id)
+    try:
+        # Remove from memory
+        with STORE_LOCK:
+            if session_id in SESSION_STORES:
+                # Clean up FAISS index
+                if SESSION_STORES[session_id].get("faiss_index"):
+                    del SESSION_STORES[session_id]["faiss_index"]
+                del SESSION_STORES[session_id]
+                session_logger.info("Session removed from memory")
+        return {
+            "success": True,
+            "message": f"Session {session_id} cleaned up successfully"
+        }
+    except Exception as e:
+        session_logger.error(f"Session cleanup failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to cleanup session: {str(e)}")
+@app.get("/sessions/active")
+async def get_active_sessions():
+    """Get information about active sessions in memory"""
+    try:
+        with STORE_LOCK:
+            active_sessions = []
+            for session_id, store in SESSION_STORES.items():
+                metadata = store["metadata"]
+                active_sessions.append({
+                    "session_id": session_id,
+                    "title": metadata["title"],
+                    "chunk_count": metadata["chunk_count"],
+                    "indexed": store["indexed"],
+                    "loaded_at": metadata["loaded_at"].isoformat(),
+                    "age_minutes": (datetime.utcnow() - metadata["loaded_at"]).total_seconds() / 60,
+                    "using_precomputed_embeddings": True
+                })
+        return {
+            "success": True,
+            "active_sessions": active_sessions,
+            "total_sessions": len(active_sessions)
+        }
+    except Exception as e:
+        logger.error(f"Failed to get active sessions: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get active sessions: {str(e)}")
+@app.get("/rag/status")
+async def get_rag_status():
+    """Get advanced RAG system status"""
+    try:
+        return {
+            "success": True,
+            "rag_initialized": RAG_INITIALIZED,
+            "optimization": {
+                "using_precomputed_embeddings": True,
+                "no_reembedding": True,
+                "persistent_faiss_index": True,
+                "mongodb_persistence": True
+            },
+            "features": {
+                "multi_stage_retrieval": True,
+                "dense_retrieval": "FAISS + Precomputed Legal-BERT Embeddings",
+                "sparse_retrieval": "BM25",
+                "entity_based_retrieval": "Legal NER + SpaCy",
+                "graph_based_retrieval": "Legal Concept Graph",
+                "query_analysis": "Legal Intent Classification",
+                "answer_generation": "Groq LLM with IRAC Method"
+            },
+            "active_techniques": [
+                "Dense Embedding Search (FAISS with Precomputed Embeddings)",
+                "BM25 Sparse Retrieval",
+                "ColBERT Token Matching",
+                "Legal Entity Matching",
+                "Concept Graph Expansion",
+                "HyDE Query Expansion",
+                "Multi-Query Retrieval",
+                "Legal Section Classification",
+                "Importance-based Ranking"
+            ]
+        }
+    except Exception as e:
+        logger.error(f"Failed to get RAG status: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get RAG status: {str(e)}")
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.getenv("PORT", 7861))
+    uvicorn.run(app, host="0.0.0.0", port=port)

rag.py ADDED Viewed

	@@ -0,0 +1,593 @@

+import torch
+import numpy as np
+from transformers import AutoTokenizer, AutoModel
+from typing import List, Dict, Any, Tuple, Optional
+import faiss
+import hashlib
+from tqdm import tqdm
+from groq import Groq
+import re
+import nltk
+from sklearn.metrics.pairwise import cosine_similarity
+import networkx as nx
+from collections import defaultdict
+import spacy
+from rank_bm25 import BM25Okapi
+# Global variables for models
+MODEL = None
+TOKENIZER = None
+GROQ_CLIENT = None
+NLP_MODEL = None
+DEVICE = None
+# Global indices
+DENSE_INDEX = None
+BM25_INDEX = None
+CONCEPT_GRAPH = None
+TOKEN_TO_CHUNKS = None
+CHUNKS_DATA = []
+# Legal knowledge base
+LEGAL_CONCEPTS = {
+    'liability': ['negligence', 'strict liability', 'vicarious liability', 'product liability'],
+    'contract': ['breach', 'consideration', 'offer', 'acceptance', 'damages', 'specific performance'],
+    'criminal': ['mens rea', 'actus reus', 'intent', 'malice', 'premeditation'],
+    'procedure': ['jurisdiction', 'standing', 'statute of limitations', 'res judicata'],
+    'evidence': ['hearsay', 'relevance', 'privilege', 'burden of proof', 'admissibility'],
+    'constitutional': ['due process', 'equal protection', 'free speech', 'search and seizure']
+}
+QUERY_PATTERNS = {
+    'precedent': ['case', 'precedent', 'ruling', 'held', 'decision'],
+    'statute_interpretation': ['statute', 'section', 'interpretation', 'meaning', 'definition'],
+    'factual': ['what happened', 'facts', 'circumstances', 'events'],
+    'procedure': ['how to', 'procedure', 'process', 'filing', 'requirements']
+}
+def initialize_models(model_id: str, groq_api_key: str = None):
+    """Initialize all models and components"""
+    global MODEL, TOKENIZER, GROQ_CLIENT, NLP_MODEL, DEVICE
+    try:
+        nltk.download('punkt', quiet=True)
+        nltk.download('stopwords', quiet=True)
+    except:
+        pass
+    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"Using device: {DEVICE}")
+    print(f"Loading model: {model_id}")
+    TOKENIZER = AutoTokenizer.from_pretrained(model_id)
+    MODEL = AutoModel.from_pretrained(model_id).to(DEVICE)
+    MODEL.eval()
+    if groq_api_key:
+        GROQ_CLIENT = Groq(api_key=groq_api_key)
+    try:
+        NLP_MODEL = spacy.load("en_core_web_sm")
+    except:
+        print("SpaCy model not found, using basic NER")
+        NLP_MODEL = None
+def create_embedding(text: str) -> np.ndarray:
+    """Create dense embedding for text"""
+    inputs = TOKENIZER(text, padding=True, truncation=True,
+                      max_length=512, return_tensors='pt').to(DEVICE)
+    with torch.no_grad():
+        outputs = MODEL(**inputs)
+        attention_mask = inputs['attention_mask']
+        token_embeddings = outputs.last_hidden_state
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        embeddings = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+        # Normalize embeddings
+        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+    return embeddings.cpu().numpy()[0]
+def extract_legal_entities(text: str) -> List[Dict[str, Any]]:
+    """Extract legal entities from text"""
+    entities = []
+    if NLP_MODEL:
+        doc = NLP_MODEL(text[:5000])  # Limit for performance
+        for ent in doc.ents:
+            if ent.label_ in ['PERSON', 'ORG', 'LAW', 'GPE']:
+                entities.append({
+                    'text': ent.text,
+                    'type': ent.label_,
+                    'importance': 1.0
+                })
+    # Legal citations
+    citation_pattern = r'\b\d+\s+[A-Z][a-z]+\.?\s+\d+\b'
+    for match in re.finditer(citation_pattern, text):
+        entities.append({
+            'text': match.group(),
+            'type': 'case_citation',
+            'importance': 2.0
+        })
+    # Statute references
+    statute_pattern = r'§\s*\d+[\.\d]*|\bSection\s+\d+'
+    for match in re.finditer(statute_pattern, text):
+        entities.append({
+            'text': match.group(),
+            'type': 'statute',
+            'importance': 1.5
+        })
+    return entities
+def analyze_query(query: str) -> Dict[str, Any]:
+    """Analyze query to understand intent"""
+    query_lower = query.lower()
+    # Classify query type
+    query_type = 'general'
+    for qtype, patterns in QUERY_PATTERNS.items():
+        if any(pattern in query_lower for pattern in patterns):
+            query_type = qtype
+            break
+    # Extract entities
+    entities = extract_legal_entities(query)
+    # Extract key concepts
+    key_concepts = []
+    for concept_category, concepts in LEGAL_CONCEPTS.items():
+        for concept in concepts:
+            if concept in query_lower:
+                key_concepts.append(concept)
+    # Generate expanded queries
+    expanded_queries = [query]
+    # Concept expansion
+    if key_concepts:
+        expanded_queries.append(f"{query} {' '.join(key_concepts[:3])}")
+    # Type-based expansion
+    if query_type == 'precedent':
+        expanded_queries.append(f"legal precedent case law {query}")
+    elif query_type == 'statute_interpretation':
+        expanded_queries.append(f"statutory interpretation meaning {query}")
+    # HyDE - Hypothetical document generation
+    if GROQ_CLIENT:
+        hyde_doc = generate_hypothetical_document(query)
+        if hyde_doc:
+            expanded_queries.append(hyde_doc)
+    return {
+        'original_query': query,
+        'query_type': query_type,
+        'entities': entities,
+        'key_concepts': key_concepts,
+        'expanded_queries': expanded_queries[:4]  # Limit to 4 queries
+    }
+def generate_hypothetical_document(query: str) -> Optional[str]:
+    """Generate hypothetical answer document (HyDE technique)"""
+    if not GROQ_CLIENT:
+        return None
+    try:
+        prompt = f"""Generate a brief hypothetical legal document excerpt that would answer this question: {query}
+        Write it as if it's from an actual legal case or statute. Be specific and use legal language.
+        Keep it under 100 words."""
+        response = GROQ_CLIENT.chat.completions.create(
+            messages=[
+                {"role": "system", "content": "You are a legal expert generating hypothetical legal text."},
+                {"role": "user", "content": prompt}
+            ],
+            model="llama-3.1-8b-instant",
+            temperature=0.3,
+            max_tokens=150
+        )
+        return response.choices[0].message.content
+    except:
+        return None
+def chunk_text_hierarchical(text: str, title: str = "") -> List[Dict[str, Any]]:
+    """Create hierarchical chunks with legal structure awareness"""
+    chunks = []
+    # Clean text
+    text = re.sub(r'\s+', ' ', text)
+    # Identify legal sections
+    section_patterns = [
+        (r'(?i)\bFACTS?\b[:\s]', 'facts'),
+        (r'(?i)\bHOLDING\b[:\s]', 'holding'),
+        (r'(?i)\bREASONING\b[:\s]', 'reasoning'),
+        (r'(?i)\bDISSENT\b[:\s]', 'dissent'),
+        (r'(?i)\bCONCLUSION\b[:\s]', 'conclusion')
+    ]
+    sections = []
+    for pattern, section_type in section_patterns:
+        matches = list(re.finditer(pattern, text))
+        for match in matches:
+            sections.append((match.start(), section_type))
+    sections.sort(key=lambda x: x[0])
+    # Split into sentences
+    import nltk
+    try:
+        sentences = nltk.sent_tokenize(text)
+    except:
+        sentences = text.split('. ')
+    # Create chunks
+    current_section = 'introduction'
+    section_sentences = []
+    chunk_size = 500  # words
+    for sent in sentences:
+        # Check section type
+        sent_pos = text.find(sent)
+        for pos, stype in sections:
+            if sent_pos >= pos:
+                current_section = stype
+        section_sentences.append(sent)
+        # Create chunk when we have enough content
+        chunk_text = ' '.join(section_sentences)
+        if len(chunk_text.split()) >= chunk_size or len(section_sentences) >= 10:
+            chunk_id = hashlib.md5(f"{title}_{len(chunks)}_{chunk_text[:50]}".encode()).hexdigest()[:12]
+            # Calculate importance
+            importance = 1.0
+            section_weights = {
+                'holding': 2.0, 'conclusion': 1.8, 'reasoning': 1.5,
+                'facts': 1.2, 'dissent': 0.8
+            }
+            importance *= section_weights.get(current_section, 1.0)
+            # Entity importance
+            entities = extract_legal_entities(chunk_text)
+            if entities:
+                entity_score = sum(e['importance'] for e in entities) / len(entities)
+                importance *= (1 + entity_score * 0.5)
+            chunks.append({
+                'id': chunk_id,
+                'text': chunk_text,
+                'title': title,
+                'section_type': current_section,
+                'importance_score': importance,
+                'entities': entities,
+                'embedding': None  # Will be filled during indexing
+            })
+            section_sentences = []
+    # Add remaining sentences
+    if section_sentences:
+        chunk_text = ' '.join(section_sentences)
+        chunk_id = hashlib.md5(f"{title}_{len(chunks)}_{chunk_text[:50]}".encode()).hexdigest()[:12]
+        chunks.append({
+            'id': chunk_id,
+            'text': chunk_text,
+            'title': title,
+            'section_type': current_section,
+            'importance_score': 1.0,
+            'entities': extract_legal_entities(chunk_text),
+            'embedding': None
+        })
+    return chunks
+def build_all_indices(chunks: List[Dict[str, Any]]):
+    """Build all retrieval indices"""
+    global DENSE_INDEX, BM25_INDEX, CONCEPT_GRAPH, TOKEN_TO_CHUNKS, CHUNKS_DATA
+    CHUNKS_DATA = chunks
+    print(f"Building indices for {len(chunks)} chunks...")
+    # 1. Dense embeddings + FAISS index
+    print("Building FAISS index...")
+    embeddings = []
+    for chunk in tqdm(chunks, desc="Creating embeddings"):
+        embedding = create_embedding(chunk['text'])
+        chunk['embedding'] = embedding
+        embeddings.append(embedding)
+    embeddings_matrix = np.vstack(embeddings)
+    DENSE_INDEX = faiss.IndexFlatIP(embeddings_matrix.shape[1])  # Inner product for normalized vectors
+    DENSE_INDEX.add(embeddings_matrix.astype('float32'))
+    # 2. BM25 index for sparse retrieval
+    print("Building BM25 index...")
+    tokenized_corpus = [chunk['text'].lower().split() for chunk in chunks]
+    BM25_INDEX = BM25Okapi(tokenized_corpus)
+    # 3. ColBERT-style token index
+    print("Building ColBERT token index...")
+    TOKEN_TO_CHUNKS = defaultdict(set)
+    for i, chunk in enumerate(chunks):
+        # Simple tokenization for token-level matching
+        tokens = chunk['text'].lower().split()
+        for token in tokens:
+            TOKEN_TO_CHUNKS[token].add(i)
+    # 4. Legal concept graph
+    print("Building legal concept graph...")
+    CONCEPT_GRAPH = nx.Graph()
+    for i, chunk in enumerate(chunks):
+        CONCEPT_GRAPH.add_node(i, text=chunk['text'][:200], importance=chunk['importance_score'])
+        # Add edges between chunks with shared entities
+        for j, other_chunk in enumerate(chunks[i+1:], i+1):
+            shared_entities = set(e['text'] for e in chunk['entities']) & \
+                            set(e['text'] for e in other_chunk['entities'])
+            if shared_entities:
+                CONCEPT_GRAPH.add_edge(i, j, weight=len(shared_entities))
+    print("All indices built successfully!")
+def multi_stage_retrieval(query_analysis: Dict[str, Any], top_k: int = 10) -> List[Tuple[Dict[str, Any], float]]:
+    """Perform multi-stage retrieval combining all techniques"""
+    candidates = {}
+    print("Performing multi-stage retrieval...")
+    # Stage 1: Dense retrieval with expanded queries
+    print("Stage 1: Dense retrieval...")
+    for query in query_analysis['expanded_queries'][:3]:
+        query_emb = create_embedding(query)
+        scores, indices = DENSE_INDEX.search(
+            query_emb.reshape(1, -1).astype('float32'),
+            top_k * 2
+        )
+        for idx, score in zip(indices[0], scores[0]):
+            if idx < len(CHUNKS_DATA):
+                chunk_id = CHUNKS_DATA[idx]['id']
+                if chunk_id not in candidates:
+                    candidates[chunk_id] = {'chunk': CHUNKS_DATA[idx], 'scores': {}}
+                candidates[chunk_id]['scores']['dense'] = float(score)
+    # Stage 2: Sparse retrieval (BM25)
+    print("Stage 2: Sparse retrieval...")
+    query_tokens = query_analysis['original_query'].lower().split()
+    bm25_scores = BM25_INDEX.get_scores(query_tokens)
+    top_bm25_indices = np.argsort(bm25_scores)[-top_k*2:][::-1]
+    for idx in top_bm25_indices:
+        if idx < len(CHUNKS_DATA):
+            chunk_id = CHUNKS_DATA[idx]['id']
+            if chunk_id not in candidates:
+                candidates[chunk_id] = {'chunk': CHUNKS_DATA[idx], 'scores': {}}
+            candidates[chunk_id]['scores']['bm25'] = float(bm25_scores[idx])
+    # Stage 3: Entity-based retrieval
+    print("Stage 3: Entity-based retrieval...")
+    for entity in query_analysis['entities']:
+        for chunk in CHUNKS_DATA:
+            chunk_entity_texts = [e['text'].lower() for e in chunk['entities']]
+            if entity['text'].lower() in chunk_entity_texts:
+                chunk_id = chunk['id']
+                if chunk_id not in candidates:
+                    candidates[chunk_id] = {'chunk': chunk, 'scores': {}}
+                candidates[chunk_id]['scores']['entity'] = \
+                    candidates[chunk_id]['scores'].get('entity', 0) + entity['importance']
+    # Stage 4: Graph-based retrieval
+    print("Stage 4: Graph-based retrieval...")
+    if candidates and CONCEPT_GRAPH:
+        seed_chunks = []
+        for chunk_id, data in list(candidates.items())[:5]:
+            for i, chunk in enumerate(CHUNKS_DATA):
+                if chunk['id'] == chunk_id:
+                    seed_chunks.append(i)
+                    break
+        for seed_idx in seed_chunks:
+            if seed_idx in CONCEPT_GRAPH:
+                neighbors = list(CONCEPT_GRAPH.neighbors(seed_idx))[:3]
+                for neighbor_idx in neighbors:
+                    if neighbor_idx < len(CHUNKS_DATA):
+                        chunk = CHUNKS_DATA[neighbor_idx]
+                        chunk_id = chunk['id']
+                        if chunk_id not in candidates:
+                            candidates[chunk_id] = {'chunk': chunk, 'scores': {}}
+                            candidates[chunk_id]['scores']['graph'] = 0.5
+    # Combine scores
+    print("Combining scores...")
+    weights = {'dense': 0.35, 'bm25': 0.25, 'entity': 0.25, 'graph': 0.15}
+    final_scores = []
+    for chunk_id, data in candidates.items():
+        chunk = data['chunk']
+        scores = data['scores']
+        final_score = 0
+        for method, weight in weights.items():
+            if method in scores:
+                # Normalize scores
+                if method == 'dense':
+                    normalized = (scores[method] + 1) / 2  # [-1, 1] to [0, 1]
+                elif method == 'bm25':
+                    normalized = min(scores[method] / 10, 1)
+                elif method == 'entity':
+                    normalized = min(scores[method] / 3, 1)
+                else:
+                    normalized = scores[method]
+                final_score += weight * normalized
+        # Boost by importance and section relevance
+        final_score *= chunk['importance_score']
+        if query_analysis['query_type'] == 'precedent' and chunk['section_type'] == 'holding':
+            final_score *= 1.5
+        elif query_analysis['query_type'] == 'factual' and chunk['section_type'] == 'facts':
+            final_score *= 1.5
+        final_scores.append((chunk, final_score))
+    # Sort and return top-k
+    final_scores.sort(key=lambda x: x[1], reverse=True)
+    return final_scores[:top_k]
+def generate_answer_with_reasoning(query: str, retrieved_chunks: List[Tuple[Dict[str, Any], float]]) -> Dict[str, Any]:
+    """Generate answer with legal reasoning"""
+    if not GROQ_CLIENT:
+        return {'error': 'Groq client not initialized'}
+    # Prepare context
+    context_parts = []
+    for i, (chunk, score) in enumerate(retrieved_chunks, 1):
+        entities = ', '.join([e['text'] for e in chunk['entities'][:3]])
+        context_parts.append(f"""
+Document {i} [{chunk['title']}] - Relevance: {score:.2f}
+Section: {chunk['section_type']}
+Key Entities: {entities}
+Content: {chunk['text'][:800]}
+""")
+    context = "\n---\n".join(context_parts)
+    system_prompt = """You are an expert legal analyst. Provide thorough legal analysis using the IRAC method:
+1. ISSUE: Identify the legal issue(s)
+2. RULE: State the applicable legal rules/precedents
+3. APPLICATION: Apply the rules to the facts
+4. CONCLUSION: Provide a clear conclusion
+CRITICAL: Base ALL responses on the provided document excerpts only. Quote directly when making claims.
+If information is not in the excerpts, state "This information is not provided in the available documents."
+"""
+    user_prompt = f"""Query: {query}
+Retrieved Legal Documents:
+{context}
+Please provide a comprehensive legal analysis using IRAC method. Cite the documents when making claims."""
+    try:
+        response = GROQ_CLIENT.chat.completions.create(
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
+            model="llama-3.1-8b-instant",
+            temperature=0.1,
+            max_tokens=1000
+        )
+        answer = response.choices[0].message.content
+        # Calculate confidence
+        avg_score = sum(score for _, score in retrieved_chunks[:3]) / min(3, len(retrieved_chunks))
+        confidence = min(avg_score * 100, 100)
+        return {
+            'answer': answer,
+            'confidence': confidence,
+            'sources': [
+                {
+                    'chunk_id': chunk['id'],
+                    'title': chunk['title'],
+                    'section': chunk['section_type'],
+                    'relevance_score': float(score),
+                    'excerpt': chunk['text'][:200] + '...',
+                    'entities': [e['text'] for e in chunk['entities'][:5]]
+                }
+                for chunk, score in retrieved_chunks
+            ]
+        }
+    except Exception as e:
+        return {
+            'error': f'Error generating answer: {str(e)}',
+            'sources': [{'chunk': c['text'][:200], 'score': s} for c, s in retrieved_chunks[:3]]
+        }
+# Main functions for external use
+def process_documents(documents: List[Dict[str, str]]) -> Dict[str, Any]:
+    """Process documents and build indices"""
+    all_chunks = []
+    for doc in documents:
+        chunks = chunk_text_hierarchical(doc['text'], doc.get('title', 'Document'))
+        all_chunks.extend(chunks)
+    build_all_indices(all_chunks)
+    return {
+        'success': True,
+        'chunk_count': len(all_chunks),
+        'message': f'Processed {len(documents)} documents into {len(all_chunks)} chunks'
+    }
+def query_documents(query: str, top_k: int = 5) -> Dict[str, Any]:
+    """Main query function - takes query, returns answer with sources"""
+    if not CHUNKS_DATA:
+        return {'error': 'No documents indexed. Call process_documents first.'}
+    # Analyze query
+    query_analysis = analyze_query(query)
+    # Multi-stage retrieval
+    retrieved_chunks = multi_stage_retrieval(query_analysis, top_k)
+    if not retrieved_chunks:
+        return {
+            'error': 'No relevant documents found',
+            'query_analysis': query_analysis
+        }
+    # Generate answer
+    result = generate_answer_with_reasoning(query, retrieved_chunks)
+    result['query_analysis'] = query_analysis
+    return result
+def search_chunks_simple(query: str, top_k: int = 3) -> List[Dict[str, Any]]:
+    """Simple search function for compatibility"""
+    if not CHUNKS_DATA:
+        return []
+    query_analysis = analyze_query(query)
+    retrieved_chunks = multi_stage_retrieval(query_analysis, top_k)
+    results = []
+    for chunk, score in retrieved_chunks:
+        results.append({
+            'chunk': {
+                'id': chunk['id'],
+                'text': chunk['text'],
+                'title': chunk['title']
+            },
+            'score': score
+        })
+    return results
+def generate_conservative_answer(query: str, context_chunks: List[Dict[str, Any]]) -> str:
+    """Generate conservative answer - for compatibility"""
+    if not context_chunks:
+        return "No relevant information found."
+    # Convert format
+    retrieved_chunks = [(chunk['chunk'], chunk['score']) for chunk in context_chunks]
+    result = generate_answer_with_reasoning(query, retrieved_chunks)
+    if 'error' in result:
+        return result['error']
+    return result.get('answer', 'Unable to generate answer.')

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+# Hugging Face Spaces requirements
+gradio==4.44.0
+requests==2.31.0
+fastapi==0.115.6
+uvicorn==0.32.1
+python-multipart==0.0.9   # ✅ needed for FastAPI endpoints
+# Core ML/NLP
+torch==2.2.2
+transformers==4.44.2
+sentence-transformers==2.2.2
+spacy==3.8.2
+scikit-learn==1.5.2
+numpy==1.26.4
+pandas==2.2.3
+nltk==3.9.1
+# Retrieval / Search
+faiss-cpu==1.7.4
+rank-bm25==0.2.2
+# API clients
+groq==0.13.0