Spaces:

kn29
/

rag-chat

Sleeping

App Files Files Community

kn29 commited on Sep 24

Commit

df9660d

verified ·

1 Parent(s): 05dbe82

Update app.py

Browse files

Files changed (1) hide show

app.py +642 -284

app.py CHANGED Viewed

@@ -1,206 +1,313 @@
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
 import pymongo
 import os
 import numpy as np
 from datetime import datetime, timedelta
 import logging
 from typing import Dict, Any, Optional, List
-import base64
-import json
 import threading
 import time
 from collections import defaultdict
-import faiss
-# Import our simplified advanced RAG system
-import rag
-# Configure logging
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
-# Initialize FastAPI app
-app = FastAPI(title="Advanced RAG Chat Service", version="1.0.0")
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # Configure this properly in production
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Global variables
 MONGO_CLIENT = None
 DB = None
 RAG_INITIALIZED = False
-# In-memory session stores
-# Format: {session_id: {"chunks": [...], "faiss_index": faiss.Index, "indexed": bool, "metadata": {...}}}
-SESSION_STORES = {}
-STORE_LOCK = threading.RLock()
-CLEANUP_INTERVAL = 3600  # 1 hour cleanup interval
-STORE_TTL = 30 * 60  # 24 hours TTL for in-memory stores
-# Request/Response models
 class ChatRequest(BaseModel):
-    message: str
 class ChatResponse(BaseModel):
     success: bool
     answer: str
-    sources: List[Dict[str, Any]]
-    chat_history: List[Dict[str, Any]]
     processing_time: float
     session_id: str
     query_analysis: Optional[Dict[str, Any]] = None
     confidence: Optional[float] = None
 class InitRequest(BaseModel):
-    pass
 class InitResponse(BaseModel):
     success: bool
     session_id: str
     message: str
-    chunk_count: int
-    title: str
     document_info: Optional[Dict[str, Any]] = None
 class HealthResponse(BaseModel):
     status: str
     mongodb_connected: bool
     rag_initialized: bool
     active_sessions: int
     memory_usage: Dict[str, Any]
 def create_session_logger(session_id: str):
     """Create a logger with session context"""
-    return logging.LoggerAdapter(logger, {'session_id': session_id})
 def connect_mongodb():
-    """Initialize MongoDB connection"""
     global MONGO_CLIENT, DB
     try:
         mongodb_url = os.getenv("MONGODB_URL", "mongodb://localhost:27017/")
-        MONGO_CLIENT = pymongo.MongoClient(mongodb_url)
-        DB = MONGO_CLIENT["legal_rag_system"]
         # Test connection
-        DB.command("ping")
-        # Create indexes for chats collection
-        logger.info("Creating MongoDB indexes for chats...")
-        DB.chats.create_index("session_id")
-        DB.chats.create_index("created_at", expireAfterSeconds=24*60*60)  # 24 hour TTL
-        DB.chats.create_index([("session_id", 1), ("created_at", 1)])  # Compound index
-        logger.info("MongoDB connected successfully")
         return True
     except Exception as e:
         logger.error(f"MongoDB connection failed: {e}")
         return False
 def initialize_rag():
-    """Initialize RAG system"""
     global RAG_INITIALIZED
     try:
         model_id = os.getenv("EMBEDDING_MODEL_ID", "sentence-transformers/all-MiniLM-L6-v2")
         groq_api_key = os.getenv("GROQ_API_KEY")
-        logger.info(f"Initializing RAG system with model: {model_id}")
-        rag.initialize_models(model_id, groq_api_key)
         RAG_INITIALIZED = True
         logger.info("RAG system initialized successfully")
         return True
     except Exception as e:
         logger.error(f"RAG initialization failed: {e}")
         return False
-def decode_embedding_from_storage(embedding_list: List[float]) -> np.ndarray:
-    """Convert embedding from MongoDB list back to numpy array"""
     try:
-        return np.array(embedding_list, dtype=np.float32)
     except Exception as e:
         logger.error(f"Failed to decode embedding: {e}")
         return np.array([])
 def load_session_from_mongodb(session_id: str) -> Dict[str, Any]:
-    """Load session data from MongoDB with precomputed embeddings"""
     session_logger = create_session_logger(session_id)
     try:
-        # Get session metadata
         session_doc = DB.sessions.find_one({"session_id": session_id})
         if not session_doc:
-            raise ValueError(f"Session {session_id} not found")
-        if session_doc.get("status") != "completed":
-            raise ValueError(f"Session {session_id} not completed yet (status: {session_doc.get('status')})")
-        session_logger.info("Loading session chunks with precomputed embeddings from MongoDB")
-        # Get all chunks for this session with embeddings
         chunks_cursor = DB.chunks.find({"session_id": session_id}).sort("created_at", 1)
         chunks_list = list(chunks_cursor)
         if not chunks_list:
             raise ValueError(f"No chunks found for session {session_id}")
-        session_logger.info(f"Found {len(chunks_list)} chunks with embeddings")
-        # Convert MongoDB chunks to format needed by RAG system
         processed_chunks = []
         embeddings_matrix = []
         for i, chunk_doc in enumerate(chunks_list):
-            # Decode the precomputed embedding
-            embedding_list = chunk_doc.get('embedding', [])
-            if not embedding_list:
-                session_logger.warning(f"Chunk {chunk_doc.get('chunk_id', i)} missing embedding")
-                continue
-            embedding = decode_embedding_from_storage(embedding_list)
-            if embedding.size == 0:
-                session_logger.warning(f"Failed to decode embedding for chunk {chunk_doc.get('chunk_id', i)}")
                 continue
-            # Format chunk for RAG system
-            processed_chunk = {
-                'id': chunk_doc.get('chunk_id', f'chunk_{i}'),
-                'text': chunk_doc['text'],
-                'title': chunk_doc.get('title', session_doc.get('title', 'Document')),
-                'section_type': chunk_doc.get('section_type', 'content'),
-                'importance_score': chunk_doc.get('importance_score', 1.0),
-                'entities': chunk_doc.get('entities', []),
-                'embedding': embedding  # Precomputed embedding as numpy array
-            }
-            processed_chunks.append(processed_chunk)
-            embeddings_matrix.append(embedding)
         if not processed_chunks:
-            raise ValueError(f"No valid chunks with embeddings found for session {session_id}")
-        # Stack embeddings for FAISS index
         embeddings_matrix = np.vstack(embeddings_matrix).astype('float32')
         session_store = {
             "chunks": processed_chunks,
             "embeddings_matrix": embeddings_matrix,
-            "faiss_index": None,  # Will be built in indexing step
             "indexed": False,
             "metadata": {
                 "session_id": session_id,
-                "title": session_doc.get("title", "Document"),
                 "chunk_count": len(processed_chunks),
                 "loaded_at": datetime.utcnow(),
                 "document_info": {
                     "filename": session_doc.get("filename", "Unknown"),
@@ -212,17 +319,21 @@ def load_session_from_mongodb(session_id: str) -> Dict[str, Any]:
             }
         }
-        session_logger.info(f"Loaded {len(processed_chunks)} chunks with precomputed embeddings")
         return session_store
     except Exception as e:
-        session_logger.error(f"Failed to load session from MongoDB: {e}")
         raise
-def build_faiss_index_from_embeddings(session_id: str) -> Dict[str, Any]:
-    """Build FAISS index from precomputed embeddings"""
     session_logger = create_session_logger(session_id)
     with STORE_LOCK:
         if session_id not in SESSION_STORES:
             raise ValueError(f"Session {session_id} not loaded")
@@ -236,60 +347,76 @@ def build_faiss_index_from_embeddings(session_id: str) -> Dict[str, Any]:
         embeddings_matrix = store["embeddings_matrix"]
     try:
-        session_logger.info(f"Building FAISS index from {len(chunks)} precomputed embeddings...")
-        # Create FAISS index (Inner Product for normalized embeddings)
         dimension = embeddings_matrix.shape[1]
         faiss_index = faiss.IndexFlatIP(dimension)
-        # Add embeddings to FAISS index
         faiss_index.add(embeddings_matrix)
-        # Set global RAG data for this session
-        rag.CHUNKS_DATA = chunks
-        rag.DENSE_INDEX = faiss_index
-        # Build other indices (BM25, concept graph, etc.) using precomputed chunks
-        session_logger.info("Building additional retrieval indices...")
-        # BM25 index for sparse retrieval
-        tokenized_corpus = [chunk['text'].lower().split() for chunk in chunks]
-        rag.BM25_INDEX = rag.BM25Okapi(tokenized_corpus)
-        # ColBERT-style token index
-        rag.TOKEN_TO_CHUNKS = defaultdict(set)
-        for i, chunk in enumerate(chunks):
-            tokens = chunk['text'].lower().split()
-            for token in tokens:
-                rag.TOKEN_TO_CHUNKS[token].add(i)
-        # Legal concept graph
-        import networkx as nx
-        rag.CONCEPT_GRAPH = nx.Graph()
-        for i, chunk in enumerate(chunks):
-            rag.CONCEPT_GRAPH.add_node(i, text=chunk['text'][:200], importance=chunk['importance_score'])
-            # Add edges between chunks with shared entities
-            for j, other_chunk in enumerate(chunks[i+1:], i+1):
-                shared_entities = set(e['text'] for e in chunk['entities']) & \
-                                set(e['text'] for e in other_chunk['entities'])
-                if shared_entities:
-                    rag.CONCEPT_GRAPH.add_edge(i, j, weight=len(shared_entities))
-        # Mark as indexed and store FAISS index
         with STORE_LOCK:
             SESSION_STORES[session_id]["faiss_index"] = faiss_index
             SESSION_STORES[session_id]["indexed"] = True
-        session_logger.info(f"FAISS index built successfully from precomputed embeddings: {len(chunks)} chunks indexed")
         return SESSION_STORES[session_id]["metadata"]
     except Exception as e:
-        session_logger.error(f"Failed to build FAISS index from embeddings: {e}")
         raise
-def save_chat_message(session_id: str, role: str, message: str):
-    """Save chat message to MongoDB"""
     try:
         chat_doc = {
             "session_id": session_id,
@@ -301,8 +428,11 @@ def save_chat_message(session_id: str, role: str, message: str):
     except Exception as e:
         logger.error(f"Failed to save chat message for session {session_id}: {e}")
-def get_chat_history(session_id: str, limit: int = 50) -> List[Dict[str, Any]]:
-    """Get chat history for a session"""
     try:
         chats_cursor = DB.chats.find(
             {"session_id": session_id}
@@ -322,14 +452,8 @@ def get_chat_history(session_id: str, limit: int = 50) -> List[Dict[str, Any]]:
         logger.error(f"Failed to get chat history for session {session_id}: {e}")
         return []
-import asyncio
-from contextlib import asynccontextmanager
-# Global cleanup task
-cleanup_task = None
-def cleanup_old_stores():
-    """Background cleanup of old in-memory stores - single run"""
     try:
         current_time = datetime.utcnow()
         expired_sessions = []
@@ -337,63 +461,119 @@ def cleanup_old_stores():
         with STORE_LOCK:
             for session_id, store in SESSION_STORES.items():
                 loaded_at = store["metadata"]["loaded_at"]
-                if (current_time - loaded_at).total_seconds() > STORE_TTL:
                     expired_sessions.append(session_id)
             for session_id in expired_sessions:
-                # Clean up FAISS index and other resources
-                if SESSION_STORES[session_id].get("faiss_index"):
-                    del SESSION_STORES[session_id]["faiss_index"]
-                del SESSION_STORES[session_id]
-                logger.info(f"Cleaned up expired store for session: {session_id}")
         if expired_sessions:
-            logger.info(f"Cleaned up {len(expired_sessions)} expired session stores")
     except Exception as e:
-        logger.error(f"Cleanup error: {e}")
 async def periodic_cleanup():
-    """Async periodic cleanup task"""
-    global cleanup_task
     try:
         while True:
-            cleanup_old_stores()
             await asyncio.sleep(CLEANUP_INTERVAL)
     except asyncio.CancelledError:
-        logger.info("Cleanup task cancelled")
         raise
     except Exception as e:
-        logger.error(f"Periodic cleanup error: {e}")
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Application lifespan manager"""
     global cleanup_task
     # Startup
-    logger.info("Starting up Advanced RAG Chat Service...")
-    # Connect to MongoDB
     if not connect_mongodb():
-        logger.error("Failed to connect to MongoDB")
-        raise Exception("MongoDB connection failed")
-    # Initialize RAG system
-    if not initialize_rag():
-        logger.error("Failed to initialize RAG system")
-        raise Exception("RAG initialization failed")
-    # Start background cleanup task
-    cleanup_task = asyncio.create_task(periodic_cleanup())
-    logger.info("Background cleanup task started")
-    logger.info("Startup completed successfully")
     yield
     # Shutdown
-    logger.info("Shutting down Advanced RAG Chat Service...")
     if cleanup_task:
         cleanup_task.cancel()
@@ -407,153 +587,237 @@ async def lifespan(app: FastAPI):
     logger.info("Shutdown completed")
-# Replace the FastAPI app initialization
 app = FastAPI(
-    title="Advanced RAG Chat Service",
-    version="1.0.0",
     lifespan=lifespan
 )
 @app.get("/health", response_model=HealthResponse)
 async def health_check():
-    """Health check endpoint"""
     try:
-        # Check MongoDB connection
         mongodb_connected = False
-        active_sessions = 0
-        if DB is not None:
             try:
                 DB.command("ping")
                 mongodb_connected = True
-                # Count sessions with recent chats
-                one_hour_ago = datetime.utcnow() - timedelta(hours=1)
-                active_sessions = len(DB.chats.distinct("session_id", {"created_at": {"$gte": one_hour_ago}}))
             except:
                 pass
-        # Memory usage info
         with STORE_LOCK:
             memory_sessions = len(SESSION_STORES)
             indexed_sessions = sum(1 for store in SESSION_STORES.values() if store["indexed"])
         return HealthResponse(
-            status="healthy" if mongodb_connected and RAG_INITIALIZED else "unhealthy",
             mongodb_connected=mongodb_connected,
             rag_initialized=RAG_INITIALIZED,
-            active_sessions=active_sessions,
             memory_usage={
                 "loaded_sessions": memory_sessions,
                 "indexed_sessions": indexed_sessions,
-                "store_ttl_hours": STORE_TTL / 3600
-            }
         )
     except Exception as e:
         logger.error(f"Health check failed: {e}")
         return HealthResponse(
             status="unhealthy",
             mongodb_connected=False,
             rag_initialized=False,
             active_sessions=0,
-            memory_usage={}
         )
 @app.post("/init/{session_id}", response_model=InitResponse)
 async def initialize_session(session_id: str, request: InitRequest):
-    """Initialize RAG context for a session using precomputed embeddings"""
     session_logger = create_session_logger(session_id)
-    if DB is None:
-        raise HTTPException(status_code=503, detail="Database not connected")
-    if not RAG_INITIALIZED:
-        raise HTTPException(status_code=503, detail="RAG system not initialized")
-    # Check if already loaded and indexed
-    with STORE_LOCK:
-        if session_id in SESSION_STORES and SESSION_STORES[session_id]["indexed"]:
-            store = SESSION_STORES[session_id]
-            metadata = store["metadata"]
-            session_logger.info("Session already initialized and indexed with precomputed embeddings")
-            return InitResponse(
-                success=True,
-                session_id=session_id,
-                message="Session already initialized with precomputed embeddings",
-                chunk_count=metadata["chunk_count"],
-                title=metadata["title"],
-                document_info=metadata["document_info"]
-            )
     try:
-        session_logger.info("Initializing session with precomputed embeddings from MongoDB")
-        # Load session data with precomputed embeddings from MongoDB
         session_store = load_session_from_mongodb(session_id)
         # Store in memory
         with STORE_LOCK:
             SESSION_STORES[session_id] = session_store
-        # Build FAISS index from precomputed embeddings (no re-embedding!)
-        metadata = build_faiss_index_from_embeddings(session_id)
-        session_logger.info(f"Session initialized with precomputed embeddings: {metadata['chunk_count']} chunks indexed")
         return InitResponse(
             success=True,
             session_id=session_id,
-            message=f"Session initialized with precomputed embeddings: {metadata['chunk_count']} chunks ready for advanced RAG",
             chunk_count=metadata["chunk_count"],
             title=metadata["title"],
             document_info=metadata["document_info"]
         )
     except ValueError as e:
-        session_logger.error(f"Session initialization failed: {e}")
-        raise HTTPException(status_code=404, detail=str(e))
     except Exception as e:
         session_logger.error(f"Session initialization error: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to initialize session: {str(e)}")
 @app.post("/chat/{session_id}", response_model=ChatResponse)
 async def chat_with_document(session_id: str, request: ChatRequest):
-    """Handle chat query with advanced RAG using precomputed embeddings"""
     session_logger = create_session_logger(session_id)
     start_time = time.time()
-    if DB is None:
-        raise HTTPException(status_code=503, detail="Database not connected")
-    if not RAG_INITIALIZED:
-        raise HTTPException(status_code=503, detail="RAG system not initialized")
-    # Validate request
-    if not request.message.strip():
-        raise HTTPException(status_code=400, detail="Empty message provided")
     try:
-        session_logger.info(f"Processing advanced RAG query: {request.message[:100]}...")
-        # Check if session is initialized and indexed
         with STORE_LOCK:
             if session_id not in SESSION_STORES:
                 raise HTTPException(
-                    status_code=400,
-                    detail=f"Session {session_id} not initialized. Call /init/{session_id} first."
                 )
             if not SESSION_STORES[session_id]["indexed"]:
                 raise HTTPException(
                     status_code=400,
-                    detail=f"Session {session_id} not indexed. Call /init/{session_id} first."
                 )
-        # Query using advanced RAG system (now using precomputed embeddings)
-        result = rag.query_documents(request.message, top_k=5)
-        if 'error' in result:
             raise HTTPException(status_code=500, detail=result['error'])
         answer = result.get('answer', 'Unable to generate answer.')
@@ -561,28 +825,31 @@ async def chat_with_document(session_id: str, request: ChatRequest):
         query_analysis = result.get('query_analysis', {})
         confidence = result.get('confidence', 0.0)
-        # Save chat messages to MongoDB for persistence
-        save_chat_message(session_id, "user", request.message)
-        save_chat_message(session_id, "assistant", answer)
-        # Get updated chat history
-        chat_history = get_chat_history(session_id)
         processing_time = time.time() - start_time
-        session_logger.info(f"Advanced RAG query processed in {processing_time:.2f}s with confidence {confidence:.1f}% using precomputed embeddings")
-        # Prepare sources for response
-        formatted_sources = [
-            {
-                "chunk_id": source.get("chunk_id", ""),
-                "title": source.get("title", ""),
-                "section": source.get("section", ""),
-                "relevance_score": source.get("relevance_score", 0.0),
-                "text_preview": source.get("excerpt", "")[:300] + "..." if len(source.get("excerpt", "")) > 300 else source.get("excerpt", ""),
-                "entities": source.get("entities", [])
-            }
-            for source in sources
-        ]
         return ChatResponse(
             success=True,
@@ -598,19 +865,30 @@ async def chat_with_document(session_id: str, request: ChatRequest):
     except HTTPException:
         raise
     except Exception as e:
-        session_logger.error(f"Advanced RAG chat processing failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Chat processing failed: {str(e)}")
 @app.get("/history/{session_id}")
 async def get_session_history(session_id: str):
     """Get chat history for a session"""
     session_logger = create_session_logger(session_id)
-    if DB is None:
         raise HTTPException(status_code=503, detail="Database not connected")
     try:
-        chat_history = get_chat_history(session_id, limit=100)
         session_logger.info(f"Retrieved {len(chat_history)} chat messages")
@@ -631,15 +909,31 @@ async def cleanup_session(session_id: str):
     session_logger = create_session_logger(session_id)
     try:
-        # Remove from memory
         with STORE_LOCK:
             if session_id in SESSION_STORES:
                 # Clean up FAISS index
-                if SESSION_STORES[session_id].get("faiss_index"):
-                    del SESSION_STORES[session_id]["faiss_index"]
                 del SESSION_STORES[session_id]
                 session_logger.info("Session removed from memory")
         return {
             "success": True,
             "message": f"Session {session_id} cleaned up successfully"
@@ -651,65 +945,128 @@ async def cleanup_session(session_id: str):
 @app.get("/sessions/active")
 async def get_active_sessions():
-    """Get information about active sessions in memory"""
     try:
         with STORE_LOCK:
             active_sessions = []
             for session_id, store in SESSION_STORES.items():
                 metadata = store["metadata"]
                 active_sessions.append({
                     "session_id": session_id,
                     "title": metadata["title"],
                     "chunk_count": metadata["chunk_count"],
                     "indexed": store["indexed"],
-                    "loaded_at": metadata["loaded_at"].isoformat(),
-                    "age_minutes": (datetime.utcnow() - metadata["loaded_at"]).total_seconds() / 60,
-                    "using_precomputed_embeddings": True
                 })
         return {
             "success": True,
             "active_sessions": active_sessions,
-            "total_sessions": len(active_sessions)
         }
     except Exception as e:
         logger.error(f"Failed to get active sessions: {e}")
         raise HTTPException(status_code=500, detail=f"Failed to get active sessions: {str(e)}")
 @app.get("/rag/status")
 async def get_rag_status():
-    """Get advanced RAG system status"""
     try:
         return {
             "success": True,
             "rag_initialized": RAG_INITIALIZED,
             "optimization": {
-                "using_precomputed_embeddings": True,
-                "no_reembedding": True,
                 "persistent_faiss_index": True,
-                "mongodb_persistence": True
             },
             "features": {
                 "multi_stage_retrieval": True,
-                "dense_retrieval": "FAISS + Precomputed Legal-BERT Embeddings",
-                "sparse_retrieval": "BM25",
-                "entity_based_retrieval": "Legal NER + SpaCy",
-                "graph_based_retrieval": "Legal Concept Graph",
                 "query_analysis": "Legal Intent Classification",
                 "answer_generation": "Groq LLM with IRAC Method"
             },
-            "active_techniques": [
-                "Dense Embedding Search (FAISS with Precomputed Embeddings)",
-                "BM25 Sparse Retrieval",
-                "ColBERT Token Matching",
-                "Legal Entity Matching",
-                "Concept Graph Expansion",
-                "HyDE Query Expansion",
-                "Multi-Query Retrieval",
-                "Legal Section Classification",
-                "Importance-based Ranking"
-            ]
         }
     except Exception as e:
@@ -719,4 +1076,5 @@ async def get_rag_status():
 if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", 7861))
     uvicorn.run(app, host="0.0.0.0", port=port)

 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
 import pymongo
 import os
 import numpy as np
 from datetime import datetime, timedelta
 import logging
+import traceback
 from typing import Dict, Any, Optional, List
+import asyncio
 import threading
 import time
 from collections import defaultdict
+from contextlib import asynccontextmanager
+import sys
+try:
+    import faiss
+    FAISS_AVAILABLE = True
+except ImportError:
+    FAISS_AVAILABLE = False
+# Configure comprehensive logging
 logging.basicConfig(
     level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - [%(funcName)s:%(lineno)d] - %(message)s',
+    handlers=[
+        logging.StreamHandler(sys.stdout),
+        logging.FileHandler('rag_app.log', mode='a')
+    ]
 )
 logger = logging.getLogger(__name__)
+# Global state
 MONGO_CLIENT = None
 DB = None
 RAG_INITIALIZED = False
+RAG_MODULE = None
+APP_STATE = {
+    "startup_time": None,
+    "mongodb_connected": False,
+    "rag_ready": False,
+    "active_sessions": 0,
+    "total_queries": 0,
+    "errors": []
+}
+# Configuration - Session memory management
+CLEANUP_INTERVAL = 1800  # Run cleanup every 30 minutes (1800 seconds)
+STORE_TTL = 1800  # Sessions expire after 30 minutes of inactivity (1800 seconds)
+# You can adjust these values:
+# STORE_TTL = 900   # 15 minutes
+# STORE_TTL = 3600  # 1 hour
+# STORE_TTL = 7200  # 2 hours
+# Request/Response models with validation
 class ChatRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=5000, description="User's query message")
 class ChatResponse(BaseModel):
     success: bool
     answer: str
+    sources: List[Dict[str, Any]] = Field(default_factory=list)
+    chat_history: List[Dict[str, Any]] = Field(default_factory=list)
     processing_time: float
     session_id: str
     query_analysis: Optional[Dict[str, Any]] = None
     confidence: Optional[float] = None
+    error_details: Optional[str] = None
 class InitRequest(BaseModel):
+    force_reload: bool = Field(default=False, description="Force reload session even if already loaded")
 class InitResponse(BaseModel):
     success: bool
     session_id: str
     message: str
+    chunk_count: int = Field(default=0)
+    title: str = Field(default="Unknown Document")
     document_info: Optional[Dict[str, Any]] = None
+    error_details: Optional[str] = None
 class HealthResponse(BaseModel):
     status: str
     mongodb_connected: bool
     rag_initialized: bool
+    faiss_available: bool
     active_sessions: int
     memory_usage: Dict[str, Any]
+    uptime_seconds: float
+    last_error: Optional[str] = None
 def create_session_logger(session_id: str):
     """Create a logger with session context"""
+    return logging.LoggerAdapter(logger, {'session_id': session_id[:8]})
+def safe_import_rag():
+    """Safely import RAG module with error handling"""
+    global RAG_MODULE
+    try:
+        import rag
+        RAG_MODULE = rag
+        logger.info("RAG module imported successfully")
+        return True
+    except ImportError as e:
+        logger.error(f"Failed to import RAG module: {e}")
+        logger.error("Make sure rag.py is in the same directory and all dependencies are installed")
+        return False
+    except Exception as e:
+        logger.error(f"Unexpected error importing RAG module: {e}")
+        logger.error(traceback.format_exc())
+        return False
 def connect_mongodb():
+    """Initialize MongoDB connection with comprehensive error handling"""
     global MONGO_CLIENT, DB
     try:
         mongodb_url = os.getenv("MONGODB_URL", "mongodb://localhost:27017/")
+        if not mongodb_url or mongodb_url == "mongodb://localhost:27017/":
+            logger.warning("Using default MongoDB URL - set MONGODB_URL environment variable for production")
+        logger.info(f"Connecting to MongoDB: {mongodb_url[:20]}...")
+        MONGO_CLIENT = pymongo.MongoClient(
+            mongodb_url,
+            serverSelectionTimeoutMS=10000,  # 10 second timeout
+            connectTimeoutMS=10000,
+            socketTimeoutMS=10000
+        )
         # Test connection
+        MONGO_CLIENT.admin.command('ping')
+        DB = MONGO_CLIENT["legal_rag_system"]
+        logger.info("Creating MongoDB indexes...")
+        # Create indexes with error handling
+        try:
+            DB.chats.create_index("session_id", background=True)
+            DB.chats.create_index("created_at", expireAfterSeconds=24*60*60, background=True)
+            DB.chats.create_index([("session_id", 1), ("created_at", 1)], background=True)
+            logger.info("MongoDB indexes created successfully")
+        except Exception as idx_error:
+            logger.warning(f"Index creation failed (non-critical): {idx_error}")
+        APP_STATE["mongodb_connected"] = True
+        logger.info("MongoDB connected and configured successfully")
         return True
+    except pymongo.errors.ServerSelectionTimeoutError:
+        logger.error("MongoDB connection timeout - check if MongoDB is running and accessible")
+        return False
+    except pymongo.errors.ConfigurationError as e:
+        logger.error(f"MongoDB configuration error: {e}")
+        return False
     except Exception as e:
         logger.error(f"MongoDB connection failed: {e}")
+        logger.error(traceback.format_exc())
         return False
 def initialize_rag():
+    """Initialize RAG system with comprehensive error handling"""
     global RAG_INITIALIZED
+    if not RAG_MODULE:
+        logger.error("RAG module not available - cannot initialize")
+        return False
+    if not FAISS_AVAILABLE:
+        logger.error("FAISS library not available - RAG system requires FAISS")
+        return False
     try:
         model_id = os.getenv("EMBEDDING_MODEL_ID", "sentence-transformers/all-MiniLM-L6-v2")
         groq_api_key = os.getenv("GROQ_API_KEY")
+        logger.info(f"Initializing RAG system with embedding model: {model_id}")
+        if groq_api_key:
+            logger.info("Groq API key found - full RAG capabilities available")
+        else:
+            logger.warning("No Groq API key - some RAG features may be limited")
+        # Initialize with timeout protection
+        RAG_MODULE.initialize_models(model_id, groq_api_key)
         RAG_INITIALIZED = True
+        APP_STATE["rag_ready"] = True
         logger.info("RAG system initialized successfully")
         return True
+    except ImportError as e:
+        logger.error(f"Missing dependencies for RAG initialization: {e}")
+        return False
     except Exception as e:
         logger.error(f"RAG initialization failed: {e}")
+        logger.error(traceback.format_exc())
+        APP_STATE["errors"].append(f"RAG init failed: {str(e)}")
         return False
+def decode_embedding_safely(embedding_list: List[float]) -> np.ndarray:
+    """Safely convert embedding from storage with validation"""
     try:
+        if not embedding_list or not isinstance(embedding_list, list):
+            raise ValueError("Invalid embedding data")
+        embedding = np.array(embedding_list, dtype=np.float32)
+        if embedding.size == 0:
+            raise ValueError("Empty embedding")
+        if np.isnan(embedding).any() or np.isinf(embedding).any():
+            raise ValueError("Embedding contains invalid values")
+        return embedding
     except Exception as e:
         logger.error(f"Failed to decode embedding: {e}")
         return np.array([])
 def load_session_from_mongodb(session_id: str) -> Dict[str, Any]:
+    """Load session with comprehensive error handling and validation"""
     session_logger = create_session_logger(session_id)
+    if not DB:
+        raise ValueError("Database not connected")
     try:
+        # Get and validate session metadata
         session_doc = DB.sessions.find_one({"session_id": session_id})
         if not session_doc:
+            raise ValueError(f"Session {session_id} not found in database")
+        session_status = session_doc.get("status")
+        if session_status != "completed":
+            raise ValueError(f"Session not ready - status: {session_status}")
+        session_logger.info(f"Loading session: {session_doc.get('filename', 'unknown')}")
+        # Load chunks with validation
         chunks_cursor = DB.chunks.find({"session_id": session_id}).sort("created_at", 1)
         chunks_list = list(chunks_cursor)
         if not chunks_list:
             raise ValueError(f"No chunks found for session {session_id}")
+        session_logger.info(f"Found {len(chunks_list)} chunks")
+        # Process chunks with validation
         processed_chunks = []
         embeddings_matrix = []
+        failed_chunks = 0
         for i, chunk_doc in enumerate(chunks_list):
+            try:
+                # Validate required fields
+                if 'text' not in chunk_doc or not chunk_doc['text'].strip():
+                    session_logger.warning(f"Chunk {i} missing or empty text")
+                    failed_chunks += 1
+                    continue
+                # Decode embedding
+                embedding_list = chunk_doc.get('embedding', [])
+                embedding = decode_embedding_safely(embedding_list)
+                if embedding.size == 0:
+                    session_logger.warning(f"Chunk {i} has invalid embedding")
+                    failed_chunks += 1
+                    continue
+                # Create processed chunk
+                processed_chunk = {
+                    'id': chunk_doc.get('chunk_id', f'chunk_{i}'),
+                    'text': chunk_doc['text'],
+                    'title': chunk_doc.get('title', session_doc.get('filename', 'Document')),
+                    'section_type': chunk_doc.get('section_type', 'content'),
+                    'importance_score': float(chunk_doc.get('importance_score', 1.0)),
+                    'entities': chunk_doc.get('entities', []),
+                    'embedding': embedding
+                }
+                processed_chunks.append(processed_chunk)
+                embeddings_matrix.append(embedding)
+            except Exception as chunk_error:
+                session_logger.error(f"Failed to process chunk {i}: {chunk_error}")
+                failed_chunks += 1
                 continue
         if not processed_chunks:
+            raise ValueError(f"No valid chunks could be loaded (failed: {failed_chunks})")
+        if failed_chunks > 0:
+            session_logger.warning(f"Failed to load {failed_chunks} chunks, continuing with {len(processed_chunks)}")
+        # Create embeddings matrix
         embeddings_matrix = np.vstack(embeddings_matrix).astype('float32')
+        # Prepare session store
         session_store = {
             "chunks": processed_chunks,
             "embeddings_matrix": embeddings_matrix,
+            "faiss_index": None,
             "indexed": False,
             "metadata": {
                 "session_id": session_id,
+                "title": session_doc.get("filename", "Document"),
                 "chunk_count": len(processed_chunks),
+                "failed_chunks": failed_chunks,
                 "loaded_at": datetime.utcnow(),
                 "document_info": {
                     "filename": session_doc.get("filename", "Unknown"),
             }
         }
+        session_logger.info(f"Session loaded successfully: {len(processed_chunks)} chunks")
         return session_store
     except Exception as e:
+        session_logger.error(f"Failed to load session: {e}")
+        session_logger.error(traceback.format_exc())
         raise
+def build_faiss_index_safely(session_id: str) -> Dict[str, Any]:
+    """Build FAISS index with error handling"""
     session_logger = create_session_logger(session_id)
+    if not FAISS_AVAILABLE:
+        raise ValueError("FAISS library not available")
     with STORE_LOCK:
         if session_id not in SESSION_STORES:
             raise ValueError(f"Session {session_id} not loaded")
         embeddings_matrix = store["embeddings_matrix"]
     try:
+        session_logger.info(f"Building FAISS index for {len(chunks)} chunks...")
+        # Validate embeddings matrix
+        if embeddings_matrix.shape[0] != len(chunks):
+            raise ValueError("Embeddings matrix size mismatch with chunks")
+        # Create FAISS index
         dimension = embeddings_matrix.shape[1]
         faiss_index = faiss.IndexFlatIP(dimension)
         faiss_index.add(embeddings_matrix)
+        # Initialize RAG system components
+        if RAG_MODULE:
+            RAG_MODULE.CHUNKS_DATA = chunks
+            RAG_MODULE.DENSE_INDEX = faiss_index
+            # Build additional indices
+            session_logger.info("Building additional retrieval indices...")
+            try:
+                # BM25 index
+                tokenized_corpus = [chunk['text'].lower().split() for chunk in chunks]
+                RAG_MODULE.BM25_INDEX = RAG_MODULE.BM25Okapi(tokenized_corpus)
+                # Token index
+                RAG_MODULE.TOKEN_TO_CHUNKS = defaultdict(set)
+                for i, chunk in enumerate(chunks):
+                    tokens = chunk['text'].lower().split()
+                    for token in tokens:
+                        RAG_MODULE.TOKEN_TO_CHUNKS[token].add(i)
+                # Concept graph
+                import networkx as nx
+                RAG_MODULE.CONCEPT_GRAPH = nx.Graph()
+                for i, chunk in enumerate(chunks):
+                    RAG_MODULE.CONCEPT_GRAPH.add_node(
+                        i,
+                        text=chunk['text'][:200],
+                        importance=chunk['importance_score']
+                    )
+                    # Add edges for shared entities
+                    for j, other_chunk in enumerate(chunks[i+1:], i+1):
+                        shared_entities = set(e.get('text', '') for e in chunk['entities']) & \
+                                        set(e.get('text', '') for e in other_chunk['entities'])
+                        if shared_entities:
+                            RAG_MODULE.CONCEPT_GRAPH.add_edge(i, j, weight=len(shared_entities))
+            except Exception as index_error:
+                session_logger.warning(f"Failed to build some retrieval indices: {index_error}")
+        # Mark as indexed
         with STORE_LOCK:
             SESSION_STORES[session_id]["faiss_index"] = faiss_index
             SESSION_STORES[session_id]["indexed"] = True
+        session_logger.info("FAISS index built successfully")
         return SESSION_STORES[session_id]["metadata"]
     except Exception as e:
+        session_logger.error(f"Failed to build FAISS index: {e}")
+        session_logger.error(traceback.format_exc())
         raise
+def save_chat_message_safely(session_id: str, role: str, message: str):
+    """Save chat message with error handling"""
+    if not DB:
+        logger.warning("Database not available - chat message not saved")
+        return
     try:
         chat_doc = {
             "session_id": session_id,
     except Exception as e:
         logger.error(f"Failed to save chat message for session {session_id}: {e}")
+def get_chat_history_safely(session_id: str, limit: int = 50) -> List[Dict[str, Any]]:
+    """Get chat history with error handling"""
+    if not DB:
+        return []
     try:
         chats_cursor = DB.chats.find(
             {"session_id": session_id}
         logger.error(f"Failed to get chat history for session {session_id}: {e}")
         return []
+def cleanup_expired_sessions():
+    """Clean up only expired chat sessions from memory, keep server running"""
     try:
         current_time = datetime.utcnow()
         expired_sessions = []
         with STORE_LOCK:
             for session_id, store in SESSION_STORES.items():
                 loaded_at = store["metadata"]["loaded_at"]
+                age_seconds = (current_time - loaded_at).total_seconds()
+                # Only expire sessions older than TTL (30 minutes)
+                if age_seconds > STORE_TTL:
                     expired_sessions.append(session_id)
+            # Clean up expired sessions one by one
             for session_id in expired_sessions:
+                try:
+                    store = SESSION_STORES[session_id]
+                    # Clean up session-specific RAG instance
+                    if "rag_instance" in store:
+                        store["rag_instance"].cleanup()
+                    # Clean up FAISS index
+                    if store.get("faiss_index"):
+                        del store["faiss_index"]
+                    # Remove session from memory
+                    del SESSION_STORES[session_id]
+                    age_minutes = (current_time - store["metadata"]["loaded_at"]).total_seconds() / 60
+                    logger.info(f"Expired session {session_id[:8]} removed from memory (age: {age_minutes:.1f} minutes)")
+                except Exception as cleanup_error:
+                    logger.error(f"Error cleaning up session {session_id[:8]}: {cleanup_error}")
+            # Update active session count
+            APP_STATE["active_sessions"] = len(SESSION_STORES)
         if expired_sessions:
+            logger.info(f"Memory cleanup completed: {len(expired_sessions)} expired sessions removed, {len(SESSION_STORES)} sessions still active")
+        else:
+            logger.debug(f"No expired sessions found. {len(SESSION_STORES)} sessions still active in memory")
     except Exception as e:
+        logger.error(f"Session cleanup error: {e}")
+        logger.error(traceback.format_exc())
 async def periodic_cleanup():
+    """Periodic cleanup of expired sessions - keeps server running"""
+    cleanup_count = 0
     try:
         while True:
+            cleanup_count += 1
+            logger.debug(f"Running session cleanup cycle #{cleanup_count}")
+            cleanup_expired_sessions()
+            # Sleep for cleanup interval (30 minutes)
             await asyncio.sleep(CLEANUP_INTERVAL)
     except asyncio.CancelledError:
+        logger.info(f"Session cleanup task cancelled after {cleanup_count} cycles")
         raise
     except Exception as e:
+        logger.error(f"Periodic cleanup error in cycle #{cleanup_count}: {e}")
+        logger.error(traceback.format_exc())
+        # Don't break the loop - keep trying to clean up
+        await asyncio.sleep(60)  # Wait 1 minute before retrying
+# Global cleanup task
+cleanup_task = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    """Application lifespan with comprehensive error handling"""
     global cleanup_task
     # Startup
+    logger.info("Starting Advanced RAG Chat Service...")
+    APP_STATE["startup_time"] = datetime.utcnow()
+    startup_success = True
+    # Check FAISS availability
+    if not FAISS_AVAILABLE:
+        logger.error("FAISS library not available - this is required for RAG functionality")
+        startup_success = False
+    # Import RAG module
+    if not safe_import_rag():
+        logger.error("RAG module import failed")
+        startup_success = False
+    # Connect to MongoDB (non-critical failure)
     if not connect_mongodb():
+        logger.error("MongoDB connection failed - continuing with limited functionality")
+    # Initialize RAG system (non-critical failure for basic health checks)
+    if RAG_MODULE and FAISS_AVAILABLE:
+        if not initialize_rag():
+            logger.error("RAG initialization failed - RAG features disabled")
+    # Start cleanup task if MongoDB is available
+    if APP_STATE["mongodb_connected"]:
+        try:
+            cleanup_task = asyncio.create_task(periodic_cleanup())
+            logger.info("Background cleanup task started")
+        except Exception as e:
+            logger.error(f"Failed to start cleanup task: {e}")
+    if startup_success:
+        logger.info("Startup completed successfully")
+    else:
+        logger.warning("Startup completed with errors - some features may be disabled")
     yield
     # Shutdown
+    logger.info("Shutting down...")
     if cleanup_task:
         cleanup_task.cancel()
     logger.info("Shutdown completed")
+# Initialize FastAPI app
 app = FastAPI(
+    title="Advanced RAG Chat Service",
+    description="Robust RAG-based chat service with comprehensive error handling",
+    version="2.0.0",
     lifespan=lifespan
 )
+# CORS configuration
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Root endpoint
+@app.get("/")
+async def root():
+    """Service information endpoint"""
+    uptime = (datetime.utcnow() - APP_STATE["startup_time"]).total_seconds() if APP_STATE["startup_time"] else 0
+    return {
+        "service": "Advanced RAG Chat Service",
+        "version": "2.0.0",
+        "status": "running",
+        "uptime_seconds": uptime,
+        "components": {
+            "mongodb": APP_STATE["mongodb_connected"],
+            "rag_system": APP_STATE["rag_ready"],
+            "faiss": FAISS_AVAILABLE
+        },
+        "active_sessions": len(SESSION_STORES),
+        "total_queries": APP_STATE["total_queries"],
+        "endpoints": {
+            "health": "GET /health",
+            "init": "POST /init/{session_id}",
+            "chat": "POST /chat/{session_id}",
+            "history": "GET /history/{session_id}",
+            "cleanup": "DELETE /session/{session_id}",
+            "status": "GET /sessions/active"
+        }
+    }
 @app.get("/health", response_model=HealthResponse)
 async def health_check():
+    """Comprehensive health check"""
     try:
+        # Test MongoDB connection
         mongodb_connected = False
+        if DB:
             try:
                 DB.command("ping")
                 mongodb_connected = True
             except:
                 pass
+        # Calculate uptime
+        uptime = 0
+        if APP_STATE["startup_time"]:
+            uptime = (datetime.utcnow() - APP_STATE["startup_time"]).total_seconds()
+        # Memory usage
         with STORE_LOCK:
             memory_sessions = len(SESSION_STORES)
             indexed_sessions = sum(1 for store in SESSION_STORES.values() if store["indexed"])
+        # Overall status
+        status = "healthy"
+        if not FAISS_AVAILABLE:
+            status = "degraded"
+        elif not mongodb_connected and not RAG_INITIALIZED:
+            status = "unhealthy"
+        last_error = APP_STATE["errors"][-1] if APP_STATE["errors"] else None
         return HealthResponse(
+            status=status,
             mongodb_connected=mongodb_connected,
             rag_initialized=RAG_INITIALIZED,
+            faiss_available=FAISS_AVAILABLE,
+            active_sessions=memory_sessions,
             memory_usage={
                 "loaded_sessions": memory_sessions,
                 "indexed_sessions": indexed_sessions,
+                "store_ttl_minutes": STORE_TTL // 60,
+                "cleanup_interval_minutes": CLEANUP_INTERVAL // 60
+            },
+            uptime_seconds=uptime,
+            last_error=last_error
         )
     except Exception as e:
         logger.error(f"Health check failed: {e}")
         return HealthResponse(
             status="unhealthy",
             mongodb_connected=False,
             rag_initialized=False,
+            faiss_available=False,
             active_sessions=0,
+            memory_usage={},
+            uptime_seconds=0,
+            last_error=str(e)
         )
 @app.post("/init/{session_id}", response_model=InitResponse)
 async def initialize_session(session_id: str, request: InitRequest):
+    """Initialize session with comprehensive validation"""
     session_logger = create_session_logger(session_id)
     try:
+        # Validate prerequisites
+        if not DB:
+            raise HTTPException(status_code=503, detail="Database not connected")
+        if not RAG_INITIALIZED:
+            raise HTTPException(status_code=503, detail="RAG system not initialized")
+        if not FAISS_AVAILABLE:
+            raise HTTPException(status_code=503, detail="FAISS library not available")
+        # Check if already initialized
+        with STORE_LOCK:
+            if session_id in SESSION_STORES and SESSION_STORES[session_id]["indexed"] and not request.force_reload:
+                store = SESSION_STORES[session_id]
+                metadata = store["metadata"]
+                session_logger.info("Session already initialized")
+                return InitResponse(
+                    success=True,
+                    session_id=session_id,
+                    message="Session already initialized",
+                    chunk_count=metadata["chunk_count"],
+                    title=metadata["title"],
+                    document_info=metadata["document_info"]
+                )
+        session_logger.info("Initializing session...")
+        # Load session from MongoDB
         session_store = load_session_from_mongodb(session_id)
         # Store in memory
         with STORE_LOCK:
             SESSION_STORES[session_id] = session_store
+            APP_STATE["active_sessions"] = len(SESSION_STORES)
+        # Build FAISS index
+        metadata = build_faiss_index_safely(session_id)
+        session_logger.info(f"Session initialized: {metadata['chunk_count']} chunks ready")
         return InitResponse(
             success=True,
             session_id=session_id,
+            message=f"Session initialized successfully with {metadata['chunk_count']} chunks",
             chunk_count=metadata["chunk_count"],
             title=metadata["title"],
             document_info=metadata["document_info"]
         )
+    except HTTPException:
+        raise
     except ValueError as e:
+        session_logger.error(f"Session initialization validation error: {e}")
+        return InitResponse(
+            success=False,
+            session_id=session_id,
+            message="Session initialization failed",
+            chunk_count=0,
+            title="Error",
+            error_details=str(e)
+        )
     except Exception as e:
         session_logger.error(f"Session initialization error: {e}")
+        session_logger.error(traceback.format_exc())
+        APP_STATE["errors"].append(f"Init failed for {session_id[:8]}: {str(e)}")
+        return InitResponse(
+            success=False,
+            session_id=session_id,
+            message="Internal server error during initialization",
+            chunk_count=0,
+            title="Error",
+            error_details="Internal server error"
+        )
 @app.post("/chat/{session_id}", response_model=ChatResponse)
 async def chat_with_document(session_id: str, request: ChatRequest):
+    """Chat endpoint with comprehensive error handling"""
     session_logger = create_session_logger(session_id)
     start_time = time.time()
     try:
+        # Validate prerequisites
+        if not DB:
+            raise HTTPException(status_code=503, detail="Database not connected")
+        if not RAG_INITIALIZED or not RAG_MODULE:
+            raise HTTPException(status_code=503, detail="RAG system not initialized")
+        # Validate session
         with STORE_LOCK:
             if session_id not in SESSION_STORES:
                 raise HTTPException(
+                    status_code=400,
+                    detail=f"Session not initialized. Call /init/{session_id} first."
                 )
             if not SESSION_STORES[session_id]["indexed"]:
                 raise HTTPException(
                     status_code=400,
+                    detail="Session not indexed properly. Try reinitializing."
                 )
+        session_logger.info(f"Processing query: {request.message[:100]}...")
+        # Query RAG system
+        try:
+            result = RAG_MODULE.query_documents(request.message, top_k=5)
+            APP_STATE["total_queries"] += 1
+        except Exception as rag_error:
+            session_logger.error(f"RAG query failed: {rag_error}")
+            result = {
+                'error': f'RAG processing failed: {str(rag_error)}',
+                'answer': 'I apologize, but I encountered an error while processing your question. Please try again or rephrase your query.',
+                'sources': [],
+                'query_analysis': {},
+                'confidence': 0.0
+            }
+        if 'error' in result and not result.get('answer'):
             raise HTTPException(status_code=500, detail=result['error'])
         answer = result.get('answer', 'Unable to generate answer.')
         query_analysis = result.get('query_analysis', {})
         confidence = result.get('confidence', 0.0)
+        # Save chat messages
+        save_chat_message_safely(session_id, "user", request.message)
+        save_chat_message_safely(session_id, "assistant", answer)
+        # Get chat history
+        chat_history = get_chat_history_safely(session_id)
         processing_time = time.time() - start_time
+        session_logger.info(f"Query processed in {processing_time:.2f}s, confidence: {confidence:.1f}%")
+        # Format sources
+        formatted_sources = []
+        for source in sources:
+            try:
+                formatted_source = {
+                    "chunk_id": source.get("chunk_id", ""),
+                    "title": source.get("title", ""),
+                    "section": source.get("section", ""),
+                    "relevance_score": float(source.get("relevance_score", 0.0)),
+                    "text_preview": source.get("excerpt", "")[:300] + ("..." if len(source.get("excerpt", "")) > 300 else ""),
+                    "entities": source.get("entities", [])
+                }
+                formatted_sources.append(formatted_source)
+            except Exception as source_error:
+                session_logger.warning(f"Failed to format source: {source_error}")
         return ChatResponse(
             success=True,
     except HTTPException:
         raise
     except Exception as e:
+        session_logger.error(f"Chat processing failed: {e}")
+        session_logger.error(traceback.format_exc())
+        APP_STATE["errors"].append(f"Chat failed for {session_id[:8]}: {str(e)}")
+        return ChatResponse(
+            success=False,
+            answer="I apologize, but I encountered an error while processing your question. Please try again.",
+            sources=[],
+            chat_history=get_chat_history_safely(session_id),
+            processing_time=time.time() - start_time,
+            session_id=session_id,
+            error_details="Internal server error"
+        )
 @app.get("/history/{session_id}")
 async def get_session_history(session_id: str):
     """Get chat history for a session"""
     session_logger = create_session_logger(session_id)
+    if not DB:
         raise HTTPException(status_code=503, detail="Database not connected")
     try:
+        chat_history = get_chat_history_safely(session_id, limit=100)
         session_logger.info(f"Retrieved {len(chat_history)} chat messages")
     session_logger = create_session_logger(session_id)
     try:
+        cleaned_up = False
         with STORE_LOCK:
             if session_id in SESSION_STORES:
+                # Clean up session-specific RAG instance
+                store = SESSION_STORES[session_id]
+                if "rag_instance" in store:
+                    try:
+                        # Clean up any resources in the RAG instance
+                        store["rag_instance"].cleanup()
+                    except:
+                        pass
                 # Clean up FAISS index
+                if store.get("faiss_index"):
+                    del store["faiss_index"]
                 del SESSION_STORES[session_id]
+                APP_STATE["active_sessions"] = len(SESSION_STORES)
+                cleaned_up = True
                 session_logger.info("Session removed from memory")
+        if not cleaned_up:
+            session_logger.info("Session not found in memory")
         return {
             "success": True,
             "message": f"Session {session_id} cleaned up successfully"
 @app.get("/sessions/active")
 async def get_active_sessions():
+    """Get information about active sessions in memory with TTL info"""
     try:
+        current_time = datetime.utcnow()
         with STORE_LOCK:
             active_sessions = []
             for session_id, store in SESSION_STORES.items():
                 metadata = store["metadata"]
+                loaded_at = metadata["loaded_at"]
+                age_seconds = (current_time - loaded_at).total_seconds()
+                remaining_seconds = STORE_TTL - age_seconds
                 active_sessions.append({
                     "session_id": session_id,
                     "title": metadata["title"],
                     "chunk_count": metadata["chunk_count"],
                     "indexed": store["indexed"],
+                    "has_rag_instance": "rag_instance" in store,
+                    "loaded_at": loaded_at.isoformat(),
+                    "age_minutes": age_seconds / 60,
+                    "remaining_minutes": max(0, remaining_seconds / 60),
+                    "expires_at": (loaded_at + timedelta(seconds=STORE_TTL)).isoformat(),
+                    "will_expire_soon": remaining_seconds < 300,  # Less than 5 minutes
+                    "failed_chunks": metadata.get("failed_chunks", 0)
                 })
+            # Sort by remaining time (expiring soon first)
+            active_sessions.sort(key=lambda x: x["remaining_minutes"])
         return {
             "success": True,
             "active_sessions": active_sessions,
+            "total_sessions": len(active_sessions),
+            "session_ttl_minutes": STORE_TTL / 60,
+            "cleanup_interval_minutes": CLEANUP_INTERVAL / 60,
+            "next_cleanup_in_minutes": CLEANUP_INTERVAL / 60  # Approximate
         }
     except Exception as e:
         logger.error(f"Failed to get active sessions: {e}")
         raise HTTPException(status_code=500, detail=f"Failed to get active sessions: {str(e)}")
+@app.post("/sessions/{session_id}/extend")
+async def extend_session_ttl(session_id: str):
+    """Extend a session's TTL by resetting its load time (keep it alive longer)"""
+    session_logger = create_session_logger(session_id)
+    try:
+        with STORE_LOCK:
+            if session_id not in SESSION_STORES:
+                raise HTTPException(status_code=404, detail="Session not found in memory")
+            # Reset the loaded_at timestamp to extend TTL
+            old_loaded_at = SESSION_STORES[session_id]["metadata"]["loaded_at"]
+            SESSION_STORES[session_id]["metadata"]["loaded_at"] = datetime.utcnow()
+            session_logger.info(f"Session TTL extended (was loaded at: {old_loaded_at.isoformat()})")
+        return {
+            "success": True,
+            "message": f"Session {session_id} TTL extended for another {STORE_TTL//60} minutes",
+            "new_expiry": (datetime.utcnow() + timedelta(seconds=STORE_TTL)).isoformat()
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        session_logger.error(f"Failed to extend session TTL: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to extend session TTL: {str(e)}")
+@app.post("/cleanup/run")
+async def manual_cleanup():
+    """Manually trigger cleanup of expired sessions"""
+    try:
+        before_count = len(SESSION_STORES)
+        cleanup_expired_sessions()
+        after_count = len(SESSION_STORES)
+        cleaned_count = before_count - after_count
+        return {
+            "success": True,
+            "message": f"Manual cleanup completed",
+            "sessions_before": before_count,
+            "sessions_after": after_count,
+            "sessions_cleaned": cleaned_count
+        }
+    except Exception as e:
+        logger.error(f"Manual cleanup failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Manual cleanup failed: {str(e)}")
 @app.get("/rag/status")
 async def get_rag_status():
+    """Get RAG system status"""
     try:
         return {
             "success": True,
             "rag_initialized": RAG_INITIALIZED,
+            "faiss_available": FAISS_AVAILABLE,
+            "concurrency": {
+                "session_isolated_rag": True,
+                "async_processing": True,
+                "thread_pool_execution": True,
+                "no_global_state_conflicts": True
+            },
             "optimization": {
+                "precomputed_embeddings": True,
                 "persistent_faiss_index": True,
+                "mongodb_persistence": True,
+                "memory_cleanup": True
             },
             "features": {
                 "multi_stage_retrieval": True,
+                "dense_retrieval": "FAISS + Session-Isolated Embeddings",
+                "sparse_retrieval": "BM25 per Session",
+                "entity_based_retrieval": "Legal NER + SpaCy",
+                "graph_based_retrieval": "Legal Concept Graph per Session",
                 "query_analysis": "Legal Intent Classification",
                 "answer_generation": "Groq LLM with IRAC Method"
             },
+            "active_sessions": len(SESSION_STORES),
+            "total_queries_processed": APP_STATE["total_queries"]
         }
     except Exception as e:
 if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", 7861))
+    logger.info(f"Starting server on port {port}")
     uvicorn.run(app, host="0.0.0.0", port=port)