import re import pandas as pd import gradio as gr from pathlib import Path from config import settings from embed.embeddings import get_finetuned_embedding_model, get_openai_embedding_model from embed.qdrant_vectorstore import create_qdrant_vectorstore from pipeline.rag_chain import build_rag_chain CACHE_DIR = Path(f"cache/{settings.DATASET_PREFIX}") CACHE_DIR.mkdir(exist_ok=True, parents=True) # ────────────────────────────────────────────────────────────── # UTILS # ────────────────────────────────────────────────────────────── def save_df(df: pd.DataFrame, name: str): df.to_csv(CACHE_DIR / f"{name}.csv", index=False) def load_df(name: str): path = CACHE_DIR / f"{name}.csv" return pd.read_csv(path) if path.exists() else None def highlight_bible_refs(text: str) -> str: """ Replace book references like 'Romans 11:25-32' with markdown hyperlinks. """ pattern = r'([1-3]?\s?[A-Z][a-z]+)\s+(\d+):(\d+(?:-\d+)?)' def linkify(match): book = match.group(1).replace(" ", "+") chapter = match.group(2) verses = match.group(3).replace("–", "-") ref = f"{match.group(1)} {chapter}:{verses}" url = f"https://www.biblegateway.com/passage/?search={book}+{chapter}%3A{verses}" return f"[{ref}]({url})" return re.sub(pattern, linkify, text) # ────────────────────────────────────────────────────────────── # STREAMING CHAIN RESPONSE # ────────────────────────────────────────────────────────────── def generate_stream_response(chain, question: str): try: yield f"\n\n# 🤔 Main Question\n\n{question}\n\n" full_answer = "" for step in chain.stream({"query": question}): sub_questions = step.get("decompose", {}).get("sub_questions", []) if sub_questions: yield f"---" yield f"\n\n# 🔹 Sub-Questions\n\n" for i, sub_q in enumerate(sub_questions): yield f"{i+1}. {sub_q}\n\n" retrieval_results = step.get("retrieve", {}).get("retrieval_results", []) if retrieval_results: yield f"---" yield f"\n\n# 🔹 Retrieval Results\n\n" for i, result in enumerate(retrieval_results): yield f"**{i+1}. {result.get('question', '')}**\n\n" for doc in result.get("docs", []): passage = highlight_bible_refs(doc.page_content) yield f"> {highlight_bible_refs(doc.metadata.get('source', ''))}: {passage}\n\n" final_answer = step.get("summarize", {}).get("final_answer", "") if final_answer and final_answer != full_answer: full_answer = final_answer yield f"---" yield f"\n\n# 🧠 Final Answer\n\n" yield f"{highlight_bible_refs(final_answer)}\n\n" except Exception as e: yield f"❌ **Error:** {str(e)}" # ────────────────────────────────────────────────────────────── # GRADIO UI # ────────────────────────────────────────────────────────────── with gr.Blocks(theme=gr.themes.Soft(), css="footer {display:none !important}") as demo: print("Loading chunked documents for ingestion...") chunked_docs_df = load_df("chunked_docs") if chunked_docs_df is None: raise FileNotFoundError("chunked_docs.csv is missing in cache directory") vectorstore = create_qdrant_vectorstore( documents=chunked_docs_df.to_dict("records"), embedding_model=get_finetuned_embedding_model(), collection_name=settings.COLLECTION_NAME_FINETUNED, ) rag_chain = build_rag_chain(vectorstore, streaming=True) gr.Markdown("## 📖 Bible Explorer") chatbot = gr.Chatbot(label="Fine-Tuned RAG Chat", show_label=True, render_markdown=True, height=800) user_input = gr.Textbox(placeholder="Ask a question...", label="Your Question") send_button = gr.Button("Send") def chat_with_rag(user_msg): buffer = "" for chunk in generate_stream_response(rag_chain, user_msg): buffer += chunk yield [(user_msg, buffer)] send_button.click( fn=chat_with_rag, inputs=[user_input], outputs=[chatbot], concurrency_limit=3 ) # === LAUNCH === if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, share=False)