Upload 12 files

Browse files

Files changed (12) hide show

app.py +50 -0
modules/__init__.py +0 -0
modules/__pycache__/__init__.cpython-313.pyc +0 -0
modules/__pycache__/llm_model.cpython-313.pyc +0 -0
modules/__pycache__/pdf_loader.cpython-313.pyc +0 -0
modules/__pycache__/qa_chain.cpython-313.pyc +0 -0
modules/__pycache__/vectorstore.cpython-313.pyc +0 -0
modules/llm_model.py +16 -0
modules/pdf_loader.py +26 -0
modules/qa_chain.py +32 -0
modules/vectorstore.py +8 -0
requirements.txt +8 -3

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+import streamlit as st
+from modules.pdf_loader import load_pdf
+from modules.vectorstore import create_vectorstore
+from modules.llm_model import load_llm_pipeline
+from modules.qa_chain import create_qa_chain
+# Set Hugging Face Token (if using)
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets.get("HF_TOKEN", "")
+st.set_page_config(page_title="Smart Business Report Assistant", layout="centered")
+st.title("📊 Smart Business Report Assistant")
+uploaded_files = st.file_uploader(
+    "📎 Upload one or more PDF reports/invoices",
+    type=["pdf"],
+    accept_multiple_files=True
+)
+if uploaded_files:
+    with st.spinner("🔄 Processing PDFs..."):
+        all_docs = []
+        for file in uploaded_files:
+            docs = load_pdf(file)
+            all_docs.extend(docs)
+        vectorstore = create_vectorstore(all_docs)
+        llm = load_llm_pipeline()
+        qa_chain = create_qa_chain(llm, vectorstore)
+    st.success("✅ Ready! Ask your questions below.")
+    query = st.text_input("❓ Ask a question about the uploaded PDF(s)")
+    if query:
+        with st.spinner("💬 Thinking..."):
+            try:
+                result = qa_chain.invoke({"query": query})
+                answer = result.get("result", "❌ No answer found. Try a different question.")
+            except Exception as e:
+                answer = f"⚠️ Error: {str(e)}"
+        st.markdown("### 💡 Answer")
+        st.markdown(
+            f"""
+            <div style='background-color: #1e1e1e; padding: 12px; border-radius: 8px; color: white; font-size: 16px;'>
+            {answer}
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )

modules/__init__.py ADDED Viewed

File without changes

modules/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (171 Bytes). View file

modules/__pycache__/llm_model.cpython-313.pyc ADDED Viewed

Binary file (853 Bytes). View file

modules/__pycache__/pdf_loader.cpython-313.pyc ADDED Viewed

Binary file (1.07 kB). View file

modules/__pycache__/qa_chain.cpython-313.pyc ADDED Viewed

Binary file (1.1 kB). View file

modules/__pycache__/vectorstore.cpython-313.pyc ADDED Viewed

Binary file (567 Bytes). View file

modules/llm_model.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+from langchain.llms import HuggingFacePipeline
+def load_llm_pipeline():
+    model_id = "declare-lab/flan-alpaca-large"  # Better formatting
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+    pipe = pipeline(
+        "text2text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_length=512,
+        do_sample=True,
+        temperature=0.5,
+    )
+    return HuggingFacePipeline(pipeline=pipe)

modules/pdf_loader.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import tempfile
+import os
+def load_pdf(uploaded_file):
+    # Save uploaded Streamlit file to a temporary location
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
+        tmp.write(uploaded_file.read())
+        tmp_path = tmp.name
+    try:
+        # Load and split PDF
+        loader = PyPDFLoader(tmp_path)
+        raw_pages = loader.load()
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,     # ~200–300 tokens
+            chunk_overlap=200    # Keeps some context
+        )
+        return splitter.split_documents(raw_pages)
+    finally:
+        # Clean up temp file
+        if os.path.exists(tmp_path):
+            os.remove(tmp_path)

modules/qa_chain.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+def create_qa_chain(llm, vectorstore):
+    retriever = vectorstore.as_retriever()
+    template = """
+You are an AI assistant helping users analyze multiple PDFs (such as resumes, reports, invoices).
+When answering questions, always speak from the user's perspective — say "your resume", not "my resume".
+Be concise, polite, and answer in bullet points or short structured text.
+Context:
+{context}
+Question:
+{question}
+Answer:
+"""
+    prompt = PromptTemplate(
+        input_variables=["context", "question"],
+        template=template,
+    )
+    return RetrievalQA.from_chain_type(
+        llm=llm,
+        retriever=retriever,
+        chain_type="stuff",
+        chain_type_kwargs={"prompt": prompt}
+    )

modules/vectorstore.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+def create_vectorstore(pages):
+    embeddings = HuggingFaceInstructEmbeddings(
+        model_name="hkunlp/instructor-base"
+    )
+    return FAISS.from_documents(pages, embeddings)

requirements.txt CHANGED Viewed

@@ -1,3 +1,8 @@
-altair
-pandas
-streamlit

+streamlit
+langchain
+langchain-community
+faiss-cpu
+transformers
+huggingface-hub
+pypdf
+InstructorEmbedding