import math import re import requests import pandas as pd import base64 from markdownify import markdownify from requests.exceptions import RequestException from typing import TypedDict, Annotated, Optional, Any from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage from langchain_core.tools import tool from langchain_ollama import ChatOllama from langgraph.graph.message import add_messages from langgraph.graph import START, StateGraph from langgraph.prebuilt import ToolNode, tools_condition from langchain_core.runnables.config import RunnableConfig from langchain_google_genai import ChatGoogleGenerativeAI from langchain_groq import ChatGroq from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint # Built-in LangChain tools from langchain_community.tools import ( WikipediaQueryRun, DuckDuckGoSearchRun, ArxivQueryRun, ShellTool, ) from langchain_community.utilities import ( WikipediaAPIWrapper, DuckDuckGoSearchAPIWrapper, ArxivAPIWrapper, ) from langchain_experimental.tools import PythonREPLTool # Initialize vision_llm at module level (commented out by default) # Uncomment and configure as needed # vision_llm = ChatOllama( # model="qwen2-vl:7b", # base_url="http://localhost:11434" # ) vision_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash") # ============== CUSTOM TOOLS (not available in LangChain) ============== @tool def reverse_text(text: str) -> str: """Reverse the given text character by character.""" return text[::-1] @tool def reverse_words(text: str) -> str: """Reverse the order of words in the given text.""" return " ".join(text.split()[::-1]) @tool def calculator(expression: str) -> str: """Perform mathematical calculations safely. Supports basic arithmetic operations.""" try: # Safe evaluation - only allow basic math operations allowed_chars = set('0123456789+-*/.() ') if all(c in allowed_chars for c in expression): result = eval(expression) return str(result) else: return "Error: Invalid characters in expression" except Exception as e: return f"Calculation error: {str(e)}" @tool def advanced_math(operation: str, num1: float, num2: Optional[float] = None) -> str: """ Perform advanced math operations like sqrt, log, sin, cos, tan, power. """ try: if operation == "sqrt": return str(math.sqrt(num1)) elif operation == "log": return str(math.log(num1)) elif operation == "sin": return str(math.sin(num1)) elif operation == "cos": return str(math.cos(num1)) elif operation == "tan": return str(math.tan(num1)) elif operation == "power": if num2 is None: return "power operation requires two numbers" return str(math.pow(num1, num2)) else: return f"Unknown operation: {operation}" except Exception as e: return f"Math error: {str(e)}" @tool def extract_text_multimodal(img_path: str) -> str: """Extract text from image using multimodal LLM vision capabilities.""" try: if 'vision_llm' not in globals(): return "Error: Vision LLM not configured. Please uncomment and configure vision_llm." with open(img_path, "rb") as image_file: image_bytes = image_file.read() image_base64 = base64.b64encode(image_bytes).decode("utf-8") message = [ HumanMessage( content=[ { "type": "text", "text": "Extract all the text from this image. Return only the extracted text, no explanations." }, { "type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"} } ] ) ] response = vision_llm.invoke(message) return response.content.strip() except Exception as e: return f"Multimodal text extraction error: {str(e)}" @tool def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame: """Read Excel file and return a pandas DataFrame.""" try: if sheet_name: df = pd.read_excel(file_path, sheet_name=sheet_name) else: df = pd.read_excel(file_path) # summary = f"Shape: {df.shape}\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}" return df except Exception as e: # Return error as a string if loading fails return pd.DataFrame({"error": [f"Excel reading error: {str(e)}"]}) @tool def visit_webpage(url: str) -> str: """ Visits a webpage at the given URL and returns its content as a markdown string. Use this to browse and extract readable content from webpages. """ try: response = requests.get(url, timeout=20) response.raise_for_status() markdown_content = markdownify(response.text).strip() markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content) MAX_LEN = 40000 if len(markdown_content) > MAX_LEN: return ( markdown_content[:MAX_LEN//2] + f"\n\n...[Content truncated to {MAX_LEN} chars]...\n\n" + markdown_content[-MAX_LEN//2:] ) return markdown_content except requests.exceptions.Timeout: return "Timeout while trying to access the webpage." except RequestException as e: return f"Request error: {str(e)}" except Exception as e: return f"Unexpected error: {str(e)}" def build_tool(): """ Initialize and return a list of built-in and custom LangChain tools. """ # Initialize built-in LangChain tools wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(doc_content_chars_max=2000)) duckduckgo_search = DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(max_results=15)) arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper()) shell_tool = ShellTool() python_repl = PythonREPLTool() # Combine built-in tools with custom tools all_tools = [ # Built-in LangChain tools wikipedia_tool, duckduckgo_search, arxiv_tool, shell_tool, python_repl, # Custom tools for specialized tasks reverse_text, reverse_words, calculator, advanced_math, extract_text_multimodal, read_excel_file, visit_webpage, ] return all_tools class AgentState(TypedDict): input_file: Optional[str] messages: Annotated[list[AnyMessage], add_messages] def build_langgraph(provider: str, model: Optional[str] = None, temperature: float = 0.1, all_tools: Optional[list[Any]] = None) -> StateGraph: """Builds and returns the LangGraph agent with the given provider.""" if all_tools is None: all_tools = [] # Select model and provider if provider == "google": llm = ChatGoogleGenerativeAI(model=model or "gemini-2.5-flash", temperature=temperature) elif provider == "groq": llm = ChatGroq(model=model or "qwen/qwen3-32b", temperature=temperature) elif provider == "huggingface": llm = ChatHuggingFace( llm=HuggingFaceEndpoint( repo_id=model or "meta-llama/Llama-3.1-8B-Instruct", temperature=temperature ) ) elif provider == "ollama": llm = ChatOllama(model=model or "qwen3:4b", base_url="http://localhost:11434", temperature=temperature) else: raise ValueError("Unsupported provider. Choose from 'google', 'groq', 'huggingface', or 'ollama'.") llm_with_tools = llm.bind_tools(all_tools) def assistant(state: AgentState): tools_description = """ Available tools for the tasks: WEB & SEARCH: - duckduckgo_search: Search the web for information - wikipedia_tool: Search Wikipedia for knowledge - visit_webpage: Visit a webpage and extract readable markdown content - arxiv_tool: Search arXiv for research papers CALCULATIONS: - calculator: Basic arithmetic operations (+, -, *, /, etc.) - advanced_math: Advanced math functions (sqrt, log, trig) - python_repl: Execute Python code for complex computations TEXT PROCESSING: - reverse_text: Reverse text character by character - reverse_words: Reverse word order in text IMAGE PROCESSING: - extract_text_multimodal: Extract text using AI vision DATA ANALYSIS: - read_excel_file: Read and preview Excel files SYSTEM: - shell_tool: Execute shell commands (use carefully) """ file = state["input_file"] sys_msg = SystemMessage( content=( "You are an intelligent AI agent designed to solve complex problems using the tools provided.\n\n" "=== Available Tools ===\n" f"{tools_description}\n\n" "=== Optional Files ===\n" f"Currently loaded file: {file}\n\n" "=== Problem-Solving Process ===\n" "Follow these steps carefully when answering a question:\n" "1. Break the problem into smaller, manageable parts.\n" "2. Choose the most suitable tool for each part.\n" "3. Use multiple tools in sequence if needed.\n" "4. Verify your results and explain your reasoning clearly.\n\n" "Be precise and clear at every step. After your reasoning, provide ONLY the final answer.\n\n" "=== Final Answer Format Rules ===\n" "- For numbers: Use only digits (no commas or units) unless units are explicitly requested.\n" "- For strings: Do not use articles (a, an, the) or abbreviations. Spell out all digits.\n" "- For lists: Use commas to separate items. Apply the above number/string rules to each item.\n" "- If the answer is unknown: Respond exactly with \"do not know\"\n\n" "Example Question 1:\n\n" "If Eliud Kipchoge could maintain his marathon pace indefinitely, how many thousand hours would it take him to run from Earth to the Moon at its closest approach? Use the minimum perigee distance from Wikipedia and round to the nearest 1000 hours. Do not use commas.\n\n" "**Example Answer 1:**\n" "17\n\n" "**Example Reasoning Steps 1:**\n" "1. Found Eliud Kipchoge's marathon pace: 4 minutes 37 seconds per mile.\n" "2. Converted pace into hours per mile.\n" "3. Found Moon's closest distance: 225623 miles.\n" "4. Multiplied pace by distance to get total hours and rounded to nearest 1000.\n\n" "Example Question 2:\n\n" "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\n\n" "**Example Answer 2:**\n" "Yoshida, Uehara\n\n" "**Example Reasoning Steps 2:**\n" "1. Looked up Taishō Tamai on Wikipedia.\n" "2. Found the pitcher with number 18 is Kōsei Yoshida.\n" "3. Found the pitcher with number 20 is Kenta Uehara.\n\n" "Now answer the following questions:\n" ) ) return { "messages": [llm_with_tools.invoke([sys_msg] + state["messages"])], "input_file": state["input_file"] } # Build the graph builder = StateGraph(AgentState) builder.add_node("assistant", assistant) builder.add_node("tools", ToolNode(all_tools)) builder.add_edge(START, "assistant") builder.add_conditional_edges("assistant", tools_condition) builder.add_edge("tools", "assistant") return builder.compile() if __name__ == "__main__": all_tools = build_tool() react_graph = build_langgraph("groq", all_tools=all_tools) print("🚀 GAIA Dataset Agent with LangChain Built-in Tools!") print("\n" + "="*60 + "\n") # Example: Multi-step problem solving print("Testing calculation capabilities...") messages = [HumanMessage(content="Calculate the square root of 169, then multiply by 15")] result = react_graph.invoke({"messages": messages, "input_file": None}) for m in result['messages']: m.pretty_print() print("\n" + "="*60 + "\n") # Example: Knowledge retrieval print("📚 Testing Wikipedia search...") messages = [HumanMessage(content="Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?")] config = RunnableConfig(recursion_limit=10) result = react_graph.invoke({"messages": messages, "input_file": None}, config) for m in result['messages']: m.pretty_print() print("\n" + "="*60 + "\n")