Spaces:

ovinduG
/

gemmafunctionC

Running

App Files Files Community

gemmafunctionC / app.py

ovinduG

Update app.py

891728f verified 6 days ago

raw

history blame contribute delete

10.1 kB

	# ============================================================================
	# HUGGINGFACE SPACES DEPLOYMENT - FUNCTIONGEMMA CLASSIFIER
	# ============================================================================

	"""
	FunctionGemma Domain Classifier deployed on HuggingFace Spaces.
	Uses Spaces Secrets for authentication - no token pasting needed!

	SETUP INSTRUCTIONS:
	1. Go to your Space Settings → Repository secrets
	2. Click "New secret"
	3. Name: HF_TOKEN
	4. Value: your_huggingface_token_here
	5. Save and the space will automatically restart with the token!

	Get your token: https://huggingface.co/settings/tokens
	Accept license: https://huggingface.co/google/functiongemma-270m-it
	"""

	import os
	import sys
	import gradio as gr
	import torch
	import json
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel
	from huggingface_hub import login

	# ============================================================================
	# CONFIGURATION
	# ============================================================================

	MODEL_REPO = "ovinduG/functiongemma-domain-classifier"
	BASE_MODEL = "google/functiongemma-270m-it"

	# ============================================================================
	# AUTHENTICATION
	# ============================================================================

	print("="*80)
	print("🔐 HUGGINGFACE SPACES AUTHENTICATION")
	print("="*80)

	# HuggingFace Spaces automatically provides this
	HF_TOKEN = os.environ.get('HF_TOKEN', '').strip()

	if not HF_TOKEN:
	print("\n❌ ERROR: HF_TOKEN not found in Spaces secrets!")
	print("\n" + "="*80)
	print("📝 SETUP INSTRUCTIONS FOR HUGGINGFACE SPACES")
	print("="*80)
	print("\n1. Go to your Space Settings")
	print("2. Click on 'Repository secrets' tab")
	print("3. Click 'New secret'")
	print("4. Add:")
	print(" Name: HF_TOKEN")
	print(" Value: hf_your_token_here")
	print("5. Click 'Add secret'")
	print("6. Space will automatically restart with the token!")
	print("\n🔑 Get your token: https://huggingface.co/settings/tokens")
	print("📋 Accept license: https://huggingface.co/google/functiongemma-270m-it")
	print("="*80)
	sys.exit(1)

	print(f"✅ Token found: {HF_TOKEN[:10]}...{HF_TOKEN[-4:]}")

	# Login
	print("\n🔄 Logging in...")
	try:
	login(token=HF_TOKEN, add_to_git_credential=False)
	print("✅ Logged in successfully!")
	except Exception as e:
	print(f"❌ Login failed: {e}")
	sys.exit(1)

	print("="*80)

	# ============================================================================
	# LOAD MODEL
	# ============================================================================

	print("\n" + "="*80)
	print("🚀 LOADING MODEL")
	print("="*80)

	print("\n📥 Loading base model...")
	try:
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	trust_remote_code=True,
	token=HF_TOKEN
	)
	print("✅ Base model loaded")
	except Exception as e:
	print(f"❌ Failed: {e}")
	sys.exit(1)

	print("\n📥 Loading adapter...")
	try:
	model = PeftModel.from_pretrained(base_model, MODEL_REPO, token=HF_TOKEN)
	print("✅ Adapter loaded")
	except Exception as e:
	print(f"❌ Failed: {e}")
	sys.exit(1)

	print("\n📥 Loading tokenizer...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, token=HF_TOKEN)
	print("✅ Tokenizer loaded")
	except:
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN)
	print("✅ Base tokenizer loaded")

	model.eval()

	print(f"\n✅ Model ready!")
	print(f" Device: {model.device}")
	print("="*80)

	# ============================================================================
	# CLASSIFICATION FUNCTION
	# ============================================================================

	def create_function_schema():
	return {
	"type": "function",
	"function": {
	"name": "classify_query_domain",
	"description": "Classify query into domains",
	"parameters": {
	"type": "object",
	"properties": {
	"primary_domain": {"type": "string"},
	"primary_confidence": {"type": "number"},
	"is_multi_domain": {"type": "boolean"},
	"secondary_domains": {"type": "array"}
	}
	}
	}
	}

	def classify_query(text):
	"""Classify a query and return formatted results."""
	if not text or not text.strip():
	return "⚠️ Please enter a query!", ""

	# Prepare input
	function_def = create_function_schema()
	messages = [
	{"role": "developer", "content": "You are a model that can do function calling"},
	{"role": "user", "content": text.strip()}
	]

	inputs = tokenizer.apply_chat_template(
	messages,
	tools=[function_def],
	add_generation_prompt=True,
	return_dict=True,
	return_tensors="pt"
	).to(model.device)

	# Generate
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=150,
	do_sample=False,
	pad_token_id=tokenizer.eos_token_id
	)

	response = tokenizer.decode(
	outputs[0][inputs["input_ids"].shape[-1]:],
	skip_special_tokens=True
	)

	# Parse result
	try:
	if "{" in response:
	start = response.find("{")
	end = response.rfind("}") + 1
	result = json.loads(response[start:end])
	else:
	result = {"primary_domain": "unknown", "primary_confidence": 0.0}
	except:
	result = {"primary_domain": "unknown", "primary_confidence": 0.0}

	# Format output
	primary = result.get('primary_domain', 'unknown')
	confidence = result.get('primary_confidence', 0) * 100
	is_multi = result.get('is_multi_domain', False)
	secondary = result.get('secondary_domains', [])

	# Primary domain output
	primary_output = f"🎯 Primary Domain: {primary.upper()}\n"
	primary_output += f"📊 Confidence: {confidence:.1f}%"

	# Secondary domain output
	secondary_output = ""
	if is_multi and secondary:
	secondary_output = "🔀 Multi-Domain Query Detected!\n\n"
	secondary_output += "Secondary Domains:\n"
	for sec in secondary:
	secondary_output += f"• {sec['domain']}: {sec['confidence']*100:.1f}%\n"

	return primary_output, secondary_output

	# ============================================================================
	# GRADIO INTERFACE
	# ============================================================================

	# Custom CSS for styling
	custom_css = """
	.primary-box {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 20px;
	border-radius: 10px;
	font-size: 18px;
	}
	.secondary-box {
	background: #f0f0f0;
	padding: 15px;
	border-radius: 10px;
	margin-top: 10px;
	}
	"""

	# Example queries
	examples = [
	["Write a Python function to reverse a linked list"],
	["Build ML model to predict customer churn and create REST API"],
	["What are the symptoms and treatment for diabetes?"],
	["Explain the theory of relativity in simple terms"],
	["Create a business plan for a coffee shop"],
	["Calculate the derivative of x^2 + 3x + 5"],
	]

	# Create Gradio interface
	with gr.Blocks(css=custom_css, title="FunctionGemma Domain Classifier") as demo:
	gr.Markdown(
	"""
	# 🎯 FunctionGemma Domain Classifier

	Classify queries into 15+ domains with multi-domain detection.
	Powered by FunctionGemma-270M fine-tuned with LoRA.

	Performance: 95.51% accuracy \| 270M parameters \| Fast inference
	"""
	)

	with gr.Row():
	with gr.Column():
	query_input = gr.Textbox(
	label="Enter Your Query",
	placeholder="e.g., Write a Python function to sort a list",
	lines=3
	)

	classify_btn = gr.Button("🔍 Classify", variant="primary", size="lg")

	gr.Markdown("### 📝 Example Queries")
	gr.Examples(
	examples=examples,
	inputs=query_input,
	label=None
	)

	with gr.Row():
	with gr.Column():
	primary_output = gr.Markdown(label="Classification Result")

	with gr.Column():
	secondary_output = gr.Markdown(label="Additional Domains")

	gr.Markdown(
	"""
	---
	### 📊 Supported Domains

	`coding` • `api_generation` • `mathematics` • `data_analysis` • `science` • `medicine` •
	`business` • `law` • `technology` • `literature` • `creative_content` • `education` •
	`general_knowledge` • `ambiguous` • `sensitive`

	### 🔗 Links
	- [Model on HuggingFace](https://huggingface.co/ovinduG/functiongemma-domain-classifier)
	- [Base Model: FunctionGemma](https://huggingface.co/google/functiongemma-270m-it)

	Made with ❤️ by ovinduG
	"""
	)

	# Set up the classification action
	classify_btn.click(
	fn=classify_query,
	inputs=query_input,
	outputs=[primary_output, secondary_output]
	)

	# Also trigger on Enter
	query_input.submit(
	fn=classify_query,
	inputs=query_input,
	outputs=[primary_output, secondary_output]
	)

	# ============================================================================
	# LAUNCH
	# ============================================================================

	if __name__ == "__main__":
	print("\n" + "="*80)
	print("🌐 LAUNCHING GRADIO INTERFACE")
	print("="*80)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False # Set to True for temporary public link
	)