govtech
/

lionguard-2-lite

Text Classification

Model card Files Files and versions

lionguard-2-lite / inference.py

leannetanyt's picture

feat: upload inference script

4b7d0f7 about 1 month ago

history blame contribute delete

1.58 kB

	import json
	import os
	import sys
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from transformers import AutoModel

	def format_texts(texts):
	# Add prompt instructions to generate embeddings that are optimized to classify texts according to preset labels
	return [f"task: classification \| query: {c}" for c in texts]

	def infer(texts):
	# Load model directly from Hub
	model = AutoModel.from_pretrained("govtech/lionguard-2-lite", trust_remote_code=True)

	# Download model from the 🤗 Hub
	embedding_model = SentenceTransformer("google/embeddinggemma-300m")
	formatted_texts = format_texts(texts)
	embeddings = embedding_model.encode(formatted_texts) # NOTE: use encode() instead of encode_documents()

	# Run inference
	results = model.predict(embeddings)
	return results


	if __name__ == "__main__":

	# Load the data
	try:
	input_data = sys.argv[1]
	batch_text = json.loads(input_data)
	print("Using provided input texts")

	except (json.JSONDecodeError, IndexError) as e:
	print(f"Error parsing input data: {e}")
	print("Falling back to default sample texts")

	batch_text = ["Eh you damn stupid lah!", "Have a nice day :)"]

	# Generate the scores and predictions
	results = infer(batch_text)
	for i in range(len(batch_text)):
	print(f"Text: '{batch_text[i]}'")
	for category in results.keys():
	print(f"[Text {i+1}] {category} score: {results[category][i]:.4f}")
	print("---------------------------------------------")