Spaces:

willwade
/

chaplinDemo

Runtime error

App Files Files Community

chaplinDemo / app.py

willwade

fix app

2bc52c8 11 months ago

raw

history blame

3.32 kB

	import gradio as gr
	import cv2
	import torch
	from pipelines.pipeline import InferencePipeline
	import time
	from huggingface_hub import hf_hub_download
	import os


	class ChaplinGradio:
	def __init__(self):
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.vsr_model = None
	self.download_models()
	self.load_models()

	# Video params
	self.fps = 16
	self.frame_interval = 1 / self.fps
	self.frame_compression = 25
	self.last_frame_time = time.time()

	def download_models(self):
	"""Download required model files from HuggingFace"""
	# Create directories if they don't exist
	os.makedirs("benchmarks/LRS3/models/LRS3_V_WER19.1", exist_ok=True)
	os.makedirs("benchmarks/LRS3/language_models/lm_en_subword", exist_ok=True)

	# Download VSR model files
	hf_hub_download(repo_id="willwade/LRS3_V_WER19.1",
	filename="model.pth",
	local_dir="benchmarks/LRS3/models/LRS3_V_WER19.1")
	hf_hub_download(repo_id="willwade/LRS3_V_WER19.1",
	filename="model.json",
	local_dir="benchmarks/LRS3/models/LRS3_V_WER19.1")

	# Download language model files
	hf_hub_download(repo_id="willwade/lm_en_subword",
	filename="model.pth",
	local_dir="benchmarks/LRS3/language_models/lm_en_subword")
	hf_hub_download(repo_id="willwade/lm_en_subword",
	filename="model.json",
	local_dir="benchmarks/LRS3/language_models/lm_en_subword")

	print("Models downloaded successfully!")

	def load_models(self):
	"""Load models using the InferencePipeline with LRS3 config"""
	config_path = "configs/LRS3_V_WER19.1.ini"

	self.vsr_model = InferencePipeline(
	config_path,
	device=self.device,
	detector="mediapipe",
	face_track=True
	)
	print("Model loaded successfully!")

	def process_frame(self, frame):
	"""Process a single frame with rate limiting and compression"""
	current_time = time.time()

	if current_time - self.last_frame_time < self.frame_interval:
	return None

	self.last_frame_time = current_time

	if frame is None:
	return "No video input detected"

	# Compress frame
	encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), self.frame_compression]
	_, buffer = cv2.imencode('.jpg', frame, encode_param)
	compressed_frame = cv2.imdecode(buffer, cv2.IMREAD_GRAYSCALE)

	# Run inference using the VSR model
	predicted_text = self.vsr_model.process_frame(compressed_frame)

	return predicted_text


	# Create Gradio interface
	chaplin = ChaplinGradio()

	iface = gr.Interface(
	fn=chaplin.process_frame,
	inputs=gr.Image(source="webcam", streaming=True),
	outputs=gr.Textbox(label="Predicted Text"),
	title="Chaplin - Live Visual Speech Recognition",
	description="Use your webcam to perform real-time visual speech recognition.",
	live=True
	)

	if __name__ == "__main__":
	iface.launch()