import argparse
import codecs as cs
import json
import os
import os.path as osp
import random
import re
import textwrap
from typing import List, Optional, Tuple, Union
import gradio as gr
from hymotion.utils.gradio_runtime import ModelInference
from hymotion.utils.gradio_utils import try_to_download_model, try_to_download_text_encoder
from hymotion.utils.gradio_css import get_placeholder_html, APP_CSS, HEADER_BASE_MD, FOOTER_MD, WITHOUT_PROMPT_ENGINEERING_WARNING
# Import spaces for Hugging Face Zero GPU support
import spaces
# define data sources
DATA_SOURCES = {
"example_prompts": "examples/example_prompts/example_subset.json",
}
# Pre-generated examples for gallery display (generated on first startup)
# Add/remove items to control the number of examples
EXAMPLE_GALLERY_LIST = [
{
"prompt": "A person jumps upward with both legs twice.",
"duration": 4.5,
"seeds": "792",
"cfg_scale": 5.0,
"filename": "jump_twice",
},
# Add more examples here as needed:
{
"prompt": "A person jumps on their right leg.",
"duration": 4.5,
"seeds": "941",
"cfg_scale": 5.0,
"filename": "jump_right_leg",
},
]
EXAMPLE_GALLERY_OUTPUT_DIR = "examples/pregenerated"
def ensure_examples_generated(model_inference_obj) -> List[str]:
"""
Ensure all example motions are generated on first startup.
Returns a list of successfully generated example filenames.
"""
example_dir = EXAMPLE_GALLERY_OUTPUT_DIR
os.makedirs(example_dir, exist_ok=True)
generated_examples = []
for example in EXAMPLE_GALLERY_LIST:
example_filename = example["filename"]
meta_path = os.path.join(example_dir, f"{example_filename}_meta.json")
# Check if already generated
if os.path.exists(meta_path):
print(f">>> Example already exists: {meta_path}")
generated_examples.append(example_filename)
continue
# Generate the example
print(f">>> Generating example motion: {example['prompt']}")
try:
# Force CPU device for example generation at startup
# This is necessary for Hugging Face Zero GPU environment where GPU
# is only available inside @spaces.GPU decorated functions
html_content, fbx_files = model_inference_obj.run_inference(
text=example["prompt"],
seeds_csv=example["seeds"],
motion_duration=example["duration"],
cfg_scale=example["cfg_scale"],
output_format="dict", # Don't generate FBX for example
original_text=example["prompt"],
output_dir=example_dir,
output_filename=example_filename,
device="cpu", # Force CPU for startup example generation
)
print(f">>> Example '{example_filename}' generated successfully!")
generated_examples.append(example_filename)
except Exception as e:
print(f">>> Failed to generate example '{example_filename}': {e}")
return generated_examples
def load_example_gallery_html(example_index: int = 0) -> str:
"""
Load a specific pre-generated example and return iframe HTML for display.
Args:
example_index: Index of the example in EXAMPLE_GALLERY_LIST
"""
from hymotion.utils.visualize_mesh_web import generate_static_html_content
if example_index < 0 or example_index >= len(EXAMPLE_GALLERY_LIST):
return ""
example = EXAMPLE_GALLERY_LIST[example_index]
example_dir = EXAMPLE_GALLERY_OUTPUT_DIR
example_filename = example["filename"]
meta_path = os.path.join(example_dir, f"{example_filename}_meta.json")
if not os.path.exists(meta_path):
return f"""
Example not generated yet. Please restart the app.
"""
try:
html_content = generate_static_html_content(
folder_name=example_dir,
file_name=example_filename,
hide_captions=False,
)
escaped_html = html_content.replace('"', """)
iframe_html = f"""
"""
return iframe_html
except Exception as e:
print(f">>> Failed to load example gallery: {e}")
return ""
def get_example_gallery_grid_html() -> str:
"""
Generate a grid layout HTML for all examples in the gallery.
"""
if not EXAMPLE_GALLERY_LIST:
return "No examples configured.
"
# Calculate grid columns based on number of examples
num_examples = len(EXAMPLE_GALLERY_LIST)
if num_examples == 1:
columns = 1
elif num_examples == 2:
columns = 2
elif num_examples <= 4:
columns = 2
else:
columns = 3
grid_items = []
for idx, example in enumerate(EXAMPLE_GALLERY_LIST):
iframe_html = load_example_gallery_html(idx)
prompt_short = example["prompt"][:60] + "..." if len(example["prompt"]) > 60 else example["prompt"]
grid_items.append(f"""
{prompt_short}
{iframe_html}
""")
grid_html = f"""
{"".join(grid_items)}
"""
return grid_html
def load_examples_from_txt(txt_path: str, example_record_fps=20, max_duration=12):
"""Load examples from txt file."""
def _parse_line(line: str) -> Optional[Tuple[str, float]]:
line = line.strip()
if line and not line.startswith("#"):
parts = line.split("#")
if len(parts) >= 2:
text = parts[0].strip()
duration = int(parts[1]) / example_record_fps
duration = min(duration, max_duration)
else:
text = line.strip()
duration = 5.0
return text, duration
return None
examples: List[Tuple[str, float]] = []
if os.path.exists(txt_path):
try:
if txt_path.endswith(".txt"):
with cs.open(txt_path, "r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines:
result = _parse_line(line)
if result is None:
continue
text, duration = result
examples.append((text, duration))
elif txt_path.endswith(".json"):
with cs.open(txt_path, "r", encoding="utf-8") as f:
lines = json.load(f)
for key, value in lines.items():
if "_raw_chn" in key or "GENERATE_PROMPT_FORMAT" in key:
continue
for line in value:
result = _parse_line(line)
if result is None:
continue
text, duration = result
examples.append((text, duration))
print(f">>> Loaded {len(examples)} examples from {txt_path}")
except Exception as e:
print(f">>> Failed to load examples from {txt_path}: {e}")
else:
print(f">>> Examples file not found: {txt_path}")
return examples
@spaces.GPU(duration=120) # Request GPU for up to 120 seconds per inference
def generate_motion_func(
# text input
original_text: str,
rewritten_text: str,
# model input
seed_input: str,
motion_duration: float,
cfg_scale: float,
) -> Tuple[str, List[str]]:
use_prompt_engineering = USE_PROMPT_ENGINEERING
output_dir = "output/gradio"
# Determine which text to use: prefer rewritten_text, fallback to original_text
if use_prompt_engineering and rewritten_text.strip():
text_to_use = rewritten_text.strip()
elif original_text.strip():
text_to_use = original_text.strip()
else:
# Both are empty
return "Error: Input text is empty, please enter text first", []
try:
# Use runtime from global if available (for Zero GPU), otherwise use self.runtime
fbx_ok = model_inference.fbx_available
req_format = "fbx" if fbx_ok else "dict"
# Use GPU-decorated wrapper function for Zero GPU support
# This ensures the GPU decorator receives proper Gradio context for user authentication
html_content, fbx_files = model_inference.run_inference(
text=text_to_use,
seeds_csv=seed_input,
motion_duration=motion_duration,
cfg_scale=cfg_scale,
output_format=req_format,
original_text=original_text,
output_dir=output_dir,
)
print(f"Running inference...after gpu_inference_wrapper")
# Escape HTML content for srcdoc attribute
escaped_html = html_content.replace('"', """)
# Return iframe with srcdoc - directly embed HTML content
iframe_html = f"""
"""
return iframe_html, fbx_files
except Exception as e:
print(f"\t>>> Motion generation failed: {e}")
return (
f"❌ Motion generation failed: {str(e)}\n\nPlease check the input parameters or try again later",
[],
)
class T2MGradioUI:
def __init__(self, args):
self.output_dir = args.output_dir
print(f"[{self.__class__.__name__}] output_dir: {self.output_dir}")
# self.args = args
self.prompt_engineering_available = args.use_prompt_engineering
if self.prompt_engineering_available:
try:
from hymotion.prompt_engineering.client import PromptEngineeringClient
self.prompt_engineering_client = PromptEngineeringClient()
# Test the client with a simple prompt to verify it works
self.prompt_engineering_client.rewrite_prompt_and_infer_time("A person walks forward.", max_timeout=30)
print(f"[{self.__class__.__name__}] Prompt engineering client initialized successfully.")
except Exception as e:
print(f"[{self.__class__.__name__}] Prompt engineering client initialization failed: {e}")
self.prompt_engineering_available = False
# IMPORTANT: Update global variable so generate_motion_func uses correct behavior
global USE_PROMPT_ENGINEERING
USE_PROMPT_ENGINEERING = False
print(f"[{self.__class__.__name__}] USE_PROMPT_ENGINEERING set to False due to initialization failure")
self.all_example_data = {}
self._init_example_data()
def _init_example_data(self):
for source_name, file_path in DATA_SOURCES.items():
examples = load_examples_from_txt(file_path)
if examples:
self.all_example_data[source_name] = examples
else:
# provide default examples as fallback
self.all_example_data[source_name] = [
("Twist at the waist and punch across the body.", 3.0),
("A person is running then takes big leap.", 3.0),
("A person holds a railing and walks down a set of stairs.", 5.0),
(
"A man performs a fluid and rhythmic hip-hop style dance, incorporating body waves, arm gestures, and side steps.",
5.0,
),
]
print(f">>> Loaded data sources: {list(self.all_example_data.keys())}")
def _get_header_text(self):
return HEADER_BASE_MD
def _generate_random_seeds(self):
seeds = [random.randint(0, 999) for _ in range(4)]
return ",".join(map(str, seeds))
def _prompt_engineering(self, text: str, duration: float):
if not text.strip():
return "", gr.update(interactive=False), gr.update(), "⚠️ Please enter text first"
print(f"\t>>> Using LLM to estimate duration/rewrite text...")
try:
predicted_duration, rewritten_text = self.prompt_engineering_client.rewrite_prompt_and_infer_time(text=text)
except Exception as e:
print(f"\t>>> Text rewriting/duration prediction failed: {e}")
# On failure, use original text and enable generate button
return (
text, # Use original text as fallback
gr.update(interactive=True), # Enable generate button
gr.update(),
f"⚠️ Text rewriting failed: {str(e)}\n💡 Using your original input directly. You can click [🚀 Generate Motion] to continue.",
)
return (
rewritten_text,
gr.update(interactive=True),
gr.update(value=predicted_duration),
"✅ Text rewriting completed! Please check and edit the rewritten text, then click [🚀 Generate Motion]",
)
def _get_example_choices(self):
"""Get all example choices from all data sources"""
choices = ["Custom Input"]
for source_name in self.all_example_data:
example_data = self.all_example_data[source_name]
for text, _ in example_data:
display_text = f"{text[:50]}..." if len(text) > 50 else text
choices.append(display_text)
return choices
def _on_example_select(self, selected_example):
"""When selecting an example, the callback function"""
if selected_example == "Custom Input":
if self.prompt_engineering_available:
return "", self._generate_random_seeds(), gr.update(), gr.update(value="", visible=False), gr.update(interactive=False), "Please enter text or select an example"
else:
return "", self._generate_random_seeds(), gr.update(), gr.update(), gr.update(), gr.update()
else:
# find the corresponding example from all data sources
for source_name in self.all_example_data:
example_data = self.all_example_data[source_name]
for text, duration in example_data:
display_text = f"{text[:50]}..." if len(text) > 50 else text
if display_text == selected_example:
if self.prompt_engineering_available:
# Set text directly to rewritten_text and enable generate button
return text, self._generate_random_seeds(), gr.update(value=duration), gr.update(value=text, visible=True), gr.update(interactive=True), "✅ Example selected! Click [🚀 Generate Motion] to start."
else:
return text, self._generate_random_seeds(), gr.update(value=duration), gr.update(), gr.update(), gr.update()
if self.prompt_engineering_available:
return "", self._generate_random_seeds(), gr.update(), gr.update(value="", visible=False), gr.update(interactive=False), "Please enter text or select an example"
else:
return "", self._generate_random_seeds(), gr.update(), gr.update(), gr.update(), gr.update()
def build_ui(self):
with gr.Blocks(css=APP_CSS) as demo:
# Create State components for non-UI values that need to be passed to event handlers
self.use_prompt_engineering_state = gr.State(self.prompt_engineering_available)
self.output_dir_state = gr.State(self.output_dir)
self.header_md = gr.Markdown(HEADER_BASE_MD, elem_classes=["main-header"])
with gr.Row():
# Left control panel
with gr.Column(scale=2, elem_classes=["left-panel"]):
# Input textbox
if self.prompt_engineering_available:
input_place_holder = "Enter text to generate motion, support Chinese and English text input. Non-humanoid Characters, Multi-person Interactions and Environment & Camera are not supported. Click [ 📚 Example Prompts ] to see more examples."
else:
input_place_holder = "Enter English text to generate motion, please use `A person ...` format to describe the motion, better less than 50 words. Non-humanoid Characters, Multi-person Interactions and Environment & Camera are not supported. Click [ 📚 Example Prompts ] to see more examples."
self.text_input = gr.Textbox(
label="📝 Input Text",
placeholder=input_place_holder,
lines=3,
max_lines=10,
autoscroll=False,
)
# if not self.prompt_engineering_available:
# gr.Markdown(
# "Click [📚 Example Prompts] to see more examples."
# )
# Rewritten textbox
self.rewritten_text = gr.Textbox(
label="✏️ Rewritten Text",
placeholder="Rewritten text will be displayed here, you can further edit",
interactive=True,
visible=False,
)
# Duration slider
self.duration_slider = gr.Slider(
minimum=0.5,
maximum=12,
value=5.0,
step=0.1,
label="⏱️ Action Duration (seconds)",
info="Feel free to adjust the action duration",
)
# Execute buttons
with gr.Row():
if self.prompt_engineering_available:
self.rewrite_btn = gr.Button(
"🔄 Rewrite Text",
variant="secondary",
size="lg",
elem_classes=["rewrite-button"],
)
else:
# Create a hidden/disabled placeholder button
self.rewrite_btn = gr.Button(
"🔄 Rewrite Text (Unavailable)",
variant="secondary",
size="lg",
elem_classes=["rewrite-button"],
interactive=False,
visible=False,
)
self.generate_btn = gr.Button(
"🚀 Generate Motion",
variant="primary",
size="lg",
elem_classes=["generate-button"],
interactive=not self.prompt_engineering_available, # Enable directly if rewrite not available
)
# Example selection dropdown
self.example_dropdown = gr.Dropdown(
choices=self._get_example_choices(),
value="Custom Input",
label="📚 Example Prompts",
# info="Select a preset example or input your own text above",
interactive=True,
)
# Advanced settings
with gr.Accordion("🔧 Advanced Settings", open=False):
self._build_advanced_settings()
# Status message depends on whether rewrite is available
if self.prompt_engineering_available:
status_msg = "Please click the [🔄 Rewrite Text] button to rewrite the text first"
else:
status_msg = "Enter your text and click [🚀 Generate Motion] directly."
self.status_output = gr.Textbox(
label="📊 Status Information",
value=status_msg,
lines=1,
max_lines=10,
elem_classes=["status-textbox"],
)
# FBX Download section
with gr.Row(visible=False) as self.fbx_download_row:
if model_inference.fbx_available:
self.fbx_files = gr.File(
label="📦 Download FBX Files",
file_count="multiple",
interactive=False,
)
else:
self.fbx_files = gr.State([])
# Right display area
with gr.Column(scale=3):
self.output_display = gr.HTML(
value=get_placeholder_html(), show_label=False, elem_classes=["flask-display"]
)
# Example Gallery Section
with gr.Accordion("🎬 Example Gallery", open=True):
self.example_gallery_display = gr.HTML(
value=get_example_gallery_grid_html(),
show_label=False,
elem_classes=["example-gallery-display"]
)
# Create use example buttons for each example
with gr.Row():
self.use_example_btns = []
for idx, example in enumerate(EXAMPLE_GALLERY_LIST):
btn = gr.Button(
f"📋 Use Example {idx + 1}",
variant="secondary",
size="sm",
)
self.use_example_btns.append((btn, idx))
# Footer
gr.Markdown(FOOTER_MD, elem_classes=["footer"])
self._bind_events()
demo.load(fn=self._get_header_text, outputs=[self.header_md])
return demo
def _build_advanced_settings(self):
with gr.Row():
self.seed_input = gr.Textbox(
label="🎯 Random Seeds",
value="0,1,2,3",
placeholder="e.g.: 0,1,2,3",
scale=3,
)
self.dice_btn = gr.Button(
"🎲",
variant="secondary",
size="sm",
scale=1,
min_width=50,
)
self.cfg_slider = gr.Slider(
minimum=1,
maximum=10,
value=5.0,
step=0.1,
label="⚙️ CFG Strength",
)
def _on_use_example(self, example_idx: int):
"""When clicking 'Use This Example' button, fill in the example prompt"""
if example_idx < 0 or example_idx >= len(EXAMPLE_GALLERY_LIST):
if self.prompt_engineering_available:
return ("", "0,1,2,3", gr.update(), gr.update(value="", visible=False), gr.update(interactive=False), "Please select a valid example")
else:
return ("", "0,1,2,3", gr.update(), gr.update(), gr.update(), gr.update())
example = EXAMPLE_GALLERY_LIST[example_idx]
if self.prompt_engineering_available:
# Set text directly to rewritten_text and enable generate button
return (
example["prompt"],
example["seeds"],
gr.update(value=example["duration"]),
gr.update(value=example["prompt"], visible=True),
gr.update(interactive=True),
"✅ Example selected! Click [🚀 Generate Motion] to start.",
)
else:
return (
example["prompt"],
example["seeds"],
gr.update(value=example["duration"]),
gr.update(),
gr.update(),
gr.update(),
)
def _bind_events(self):
# Generate random seeds
self.dice_btn.click(self._generate_random_seeds, outputs=[self.seed_input])
# Use example buttons - bind each button to its example
for btn, idx in self.use_example_btns:
btn.click(
fn=lambda i=idx: self._on_use_example(i),
outputs=[self.text_input, self.seed_input, self.duration_slider, self.rewritten_text, self.generate_btn, self.status_output],
)
# Bind example selection event
self.example_dropdown.change(
fn=self._on_example_select,
inputs=[self.example_dropdown],
outputs=[self.text_input, self.seed_input, self.duration_slider, self.rewritten_text, self.generate_btn, self.status_output],
)
# Rewrite text logic (only bind when rewrite is available)
if self.prompt_engineering_available:
self.rewrite_btn.click(fn=lambda: "Rewriting text, please wait...", outputs=[self.status_output]).then(
self._prompt_engineering,
inputs=[
self.text_input,
self.duration_slider,
],
outputs=[self.rewritten_text, self.generate_btn, self.duration_slider, self.status_output],
).then(
fn=lambda: gr.update(visible=True),
outputs=[self.rewritten_text],
)
# Generate motion logic
self.generate_btn.click(
fn=lambda: "Generating motion, please wait... (It takes some extra time for the first generation)",
outputs=[self.status_output],
).then(
generate_motion_func,
inputs=[self.text_input, self.rewritten_text, self.seed_input, self.duration_slider, self.cfg_slider],
outputs=[self.output_display, self.fbx_files],
).then(
fn=lambda fbx_list: (
(
"🎉 Motion generation completed! You can view the motion visualization result on the right. FBX files are ready for download."
if fbx_list
else "🎉 Motion generation completed! You can view the motion visualization result on the right"
),
gr.update(visible=bool(fbx_list)),
),
inputs=[self.fbx_files],
outputs=[self.status_output, self.fbx_download_row],
)
# Reset logic - different behavior based on rewrite availability
if self.prompt_engineering_available:
# When text_input changes:
# - If text_input == rewritten_text, it means the change was triggered by example selection,
# so we should NOT hide the rewritten_text (keep it visible and generate button enabled)
# - If text_input != rewritten_text, it means user manually edited the input,
# so we should hide the rewritten_text and require a new rewrite
self.text_input.change(
fn=lambda text, rewritten: (
gr.update() if text.strip() == rewritten.strip() else gr.update(visible=False),
gr.update() if text.strip() == rewritten.strip() else gr.update(interactive=False),
(
"✅ Example selected! Click [🚀 Generate Motion] to start."
if text.strip() == rewritten.strip() and text.strip()
else "Please click the [🔄 Rewrite Text] button to rewrite the text first"
),
),
inputs=[self.text_input, self.rewritten_text],
outputs=[self.rewritten_text, self.generate_btn, self.status_output],
)
else:
# When rewrite is not available, enable generate button directly when text is entered
self.text_input.change(
fn=lambda text: (
gr.update(visible=False),
gr.update(interactive=bool(text.strip())),
(
"Ready to generate! Click [🚀 Generate Motion] to start."
if text.strip()
else "Enter your text and click [🚀 Generate Motion] directly."
),
),
inputs=[self.text_input],
outputs=[self.rewritten_text, self.generate_btn, self.status_output],
)
# Only bind rewritten_text change when rewrite is available
if self.prompt_engineering_available:
self.rewritten_text.change(
fn=lambda text: (
gr.update(interactive=bool(text.strip())),
(
"Rewritten text has been modified, you can click [🚀 Generate Motion]"
if text.strip()
else "Rewritten text cannot be empty, please enter valid text"
),
),
inputs=[self.rewritten_text],
outputs=[self.generate_btn, self.status_output],
)
def create_demo(final_model_path):
"""Create the Gradio demo with Zero GPU support."""
class Args:
model_path = final_model_path
output_dir = "output/gradio"
use_prompt_engineering = USE_PROMPT_ENGINEERING
use_text_encoder = True
args = Args()
# Check required files:
cfg = osp.join(args.model_path, "config.yml")
ckpt = osp.join(args.model_path, "latest.ckpt")
if not osp.exists(cfg):
raise FileNotFoundError(f">>> Configuration file not found: {cfg}")
# Create output directory
os.makedirs(args.output_dir, exist_ok=True)
# For Zero GPU: Don't load model at startup, use lazy loading
# Create a minimal runtime for UI initialization (without model loading)
ui = T2MGradioUI(args=args)
demo = ui.build_ui()
return demo
# Create demo at module level for Hugging Face Spaces
# Pre-download text encoder models first (without loading)
if __name__ == "__main__":
# Create demo at module level for Hugging Face Spaces
import argparse
parser = argparse.ArgumentParser(description="HY-Motion-1.0 Gradio App")
parser.add_argument("--port", type=int, default=7860, help="Port to listen on")
args = parser.parse_args()
USE_PROMPT_ENGINEERING = True
try_to_download_text_encoder()
# Then download the main model
final_model_path = try_to_download_model()
model_inference = ModelInference(final_model_path,
use_prompt_engineering=False, use_text_encoder=True)
model_inference.initialize_model(device="cpu")
# Generate examples on first startup (if not exists)
ensure_examples_generated(model_inference)
demo = create_demo(final_model_path)
demo.launch(server_name="0.0.0.0", server_port=args.port)