| | """
|
| | T5-Gemma-2 Fine-Tuning Space for Custom AI Specialists
|
| | ========================================================
|
| | This Space fine-tunes google/t5gemma-2-270m-270m on custom training data.
|
| | Uses HuggingFace's free T4 GPU for training.
|
| |
|
| | Specialist Models we can train:
|
| | 1. UE5 Code Expert - Unreal Engine Python scripting
|
| | 2. 3D Asset Expert - How to use 3D generation tools
|
| | 3. Texture Expert - PBR materials, UV mapping guidance
|
| | 4. Animation Expert - Rigging, animation workflows
|
| | """
|
| |
|
| | import gradio as gr
|
| | import json
|
| | import os
|
| | from pathlib import Path
|
| |
|
| |
|
| | def load_training_libs():
|
| | global torch, AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments, Dataset
|
| | import torch
|
| | from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
|
| | from datasets import Dataset
|
| | return True
|
| |
|
| |
|
| | MODEL_ID = "google/t5gemma-2-270m-270m"
|
| | OUTPUT_DIR = "./fine-tuned-model"
|
| |
|
| |
|
| | DEMO_DATA = [
|
| | {
|
| | "input": "How do I get the current level name in UE5 Python?",
|
| | "output": """import unreal
|
| | world = unreal.EditorLevelLibrary.get_editor_world()
|
| | level_name = world.get_name()
|
| | print(f"Current level: {level_name}")"""
|
| | },
|
| | {
|
| | "input": "How do I spawn an actor at a location in UE5?",
|
| | "output": """import unreal
|
| | # Define spawn location
|
| | location = unreal.Vector(0, 0, 100)
|
| | rotation = unreal.Rotator(0, 0, 0)
|
| |
|
| | # Get the actor class (e.g., StaticMeshActor)
|
| | actor_class = unreal.StaticMeshActor
|
| |
|
| | # Spawn the actor
|
| | actor = unreal.EditorLevelLibrary.spawn_actor_from_class(
|
| | actor_class, location, rotation
|
| | )"""
|
| | }
|
| | ]
|
| |
|
| |
|
| | def prepare_dataset(training_data_text: str) -> dict:
|
| | """Parse JSONL training data and prepare for training."""
|
| | lines = training_data_text.strip().split('\n')
|
| | data = []
|
| |
|
| | for line in lines:
|
| | if line.strip():
|
| | try:
|
| | item = json.loads(line)
|
| | if 'input' in item and 'output' in item:
|
| | data.append(item)
|
| | except json.JSONDecodeError:
|
| | continue
|
| |
|
| | return {
|
| | "count": len(data),
|
| | "sample": data[0] if data else None,
|
| | "data": data
|
| | }
|
| |
|
| |
|
| | def validate_data(training_data: str) -> str:
|
| | """Validate training data format."""
|
| | result = prepare_dataset(training_data)
|
| |
|
| | if result["count"] == 0:
|
| | return "โ No valid training examples found. Ensure each line is valid JSON with 'input' and 'output' fields."
|
| |
|
| | return f"""โ
**Validation Successful!**
|
| |
|
| | **Examples found:** {result["count"]}
|
| |
|
| | **Sample Entry:**
|
| | ```json
|
| | {json.dumps(result["sample"], indent=2)}
|
| | ```
|
| |
|
| | Ready to start training!"""
|
| |
|
| |
|
| | def start_training(
|
| | training_data: str,
|
| | model_name: str,
|
| | num_epochs: int,
|
| | batch_size: int,
|
| | learning_rate: float,
|
| | progress=gr.Progress()
|
| | ) -> str:
|
| | """Fine-tune T5-Gemma-2 on the provided training data."""
|
| |
|
| | progress(0, desc="Loading training libraries...")
|
| | try:
|
| | load_training_libs()
|
| | except Exception as e:
|
| | return f"โ Failed to load training libraries: {e}"
|
| |
|
| | progress(0.1, desc="Parsing training data...")
|
| | dataset_info = prepare_dataset(training_data)
|
| |
|
| | if dataset_info["count"] < 5:
|
| | return "โ Need at least 5 training examples to fine-tune."
|
| |
|
| | progress(0.2, desc=f"Loading {MODEL_ID}...")
|
| | try:
|
| | tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| | model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
|
| | except Exception as e:
|
| | return f"โ Failed to load model: {e}"
|
| |
|
| | progress(0.3, desc="Tokenizing dataset...")
|
| |
|
| | def tokenize_function(examples):
|
| |
|
| | inputs = [f"Question: {q}" for q in examples["input"]]
|
| | targets = examples["output"]
|
| |
|
| | model_inputs = tokenizer(
|
| | inputs,
|
| | max_length=512,
|
| | truncation=True,
|
| | padding="max_length"
|
| | )
|
| |
|
| | labels = tokenizer(
|
| | targets,
|
| | max_length=512,
|
| | truncation=True,
|
| | padding="max_length"
|
| | )
|
| |
|
| | model_inputs["labels"] = labels["input_ids"]
|
| | return model_inputs
|
| |
|
| |
|
| | dataset = Dataset.from_dict({
|
| | "input": [d["input"] for d in dataset_info["data"]],
|
| | "output": [d["output"] for d in dataset_info["data"]]
|
| | })
|
| |
|
| | tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
| |
|
| | progress(0.4, desc="Setting up training...")
|
| |
|
| | training_args = TrainingArguments(
|
| | output_dir=OUTPUT_DIR,
|
| | num_train_epochs=num_epochs,
|
| | per_device_train_batch_size=batch_size,
|
| | learning_rate=learning_rate,
|
| | weight_decay=0.01,
|
| | logging_steps=10,
|
| | save_strategy="epoch",
|
| | fp16=True,
|
| | push_to_hub=False,
|
| | report_to="none",
|
| | )
|
| |
|
| | trainer = Trainer(
|
| | model=model,
|
| | args=training_args,
|
| | train_dataset=tokenized_dataset,
|
| | )
|
| |
|
| | progress(0.5, desc="Training in progress...")
|
| |
|
| | try:
|
| | trainer.train()
|
| | progress(0.9, desc="Saving model...")
|
| |
|
| |
|
| | trainer.save_model(OUTPUT_DIR)
|
| | tokenizer.save_pretrained(OUTPUT_DIR)
|
| |
|
| | progress(1.0, desc="Complete!")
|
| |
|
| | return f"""โ
**Training Complete!**
|
| |
|
| | **Model:** {model_name}
|
| | **Base:** {MODEL_ID}
|
| | **Training Examples:** {dataset_info["count"]}
|
| | **Epochs:** {num_epochs}
|
| |
|
| | Model saved to: `{OUTPUT_DIR}`
|
| |
|
| | To push to HuggingFace Hub:
|
| | ```python
|
| | from huggingface_hub import HfApi
|
| | api = HfApi()
|
| | api.upload_folder(folder_path="{OUTPUT_DIR}", repo_id="YOUR_USERNAME/{model_name}")
|
| | ```
|
| | """
|
| | except Exception as e:
|
| | return f"โ Training failed: {e}"
|
| |
|
| |
|
| | def test_model(prompt: str) -> str:
|
| | """Test the fine-tuned model."""
|
| | model_path = Path(OUTPUT_DIR)
|
| |
|
| | if not model_path.exists():
|
| | return "โ No trained model found. Please train a model first."
|
| |
|
| | try:
|
| | load_training_libs()
|
| | tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)
|
| | model = AutoModelForSeq2SeqLM.from_pretrained(OUTPUT_DIR)
|
| |
|
| |
|
| | input_text = f"Question: {prompt}"
|
| | inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
|
| |
|
| | outputs = model.generate(
|
| | **inputs,
|
| | max_length=512,
|
| | num_beams=4,
|
| | early_stopping=True
|
| | )
|
| |
|
| | response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| | return response
|
| | except Exception as e:
|
| | return f"โ Error: {e}"
|
| |
|
| |
|
| | def load_demo_data() -> str:
|
| | """Load demo training data."""
|
| | return "\n".join([json.dumps(d) for d in DEMO_DATA])
|
| |
|
| |
|
| |
|
| | with gr.Blocks(title="Custom AI Specialist Trainer") as demo:
|
| | gr.Markdown("""
|
| | # ๐ง Custom AI Specialist Trainer
|
| |
|
| | Fine-tune **T5-Gemma-2-270m** on your custom training data using HuggingFace's free T4 GPU!
|
| |
|
| | **Use Cases:**
|
| | - ๐ฎ **UE5 Code Expert** - Unreal Engine Python scripting
|
| | - ๐๏ธ **3D Asset Expert** - 3D generation tool guidance
|
| | - ๐จ **Texture Expert** - PBR materials, UV mapping
|
| | - ๐ฌ **Animation Expert** - Rigging, motion workflows
|
| | """)
|
| |
|
| | with gr.Tab("๐ Training Data"):
|
| | gr.Markdown("""
|
| | ### Training Data Format (JSONL)
|
| | Each line should be a JSON object with `input` and `output` fields:
|
| | ```json
|
| | {"input": "How do I do X?", "output": "Here's how to do X..."}
|
| | ```
|
| | """)
|
| |
|
| | training_data = gr.Textbox(
|
| | label="Training Data (JSONL)",
|
| | placeholder='{"input": "How do I...", "output": "You can..."}',
|
| | lines=15
|
| | )
|
| |
|
| | with gr.Row():
|
| | load_demo_btn = gr.Button("๐ฅ Load Demo Data", variant="secondary")
|
| | validate_btn = gr.Button("โ
Validate Data", variant="primary")
|
| |
|
| | validation_output = gr.Markdown()
|
| |
|
| | load_demo_btn.click(load_demo_data, outputs=training_data)
|
| | validate_btn.click(validate_data, inputs=training_data, outputs=validation_output)
|
| |
|
| | with gr.Tab("๐ Train Model"):
|
| | gr.Markdown("### Training Configuration")
|
| |
|
| | with gr.Row():
|
| | model_name = gr.Textbox(
|
| | label="Model Name",
|
| | value="ue5-code-expert",
|
| | info="Name for your fine-tuned model"
|
| | )
|
| | num_epochs = gr.Slider(
|
| | minimum=1, maximum=10, value=3, step=1,
|
| | label="Training Epochs"
|
| | )
|
| |
|
| | with gr.Row():
|
| | batch_size = gr.Slider(
|
| | minimum=1, maximum=8, value=4, step=1,
|
| | label="Batch Size",
|
| | info="Lower if out of memory"
|
| | )
|
| | learning_rate = gr.Number(
|
| | value=5e-5,
|
| | label="Learning Rate"
|
| | )
|
| |
|
| | train_btn = gr.Button("๐๏ธ Start Training", variant="primary", size="lg")
|
| | training_output = gr.Markdown()
|
| |
|
| | train_btn.click(
|
| | start_training,
|
| | inputs=[training_data, model_name, num_epochs, batch_size, learning_rate],
|
| | outputs=training_output
|
| | )
|
| |
|
| | with gr.Tab("๐งช Test Model"):
|
| | gr.Markdown("### Test Your Fine-Tuned Model")
|
| |
|
| | test_prompt = gr.Textbox(
|
| | label="Your Question",
|
| | placeholder="How do I spawn an actor in UE5?",
|
| | lines=3
|
| | )
|
| |
|
| | test_btn = gr.Button("๐ฎ Generate Response", variant="primary")
|
| | test_output = gr.Code(label="Model Response", language="python")
|
| |
|
| | test_btn.click(test_model, inputs=test_prompt, outputs=test_output)
|
| |
|
| | gr.Markdown("""
|
| | ---
|
| | ### ๐ Related HF Spaces for Game Dev
|
| |
|
| | | Tool | Purpose | Link |
|
| | |------|---------|------|
|
| | | TRELLIS.2 | Image โ 3D Mesh | [microsoft/TRELLIS.2](https://hf.co/spaces/microsoft/TRELLIS.2) |
|
| | | Hunyuan3D | Text/Image โ 3D | [tencent/Hunyuan3D-2.1](https://hf.co/spaces/tencent/Hunyuan3D-2.1) |
|
| | | Stable Fast 3D | Quick 3D from Image | [stabilityai/stable-fast-3d](https://hf.co/spaces/stabilityai/stable-fast-3d) |
|
| | | Chord PBR | PBR Material Gen | [ksangk/chord-demo](https://hf.co/spaces/ksangk/chord-demo) |
|
| | | FLUX.2 | Image Generation | [black-forest-labs/FLUX.2-dev](https://hf.co/spaces/black-forest-labs/FLUX.2-dev) |
|
| | """)
|
| |
|
| | if __name__ == "__main__":
|
| | demo.launch()
|
| |
|