""" T5-Gemma-2 Fine-Tuning Space for Custom AI Specialists ======================================================== This Space fine-tunes google/t5gemma-2-270m-270m on custom training data. Uses HuggingFace's free T4 GPU for training. Specialist Models we can train: 1. UE5 Code Expert - Unreal Engine Python scripting 2. 3D Asset Expert - How to use 3D generation tools 3. Texture Expert - PBR materials, UV mapping guidance 4. Animation Expert - Rigging, animation workflows """ import gradio as gr import json import os from pathlib import Path # Training imports (loaded on demand to save memory) def load_training_libs(): global torch, AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments, Dataset import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments from datasets import Dataset return True # Constants MODEL_ID = "google/t5gemma-2-270m-270m" OUTPUT_DIR = "./fine-tuned-model" # Sample training data for demo DEMO_DATA = [ { "input": "How do I get the current level name in UE5 Python?", "output": """import unreal world = unreal.EditorLevelLibrary.get_editor_world() level_name = world.get_name() print(f"Current level: {level_name}")""" }, { "input": "How do I spawn an actor at a location in UE5?", "output": """import unreal # Define spawn location location = unreal.Vector(0, 0, 100) rotation = unreal.Rotator(0, 0, 0) # Get the actor class (e.g., StaticMeshActor) actor_class = unreal.StaticMeshActor # Spawn the actor actor = unreal.EditorLevelLibrary.spawn_actor_from_class( actor_class, location, rotation )""" } ] def prepare_dataset(training_data_text: str) -> dict: """Parse JSONL training data and prepare for training.""" lines = training_data_text.strip().split('\n') data = [] for line in lines: if line.strip(): try: item = json.loads(line) if 'input' in item and 'output' in item: data.append(item) except json.JSONDecodeError: continue return { "count": len(data), "sample": data[0] if data else None, "data": data } def validate_data(training_data: str) -> str: """Validate training data format.""" result = prepare_dataset(training_data) if result["count"] == 0: return "❌ No valid training examples found. Ensure each line is valid JSON with 'input' and 'output' fields." return f"""✅ **Validation Successful!** **Examples found:** {result["count"]} **Sample Entry:** ```json {json.dumps(result["sample"], indent=2)} ``` Ready to start training!""" def start_training( training_data: str, model_name: str, num_epochs: int, batch_size: int, learning_rate: float, progress=gr.Progress() ) -> str: """Fine-tune T5-Gemma-2 on the provided training data.""" progress(0, desc="Loading training libraries...") try: load_training_libs() except Exception as e: return f"❌ Failed to load training libraries: {e}" progress(0.1, desc="Parsing training data...") dataset_info = prepare_dataset(training_data) if dataset_info["count"] < 5: return "❌ Need at least 5 training examples to fine-tune." progress(0.2, desc=f"Loading {MODEL_ID}...") try: tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID) except Exception as e: return f"❌ Failed to load model: {e}" progress(0.3, desc="Tokenizing dataset...") def tokenize_function(examples): # T5-style input format inputs = [f"Question: {q}" for q in examples["input"]] targets = examples["output"] model_inputs = tokenizer( inputs, max_length=512, truncation=True, padding="max_length" ) labels = tokenizer( targets, max_length=512, truncation=True, padding="max_length" ) model_inputs["labels"] = labels["input_ids"] return model_inputs # Create HF Dataset dataset = Dataset.from_dict({ "input": [d["input"] for d in dataset_info["data"]], "output": [d["output"] for d in dataset_info["data"]] }) tokenized_dataset = dataset.map(tokenize_function, batched=True) progress(0.4, desc="Setting up training...") training_args = TrainingArguments( output_dir=OUTPUT_DIR, num_train_epochs=num_epochs, per_device_train_batch_size=batch_size, learning_rate=learning_rate, weight_decay=0.01, logging_steps=10, save_strategy="epoch", fp16=True, # Use mixed precision on T4 push_to_hub=False, report_to="none", ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, ) progress(0.5, desc="Training in progress...") try: trainer.train() progress(0.9, desc="Saving model...") # Save locally trainer.save_model(OUTPUT_DIR) tokenizer.save_pretrained(OUTPUT_DIR) progress(1.0, desc="Complete!") return f"""✅ **Training Complete!** **Model:** {model_name} **Base:** {MODEL_ID} **Training Examples:** {dataset_info["count"]} **Epochs:** {num_epochs} Model saved to: `{OUTPUT_DIR}` To push to HuggingFace Hub: ```python from huggingface_hub import HfApi api = HfApi() api.upload_folder(folder_path="{OUTPUT_DIR}", repo_id="YOUR_USERNAME/{model_name}") ``` """ except Exception as e: return f"❌ Training failed: {e}" def test_model(prompt: str) -> str: """Test the fine-tuned model.""" model_path = Path(OUTPUT_DIR) if not model_path.exists(): return "❌ No trained model found. Please train a model first." try: load_training_libs() tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR) model = AutoModelForSeq2SeqLM.from_pretrained(OUTPUT_DIR) # Format input like training data input_text = f"Question: {prompt}" inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True) outputs = model.generate( **inputs, max_length=512, num_beams=4, early_stopping=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response except Exception as e: return f"❌ Error: {e}" def load_demo_data() -> str: """Load demo training data.""" return "\n".join([json.dumps(d) for d in DEMO_DATA]) # Build the Gradio interface with gr.Blocks(title="Custom AI Specialist Trainer") as demo: gr.Markdown(""" # 🧠 Custom AI Specialist Trainer Fine-tune **T5-Gemma-2-270m** on your custom training data using HuggingFace's free T4 GPU! **Use Cases:** - 🎮 **UE5 Code Expert** - Unreal Engine Python scripting - 🏗️ **3D Asset Expert** - 3D generation tool guidance - 🎨 **Texture Expert** - PBR materials, UV mapping - 🎬 **Animation Expert** - Rigging, motion workflows """) with gr.Tab("📝 Training Data"): gr.Markdown(""" ### Training Data Format (JSONL) Each line should be a JSON object with `input` and `output` fields: ```json {"input": "How do I do X?", "output": "Here's how to do X..."} ``` """) training_data = gr.Textbox( label="Training Data (JSONL)", placeholder='{"input": "How do I...", "output": "You can..."}', lines=15 ) with gr.Row(): load_demo_btn = gr.Button("📥 Load Demo Data", variant="secondary") validate_btn = gr.Button("✅ Validate Data", variant="primary") validation_output = gr.Markdown() load_demo_btn.click(load_demo_data, outputs=training_data) validate_btn.click(validate_data, inputs=training_data, outputs=validation_output) with gr.Tab("🚀 Train Model"): gr.Markdown("### Training Configuration") with gr.Row(): model_name = gr.Textbox( label="Model Name", value="ue5-code-expert", info="Name for your fine-tuned model" ) num_epochs = gr.Slider( minimum=1, maximum=10, value=3, step=1, label="Training Epochs" ) with gr.Row(): batch_size = gr.Slider( minimum=1, maximum=8, value=4, step=1, label="Batch Size", info="Lower if out of memory" ) learning_rate = gr.Number( value=5e-5, label="Learning Rate" ) train_btn = gr.Button("🏋️ Start Training", variant="primary", size="lg") training_output = gr.Markdown() train_btn.click( start_training, inputs=[training_data, model_name, num_epochs, batch_size, learning_rate], outputs=training_output ) with gr.Tab("🧪 Test Model"): gr.Markdown("### Test Your Fine-Tuned Model") test_prompt = gr.Textbox( label="Your Question", placeholder="How do I spawn an actor in UE5?", lines=3 ) test_btn = gr.Button("🔮 Generate Response", variant="primary") test_output = gr.Code(label="Model Response", language="python") test_btn.click(test_model, inputs=test_prompt, outputs=test_output) gr.Markdown(""" --- ### 🔗 Related HF Spaces for Game Dev | Tool | Purpose | Link | |------|---------|------| | TRELLIS.2 | Image → 3D Mesh | [microsoft/TRELLIS.2](https://hf.co/spaces/microsoft/TRELLIS.2) | | Hunyuan3D | Text/Image → 3D | [tencent/Hunyuan3D-2.1](https://hf.co/spaces/tencent/Hunyuan3D-2.1) | | Stable Fast 3D | Quick 3D from Image | [stabilityai/stable-fast-3d](https://hf.co/spaces/stabilityai/stable-fast-3d) | | Chord PBR | PBR Material Gen | [ksangk/chord-demo](https://hf.co/spaces/ksangk/chord-demo) | | FLUX.2 | Image Generation | [black-forest-labs/FLUX.2-dev](https://hf.co/spaces/black-forest-labs/FLUX.2-dev) | """) if __name__ == "__main__": demo.launch()