Spaces:

ltg
/

fluency-annotation

Sleeping

App Files Files Community

davda54 commited on Aug 13

Commit

8bdf0b7

verified ·

1 Parent(s): eded521

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -67

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import random
 from datetime import datetime
 from typing import Dict, List, Tuple
 import hashlib
 from collections.abc import Iterable
@@ -212,39 +214,73 @@ TODO
 **Code or mathematical expressions**: If responses contain code snippets or mathematical expressions, evaluate only the fluency of the natural language portions.
 """
-# Dummy dataset - replace with actual HuggingFace dataset loading
-DUMMY_DATASET = [
-    {
-        "id": "sample_001",
-        "prompt": "Write a short story about a robot learning to paint.",
-        "response_a": "The robot, Model X-7, stood before the canvas with mechanical precision. Its metallic fingers gripped the brush awkwardly at first, calculating angles and pressure. Day by day, its movements became more fluid, less calculated. What started as geometric patterns evolved into landscapes. The robot discovered that art wasn't about perfection—it was about expression.",
-        "response_b": "X-7 was a robot who wanted to paint. It picked up a brush and started making strokes on the canvas. At first the paintings were very mechanical and rigid. But over time the robot got better at painting. It learned that painting was not just about following rules but about creativity and expressing feelings, even though it was just a machine."
-    },
-    {
-        "id": "sample_002",
-        "prompt": "Explain quantum computing in simple terms.",
-        "response_a": "Quantum computing uses quantum bits or 'qubits' instead of regular bits. While normal bits are either 0 or 1, qubits can be both at the same time through superposition. This allows quantum computers to process many calculations simultaneously, making them potentially much faster for certain problems like cryptography or drug discovery.",
-        "response_b": "Think of regular computers like a light switch—it's either on or off. Quantum computers are like a dimmer switch that can be at any level of brightness. This special property lets them explore many solutions at once, similar to trying all paths in a maze simultaneously instead of one at a time."
-    },
-    {
-        "id": "sample_003",
-        "prompt": "What are the benefits of meditation?",
-        "response_a": "Meditation offers numerous benefits for both mental and physical health. Regular practice can reduce stress and anxiety by lowering cortisol levels. It improves focus and concentration, enhances emotional well-being, and can even boost immune function. Many practitioners report better sleep quality and increased self-awareness.",
-        "response_b": "The benefits of meditation include stress reduction, improved concentration, better emotional health, and enhanced self-awareness. Studies show it can lower blood pressure, improve sleep, and reduce symptoms of anxiety and depression. Even just 10 minutes daily can make a significant difference in overall well-being and mental clarity."
-    },
-    {
-        "id": "sample_004",
-        "prompt": "Describe the process of photosynthesis.",
-        "response_a": "Photosynthesis is how plants make their own food using sunlight. In the chloroplasts, chlorophyll captures light energy. This energy splits water molecules into hydrogen and oxygen. The hydrogen combines with carbon dioxide from the air to create glucose (sugar), which feeds the plant. Oxygen is released as a byproduct.",
-        "response_b": "Plants perform photosynthesis to convert light into chemical energy. The process occurs in two stages: light reactions and dark reactions. During light reactions, sunlight energizes electrons in chlorophyll. These electrons help produce ATP and NADPH. In dark reactions (Calvin cycle), these molecules help convert CO2 into glucose for the plant's energy needs."
-    },
-    {
-        "id": "sample_005",
-        "prompt": "How do vaccines work?",
-        "response_a": "Vaccines train your immune system to recognize and fight specific diseases. They contain weakened or inactive parts of a pathogen (like a virus or bacteria). When injected, your body produces antibodies against these harmless versions. If you later encounter the real pathogen, your immune system remembers it and quickly produces antibodies to fight it off.",
-        "response_b": "Vaccines work by introducing a safe version of a disease-causing organism to your body. This might be a killed virus, a weakened bacteria, or just a piece of the pathogen's protein. Your immune system responds by creating antibodies and memory cells. These memory cells remember the threat, so if you're exposed to the actual disease later, your body can mount a rapid defense."
     }
-]
 class AnnotationManager:
     def __init__(self):
@@ -253,15 +289,52 @@ class AnnotationManager:
     def get_user_seed(self, user_id: str) -> int:
         """Generate consistent seed for user"""
-        return int(hashlib.md5(user_id.encode()).hexdigest(), 16) % 10000
     def get_user_samples(self, user_id: str) -> List[Dict]:
         """Get shuffled samples for user based on their ID"""
         seed = self.get_user_seed(user_id)
-        samples = DUMMY_DATASET.copy()
         random.Random(seed).shuffle(samples)
         return samples
     def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]:
         """Get next unannotated sample for user"""
         if user_id not in self.user_states:
@@ -288,36 +361,7 @@ class AnnotationManager:
         if user_id not in self.annotations:
             return False
         return any(ann["sample_id"] == sample_id for ann in self.annotations[user_id])
-    def save_annotation(self, user_id: str, sample_id: str, choice: str):
-        """Save user's annotation"""
-        if user_id not in self.annotations:
-            self.annotations[user_id] = []
-        annotation = {
-            "user_id": user_id,
-            "sample_id": sample_id,
-            "choice": choice,
-            "timestamp": datetime.now().isoformat()
-        }
-        self.annotations[user_id].append(annotation)
-        # Update user state
-        if user_id in self.user_states:
-            self.user_states[user_id]["annotations"].append(sample_id)
-            self.user_states[user_id]["current_index"] += 1
-        # In production, save to HuggingFace dataset here
-        print(f"Saved annotation: {annotation}")
-    def get_user_progress(self, user_id: str) -> Dict:
-        """Get user's annotation progress"""
-        if user_id not in self.annotations:
-            return {"completed": 0, "total": len(DUMMY_DATASET)}
-        completed = len(self.annotations[user_id])
-        return {"completed": completed, "total": len(DUMMY_DATASET)}
 # Initialize manager
 manager = AnnotationManager()
@@ -382,7 +426,14 @@ def annotate(choice: str, user_id: str) -> Tuple:
             "b_better": "B is more fluent",
             "equal": "Equally fluent"
         }
-        manager.save_annotation(user_id, sample["id"], choice_map[choice])
     # Get next sample
     next_sample, current, total = manager.get_next_sample(user_id)
@@ -400,7 +451,8 @@ def annotate(choice: str, user_id: str) -> Tuple:
         gr.update(value=next_sample["prompt"]),  # prompt
         gr.update(value=next_sample["response_a"]),  # response_a
         gr.update(value=next_sample["response_b"]),  # response_b
-        gr.update(value=f"Progress: {current}/{total}"),  # progress
         gr.update(value="Annotation saved!", visible=True)  # status
     )

 from datetime import datetime
 from typing import Dict, List, Tuple
 import hashlib
+from datasets import load_dataset
+import itertools
 from collections.abc import Iterable
 **Code or mathematical expressions**: If responses contain code snippets or mathematical expressions, evaluate only the fluency of the natural language portions.
 """
+# Model names for the three responses
+MODEL_NAMES = ["mistral-Nemo", "translated-SFT", "on-policy-RL"]
+# Create all pairwise comparisons
+MODEL_PAIRS = list(itertools.combinations(MODEL_NAMES, 2))
+def load_dataset_samples():
+    """Load and prepare dataset samples with pairwise comparisons"""
+    try:
+        # Load the private dataset (requires authentication)
+        dataset = load_dataset("ltg/fluency-generations", split="train", use_auth_token=True)
+        # Transform dataset into pairwise comparison format
+        pairwise_samples = []
+        for item in dataset:
+            sample_id = item["sample_id"]
+            prompt = item["prompt"]
+            responses = item["responses"]
+            # Create pairwise comparisons for this sample
+            for model_a, model_b in MODEL_PAIRS:
+                pairwise_samples.append({
+                    "id": f"{sample_id}_{model_a}_vs_{model_b}",
+                    "original_id": sample_id,
+                    "prompt": prompt,
+                    "response_a": responses[model_a],
+                    "response_b": responses[model_b],
+                    "model_a": model_a,
+                    "model_b": model_b,
+                    "dataset": item.get("dataset", "unknown")
+                })
+        return pairwise_samples
+    except Exception as e:
+        print(f"Error loading dataset: {e}")
+        print("Using dummy data for testing...")
+        # Fallback to dummy data for testing
+        return [
+            {
+                "id": "dummy_001_modelA_vs_modelB",
+                "original_id": "dummy_001",
+                "prompt": "Test prompt for development",
+                "response_a": "This is response A for testing.",
+                "response_b": "This is response B for testing.",
+                "model_a": "modelA",
+                "model_b": "modelB",
+                "dataset": "test"
+            }
+        ]
+def swap_sample(sample):
+    return {
+        "id": sample["original_id"] + '_' + sample["model_b"] + '_vs_' + sample["model_a"]
+        "original_id": sample["original_id"],
+        "prompt": sample["prompt"],
+        "response_a": sample["response_b"],
+        "response_b": sample["response_a"],
+        "model_a": sample["model_b"],
+        "model_b": sample["model_a"],
+        "dataset": sample["dataset"]
     }
+# Load dataset on startup
+DATASET_SAMPLES = load_dataset_samples()
 class AnnotationManager:
     def __init__(self):
     def get_user_seed(self, user_id: str) -> int:
         """Generate consistent seed for user"""
+        return int(hashlib.md5(user_id.encode()).hexdigest(), 16) % 100000
     def get_user_samples(self, user_id: str) -> List[Dict]:
         """Get shuffled samples for user based on their ID"""
         seed = self.get_user_seed(user_id)
+        samples = DATASET_SAMPLES.copy()  # Use loaded dataset
         random.Random(seed).shuffle(samples)
+        samples = [
+            sample if random.Random(seed + i).randint(0, 1) == 0 else swap_sample(sample)
+            for i, sample in enumerate(samples)
+        ]
         return samples
+    def save_annotation(self, user_id: str, sample_id: str, choice: str,
+                        model_a: str = None, model_b: str = None):
+        """Save user's annotation with model information"""
+        if user_id not in self.annotations:
+            self.annotations[user_id] = []
+        annotation = {
+            "user_id": user_id,
+            "sample_id": sample_id,
+            "choice": choice,
+            "model_a": model_a,
+            "model_b": model_b,
+            "timestamp": datetime.now().isoformat()
+        }
+        self.annotations[user_id].append(annotation)
+        # Update user state
+        if user_id in self.user_states:
+            self.user_states[user_id]["annotations"].append(sample_id)
+            self.user_states[user_id]["current_index"] += 1
+        # In production, save to HuggingFace dataset here
+        print(f"Saved annotation: {annotation}")
+    def get_user_progress(self, user_id: str) -> Dict:
+        """Get user's annotation progress"""
+        if user_id not in self.annotations:
+            return {"completed": 0, "total": len(DATASET_SAMPLES)}
+        completed = len(self.annotations[user_id])
+        return {"completed": completed, "total": len(DATASET_SAMPLES)}
     def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]:
         """Get next unannotated sample for user"""
         if user_id not in self.user_states:
         if user_id not in self.annotations:
             return False
         return any(ann["sample_id"] == sample_id for ann in self.annotations[user_id])
 # Initialize manager
 manager = AnnotationManager()
             "b_better": "B is more fluent",
             "equal": "Equally fluent"
         }
+        # Save with model information
+        manager.save_annotation(
+            user_id,
+            sample["id"],
+            choice_map[choice],
+            model_a=sample.get("model_a"),
+            model_b=sample.get("model_b")
+        )
     # Get next sample
     next_sample, current, total = manager.get_next_sample(user_id)
         gr.update(value=next_sample["prompt"]),  # prompt
         gr.update(value=next_sample["response_a"]),  # response_a
         gr.update(value=next_sample["response_b"]),  # response_b
+        gr.update(value=f"Progress: {current}/{total} | Comparing: {sample.get('model_a', 'A')} vs {sample.get('model_b', 'B')}")  # progress
+        # gr.update(value=f"Progress: {current}/{total}"),  # progress
         gr.update(value="Annotation saved!", visible=True)  # status
     )