davda54 commited on
Commit
8bdf0b7
·
verified ·
1 Parent(s): eded521

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -67
app.py CHANGED
@@ -6,6 +6,8 @@ import random
6
  from datetime import datetime
7
  from typing import Dict, List, Tuple
8
  import hashlib
 
 
9
 
10
  from collections.abc import Iterable
11
 
@@ -212,39 +214,73 @@ TODO
212
  **Code or mathematical expressions**: If responses contain code snippets or mathematical expressions, evaluate only the fluency of the natural language portions.
213
  """
214
 
215
- # Dummy dataset - replace with actual HuggingFace dataset loading
216
- DUMMY_DATASET = [
217
- {
218
- "id": "sample_001",
219
- "prompt": "Write a short story about a robot learning to paint.",
220
- "response_a": "The robot, Model X-7, stood before the canvas with mechanical precision. Its metallic fingers gripped the brush awkwardly at first, calculating angles and pressure. Day by day, its movements became more fluid, less calculated. What started as geometric patterns evolved into landscapes. The robot discovered that art wasn't about perfection—it was about expression.",
221
- "response_b": "X-7 was a robot who wanted to paint. It picked up a brush and started making strokes on the canvas. At first the paintings were very mechanical and rigid. But over time the robot got better at painting. It learned that painting was not just about following rules but about creativity and expressing feelings, even though it was just a machine."
222
- },
223
- {
224
- "id": "sample_002",
225
- "prompt": "Explain quantum computing in simple terms.",
226
- "response_a": "Quantum computing uses quantum bits or 'qubits' instead of regular bits. While normal bits are either 0 or 1, qubits can be both at the same time through superposition. This allows quantum computers to process many calculations simultaneously, making them potentially much faster for certain problems like cryptography or drug discovery.",
227
- "response_b": "Think of regular computers like a light switch—it's either on or off. Quantum computers are like a dimmer switch that can be at any level of brightness. This special property lets them explore many solutions at once, similar to trying all paths in a maze simultaneously instead of one at a time."
228
- },
229
- {
230
- "id": "sample_003",
231
- "prompt": "What are the benefits of meditation?",
232
- "response_a": "Meditation offers numerous benefits for both mental and physical health. Regular practice can reduce stress and anxiety by lowering cortisol levels. It improves focus and concentration, enhances emotional well-being, and can even boost immune function. Many practitioners report better sleep quality and increased self-awareness.",
233
- "response_b": "The benefits of meditation include stress reduction, improved concentration, better emotional health, and enhanced self-awareness. Studies show it can lower blood pressure, improve sleep, and reduce symptoms of anxiety and depression. Even just 10 minutes daily can make a significant difference in overall well-being and mental clarity."
234
- },
235
- {
236
- "id": "sample_004",
237
- "prompt": "Describe the process of photosynthesis.",
238
- "response_a": "Photosynthesis is how plants make their own food using sunlight. In the chloroplasts, chlorophyll captures light energy. This energy splits water molecules into hydrogen and oxygen. The hydrogen combines with carbon dioxide from the air to create glucose (sugar), which feeds the plant. Oxygen is released as a byproduct.",
239
- "response_b": "Plants perform photosynthesis to convert light into chemical energy. The process occurs in two stages: light reactions and dark reactions. During light reactions, sunlight energizes electrons in chlorophyll. These electrons help produce ATP and NADPH. In dark reactions (Calvin cycle), these molecules help convert CO2 into glucose for the plant's energy needs."
240
- },
241
- {
242
- "id": "sample_005",
243
- "prompt": "How do vaccines work?",
244
- "response_a": "Vaccines train your immune system to recognize and fight specific diseases. They contain weakened or inactive parts of a pathogen (like a virus or bacteria). When injected, your body produces antibodies against these harmless versions. If you later encounter the real pathogen, your immune system remembers it and quickly produces antibodies to fight it off.",
245
- "response_b": "Vaccines work by introducing a safe version of a disease-causing organism to your body. This might be a killed virus, a weakened bacteria, or just a piece of the pathogen's protein. Your immune system responds by creating antibodies and memory cells. These memory cells remember the threat, so if you're exposed to the actual disease later, your body can mount a rapid defense."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  }
247
- ]
 
 
248
 
249
  class AnnotationManager:
250
  def __init__(self):
@@ -253,15 +289,52 @@ class AnnotationManager:
253
 
254
  def get_user_seed(self, user_id: str) -> int:
255
  """Generate consistent seed for user"""
256
- return int(hashlib.md5(user_id.encode()).hexdigest(), 16) % 10000
257
-
258
  def get_user_samples(self, user_id: str) -> List[Dict]:
259
  """Get shuffled samples for user based on their ID"""
260
  seed = self.get_user_seed(user_id)
261
- samples = DUMMY_DATASET.copy()
262
  random.Random(seed).shuffle(samples)
 
 
 
 
263
  return samples
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]:
266
  """Get next unannotated sample for user"""
267
  if user_id not in self.user_states:
@@ -288,36 +361,7 @@ class AnnotationManager:
288
  if user_id not in self.annotations:
289
  return False
290
  return any(ann["sample_id"] == sample_id for ann in self.annotations[user_id])
291
-
292
- def save_annotation(self, user_id: str, sample_id: str, choice: str):
293
- """Save user's annotation"""
294
- if user_id not in self.annotations:
295
- self.annotations[user_id] = []
296
-
297
- annotation = {
298
- "user_id": user_id,
299
- "sample_id": sample_id,
300
- "choice": choice,
301
- "timestamp": datetime.now().isoformat()
302
- }
303
-
304
- self.annotations[user_id].append(annotation)
305
-
306
- # Update user state
307
- if user_id in self.user_states:
308
- self.user_states[user_id]["annotations"].append(sample_id)
309
- self.user_states[user_id]["current_index"] += 1
310
-
311
- # In production, save to HuggingFace dataset here
312
- print(f"Saved annotation: {annotation}")
313
-
314
- def get_user_progress(self, user_id: str) -> Dict:
315
- """Get user's annotation progress"""
316
- if user_id not in self.annotations:
317
- return {"completed": 0, "total": len(DUMMY_DATASET)}
318
-
319
- completed = len(self.annotations[user_id])
320
- return {"completed": completed, "total": len(DUMMY_DATASET)}
321
 
322
  # Initialize manager
323
  manager = AnnotationManager()
@@ -382,7 +426,14 @@ def annotate(choice: str, user_id: str) -> Tuple:
382
  "b_better": "B is more fluent",
383
  "equal": "Equally fluent"
384
  }
385
- manager.save_annotation(user_id, sample["id"], choice_map[choice])
 
 
 
 
 
 
 
386
 
387
  # Get next sample
388
  next_sample, current, total = manager.get_next_sample(user_id)
@@ -400,7 +451,8 @@ def annotate(choice: str, user_id: str) -> Tuple:
400
  gr.update(value=next_sample["prompt"]), # prompt
401
  gr.update(value=next_sample["response_a"]), # response_a
402
  gr.update(value=next_sample["response_b"]), # response_b
403
- gr.update(value=f"Progress: {current}/{total}"), # progress
 
404
  gr.update(value="Annotation saved!", visible=True) # status
405
  )
406
 
 
6
  from datetime import datetime
7
  from typing import Dict, List, Tuple
8
  import hashlib
9
+ from datasets import load_dataset
10
+ import itertools
11
 
12
  from collections.abc import Iterable
13
 
 
214
  **Code or mathematical expressions**: If responses contain code snippets or mathematical expressions, evaluate only the fluency of the natural language portions.
215
  """
216
 
217
+ # Model names for the three responses
218
+ MODEL_NAMES = ["mistral-Nemo", "translated-SFT", "on-policy-RL"]
219
+
220
+ # Create all pairwise comparisons
221
+ MODEL_PAIRS = list(itertools.combinations(MODEL_NAMES, 2))
222
+
223
+ def load_dataset_samples():
224
+ """Load and prepare dataset samples with pairwise comparisons"""
225
+ try:
226
+ # Load the private dataset (requires authentication)
227
+ dataset = load_dataset("ltg/fluency-generations", split="train", use_auth_token=True)
228
+
229
+ # Transform dataset into pairwise comparison format
230
+ pairwise_samples = []
231
+
232
+ for item in dataset:
233
+ sample_id = item["sample_id"]
234
+ prompt = item["prompt"]
235
+ responses = item["responses"]
236
+
237
+ # Create pairwise comparisons for this sample
238
+ for model_a, model_b in MODEL_PAIRS:
239
+ pairwise_samples.append({
240
+ "id": f"{sample_id}_{model_a}_vs_{model_b}",
241
+ "original_id": sample_id,
242
+ "prompt": prompt,
243
+ "response_a": responses[model_a],
244
+ "response_b": responses[model_b],
245
+ "model_a": model_a,
246
+ "model_b": model_b,
247
+ "dataset": item.get("dataset", "unknown")
248
+ })
249
+
250
+ return pairwise_samples
251
+
252
+ except Exception as e:
253
+ print(f"Error loading dataset: {e}")
254
+ print("Using dummy data for testing...")
255
+ # Fallback to dummy data for testing
256
+ return [
257
+ {
258
+ "id": "dummy_001_modelA_vs_modelB",
259
+ "original_id": "dummy_001",
260
+ "prompt": "Test prompt for development",
261
+ "response_a": "This is response A for testing.",
262
+ "response_b": "This is response B for testing.",
263
+ "model_a": "modelA",
264
+ "model_b": "modelB",
265
+ "dataset": "test"
266
+ }
267
+ ]
268
+
269
+
270
+ def swap_sample(sample):
271
+ return {
272
+ "id": sample["original_id"] + '_' + sample["model_b"] + '_vs_' + sample["model_a"]
273
+ "original_id": sample["original_id"],
274
+ "prompt": sample["prompt"],
275
+ "response_a": sample["response_b"],
276
+ "response_b": sample["response_a"],
277
+ "model_a": sample["model_b"],
278
+ "model_b": sample["model_a"],
279
+ "dataset": sample["dataset"]
280
  }
281
+
282
+ # Load dataset on startup
283
+ DATASET_SAMPLES = load_dataset_samples()
284
 
285
  class AnnotationManager:
286
  def __init__(self):
 
289
 
290
  def get_user_seed(self, user_id: str) -> int:
291
  """Generate consistent seed for user"""
292
+ return int(hashlib.md5(user_id.encode()).hexdigest(), 16) % 100000
293
+
294
  def get_user_samples(self, user_id: str) -> List[Dict]:
295
  """Get shuffled samples for user based on their ID"""
296
  seed = self.get_user_seed(user_id)
297
+ samples = DATASET_SAMPLES.copy() # Use loaded dataset
298
  random.Random(seed).shuffle(samples)
299
+ samples = [
300
+ sample if random.Random(seed + i).randint(0, 1) == 0 else swap_sample(sample)
301
+ for i, sample in enumerate(samples)
302
+ ]
303
  return samples
304
 
305
+ def save_annotation(self, user_id: str, sample_id: str, choice: str,
306
+ model_a: str = None, model_b: str = None):
307
+ """Save user's annotation with model information"""
308
+ if user_id not in self.annotations:
309
+ self.annotations[user_id] = []
310
+
311
+ annotation = {
312
+ "user_id": user_id,
313
+ "sample_id": sample_id,
314
+ "choice": choice,
315
+ "model_a": model_a,
316
+ "model_b": model_b,
317
+ "timestamp": datetime.now().isoformat()
318
+ }
319
+
320
+ self.annotations[user_id].append(annotation)
321
+
322
+ # Update user state
323
+ if user_id in self.user_states:
324
+ self.user_states[user_id]["annotations"].append(sample_id)
325
+ self.user_states[user_id]["current_index"] += 1
326
+
327
+ # In production, save to HuggingFace dataset here
328
+ print(f"Saved annotation: {annotation}")
329
+
330
+ def get_user_progress(self, user_id: str) -> Dict:
331
+ """Get user's annotation progress"""
332
+ if user_id not in self.annotations:
333
+ return {"completed": 0, "total": len(DATASET_SAMPLES)}
334
+
335
+ completed = len(self.annotations[user_id])
336
+ return {"completed": completed, "total": len(DATASET_SAMPLES)}
337
+
338
  def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]:
339
  """Get next unannotated sample for user"""
340
  if user_id not in self.user_states:
 
361
  if user_id not in self.annotations:
362
  return False
363
  return any(ann["sample_id"] == sample_id for ann in self.annotations[user_id])
364
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
  # Initialize manager
367
  manager = AnnotationManager()
 
426
  "b_better": "B is more fluent",
427
  "equal": "Equally fluent"
428
  }
429
+ # Save with model information
430
+ manager.save_annotation(
431
+ user_id,
432
+ sample["id"],
433
+ choice_map[choice],
434
+ model_a=sample.get("model_a"),
435
+ model_b=sample.get("model_b")
436
+ )
437
 
438
  # Get next sample
439
  next_sample, current, total = manager.get_next_sample(user_id)
 
451
  gr.update(value=next_sample["prompt"]), # prompt
452
  gr.update(value=next_sample["response_a"]), # response_a
453
  gr.update(value=next_sample["response_b"]), # response_b
454
+ gr.update(value=f"Progress: {current}/{total} | Comparing: {sample.get('model_a', 'A')} vs {sample.get('model_b', 'B')}") # progress
455
+ # gr.update(value=f"Progress: {current}/{total}"), # progress
456
  gr.update(value="Annotation saved!", visible=True) # status
457
  )
458