Spaces:

CadenShokat
/

sentiment-eval

Running

App Files Files Community

CadenShokat commited on Sep 9

Commit

bef81d1

verified ·

1 Parent(s): 87a2111

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -11

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from fastapi import FastAPI, Query
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import math
@@ -18,17 +19,35 @@ def health():
     return {"status": "healthy", "model": loaded_model_id}
 def load_pipeline():
     global clf, loaded_model_id
     if clf is not None:
         return clf
     for model_id in (PRIMARY_MODEL, FALLBACK_MODEL):
         try:
-            tok = AutoTokenizer.from_pretrained(model_id)
-            mdl = AutoModelForSequenceClassification.from_pretrained(model_id)
             loaded_model_id = model_id
-            return pipeline("text-classification", model=mdl, tokenizer=tok, return_all_scores=True, truncation=True)
         except Exception as e:
             print(f"Failed to load {model_id}: {e}")
     raise RuntimeError("No sentiment model could be loaded")
 def compute_score(pos: float, neg: float, neu: float, mode: str) -> float:
@@ -39,9 +58,10 @@ def compute_score(pos: float, neg: float, neu: float, mode: str) -> float:
         return pos - neg
     elif mode == "logit": # optional slightly squashed
         # difference of logits, then tanh to clamp to [-1,1]
-        lp = math.log(max(1e-9, pos)) - math.log(max(1e-9, 1-pos))
-        ln = math.log(max(1e-9, neg)) - math.log(max(1e-9, 1-neg))
-        return math.tanh((lp - ln) / 4.0)
     else:
         return pos - neg
@@ -70,7 +90,7 @@ def scores_to_label(scores, mode: str, binary_hint: bool | None, min_conf: float
         detected_binary = (neu == 0.0)
     is_binary = detected_binary if binary_hint is None else bool(binary_hint)
-    if is_binary:
         neu = 0.0
     score = compute_score(pos, neg, neu, mode)
@@ -94,13 +114,29 @@ def scores_to_label(scores, mode: str, binary_hint: bool | None, min_conf: float
 @app.post("/predict")
 def predict(
     payload: Payload,
-    mode: str = Query("raw", pattern="^(raw|debias|logit)$"),
-    min_conf: float = Query(0.60, ge=0.0, le=1.0),
-    neutral_zone: float = Query(0.20, ge=0.0, le=1.0)
 ):
     clf = load_pipeline()
     texts = payload.sentences or []
-    outs = clf(texts, top_k=None)
     binary_hint = (loaded_model_id == FALLBACK_MODEL)
     results = [scores_to_label(s, mode, binary_hint, min_conf, neutral_zone) for s in outs]
     return {"model": loaded_model_id, "results": results}

 from fastapi import FastAPI, Query
 from pydantic import BaseModel
+from typing import Literal
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import math
     return {"status": "healthy", "model": loaded_model_id}
 def load_pipeline():
+    """
+    Robust loader:
+    - Avoid meta-tensor issue by forcing low_cpu_mem_usage=False
+    - No device_map; keep on CPU
+    - Cache the pipeline in the global `clf`
+    """
     global clf, loaded_model_id
     if clf is not None:
         return clf
     for model_id in (PRIMARY_MODEL, FALLBACK_MODEL):
         try:
+            tok = AutoTokenizer.from_pretrained(model_id)  # use_fast default is fine
+            mdl = AutoModelForSequenceClassification.from_pretrained(
+                model_id,
+                low_cpu_mem_usage=False,     # <-- important to avoid meta tensors
+                trust_remote_code=False
+            )
             loaded_model_id = model_id
+            clf = pipeline(
+                "text-classification",
+                model=mdl,
+                tokenizer=tok,
+                device=-1  # CPU
+            )
+            return clf
         except Exception as e:
             print(f"Failed to load {model_id}: {e}")
     raise RuntimeError("No sentiment model could be loaded")
 def compute_score(pos: float, neg: float, neu: float, mode: str) -> float:
         return pos - neg
     elif mode == "logit": # optional slightly squashed
         # difference of logits, then tanh to clamp to [-1,1]
+        import math as _m
+        lp = _m.log(max(1e-9, pos)) - _m.log(max(1e-9, 1-pos))
+        ln = _m.log(max(1e-9, neg)) - _m.log(max(1e-9, 1-neg))
+        return _m.tanh((lp - ln) / 4.0)
     else:
         return pos - neg
         detected_binary = (neu == 0.0)
     is_binary = detected_binary if binary_hint is None else bool(binary_hint)
+    if is_binary:
         neu = 0.0
     score = compute_score(pos, neg, neu, mode)
 @app.post("/predict")
 def predict(
     payload: Payload,
+    mode: Literal["raw","debias","logit"] = Query("raw"),
+    min_conf: float = Query(0.60, ge=0.0, le=1.0),
+    neutral_zone: float = Query(0.20, ge=0.0, le=1.0)
 ):
+    """
+    - Use top_k=None (replacement for deprecated return_all_scores=True)
+    - Force truncation/padding/max_length to avoid 631>514 crashes
+    """
     clf = load_pipeline()
     texts = payload.sentences or []
+    outs = clf(
+        texts,
+        top_k=None,           # replaces return_all_scores=True
+        truncation=True,      # <-- important for long inputs
+        padding=True,
+        max_length=512
+    )
+    # If a single string was passed, HF may return a single item; normalize to list
+    if isinstance(outs, dict) or (outs and isinstance(outs[0], dict)):
+        outs = [outs]  # ensure list[list[dict]]
     binary_hint = (loaded_model_id == FALLBACK_MODEL)
     results = [scores_to_label(s, mode, binary_hint, min_conf, neutral_zone) for s in outs]
     return {"model": loaded_model_id, "results": results}