Spaces:

IndraDThor
/

ingredient-analyzer

Sleeping

App Files Files Community

IndraDThor commited on Oct 4

Commit

c2b0be4

verified ·

1 Parent(s): 4fda722

Upload 2 files

Browse files

Files changed (2) hide show

app.py +104 -0
requirments.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, AutoModelForSeq2SeqLM
+import json  # For JSON output
+import re  # For flexible parsing
+# Load NER model for entity extraction
+ner_model_name = "sgarbi/bert-fda-nutrition-ner"
+ner_tokenizer = AutoTokenizer.from_pretrained(ner_model_name)
+ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name)
+ner_pipeline = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple")
+# Load generative model for summarization (Flan-T5: fast, instruction-tuned)
+summary_model_name = "google/flan-t5-base"
+summary_tokenizer = AutoTokenizer.from_pretrained(summary_model_name)
+summary_model = AutoModelForSeq2SeqLM.from_pretrained(summary_model_name)
+def generate_summary(entities_text):
+    """Generate structured summary using Flan-T5 based on extracted entities"""
+    # Enhanced prompt: More examples for better zero-shot performance
+    prompt = f"""
+    Analyze these food ingredients. Output concise bullet points ONLY in this format:
+    Benefits:
+    - [1-2 benefits, e.g., High protein for muscle building and energy]
+    Avoid if:
+    - [1-3 warnings for age/health/cultural/allergies, e.g., Infants (choking risk), vegans (animal products), nut allergy]
+    Ingredients: {entities_text}
+    """
+    inputs = summary_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
+    outputs = summary_model.generate(**inputs, max_length=250, num_beams=4, temperature=0.8, do_sample=True,
+                                     early_stopping=True)
+    summary = summary_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Fallback if summary is too short/placeholder/garbage (e.g., [1-1-1] patterns)
+    if len(summary) < 20 or "[1-3" in summary or re.match(r'^\[.*\]$', summary) or '-' * 3 in summary or all(c in ' -123456789[]' for c in summary.replace('\n', '')):
+        # Rule-based fallback for common entities (demo-proof)
+        fallback_benefits = []
+        fallback_avoid = []
+        if "beef" in entities_text.lower():
+            fallback_benefits.append("High-quality protein for muscle repair")
+            fallback_avoid.extend(["Vegans/vegetarians (animal product)", "Children under 5 (choking risk)",
+                                   "Gout sufferers (high purines)"])
+        if "milk" in entities_text.lower():
+            fallback_benefits.append("Calcium for bone health")
+            fallback_avoid.extend(
+                ["Lactose-intolerant (dairy)", "Vegans (animal product)", "Infants under 1 (potential allergy)"])
+        if "sugar" in entities_text.lower():
+            fallback_benefits.append("Quick energy source")
+            fallback_avoid.append("Diabetics (high carbs)")
+        summary = f"Benefits:\n- {'; '.join(fallback_benefits)}\nAvoid if:\n- {'; '.join(fallback_avoid)}"
+    return summary.strip()
+def analyze_ingredients(text):
+    """Analyze text for nutrition NER + generate summary"""
+    if not text.strip():
+        return {"error": "No text provided."}
+    # Step 1: Extract entities with NER (filter noise like quantities)
+    ner_results = ner_pipeline(text)
+    entities = [entity["word"].strip() for entity in ner_results if
+                entity["score"] > 0.7]  # Higher threshold for cleaner tags
+    entities_text = ", ".join(entities) if entities else "No key ingredients found"
+    # Step 2: Generate summary
+    summary = generate_summary(entities_text)
+    # Step 3: Parse summary into arrays (flexible regex for bullets)
+    benefits = re.findall(r'Benefits:\s*-?\s*(.+?)(?=\n-|$)', summary, re.IGNORECASE | re.MULTILINE) or []
+    avoidances = re.findall(r'Avoid if:\s*-?\s*(.+?)(?=\n-|$)', summary, re.IGNORECASE | re.MULTILINE) or []
+    # Format full output as JSON for Flutter
+    formatted = {
+        "input_text": text,
+        "extracted_entities": [
+            {
+                "word": entity["word"].strip(),
+                "entity_type": entity["entity_group"],
+                "confidence": round(entity["score"], 3)
+            }
+            for entity in ner_results if entity["score"] > 0.7
+        ],
+        "summary": summary,
+        "benefits": benefits,  # Now populated, e.g., ["High in protein for muscle repair"]
+        "avoidances": avoidances  # e.g., ["Diabetics (high sugar), vegans (beef)"]
+    }
+    return formatted  # Gradio auto-JSONifies for API
+# Gradio interface (web UI + API)
+iface = gr.Interface(
+    fn=analyze_ingredients,
+    inputs=gr.Textbox(label="Enter Ingredients Text", placeholder="e.g., Wheat, milk 1kg, sugar, nuts"),
+    outputs=gr.JSON(label="Full Analysis (Entities + Summary)"),
+    title="Ingredient NER & Health Analyzer",
+    description="Extracts nutrition entities from food labels and generates a summary of benefits, avoidance warnings (age, health, cultural, allergies)."
+)
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)  # For HF compatibility

requirments.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==4.44.0
+transformers==4.44.2
+torch==2.4.1
+numpy==1.26.4