Spaces:

ai-assist-sh
/

PhishingMail-Lab

Sleeping

App Files Files Community

ai-assist-sh commited on Aug 21

Commit

c4c41cc

verified ·

1 Parent(s): eb515b5

Upload main.py

Browse files

Files changed (1) hide show

main.py +124 -42

main.py CHANGED Viewed

@@ -1,6 +1,6 @@
-import os, re, json, time
 from dataclasses import dataclass
-from typing import List, Dict, Tuple
 import gradio as gr
@@ -36,12 +36,16 @@ class UrlResult:
     url: str
     risk: float
     reasons: List[str]
 @dataclass
 class EmailResult:
-    p_email: float             # final probability after boosts
     kw_hits: List[str]
-    strong_hits: List[str]     # subset of kw_hits considered strong
 # =========================
 # URL extraction & heuristics (swap with your real URL model when ready)
@@ -69,24 +73,39 @@ def url_host(url: str) -> str:
     return host
 def score_url_heuristic(url: str) -> UrlResult:
     host = url_host(url)
-    score = 0.05
     reasons = []
     if len(url) > 140:
-        score += 0.15; reasons.append("very_long_url")
     if "@" in url or "%" in url:
-        score += 0.2; reasons.append("special_chars")
     if any(host.endswith(t) for t in SUSPICIOUS_TLDS):
-        score += 0.35; reasons.append("suspicious_tld")
     if any(s in host for s in SHORTENERS):
-        score += 0.5; reasons.append("shortener")
     if host.count(".") >= 3:
-        score += 0.2; reasons.append("deep_subdomain")
     if len(re.findall(r"[A-Z]", url)) > 16:
-        score += 0.1; reasons.append("mixed_case")
-    return UrlResult(url=url, risk=min(score, 1.0), reasons=reasons)
 def score_urls(urls: List[str]) -> List[UrlResult]:
     return [score_url_heuristic(u) for u in urls]
@@ -97,6 +116,8 @@ def score_urls(urls: List[str]) -> List[UrlResult]:
 _tokenizer = None
 _model = None
 _model_loaded_from = None  # "classifier", "backbone", or None
 # Strong vs normal cues (lowercase)
 STRONG_CUES = [
@@ -119,12 +140,14 @@ LEXICAL_CUES = sorted(set(STRONG_CUES + NORMAL_CUES))
 def load_email_model() -> Tuple[object, object, str]:
     """Try to load EMAIL_CLASSIFIER_ID; on failure, fall back to backbone with small head.
        Apply dynamic int8 quantization for CPU if available."""
-    global _tokenizer, _model, _model_loaded_from
     if _tokenizer is not None and _model is not None:
         return _tokenizer, _model, _model_loaded_from
     if AutoTokenizer is None or AutoModelForSequenceClassification is None or torch is None:
         _model_loaded_from = None
         return None, None, _model_loaded_from  # environment without torch/transformers
     # Preferred classifier
@@ -142,18 +165,22 @@ def load_email_model() -> Tuple[object, object, str]:
             _model_loaded_from = "backbone"
         except Exception:
             _tokenizer, _model, _model_loaded_from = None, None, None
             return None, None, _model_loaded_from
     # Dynamic quantization (CPU)
     try:
         _model.eval()
         _model.to("cpu")
         if hasattr(torch, "quantization"):
             from torch.quantization import quantize_dynamic
             _model = quantize_dynamic(_model, {torch.nn.Linear}, dtype=torch.qint8)  # type: ignore
     except Exception:
         pass
     return _tokenizer, _model, _model_loaded_from
 def _truncate_for_budget(tokens_subject: List[int], tokens_body: List[int], max_len: int, subj_budget: int):
@@ -163,9 +190,9 @@ def _truncate_for_budget(tokens_subject: List[int], tokens_body: List[int], max_
     return subj + body
 def score_email(subject: str, body: str) -> Tuple[EmailResult, Dict]:
-    """Return EmailResult + debug dict with probability, hits, boosts, timings, and model info.
-       Strong cues push higher risk even without a model (email-only scams)."""
-    dbg = {"path": None, "p_raw": None, "boost_from_strong": 0.0, "boost_from_normal": 0.0, "timing_ms": {}}
     t0 = time.perf_counter()
     text = (subject or "") + "\n" + (body or "")
@@ -177,6 +204,13 @@ def score_email(subject: str, body: str) -> Tuple[EmailResult, Dict]:
     tok, mdl, path = load_email_model()
     dbg["path"] = path
     if tok is None or mdl is None:
         # Pure lexical fallback (no model available):
@@ -187,7 +221,11 @@ def score_email(subject: str, body: str) -> Tuple[EmailResult, Dict]:
         dbg["boost_from_strong"] = 0.18 * len(strong_hits)
         dbg["boost_from_normal"] = 0.07 * len(normal_hits)
         dbg["timing_ms"]["email_infer"] = round((time.perf_counter() - t0) * 1000, 2)
-        return EmailResult(p_email=p_email, kw_hits=all_hits, strong_hits=strong_hits), dbg
     # Model path (MiniLM or your classifier)
     enc_t0 = time.perf_counter()
@@ -202,12 +240,10 @@ def score_email(subject: str, body: str) -> Tuple[EmailResult, Dict]:
     with torch.no_grad():
         out = mdl(input_ids=ids, attention_mask=mask)
-    import math
     if hasattr(out, "logits"):
         logits = out.logits[0].detach().cpu().numpy().tolist()
         exps = [math.exp(x) for x in logits]
-        p1 = exps[1] / (exps[0] + exps[1])  # assume label 1 = phishing
-        p_raw = float(p1)
     else:
         p_raw = 0.5
@@ -220,8 +256,16 @@ def score_email(subject: str, body: str) -> Tuple[EmailResult, Dict]:
     dbg["boost_from_strong"] = round(boost_s, 3)
     dbg["boost_from_normal"] = round(boost_n, 3)
     dbg["timing_ms"]["email_infer"] = round((time.perf_counter() - enc_t0) * 1000, 2)
-    return EmailResult(p_email=p_email, kw_hits=all_hits, strong_hits=strong_hits), dbg
 # =========================
 # Fusion
@@ -278,11 +322,13 @@ def fuse(email_res: EmailResult, url_results: List[UrlResult], allowlist_domains
     fused = {
         "P_email": round(email_res.p_email, 3),
         "R_url_max": round(r_url_max, 3),
         "R_total": round(r_after, 3),
         "R_total_before_overrides": round(r_before, 3),
         "kw_hits": email_res.kw_hits,
         "strong_hits": email_res.strong_hits,
         "no_urls": no_urls,
         "allowlist_hit": allowlist_hit,
         "verdict": verdict
@@ -300,7 +346,7 @@ def fuse(email_res: EmailResult, url_results: List[UrlResult], allowlist_domains
 # Gradio UI
 # =========================
 with gr.Blocks(title="PhishingMail-Lab") as demo:
-    gr.Markdown("# 🧪 PhishingMail‑Lab\n**POC** — Free‑tier friendly hybrid (email + URL) with explainable cues.")
     with gr.Row():
         with gr.Column(scale=3):
@@ -333,7 +379,8 @@ with gr.Blocks(title="PhishingMail-Lab") as demo:
         # URL pipeline
         t0 = time.perf_counter()
-        urls = list(dict.fromkeys(extract_urls((subject_text or "") + "\n" + (body_text or ""))))  # uniq & ordered
         t1 = time.perf_counter()
         url_results = score_urls(urls)
         t2 = time.perf_counter()
@@ -354,33 +401,63 @@ with gr.Blocks(title="PhishingMail-Lab") as demo:
         banner_text = "<br>".join(banners) if banners else ""
         banner_visible = bool(banners)
-        # Forensics JSON
-        per_url = [{"url": u.url, "risk": round(u.risk,3), "reasons": u.reasons} for u in url_results]
         fx = {
             "config": {
                 "weights": {"email": FUSION_EMAIL_W, "url": FUSION_URL_W},
                 "threshold_tau": THRESHOLD_TAU,
-                "overrides": {"url_high": URL_OVERRIDE_HIGH, "url_kw": URL_OVERRIDE_KW, "allowlist_safe_cap": ALLOWLIST_SAFE_CAP},
                 "model_ids": {"classifier": EMAIL_CLASSIFIER_ID, "backbone": EMAIL_BACKBONE_ID}
             },
             "email": {
-                "p_email": fused["P_email"],
                 "p_email_raw": email_dbg["p_raw"],
                 "boost_from_strong": email_dbg["boost_from_strong"],
                 "boost_from_normal": email_dbg["boost_from_normal"],
-                "path": email_dbg["path"],
                 "kw_hits": email_res.kw_hits,
-                "strong_hits": email_res.strong_hits
             },
             "urls": per_url,
             "fusion": {
-                "r_total_before_overrides": fused["R_total_before_overrides"],
-                "r_total_final": fused["R_total"],
-                "applied_overrides": fuse_dbg["applied_overrides"],
-                "no_urls": fused["no_urls"],
-                "allowlist_hit": fused["allowlist_hit"]
             },
             "timings_ms": {
                 "url_extract": round((t1 - t0) * 1000, 2),
                 "url_score": round((t2 - t1) * 1000, 2),
                 "email_infer": email_dbg["timing_ms"].get("email_infer"),
@@ -388,32 +465,37 @@ with gr.Blocks(title="PhishingMail-Lab") as demo:
             }
         }
-        # Forensics Markdown (human‑readable)
         lines = []
-        lines.append(f"**Verdict:** `{fused['verdict']}`  |  **R_total:** `{fused['R_total']}` (before overrides: `{fused['R_total_before_overrides']}`)")
-        lines.append(f"**Components:**  P_email=`{fused['P_email']}`,  R_url_max=`{fused['R_url_max']}`  |  Weights: email={FUSION_EMAIL_W}, url={FUSION_URL_W}")
         if fuse_dbg["applied_overrides"]:
-            lines.append(f"**Overrides applied:** {', '.join(fuse_dbg['applied_overrides'])}")
         else:
-            lines.append("**Overrides applied:** (none)")
         if fused["no_urls"]:
             lines.append("• No URLs found → email‑only decision path.")
         if fused["allowlist_hit"]:
             lines.append("• Allowlist matched → risk capped.")
         lines.append("")
-        lines.append(f"**Email path:** `{email_dbg['path'] or 'lexical-fallback'}`  |  p_raw={email_dbg['p_raw']}, +strong={email_dbg['boost_from_strong']}, +normal={email_dbg['boost_from_normal']}")
         if email_res.strong_hits:
             lines.append(f"• Strong cues: {', '.join(email_res.strong_hits)}")
         if email_res.kw_hits:
             lines.append(f"• All cues: {', '.join(email_res.kw_hits)}")
         lines.append("")
         if per_url:
-            lines.append("**URLs:**")
             for u in per_url:
-                lines.append(f"• {u['url']}  → risk={u['risk']}  ({', '.join(u['reasons']) or 'no reasons'})")
         else:
             lines.append("**URLs:** (none)")
         lines.append("")
         lines.append("**Timings (ms):** " + json.dumps(fx["timings_ms"]))
         forensic_markdown = "\n".join(lines)

+import os, re, json, time, math
 from dataclasses import dataclass
+from typing import List, Dict, Tuple, Optional
 import gradio as gr
     url: str
     risk: float
     reasons: List[str]
+    contrib: Dict[str, float]     # per‑reason contribution for transparency
 @dataclass
 class EmailResult:
+    p_email: float                # final probability after boosts
     kw_hits: List[str]
+    strong_hits: List[str]        # subset of kw_hits considered strong
+    token_counts: Dict[str, int]  # {"subject_tokens":..,"body_tokens":..,"sequence_len":..}
+    p_raw: Optional[float]        # raw model probability (before boosts); None in lexical fallback
+    path: Optional[str]           # "classifier" | "backbone" | None (lexical)
 # =========================
 # URL extraction & heuristics (swap with your real URL model when ready)
     return host
 def score_url_heuristic(url: str) -> UrlResult:
+    """
+    Heuristic scoring with a transparent per‑reason contribution map.
+    This keeps the POC explainable and makes the Forensics panel richer.
+    """
     host = url_host(url)
+    score = 0.0
     reasons = []
+    contrib = {}
+    def add(amount: float, tag: str):
+        nonlocal score
+        score += amount
+        reasons.append(tag)
+        contrib[tag] = round(contrib.get(tag, 0.0) + amount, 3)
+    base = 0.05
+    add(base, "base")
     if len(url) > 140:
+        add(0.15, "very_long_url")
     if "@" in url or "%" in url:
+        add(0.20, "special_chars")
     if any(host.endswith(t) for t in SUSPICIOUS_TLDS):
+        add(0.35, "suspicious_tld")
     if any(s in host for s in SHORTENERS):
+        add(0.50, "shortener")
     if host.count(".") >= 3:
+        add(0.20, "deep_subdomain")
     if len(re.findall(r"[A-Z]", url)) > 16:
+        add(0.10, "mixed_case")
+    score = min(score, 1.0)
+    return UrlResult(url=url, risk=score, reasons=reasons, contrib=contrib)
 def score_urls(urls: List[str]) -> List[UrlResult]:
     return [score_url_heuristic(u) for u in urls]
 _tokenizer = None
 _model = None
 _model_loaded_from = None  # "classifier", "backbone", or None
+_model_load_ms = None
+_model_quantized = False
 # Strong vs normal cues (lowercase)
 STRONG_CUES = [
 def load_email_model() -> Tuple[object, object, str]:
     """Try to load EMAIL_CLASSIFIER_ID; on failure, fall back to backbone with small head.
        Apply dynamic int8 quantization for CPU if available."""
+    global _tokenizer, _model, _model_loaded_from, _model_load_ms, _model_quantized
     if _tokenizer is not None and _model is not None:
         return _tokenizer, _model, _model_loaded_from
+    start = time.perf_counter()
     if AutoTokenizer is None or AutoModelForSequenceClassification is None or torch is None:
         _model_loaded_from = None
+        _model_load_ms = round((time.perf_counter() - start) * 1000, 2)
         return None, None, _model_loaded_from  # environment without torch/transformers
     # Preferred classifier
             _model_loaded_from = "backbone"
         except Exception:
             _tokenizer, _model, _model_loaded_from = None, None, None
+            _model_load_ms = round((time.perf_counter() - start) * 1000, 2)
             return None, None, _model_loaded_from
     # Dynamic quantization (CPU)
+    _model_quantized = False
     try:
         _model.eval()
         _model.to("cpu")
         if hasattr(torch, "quantization"):
             from torch.quantization import quantize_dynamic
             _model = quantize_dynamic(_model, {torch.nn.Linear}, dtype=torch.qint8)  # type: ignore
+            _model_quantized = True
     except Exception:
         pass
+    _model_load_ms = round((time.perf_counter() - start) * 1000, 2)
     return _tokenizer, _model, _model_loaded_from
 def _truncate_for_budget(tokens_subject: List[int], tokens_body: List[int], max_len: int, subj_budget: int):
     return subj + body
 def score_email(subject: str, body: str) -> Tuple[EmailResult, Dict]:
+    """Return EmailResult + debug dict with probability, hits, boosts, timings, token counts, and model info."""
+    dbg = {"path": None, "p_raw": None, "boost_from_strong": 0.0, "boost_from_normal": 0.0,
+           "timing_ms": {}, "token_counts": {}, "model_info": {}}
     t0 = time.perf_counter()
     text = (subject or "") + "\n" + (body or "")
     tok, mdl, path = load_email_model()
     dbg["path"] = path
+    dbg["model_info"] = {
+        "loaded_from": path,
+        "classifier_id": EMAIL_CLASSIFIER_ID,
+        "backbone_id": EMAIL_BACKBONE_ID,
+        "quantized": _model_quantized,
+        "model_load_ms": _model_load_ms
+    }
     if tok is None or mdl is None:
         # Pure lexical fallback (no model available):
         dbg["boost_from_strong"] = 0.18 * len(strong_hits)
         dbg["boost_from_normal"] = 0.07 * len(normal_hits)
         dbg["timing_ms"]["email_infer"] = round((time.perf_counter() - t0) * 1000, 2)
+        dbg["token_counts"] = {"subject_tokens": 0, "body_tokens": 0, "sequence_len": 0}
+        return EmailResult(
+            p_email=p_email, kw_hits=all_hits, strong_hits=strong_hits,
+            token_counts=dbg["token_counts"], p_raw=None, path=path
+        ), dbg
     # Model path (MiniLM or your classifier)
     enc_t0 = time.perf_counter()
     with torch.no_grad():
         out = mdl(input_ids=ids, attention_mask=mask)
     if hasattr(out, "logits"):
         logits = out.logits[0].detach().cpu().numpy().tolist()
         exps = [math.exp(x) for x in logits]
+        p_raw = float(exps[1] / (exps[0] + exps[1]))  # assume label 1 = phishing
     else:
         p_raw = 0.5
     dbg["boost_from_strong"] = round(boost_s, 3)
     dbg["boost_from_normal"] = round(boost_n, 3)
     dbg["timing_ms"]["email_infer"] = round((time.perf_counter() - enc_t0) * 1000, 2)
+    dbg["token_counts"] = {
+        "subject_tokens": len(encoded_subj),
+        "body_tokens": len(encoded_body),
+        "sequence_len": len(input_ids)
+    }
+    return EmailResult(
+        p_email=p_email, kw_hits=all_hits, strong_hits=strong_hits,
+        token_counts=dbg["token_counts"], p_raw=p_raw, path=path
+    ), dbg
 # =========================
 # Fusion
     fused = {
         "P_email": round(email_res.p_email, 3),
+        "P_email_raw": round(email_res.p_raw, 3) if email_res.p_raw is not None else None,
         "R_url_max": round(r_url_max, 3),
         "R_total": round(r_after, 3),
         "R_total_before_overrides": round(r_before, 3),
         "kw_hits": email_res.kw_hits,
         "strong_hits": email_res.strong_hits,
+        "token_counts": email_res.token_counts,
         "no_urls": no_urls,
         "allowlist_hit": allowlist_hit,
         "verdict": verdict
 # Gradio UI
 # =========================
 with gr.Blocks(title="PhishingMail-Lab") as demo:
+    gr.Markdown("# 🧪 PhishingMail‑Lab\n**POC** — Free‑tier friendly hybrid (email + URL) with explainable cues and rich forensics.")
     with gr.Row():
         with gr.Column(scale=3):
         # URL pipeline
         t0 = time.perf_counter()
+        raw_text = (subject_text or "") + "\n" + (body_text or "")
+        urls = list(dict.fromkeys(extract_urls(raw_text)))  # uniq & ordered
         t1 = time.perf_counter()
         url_results = score_urls(urls)
         t2 = time.perf_counter()
         banner_text = "<br>".join(banners) if banners else ""
         banner_visible = bool(banners)
+        # Forensics JSON (deeper detail)
+        per_url = [{
+            "url": u.url,
+            "risk": round(u.risk,3),
+            "reasons": u.reasons,
+            "contrib": u.contrib
+        } for u in url_results]
         fx = {
             "config": {
                 "weights": {"email": FUSION_EMAIL_W, "url": FUSION_URL_W},
                 "threshold_tau": THRESHOLD_TAU,
+                "overrides": {
+                    "url_high": URL_OVERRIDE_HIGH,
+                    "url_kw": URL_OVERRIDE_KW,
+                    "allowlist_safe_cap": ALLOWLIST_SAFE_CAP
+                },
                 "model_ids": {"classifier": EMAIL_CLASSIFIER_ID, "backbone": EMAIL_BACKBONE_ID}
             },
+            "input_summary": {
+                "chars_subject": len(subject_text or ""),
+                "chars_body": len(body_text or ""),
+                "num_urls": len(urls),
+                "allowlist_domains": allow_domains
+            },
             "email": {
+                "path": email_dbg["path"] or "lexical-fallback",
+                "p_email_final": fused["P_email"],
                 "p_email_raw": email_dbg["p_raw"],
                 "boost_from_strong": email_dbg["boost_from_strong"],
                 "boost_from_normal": email_dbg["boost_from_normal"],
+                "token_counts": email_dbg["token_counts"],
                 "kw_hits": email_res.kw_hits,
+                "strong_hits": email_res.strong_hits,
+                "model_info": email_dbg["model_info"]
             },
             "urls": per_url,
             "fusion": {
+                "equation": f"R_total = {FUSION_EMAIL_W} * P_email + {FUSION_URL_W} * R_url_max",
+                "values": {
+                    "P_email": fused["P_email"],
+                    "R_url_max": fused["R_url_max"],
+                    "R_total_before_overrides": fused["R_total_before_overrides"],
+                    "R_total_final": fused["R_total"],
+                    "overrides_applied": fuse_dbg["applied_overrides"]
+                },
+                "decision": {
+                    "threshold_tau": THRESHOLD_TAU,
+                    "verdict": fused["verdict"]
+                },
+                "flags": {
+                    "no_urls": fused["no_urls"],
+                    "allowlist_hit": fused["allowlist_hit"]
+                }
             },
             "timings_ms": {
+                "model_load": email_dbg["model_info"]["model_load_ms"],
                 "url_extract": round((t1 - t0) * 1000, 2),
                 "url_score": round((t2 - t1) * 1000, 2),
                 "email_infer": email_dbg["timing_ms"].get("email_infer"),
             }
         }
+        # Forensics Markdown (human‑readable, denser detail)
         lines = []
+        lines.append(f"**Verdict:** `{fused['verdict']}`  |  **R_total:** `{fused['R_total']}`  (before: `{fused['R_total_before_overrides']}`)  |  **τ:** `{THRESHOLD_TAU}`")
+        lines.append(f"**Fusion:**  R = {FUSION_EMAIL_W}×P_email + {FUSION_URL_W}×R_url_max  →  {FUSION_EMAIL_W}×{fused['P_email']} + {FUSION_URL_W}×{fused['R_url_max']}")
         if fuse_dbg["applied_overrides"]:
+            lines.append(f"**Overrides:** {', '.join(fuse_dbg['applied_overrides'])}")
         else:
+            lines.append("**Overrides:** (none)")
         if fused["no_urls"]:
             lines.append("• No URLs found → email‑only decision path.")
         if fused["allowlist_hit"]:
             lines.append("• Allowlist matched → risk capped.")
         lines.append("")
+        lines.append(f"**Email path:** `{email_dbg['path'] or 'lexical-fallback'}`  |  p_raw={email_dbg['p_raw']}  |  +strong={email_dbg['boost_from_strong']}  |  +normal={email_dbg['boost_from_normal']}")
+        tc = email_dbg["token_counts"]
+        lines.append(f"• Tokens: subject={tc.get('subject_tokens',0)}, body={tc.get('body_tokens',0)}, sequence_len={tc.get('sequence_len',0)} (max={MAX_SEQ_LEN})  |  subject_budget={SUBJECT_TOKEN_BUDGET}")
         if email_res.strong_hits:
             lines.append(f"• Strong cues: {', '.join(email_res.strong_hits)}")
         if email_res.kw_hits:
             lines.append(f"• All cues: {', '.join(email_res.kw_hits)}")
         lines.append("")
         if per_url:
+            lines.append("**URLs & contributions:**")
             for u in per_url:
+                contrib_str = ", ".join([f"{k}:{v}" for k,v in u["contrib"].items()])
+                lines.append(f"• {u['url']}  → risk={u['risk']}  |  reasons=({', '.join(u['reasons']) or 'none'})  |  contrib=({contrib_str or 'n/a'})")
         else:
             lines.append("**URLs:** (none)")
         lines.append("")
+        lines.append(f"**Model info:** loaded_from={email_dbg['model_info']['loaded_from']}, quantized={email_dbg['model_info']['quantized']}, load_ms={email_dbg['model_info']['model_load_ms']}")
+        lines.append("")
         lines.append("**Timings (ms):** " + json.dumps(fx["timings_ms"]))
         forensic_markdown = "\n".join(lines)