Spaces:

apersonnaz
/

so_tag_api

Sleeping

App Files Files Community

apersonnaz commited on Mar 28, 2023

Commit

5ed6105

1 Parent(s): d757a86

FastAPI app with models and endpoint

Browse files

Files changed (2) hide show

__pycache__/main.cpython-310.pyc +0 -0
main.py +77 -0

__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (2.66 kB). View file

main.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from fastapi import FastAPI
+import spacy
+import json
+from typing import List
+from bs4 import BeautifulSoup
+from markdown import markdown
+TAG_PROBABILITY_THRESHOLD = 0.5
+app = FastAPI()
+nlp_text = spacy.load("./spacy_text_model")
+nlp_code = spacy.load("./spacy_code_model")
+with open('selected_tags.json', 'r') as openfile:
+    selected_tags = json.load(openfile)
+def preprocess(texts):
+    tokens = []
+    removal = ['PUNCT', 'SPACE', 'NUM', 'SYM']
+    cleaned_texts = []
+    for summary in nlp_text.pipe(texts, disable=["transformer", "tagger", "parser", "attribute_ruler", "lemmatizer", "ner"]):
+        question_tokens = []
+        for token in summary:
+            if token.pos_ not in removal and token.is_alpha and len(question_tokens) < 512:
+                question_tokens.append(token.lower_)
+        cleaned_texts.append(" ".join(question_tokens))
+    return cleaned_texts
+def get_text_and_code(body):
+    html = markdown(body)
+    bs = BeautifulSoup(html)
+    codes = bs.findAll('code')
+    code = '\n'.join([x.text for x in codes])
+    for x in codes:
+        x.decompose()
+    text = '\n'.join(bs.findAll(text=True))
+    return text, code
+@app.post("/infer_tags")
+async def infer_tags(questions: List[str]):
+    results = []
+    texts = []
+    codes = []
+    for question in questions:
+        text, code = get_text_and_code(question)
+        texts.append(text)
+        codes.append(code)
+    texts_preprocessed = preprocess(texts)
+    codes_preprocessed = preprocess(codes)
+    pred_text = []
+    pred_code = []
+    for summary in nlp_text.pipe(texts_preprocessed):
+        if summary.text != '':
+            pred_text.append(summary.cats)
+        else:
+            pred_text.append(dict.fromkeys(selected_tags, 0))
+    for summary in nlp_code.pipe(codes_preprocessed):
+        if summary.text != '':
+            pred_code.append(summary.cats)
+        else:
+            pred_code.append(dict.fromkeys(selected_tags, 0))
+    text_tags = [[x for x in selected_tags if y[x] > TAG_PROBABILITY_THRESHOLD]
+                 for y in pred_text]
+    code_tags = [[x for x in selected_tags if y[x] > TAG_PROBABILITY_THRESHOLD]
+                 for y in pred_code]
+    union_tags = []
+    for i in range(len(text_tags)):
+        union_tags.append(list(set(text_tags[i]) | set(code_tags[i])))
+    return union_tags