apersonnaz commited on
Commit
5ed6105
·
1 Parent(s): d757a86

FastAPI app with models and endpoint

Browse files
Files changed (2) hide show
  1. __pycache__/main.cpython-310.pyc +0 -0
  2. main.py +77 -0
__pycache__/main.cpython-310.pyc ADDED
Binary file (2.66 kB). View file
 
main.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import spacy
3
+ import json
4
+ from typing import List
5
+ from bs4 import BeautifulSoup
6
+ from markdown import markdown
7
+
8
+ TAG_PROBABILITY_THRESHOLD = 0.5
9
+
10
+ app = FastAPI()
11
+
12
+ nlp_text = spacy.load("./spacy_text_model")
13
+ nlp_code = spacy.load("./spacy_code_model")
14
+
15
+ with open('selected_tags.json', 'r') as openfile:
16
+ selected_tags = json.load(openfile)
17
+
18
+
19
+ def preprocess(texts):
20
+ tokens = []
21
+ removal = ['PUNCT', 'SPACE', 'NUM', 'SYM']
22
+ cleaned_texts = []
23
+ for summary in nlp_text.pipe(texts, disable=["transformer", "tagger", "parser", "attribute_ruler", "lemmatizer", "ner"]):
24
+ question_tokens = []
25
+ for token in summary:
26
+ if token.pos_ not in removal and token.is_alpha and len(question_tokens) < 512:
27
+ question_tokens.append(token.lower_)
28
+ cleaned_texts.append(" ".join(question_tokens))
29
+ return cleaned_texts
30
+
31
+
32
+ def get_text_and_code(body):
33
+ html = markdown(body)
34
+ bs = BeautifulSoup(html)
35
+ codes = bs.findAll('code')
36
+ code = '\n'.join([x.text for x in codes])
37
+ for x in codes:
38
+ x.decompose()
39
+ text = '\n'.join(bs.findAll(text=True))
40
+ return text, code
41
+
42
+
43
+ @app.post("/infer_tags")
44
+ async def infer_tags(questions: List[str]):
45
+ results = []
46
+ texts = []
47
+ codes = []
48
+ for question in questions:
49
+ text, code = get_text_and_code(question)
50
+ texts.append(text)
51
+ codes.append(code)
52
+
53
+ texts_preprocessed = preprocess(texts)
54
+ codes_preprocessed = preprocess(codes)
55
+
56
+ pred_text = []
57
+ pred_code = []
58
+ for summary in nlp_text.pipe(texts_preprocessed):
59
+ if summary.text != '':
60
+ pred_text.append(summary.cats)
61
+ else:
62
+ pred_text.append(dict.fromkeys(selected_tags, 0))
63
+
64
+ for summary in nlp_code.pipe(codes_preprocessed):
65
+ if summary.text != '':
66
+ pred_code.append(summary.cats)
67
+ else:
68
+ pred_code.append(dict.fromkeys(selected_tags, 0))
69
+
70
+ text_tags = [[x for x in selected_tags if y[x] > TAG_PROBABILITY_THRESHOLD]
71
+ for y in pred_text]
72
+ code_tags = [[x for x in selected_tags if y[x] > TAG_PROBABILITY_THRESHOLD]
73
+ for y in pred_code]
74
+ union_tags = []
75
+ for i in range(len(text_tags)):
76
+ union_tags.append(list(set(text_tags[i]) | set(code_tags[i])))
77
+ return union_tags