Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,6 +34,25 @@ def caption(input_image):
|
|
| 34 |
import openai
|
| 35 |
import os
|
| 36 |
openai.api_key= os.getenv('openai_appkey')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def gpt3(question,vqa_answer,caption):
|
| 38 |
prompt=caption+"\n"+question+"\n"+vqa_answer+"\n Tell me the right answer."
|
| 39 |
response = openai.Completion.create(
|
|
@@ -51,7 +70,7 @@ def gpt3(question,vqa_answer,caption):
|
|
| 51 |
def vle(input_image,input_text):
|
| 52 |
vqa_answers = vqa_pipeline({"image":input_image, "question":input_text}, top_k=4)
|
| 53 |
# return [" ".join([str(value) for key,value in vqa.items()] )for vqa in vqa_answers]
|
| 54 |
-
return [vqa['answer'] for vqa in vqa_answers]
|
| 55 |
def inference_chat(input_image,input_text):
|
| 56 |
cap=caption(input_image)
|
| 57 |
# inputs = processor(images=input_image, text=input_text,return_tensors="pt")
|
|
@@ -62,10 +81,10 @@ def inference_chat(input_image,input_text):
|
|
| 62 |
# out=processor.batch_decode(out, skip_special_tokens=True)
|
| 63 |
|
| 64 |
out=vle(input_image,input_text)
|
| 65 |
-
vqa="\n".join(out)
|
| 66 |
gpt3_out=gpt3(input_text,vqa,cap)
|
| 67 |
-
gpt3_out1=
|
| 68 |
-
return out[0], gpt3_out,gpt3_out1
|
| 69 |
title = """<h1 align="center">VQA</h1>"""
|
| 70 |
with gr.Blocks(
|
| 71 |
css="""
|
|
@@ -105,12 +124,12 @@ with gr.Blocks(
|
|
| 105 |
gpt3_output_v1 = gr.Textbox(lines=0, label="VQA+LLM (long answer)")
|
| 106 |
|
| 107 |
|
| 108 |
-
image_input.change(
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
)
|
| 114 |
chat_input.submit(
|
| 115 |
inference_chat,
|
| 116 |
[
|
|
|
|
| 34 |
import openai
|
| 35 |
import os
|
| 36 |
openai.api_key= os.getenv('openai_appkey')
|
| 37 |
+
def gpt3_short(question,vqa_answer,caption):
|
| 38 |
+
vqa_answer,vqa_score=vqa_answer
|
| 39 |
+
prompt="prompt: This is a picture of Caption: "+caption+". Question: "+question+" VQA model predicts:"+"A: "+vqa_answer[0]+"socre:"+str(vqa_score[0])+\
|
| 40 |
+
" B: "+vqa_answer[1]+" score:"+str(vqa_score[1])+" C: "+vqa_answer[2]+" score:"+str(vqa_score[2])+\
|
| 41 |
+
" D: "+vqa_answer[3]+'score:'+str(vqa_score[3])+\
|
| 42 |
+
". Choose A if it is not in conflict with the description of the picture and A's score is bigger than 0.8; otherwise choose the B, C or D based on the description."
|
| 43 |
+
|
| 44 |
+
# prompt=caption+"\n"+question+"\n"+vqa_answer+"\n Tell me the right answer."
|
| 45 |
+
response = openai.Completion.create(
|
| 46 |
+
engine="text-davinci-003",
|
| 47 |
+
prompt=prompt,
|
| 48 |
+
max_tokens=30,
|
| 49 |
+
n=1,
|
| 50 |
+
stop=None,
|
| 51 |
+
temperature=0.7,
|
| 52 |
+
)
|
| 53 |
+
answer = response.choices[0].text.strip()
|
| 54 |
+
|
| 55 |
+
return answer
|
| 56 |
def gpt3(question,vqa_answer,caption):
|
| 57 |
prompt=caption+"\n"+question+"\n"+vqa_answer+"\n Tell me the right answer."
|
| 58 |
response = openai.Completion.create(
|
|
|
|
| 70 |
def vle(input_image,input_text):
|
| 71 |
vqa_answers = vqa_pipeline({"image":input_image, "question":input_text}, top_k=4)
|
| 72 |
# return [" ".join([str(value) for key,value in vqa.items()] )for vqa in vqa_answers]
|
| 73 |
+
return [vqa['answer'] for vqa in vqa_answers],[vqa['score'] for vqa in vqa_answers]
|
| 74 |
def inference_chat(input_image,input_text):
|
| 75 |
cap=caption(input_image)
|
| 76 |
# inputs = processor(images=input_image, text=input_text,return_tensors="pt")
|
|
|
|
| 81 |
# out=processor.batch_decode(out, skip_special_tokens=True)
|
| 82 |
|
| 83 |
out=vle(input_image,input_text)
|
| 84 |
+
vqa="\n".join(out[0])
|
| 85 |
gpt3_out=gpt3(input_text,vqa,cap)
|
| 86 |
+
gpt3_out1=gpt3_short(input_text,out,cap)
|
| 87 |
+
return out[0][0], gpt3_out,gpt3_out1
|
| 88 |
title = """<h1 align="center">VQA</h1>"""
|
| 89 |
with gr.Blocks(
|
| 90 |
css="""
|
|
|
|
| 124 |
gpt3_output_v1 = gr.Textbox(lines=0, label="VQA+LLM (long answer)")
|
| 125 |
|
| 126 |
|
| 127 |
+
# image_input.change(
|
| 128 |
+
# lambda: ("", [],"","",""),
|
| 129 |
+
# [],
|
| 130 |
+
# [ caption_output, state,caption_output,gpt3_output_v1,caption_output_v1],
|
| 131 |
+
# queue=False,
|
| 132 |
+
# )
|
| 133 |
chat_input.submit(
|
| 134 |
inference_chat,
|
| 135 |
[
|