Spaces:

hsienchen
/

gemini-mm-cot

Sleeping

App Files Files Community

hsienchen commited on Jan 18, 2024

Commit

902323a

verified ·

1 Parent(s): 4a98f1e

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -16

app.py CHANGED Viewed

@@ -46,6 +46,7 @@ def llm_response(history,text,img):
         return history
 # Function that takes User Inputs and displays it on ChatUI
 def output_query_message(txt,img):
     if not img:
         return txt
@@ -71,33 +72,26 @@ def output_llm_response(text,img):
 def sentence_builder(animal, place):
     return f"""how many {animal}s from the {place} are shown in the picture?"""
 # gradio block
 with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
-    title = gr.Markdown("## COT ##")
     with gr.Column():
-        outputbox = gr.Textbox(label="AI prediction here...")
-        text_box = gr.Dropdown(
-                ["what is in the image",
-                 "provide alternative title for the image",
-                 "how many birds can be seen in the picture?"],
-                 label="Prompts", info="Will add more animals later!"
-            )
         image_box = gr.Image(type="filepath")
-    btn = gr.Button("Submit")
     clicked = btn.click(output_query_message,
-                        [text_box,image_box],
                         outputbox
                         ).then(output_llm_response,
-                                [text_box,image_box],
                                 outputbox
                                 )
     gr.Markdown("""
-    # Multimodal Chain-of-Thought Reasoning in Language Models
-    <h5 align="center"><i>"Imagine learning a textbook without figures or tables."</i></h5>
     Multimodal-CoT incorporates vision features in a decoupled training framework. The framework consists of two training stages: (i) rationale generation and (ii) answer inference. Both stages share the same model architecture but differ in the input and output.
     """)
@@ -128,8 +122,8 @@ with gr.Blocks(theme='snehilsanyal/scikit-learn') as app2:
                                 chatbot
                                 )
 with gr.Blocks(theme='snehilsanyal/scikit-learn') as demo:
-    gr.Markdown("# DEMO #")
-    gr.TabbedInterface([app1, app2], ["APP #1", "APP #2"])
 demo.queue()
 demo.launch()

         return history
 # Function that takes User Inputs and displays it on ChatUI
+text_box_01 = "what is in the image"
 def output_query_message(txt,img):
     if not img:
         return txt
 def sentence_builder(animal, place):
     return f"""how many {animal}s from the {place} are shown in the picture?"""
 # gradio block
 with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
+    title = 'line clearance'
     with gr.Column():
+        outputbox = gr.Textbox(label="result here...")
         image_box = gr.Image(type="filepath")
+    btn = gr.Button("Check This")
     clicked = btn.click(output_query_message,
+                        [text_box_01,image_box],
                         outputbox
                         ).then(output_llm_response,
+                                [text_box_01,image_box],
                                 outputbox
                                 )
     gr.Markdown("""
+    ## SOP-302: Line Clearance ##
+    <h5 align="center"><i>"XXXX here here."</i></h5>
     Multimodal-CoT incorporates vision features in a decoupled training framework. The framework consists of two training stages: (i) rationale generation and (ii) answer inference. Both stages share the same model architecture but differ in the input and output.
     """)
                                 chatbot
                                 )
 with gr.Blocks(theme='snehilsanyal/scikit-learn') as demo:
+    gr.Markdown("## SOP Camera ##")
+    gr.TabbedInterface([app1, app2], ["Check #1", "Check #2"])
 demo.queue()
 demo.launch()