Spaces:

LukeJacob2023
/

moondream-webui

Runtime error

App Files Files Community

LukeJacob2023 commited on Aug 30, 2024

Commit

9328f5f

verified ·

1 Parent(s): 2fb11d3

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -65

app.py CHANGED Viewed

@@ -1,65 +1,65 @@
-import spaces
-import torch
-import re
-import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from PIL import Image
-if torch.cuda.is_available():
-    device, dtype = "cuda", torch.float16
-else:
-    device, dtype = "cpu", torch.float32
-model_id = "vikhyatk/moondream2"
-revision = "2024-08-26"
-tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
-moondream = AutoModelForCausalLM.from_pretrained(
-    model_id, trust_remote_code=True, revision=revision, torch_dtype=dtype
-).to(device=device)
-moondream.eval()
-@spaces.GPU
-def answer_questions(image_tuples, prompt_text):
-    result = ""
-    Q_and_A = ""
-    prompts = [p.strip() for p in prompt_text.split(',')]
-    image_embeds = [img[0] for img in image_tuples if img[0] is not None]
-    #print(f"\nprompts: {prompts}\n\n")
-    answers = []
-    for prompt in prompts:
-        image_answers = moondream.batch_answer(
-            images=[img.convert("RGB") for img in image_embeds],
-            prompts=[prompt] * len(image_embeds),
-            tokenizer=tokenizer,
-        )
-        answers.append(image_answers)
-    for i, prompt in enumerate(prompts):
-        Q_and_A += f"### Q: {prompt}\n"
-        for j, image_tuple in enumerate(image_tuples):
-            image_name = f"image{j+1}"
-            answer_text = answers[i][j]
-            Q_and_A += f"**{image_name} A:** \n {answer_text} \n\n"
-    result = {'headers': prompts, 'data': answers}
-    #print(f"result\n{result}\n\nQ_and_A\n{Q_and_A}\n\n")
-    return Q_and_A, result
-with gr.Blocks() as demo:
-    gr.Markdown("# MoonDream WebUI")
-    gr.Markdown("## 🌔 Modify by https://huggingface.co/spaces/Csplk/moondream2-batch-processing")
-    gr.Markdown("## 🌔 moondream2\nA tiny vision language model. [GitHub](https://github.com/vikhyatk/moondream)")
-    with gr.Row():
-        img = gr.Gallery(label="Upload Images", type="pil", preview=True, columns=4)
-    with gr.Row():
-        prompt = gr.Textbox(label="Input Prompts", placeholder="Enter prompts (one prompt for each image provided) separated by commas. Ex: Describe this image, What is in this image?", lines=8)
-    with gr.Row():
-        submit = gr.Button("Submit")
-    with gr.Row():
-        output = gr.Markdown(label="Questions and Answers", line_breaks=True)
-    with gr.Row():
-        output2 = gr.Dataframe(label="Structured Dataframe", type="array", wrap=True)
-    submit.click(answer_questions, [img, prompt], [output, output2])
-demo.queue().launch()

+import spaces
+import torch
+import re
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from PIL import Image
+if torch.cuda.is_available():
+    device, dtype = "cuda", torch.float16
+else:
+    device, dtype = "cpu", torch.float32
+model_id = "vikhyatk/moondream2"
+revision = "2024-08-26"
+tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
+moondream = AutoModelForCausalLM.from_pretrained(
+    model_id, trust_remote_code=True, revision=revision, torch_dtype=dtype
+).to(device=device)
+moondream.eval()
+@spaces.GPU
+def answer_questions(image_tuples, prompt_text):
+    result = ""
+    Q_and_A = ""
+    prompts = [p.strip() for p in prompt_text.split(',')]
+    image_embeds = [img[0] for img in image_tuples if img[0] is not None]
+    #print(f"\nprompts: {prompts}\n\n")
+    answers = []
+    for prompt in prompts:
+        image_answers = moondream.batch_answer(
+            images=[img.convert("RGB") for img in image_embeds],
+            prompts=[prompt] * len(image_embeds),
+            tokenizer=tokenizer,
+        )
+        answers.append(image_answers)
+    for i, prompt in enumerate(prompts):
+        Q_and_A += f"### Q: {prompt}\n"
+        for j, image_tuple in enumerate(image_tuples):
+            image_name = f"image{j+1}"
+            answer_text = answers[i][j]
+            Q_and_A += f"**{image_name} A:** \n {answer_text} \n\n"
+    result = {'headers': prompts, 'data': answers}
+    #print(f"result\n{result}\n\nQ_and_A\n{Q_and_A}\n\n")
+    return Q_and_A, result
+with gr.Blocks() as demo:
+    gr.Markdown("# MoonDream WebUI")
+    gr.Markdown("## 🌔 WebUI is modify by https://huggingface.co/spaces/Csplk/moondream2-batch-processing")
+    gr.Markdown("## 🌔 moondream2 - A tiny vision language model. [GitHub](https://github.com/vikhyatk/moondream)")
+    with gr.Row():
+        img = gr.Gallery(label="Upload Images", type="pil", preview=True, columns=4)
+    with gr.Row():
+        prompt = gr.Textbox(label="Input Prompts", placeholder="Enter prompts (one prompt for each image provided) separated by commas. Ex: Describe this image, What is in this image?", lines=8)
+    with gr.Row():
+        submit = gr.Button("Submit")
+    with gr.Row():
+        output = gr.Markdown(label="Questions and Answers", line_breaks=True)
+    with gr.Row():
+        output2 = gr.Dataframe(label="Structured Dataframe", type="array", wrap=True)
+    submit.click(answer_questions, [img, prompt], [output, output2])
+demo.queue().launch()