prithivMLmods commited on
Commit
08ebc2c
·
verified ·
1 Parent(s): 8b08643

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -129,6 +129,7 @@ MODEL_ID_V = "nanonets/Nanonets-OCR2-3B"
129
  processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
130
  model_v = Qwen2_5_VLForConditionalGeneration.from_pretrained(
131
  MODEL_ID_V,
 
132
  trust_remote_code=True,
133
  torch_dtype=torch.float16
134
  ).to(device).eval()
@@ -138,6 +139,7 @@ MODEL_ID_X = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
138
  processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
139
  model_x = Qwen2VLForConditionalGeneration.from_pretrained(
140
  MODEL_ID_X,
 
141
  trust_remote_code=True,
142
  torch_dtype=torch.float16
143
  ).to(device).eval()
@@ -147,6 +149,7 @@ MODEL_ID_A = "CohereForAI/aya-vision-8b"
147
  processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
148
  model_a = AutoModelForImageTextToText.from_pretrained(
149
  MODEL_ID_A,
 
150
  trust_remote_code=True,
151
  torch_dtype=torch.float16
152
  ).to(device).eval()
@@ -156,6 +159,7 @@ MODEL_ID_W = "allenai/olmOCR-7B-0725"
156
  processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
157
  model_w = Qwen2_5_VLForConditionalGeneration.from_pretrained(
158
  MODEL_ID_W,
 
159
  trust_remote_code=True,
160
  torch_dtype=torch.float16
161
  ).to(device).eval()
@@ -165,6 +169,7 @@ MODEL_ID_M = "reducto/RolmOCR"
165
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
166
  model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
167
  MODEL_ID_M,
 
168
  trust_remote_code=True,
169
  torch_dtype=torch.float16
170
  ).to(device).eval()
@@ -245,8 +250,8 @@ image_examples = [
245
  ]
246
 
247
  # Create the Gradio Interface
248
- with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
249
- gr.Markdown("# **Multimodal [OCR](https://huggingface.co/collections/prithivMLmods/multimodal-implementations)**", elem_id="main-title")
250
  with gr.Row():
251
  with gr.Column(scale=2):
252
  image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
@@ -267,7 +272,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
267
 
268
  with gr.Column(scale=3):
269
  gr.Markdown("## Output", elem_id="output-title")
270
- output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=11, show_copy_button=True)
271
  with gr.Accordion("(Result.md)", open=False):
272
  markdown_output = gr.Markdown(label="(Result.Md)")
273
 
@@ -285,4 +290,4 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
285
  )
286
 
287
  if __name__ == "__main__":
288
- demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)
 
129
  processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
130
  model_v = Qwen2_5_VLForConditionalGeneration.from_pretrained(
131
  MODEL_ID_V,
132
+ attn_implementation="flash_attention_2",
133
  trust_remote_code=True,
134
  torch_dtype=torch.float16
135
  ).to(device).eval()
 
139
  processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
140
  model_x = Qwen2VLForConditionalGeneration.from_pretrained(
141
  MODEL_ID_X,
142
+ attn_implementation="flash_attention_2",
143
  trust_remote_code=True,
144
  torch_dtype=torch.float16
145
  ).to(device).eval()
 
149
  processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
150
  model_a = AutoModelForImageTextToText.from_pretrained(
151
  MODEL_ID_A,
152
+ attn_implementation="flash_attention_2",
153
  trust_remote_code=True,
154
  torch_dtype=torch.float16
155
  ).to(device).eval()
 
159
  processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
160
  model_w = Qwen2_5_VLForConditionalGeneration.from_pretrained(
161
  MODEL_ID_W,
162
+ attn_implementation="flash_attention_2",
163
  trust_remote_code=True,
164
  torch_dtype=torch.float16
165
  ).to(device).eval()
 
169
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
170
  model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
171
  MODEL_ID_M,
172
+ attn_implementation="flash_attention_2",
173
  trust_remote_code=True,
174
  torch_dtype=torch.float16
175
  ).to(device).eval()
 
250
  ]
251
 
252
  # Create the Gradio Interface
253
+ with gr.Blocks() as demo:
254
+ gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
255
  with gr.Row():
256
  with gr.Column(scale=2):
257
  image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
 
272
 
273
  with gr.Column(scale=3):
274
  gr.Markdown("## Output", elem_id="output-title")
275
+ output = gr.Textbox(label="Raw Output Stream", interactive=True, lines=11)
276
  with gr.Accordion("(Result.md)", open=False):
277
  markdown_output = gr.Markdown(label="(Result.Md)")
278
 
 
290
  )
291
 
292
  if __name__ == "__main__":
293
+ demo.queue(max_size=50).launch(css=css, theme=steel_blue_theme, mcp_server=True, ssr_mode=False, show_error=True)