h-xml commited on
Commit
4fdef1d
·
verified ·
1 Parent(s): 19a8d7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +320 -319
app.py CHANGED
@@ -1,319 +1,320 @@
1
- """
2
- BabelDocs x Agentic AI MCP - Gradio Application
3
-
4
- PDF Translation with Google Drive Integration.
5
- Accepts public GDrive links or local file uploads.
6
-
7
- For Anthropic Hackathon - Track 1: Building MCP
8
-
9
- Usage:
10
- python app.py
11
- """
12
-
13
- import os
14
- import re
15
- import base64
16
- import tempfile
17
- import httpx
18
- import gradio as gr
19
- from pathlib import Path
20
- from datetime import datetime
21
- from dotenv import load_dotenv
22
-
23
- load_dotenv()
24
-
25
- # Modal endpoint configuration
26
- # Set BABELDOCS_MODAL_URL as HuggingFace Space secret for production
27
- MODAL_BASE_URL = os.getenv("BABELDOCS_MODAL_URL")
28
- if not MODAL_BASE_URL:
29
- raise ValueError("BABELDOCS_MODAL_URL environment variable required. Set it as a HuggingFace Space secret.")
30
- MODAL_TRANSLATE_URL = f"{MODAL_BASE_URL}-babeldocstranslator-api.modal.run"
31
- MODAL_HEALTH_URL = f"{MODAL_BASE_URL}-babeldocstranslator-health.modal.run"
32
-
33
- # Max pages limit (test phase)
34
- MAX_PAGES = 20
35
-
36
- # Supported languages
37
- LANGUAGES = {
38
- "fr": "French",
39
- "en": "English",
40
- "es": "Spanish",
41
- "de": "German",
42
- "it": "Italian",
43
- "pt": "Portuguese",
44
- "zh": "Chinese",
45
- "ja": "Japanese",
46
- "ko": "Korean",
47
- "ru": "Russian",
48
- "ar": "Arabic",
49
- }
50
-
51
-
52
- def log_message(logs: list, message: str) -> list:
53
- """Add timestamped message to logs."""
54
- timestamp = datetime.now().strftime("%H:%M:%S")
55
- logs.append(f"[{timestamp}] {message}")
56
- return logs
57
-
58
-
59
- def extract_gdrive_file_id(url: str) -> str | None:
60
- """Extract file ID from Google Drive URL."""
61
- patterns = [
62
- r"/file/d/([a-zA-Z0-9_-]+)",
63
- r"id=([a-zA-Z0-9_-]+)",
64
- r"/d/([a-zA-Z0-9_-]+)",
65
- ]
66
- for pattern in patterns:
67
- match = re.search(pattern, url)
68
- if match:
69
- return match.group(1)
70
- return None
71
-
72
-
73
- async def download_gdrive_public(url: str) -> tuple[bytes, str]:
74
- """Download file from public Google Drive link.
75
-
76
- Returns (file_bytes, filename).
77
- """
78
- file_id = extract_gdrive_file_id(url)
79
- if not file_id:
80
- raise ValueError("Invalid Google Drive URL")
81
-
82
- # Direct download URL
83
- download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
84
-
85
- async with httpx.AsyncClient(timeout=120.0, follow_redirects=True) as client:
86
- response = await client.get(download_url)
87
- response.raise_for_status()
88
-
89
- # Try to get filename from Content-Disposition header
90
- content_disp = response.headers.get("Content-Disposition", "")
91
- filename_match = re.search(r'filename="?([^";\n]+)"?', content_disp)
92
- if filename_match:
93
- filename = filename_match.group(1)
94
- else:
95
- filename = f"gdrive_{file_id}.pdf"
96
-
97
- return response.content, filename
98
-
99
-
100
- async def translate_pdf_modal(
101
- pdf_file,
102
- gdrive_url: str,
103
- target_lang: str,
104
- progress=gr.Progress()
105
- ) -> tuple:
106
- """Translate PDF using Modal cloud."""
107
- logs = []
108
-
109
- # Validate input
110
- if not pdf_file and not gdrive_url:
111
- return None, None, "Please upload a PDF or provide a Google Drive link", "", "\n".join(logs)
112
-
113
- if pdf_file and gdrive_url:
114
- return None, None, "Please use either file upload OR Google Drive link, not both", "", "\n".join(logs)
115
-
116
- try:
117
- logs = log_message(logs, "Starting translation...")
118
-
119
- # Get PDF bytes and filename
120
- if gdrive_url:
121
- logs = log_message(logs, f"Downloading from Google Drive...")
122
- progress(0.05, desc="Downloading from Google Drive...")
123
- pdf_bytes, source_filename = await download_gdrive_public(gdrive_url.strip())
124
- logs = log_message(logs, f"Downloaded: {source_filename}")
125
- else:
126
- pdf_path = Path(pdf_file)
127
- pdf_bytes = pdf_path.read_bytes()
128
- source_filename = pdf_path.name
129
-
130
- pdf_base64 = base64.b64encode(pdf_bytes).decode("utf-8")
131
-
132
- logs = log_message(logs, f"Input: {source_filename}")
133
- logs = log_message(logs, f"Size: {len(pdf_bytes) / 1024:.1f} KB")
134
- logs = log_message(logs, f"Target: {LANGUAGES.get(target_lang, target_lang)}")
135
-
136
- progress(0.1, desc="Uploading to Modal...")
137
-
138
- payload = {
139
- "pdf_base64": pdf_base64,
140
- "target_lang": target_lang,
141
- }
142
-
143
- logs = log_message(logs, "Translating on Modal cloud...")
144
- logs = log_message(logs, "(This may take several minutes)")
145
-
146
- progress(0.2, desc="Translating...")
147
- start_time = datetime.now()
148
-
149
- async with httpx.AsyncClient(timeout=900.0, follow_redirects=True) as client:
150
- response = await client.post(MODAL_TRANSLATE_URL, json=payload)
151
- response.raise_for_status()
152
- result = response.json()
153
-
154
- duration = (datetime.now() - start_time).total_seconds()
155
- progress(0.8, desc="Processing result...")
156
-
157
- if not result.get("success"):
158
- error_msg = result.get("message", "Unknown error")
159
- logs = log_message(logs, f"ERROR: {error_msg}")
160
- return None, None, "Translation failed", "", "\n".join(logs)
161
-
162
- # Process mono_img PDF
163
- mono_img_path = None
164
- mono_img_base64 = result.get("mono_img_pdf_base64")
165
- if mono_img_base64:
166
- mono_img_bytes = base64.b64decode(mono_img_base64)
167
- stem = Path(source_filename).stem
168
- mono_img_filename = f"{stem}_translated.{target_lang}.pdf"
169
- mono_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
170
- mono_img_file.write(mono_img_bytes)
171
- mono_img_file.close()
172
- mono_img_path = mono_img_file.name
173
- logs = log_message(logs, f"Mono: {mono_img_filename} ({len(mono_img_bytes) / 1024:.1f} KB)")
174
-
175
- # Process dual_img PDF
176
- dual_img_path = None
177
- dual_img_base64 = result.get("dual_img_pdf_base64")
178
- if dual_img_base64:
179
- dual_img_bytes = base64.b64decode(dual_img_base64)
180
- stem = Path(source_filename).stem
181
- dual_img_filename = f"{stem}_translated.{target_lang}.dual.pdf"
182
- dual_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
183
- dual_img_file.write(dual_img_bytes)
184
- dual_img_file.close()
185
- dual_img_path = dual_img_file.name
186
- logs = log_message(logs, f"Dual: {dual_img_filename} ({len(dual_img_bytes) / 1024:.1f} KB)")
187
-
188
- if not mono_img_path and not dual_img_path:
189
- logs = log_message(logs, "ERROR: No output PDF in response")
190
- return None, None, "Translation failed", "", "\n".join(logs)
191
-
192
- logs = log_message(logs, f"Duration: {duration:.1f} seconds")
193
-
194
- stats_msg = f"""**Translation completed!**
195
-
196
- - **Duration:** {duration:.1f} seconds
197
- - **Target:** {LANGUAGES.get(target_lang, target_lang)}"""
198
-
199
- progress(1.0, desc="Done!")
200
-
201
- return mono_img_path, dual_img_path, "Translation successful!", stats_msg, "\n".join(logs)
202
-
203
- except httpx.TimeoutException:
204
- logs = log_message(logs, "ERROR: Translation timed out")
205
- return None, None, "Translation timed out", "", "\n".join(logs)
206
- except httpx.HTTPStatusError as e:
207
- logs = log_message(logs, f"ERROR: HTTP {e.response.status_code}")
208
- return None, None, f"HTTP error: {e.response.status_code}", "", "\n".join(logs)
209
- except Exception as e:
210
- logs = log_message(logs, f"ERROR: {str(e)}")
211
- return None, None, f"Error: {str(e)}", "", "\n".join(logs)
212
-
213
-
214
- # Gradio Interface
215
- with gr.Blocks(title="BabelDocs x Agentic AI MCP") as demo:
216
-
217
- gr.Markdown("""
218
- # BabelDocs x Agentic AI MCP - PDF Translation with Google Drive Integration
219
-
220
- **Translate PDFs directly from Google Drive and save back automatically**
221
-
222
- ---
223
-
224
- ## Key Feature: Full Google Drive Workflow in CLAUDE Desktop MCP
225
-
226
- ```
227
- "Translate my Q3 report to French and save it to Translations folder"
228
-
229
- Claude searches → downloads → translates → uploads → done!
230
- ```
231
-
232
- ---
233
- """)
234
-
235
- with gr.Row():
236
- with gr.Column(scale=1):
237
- gr.Markdown("### Input")
238
-
239
- gdrive_url = gr.Textbox(
240
- label="Google Drive Link (public)",
241
- placeholder="https://drive.google.com/file/d/... or leave empty",
242
- info="Paste a public GDrive link, OR upload a local file below",
243
- )
244
-
245
- gr.Markdown("**OR**")
246
-
247
- pdf_input = gr.File(
248
- label="Upload PDF",
249
- file_types=[".pdf"],
250
- type="filepath",
251
- )
252
-
253
- target_lang = gr.Dropdown(
254
- choices=list(LANGUAGES.keys()),
255
- value="fr",
256
- label="Target Language",
257
- )
258
-
259
- translate_btn = gr.Button(
260
- "Translate PDF",
261
- variant="primary",
262
- size="lg",
263
- )
264
-
265
- with gr.Column(scale=1):
266
- gr.Markdown("### Result")
267
-
268
- status_output = gr.Textbox(
269
- label="Status",
270
- interactive=False,
271
- )
272
-
273
- stats_output = gr.Markdown(label="Statistics")
274
-
275
- gr.Markdown("**Downloads:**")
276
- with gr.Row():
277
- mono_img_output = gr.File(label="Mono (translated + images)")
278
- dual_img_output = gr.File(label="Dual (bilingual + images)")
279
-
280
- logs_output = gr.Textbox(
281
- label="Logs",
282
- interactive=False,
283
- lines=10,
284
- max_lines=15,
285
- )
286
-
287
- gr.Markdown("""
288
- ---
289
-
290
- ### How it works
291
-
292
- ```
293
- 1. Upload PDF or paste GDrive link
294
-
295
- 2. Send to Modal cloud (serverless)
296
-
297
- 3. BabelDOC with Agentic AI translates text + images, preserves layout
298
-
299
- 4. Download translated PDF
300
- ```
301
-
302
- ---
303
-
304
- **Built with:** BabelDOC, Modal, Nebius AI, Gradio | **Hackathon:** Anthropic MCP Track 1
305
- """)
306
-
307
- translate_btn.click(
308
- fn=translate_pdf_modal,
309
- inputs=[pdf_input, gdrive_url, target_lang],
310
- outputs=[mono_img_output, dual_img_output, status_output, stats_output, logs_output],
311
- )
312
-
313
-
314
- if __name__ == "__main__":
315
- demo.launch(
316
- server_name="127.0.0.1",
317
- server_port=7860,
318
- share=False,
319
- )
 
 
1
+ """
2
+ BabelDocs x Agentic AI MCP - Gradio Application
3
+
4
+ PDF Translation with Google Drive Integration.
5
+ Accepts public GDrive links or local file uploads.
6
+
7
+ For Anthropic Hackathon - Track 1: Building MCP
8
+
9
+ Usage:
10
+ python app.py
11
+ """
12
+
13
+ import os
14
+ import re
15
+ import base64
16
+ import tempfile
17
+ import httpx
18
+ import gradio as gr
19
+ from pathlib import Path
20
+ from datetime import datetime
21
+ from dotenv import load_dotenv
22
+
23
+ load_dotenv()
24
+
25
+ # Modal endpoint configuration
26
+ # Set BABELDOCS_MODAL_URL as HuggingFace Space secret for production
27
+ MODAL_BASE_URL = os.getenv("BABELDOCS_MODAL_URL")
28
+ if not MODAL_BASE_URL:
29
+ raise ValueError("BABELDOCS_MODAL_URL environment variable required. Set it as a HuggingFace Space secret.")
30
+ MODAL_TRANSLATE_URL = f"{MODAL_BASE_URL}-babeldocstranslator-api.modal.run"
31
+ MODAL_HEALTH_URL = f"{MODAL_BASE_URL}-babeldocstranslator-health.modal.run"
32
+
33
+ # Max pages limit (test phase)
34
+ MAX_PAGES = 20
35
+
36
+ # Supported languages
37
+ LANGUAGES = {
38
+ "fr": "French",
39
+ "en": "English",
40
+ "es": "Spanish",
41
+ "de": "German",
42
+ "it": "Italian",
43
+ "pt": "Portuguese",
44
+ "zh": "Chinese",
45
+ "ja": "Japanese",
46
+ "ko": "Korean",
47
+ "ru": "Russian",
48
+ "ar": "Arabic",
49
+ }
50
+
51
+
52
+ def log_message(logs: list, message: str) -> list:
53
+ """Add timestamped message to logs."""
54
+ timestamp = datetime.now().strftime("%H:%M:%S")
55
+ logs.append(f"[{timestamp}] {message}")
56
+ return logs
57
+
58
+
59
+ def extract_gdrive_file_id(url: str) -> str | None:
60
+ """Extract file ID from Google Drive URL."""
61
+ patterns = [
62
+ r"/file/d/([a-zA-Z0-9_-]+)",
63
+ r"id=([a-zA-Z0-9_-]+)",
64
+ r"/d/([a-zA-Z0-9_-]+)",
65
+ ]
66
+ for pattern in patterns:
67
+ match = re.search(pattern, url)
68
+ if match:
69
+ return match.group(1)
70
+ return None
71
+
72
+
73
+ async def download_gdrive_public(url: str) -> tuple[bytes, str]:
74
+ """Download file from public Google Drive link.
75
+
76
+ Returns (file_bytes, filename).
77
+ """
78
+ file_id = extract_gdrive_file_id(url)
79
+ if not file_id:
80
+ raise ValueError("Invalid Google Drive URL")
81
+
82
+ # Direct download URL
83
+ download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
84
+
85
+ async with httpx.AsyncClient(timeout=120.0, follow_redirects=True) as client:
86
+ response = await client.get(download_url)
87
+ response.raise_for_status()
88
+
89
+ # Try to get filename from Content-Disposition header
90
+ content_disp = response.headers.get("Content-Disposition", "")
91
+ filename_match = re.search(r'filename="?([^";\n]+)"?', content_disp)
92
+ if filename_match:
93
+ filename = filename_match.group(1)
94
+ else:
95
+ filename = f"gdrive_{file_id}.pdf"
96
+
97
+ return response.content, filename
98
+
99
+
100
+ async def translate_pdf_modal(
101
+ pdf_file,
102
+ gdrive_url: str,
103
+ target_lang: str,
104
+ progress=gr.Progress()
105
+ ) -> tuple:
106
+ """Translate PDF using Modal cloud."""
107
+ logs = []
108
+
109
+ # Validate input
110
+ if not pdf_file and not gdrive_url:
111
+ return None, None, "Please upload a PDF or provide a Google Drive link", "", "\n".join(logs)
112
+
113
+ if pdf_file and gdrive_url:
114
+ return None, None, "Please use either file upload OR Google Drive link, not both", "", "\n".join(logs)
115
+
116
+ try:
117
+ logs = log_message(logs, "Starting translation...")
118
+
119
+ # Get PDF bytes and filename
120
+ if gdrive_url:
121
+ logs = log_message(logs, f"Downloading from Google Drive...")
122
+ progress(0.05, desc="Downloading from Google Drive...")
123
+ pdf_bytes, source_filename = await download_gdrive_public(gdrive_url.strip())
124
+ logs = log_message(logs, f"Downloaded: {source_filename}")
125
+ else:
126
+ pdf_path = Path(pdf_file)
127
+ pdf_bytes = pdf_path.read_bytes()
128
+ source_filename = pdf_path.name
129
+
130
+ pdf_base64 = base64.b64encode(pdf_bytes).decode("utf-8")
131
+
132
+ logs = log_message(logs, f"Input: {source_filename}")
133
+ logs = log_message(logs, f"Size: {len(pdf_bytes) / 1024:.1f} KB")
134
+ logs = log_message(logs, f"Target: {LANGUAGES.get(target_lang, target_lang)}")
135
+
136
+ progress(0.1, desc="Uploading to Modal...")
137
+
138
+ payload = {
139
+ "pdf_base64": pdf_base64,
140
+ "target_lang": target_lang,
141
+ }
142
+
143
+ logs = log_message(logs, "Translating on Modal cloud...")
144
+ logs = log_message(logs, "(This may take several minutes)")
145
+
146
+ progress(0.2, desc="Translating...")
147
+ start_time = datetime.now()
148
+
149
+ async with httpx.AsyncClient(timeout=900.0, follow_redirects=True) as client:
150
+ response = await client.post(MODAL_TRANSLATE_URL, json=payload)
151
+ response.raise_for_status()
152
+ result = response.json()
153
+
154
+ duration = (datetime.now() - start_time).total_seconds()
155
+ progress(0.8, desc="Processing result...")
156
+
157
+ if not result.get("success"):
158
+ error_msg = result.get("message", "Unknown error")
159
+ logs = log_message(logs, f"ERROR: {error_msg}")
160
+ return None, None, "Translation failed", "", "\n".join(logs)
161
+
162
+ # Process mono_img PDF
163
+ mono_img_path = None
164
+ mono_img_base64 = result.get("mono_img_pdf_base64")
165
+ if mono_img_base64:
166
+ mono_img_bytes = base64.b64decode(mono_img_base64)
167
+ stem = Path(source_filename).stem
168
+ mono_img_filename = f"{stem}_translated.{target_lang}.pdf"
169
+ mono_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
170
+ mono_img_file.write(mono_img_bytes)
171
+ mono_img_file.close()
172
+ mono_img_path = mono_img_file.name
173
+ logs = log_message(logs, f"Mono: {mono_img_filename} ({len(mono_img_bytes) / 1024:.1f} KB)")
174
+
175
+ # Process dual_img PDF
176
+ dual_img_path = None
177
+ dual_img_base64 = result.get("dual_img_pdf_base64")
178
+ if dual_img_base64:
179
+ dual_img_bytes = base64.b64decode(dual_img_base64)
180
+ stem = Path(source_filename).stem
181
+ dual_img_filename = f"{stem}_translated.{target_lang}.dual.pdf"
182
+ dual_img_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
183
+ dual_img_file.write(dual_img_bytes)
184
+ dual_img_file.close()
185
+ dual_img_path = dual_img_file.name
186
+ logs = log_message(logs, f"Dual: {dual_img_filename} ({len(dual_img_bytes) / 1024:.1f} KB)")
187
+
188
+ if not mono_img_path and not dual_img_path:
189
+ logs = log_message(logs, "ERROR: No output PDF in response")
190
+ return None, None, "Translation failed", "", "\n".join(logs)
191
+
192
+ logs = log_message(logs, f"Duration: {duration:.1f} seconds")
193
+
194
+ stats_msg = f"""**Translation completed!**
195
+
196
+ - **Duration:** {duration:.1f} seconds
197
+ - **Target:** {LANGUAGES.get(target_lang, target_lang)}"""
198
+
199
+ progress(1.0, desc="Done!")
200
+
201
+ return mono_img_path, dual_img_path, "Translation successful!", stats_msg, "\n".join(logs)
202
+
203
+ except httpx.TimeoutException:
204
+ logs = log_message(logs, "ERROR: Translation timed out")
205
+ return None, None, "Translation timed out", "", "\n".join(logs)
206
+ except httpx.HTTPStatusError as e:
207
+ logs = log_message(logs, f"ERROR: HTTP {e.response.status_code}")
208
+ return None, None, f"HTTP error: {e.response.status_code}", "", "\n".join(logs)
209
+ except Exception as e:
210
+ logs = log_message(logs, f"ERROR: {str(e)}")
211
+ return None, None, f"Error: {str(e)}", "", "\n".join(logs)
212
+
213
+
214
+ # Gradio Interface
215
+ with gr.Blocks(title="BabelDocs x Agentic AI MCP") as demo:
216
+
217
+ gr.Markdown("""
218
+ # BabelDocs x Agentic AI MCP - PDF Translation with Google Drive Integration
219
+
220
+ **Translate PDFs directly from Google Drive and save back automatically**
221
+
222
+ ---
223
+
224
+ ## Key Feature: Full Google Drive Workflow in CLAUDE Desktop MCP
225
+
226
+ ```
227
+ "Translate my Q3 report to French and save it to Translations folder"
228
+
229
+ Claude searches → downloads → translates → uploads → done!
230
+ ```
231
+
232
+ ---
233
+ """)
234
+
235
+ with gr.Row():
236
+ with gr.Column(scale=1):
237
+ gr.Markdown("### Input")
238
+
239
+ gdrive_url = gr.Textbox(
240
+ label="Google Drive Link (public)",
241
+ placeholder="https://drive.google.com/file/d/... or leave empty",
242
+ info="Paste a public GDrive link, OR upload a local file below",
243
+ )
244
+
245
+ gr.Markdown("**OR**")
246
+
247
+ pdf_input = gr.File(
248
+ label="Upload PDF",
249
+ file_types=[".pdf"],
250
+ type="filepath",
251
+ )
252
+
253
+ target_lang = gr.Dropdown(
254
+ choices=list(LANGUAGES.keys()),
255
+ value="fr",
256
+ label="Target Language",
257
+ )
258
+
259
+ translate_btn = gr.Button(
260
+ "Translate PDF",
261
+ variant="primary",
262
+ size="lg",
263
+ )
264
+
265
+ with gr.Column(scale=1):
266
+ gr.Markdown("### Result")
267
+
268
+ status_output = gr.Textbox(
269
+ label="Status",
270
+ interactive=False,
271
+ )
272
+
273
+ stats_output = gr.Markdown(label="Statistics")
274
+
275
+ gr.Markdown("**Downloads:**")
276
+ with gr.Row():
277
+ mono_img_output = gr.File(label="Mono (translated + images)")
278
+ dual_img_output = gr.File(label="Dual (bilingual + images)")
279
+
280
+ logs_output = gr.Textbox(
281
+ label="Logs",
282
+ interactive=False,
283
+ lines=10,
284
+ max_lines=15,
285
+ )
286
+
287
+ gr.Markdown("""
288
+ ---
289
+
290
+ ### How it works
291
+
292
+ ```
293
+ 1. Upload PDF or paste GDrive link
294
+
295
+ 2. Send to Modal cloud (serverless)
296
+
297
+ 3. BabelDOC with Agentic AI translates text + images, preserves layout
298
+
299
+ 4. Download translated PDF
300
+ ```
301
+
302
+ ---
303
+
304
+ **Built with:** BabelDOC, Modal, Nebius AI, Gradio | **Hackathon:** Anthropic MCP Track 1
305
+ """)
306
+
307
+ translate_btn.click(
308
+ fn=translate_pdf_modal,
309
+ inputs=[pdf_input, gdrive_url, target_lang],
310
+ outputs=[mono_img_output, dual_img_output, status_output, stats_output, logs_output],
311
+ )
312
+
313
+
314
+ if __name__ == "__main__":
315
+ demo.launch(
316
+ server_name="0.0.0.0",
317
+ server_port=7860,
318
+ share=False,
319
+ ssr_mode=False,
320
+ )