Spaces:
Running
Running
| import gradio as gr | |
| import torch | |
| import re | |
| from transformers import ( | |
| pipeline, | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| AutoModelForSeq2SeqLM, | |
| NllbTokenizer | |
| ) | |
| from functools import lru_cache | |
| # ==================== NEW: PULAR TO FRENCH TRANSLATOR ==================== | |
| def load_pular_to_french(): | |
| """Load the Pular-to-French translator model""" | |
| print("Loading Pular→French translator model...") | |
| model_name = "mlamined/pl_fr_104" # Your new checkpoint | |
| try: | |
| # Load with NLLB tokenizer for proper language codes | |
| tokenizer = NllbTokenizer.from_pretrained( | |
| "facebook/nllb-200-distilled-600M", | |
| src_lang="fuv_Latn", # Pular source | |
| tgt_lang="fra_Latn" # French target | |
| ) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| translator = pipeline( | |
| "translation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| src_lang="fuv_Latn", | |
| tgt_lang="fra_Latn", | |
| max_length=256, | |
| num_beams=3, | |
| early_stopping=True | |
| ) | |
| print("Pular→French translator model loaded successfully!") | |
| return translator | |
| except Exception as e: | |
| print(f"Error loading Pular→French translator: {e}") | |
| return None | |
| # ==================== EXISTING MODELS ==================== | |
| def load_french_to_pular(): | |
| """Load the French-to-Pular translator model""" | |
| print("Loading French→Pular translator model...") | |
| model_name = "mlamined/fr_pl_130" | |
| try: | |
| tokenizer = NllbTokenizer.from_pretrained( | |
| "facebook/nllb-200-distilled-600M", | |
| src_lang="fra_Latn", | |
| tgt_lang="fuv_Latn" | |
| ) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| translator = pipeline( | |
| "translation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| src_lang="fra_Latn", | |
| tgt_lang="fuv_Latn", | |
| max_length=256, | |
| num_beams=3, | |
| early_stopping=True | |
| ) | |
| print("French→Pular translator model loaded successfully!") | |
| return translator | |
| except Exception as e: | |
| print(f"Error loading French→Pular translator: {e}") | |
| return None | |
| def load_llm(): | |
| """Load the LLM model (Gemma-2-2B)""" | |
| print("Loading LLM model...") | |
| llm_model_name = "google/gemma-2-2b-it" | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(llm_model_name) | |
| # Set padding token | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load model with appropriate settings for CPU | |
| model = AutoModelForCausalLM.from_pretrained( | |
| llm_model_name, | |
| torch_dtype=torch.float32, | |
| device_map="auto" if torch.cuda.is_available() else None, | |
| low_cpu_mem_usage=True | |
| ) | |
| # If no GPU, move to CPU | |
| if not torch.cuda.is_available(): | |
| model = model.to("cpu") | |
| print("LLM model loaded successfully!") | |
| return model, tokenizer | |
| except Exception as e: | |
| print(f"Error loading LLM: {e}") | |
| return None, None | |
| # ==================== LOAD ALL MODELS ==================== | |
| print("\n" + "="*60) | |
| print("🚀 LOADING ALL MODELS") | |
| print("="*60) | |
| translator_pular_to_french = load_pular_to_french() # NEW | |
| translator_french_to_pular = load_french_to_pular() # EXISTING | |
| llm_model, llm_tokenizer = load_llm() # EXISTING | |
| # Check if models loaded | |
| use_llm = llm_model is not None and llm_tokenizer is not None | |
| # ==================== TRANSLATION FUNCTIONS ==================== | |
| def translate_pular_to_french(pular_text): | |
| """Translate Pular text to French""" | |
| if not translator_pular_to_french: | |
| return "Erreur: Modèle Pular→Français non disponible." | |
| if not pular_text or len(pular_text.strip()) == 0: | |
| return "" | |
| try: | |
| # Clean the Pular text | |
| clean_pular = pular_text.strip() | |
| clean_pular = re.sub(r'\s+', ' ', clean_pular) | |
| clean_pular = clean_pular[:300] # Limit length | |
| print(f"Translating Pular→French: {clean_pular[:100]}...") | |
| # Translate | |
| result = translator_pular_to_french(clean_pular, max_length=256) | |
| # Extract translation | |
| if isinstance(result, list) and len(result) > 0: | |
| if isinstance(result[0], dict) and "translation_text" in result[0]: | |
| french_text = result[0]["translation_text"] | |
| elif isinstance(result[0], str): | |
| french_text = result[0] | |
| else: | |
| french_text = str(result[0]) | |
| elif isinstance(result, dict) and "translation_text" in result: | |
| french_text = result["translation_text"] | |
| elif isinstance(result, str): | |
| french_text = result | |
| else: | |
| return "Erreur de traduction. Veuillez réessayer." | |
| # Clean the French response | |
| french_text = re.sub(r'\*.*?\*', '', french_text) | |
| french_text = re.sub(r'\[.*?\]|\(.*?\)', '', french_text) | |
| french_text = re.sub(r'\s+', ' ', french_text).strip() | |
| print(f"Translated to French: {french_text[:100]}...") | |
| return french_text | |
| except Exception as e: | |
| print(f"Pular→French translation error: {e}") | |
| return "Erreur technique lors de la traduction." | |
| def translate_french_to_pular(french_text): | |
| """Translate French text to Pular""" | |
| if not translator_french_to_pular: | |
| return "Hakkunde ndee, mi wadataa." | |
| if not french_text or len(french_text.strip()) == 0: | |
| return "" | |
| try: | |
| # Clean the French text | |
| clean_french = french_text.strip() | |
| clean_french = re.sub(r'\*+', '', clean_french) | |
| clean_french = re.sub(r'\s+', ' ', clean_french) | |
| clean_french = clean_french[:300] # Limit length | |
| print(f"Translating French→Pular: {clean_french[:100]}...") | |
| # Translate | |
| result = translator_french_to_pular(clean_french, max_length=256) | |
| # Extract translation | |
| if isinstance(result, list) and len(result) > 0: | |
| if isinstance(result[0], dict) and "translation_text" in result[0]: | |
| pular_text = result[0]["translation_text"] | |
| elif isinstance(result[0], str): | |
| pular_text = result[0] | |
| else: | |
| pular_text = str(result[0]) | |
| elif isinstance(result, dict) and "translation_text" in result: | |
| pular_text = result["translation_text"] | |
| elif isinstance(result, str): | |
| pular_text = result | |
| else: | |
| return "Hakkunde ndee, mi wadataa." | |
| # Clean the Pular response | |
| pular_text = re.sub(r'\*.*?\*', '', pular_text) | |
| pular_text = re.sub(r'\bFinsitaare\b.*', '', pular_text) | |
| pular_text = re.sub(r'\[.*?\]|\(.*?\)', '', pular_text) | |
| pular_text = re.sub(r'\s+', ' ', pular_text).strip() | |
| print(f"Translated to Pular: {pular_text[:100]}...") | |
| return pular_text | |
| except Exception as e: | |
| print(f"French→Pular translation error: {e}") | |
| return "Hakkunde ndee, tontu kadi." | |
| # ==================== EXISTING FUNCTIONS (UNCHANGED) ==================== | |
| system_prompt = """You are a helpful assistant . Use simple, clear language as if explaining to a young child. Provide accurate and relevant responses. Answer in French, and keep responses short and friendly. Maintenant, réponds aux questions suivantes:""" | |
| def clean_french_response(text): | |
| """Clean French response before translation""" | |
| if not text: | |
| return "" | |
| # Remove markdown formatting | |
| text = re.sub(r'\*+', '', text) | |
| text = re.sub(r'#+\s*', '', text) | |
| text = re.sub(r'`.*?`', '', text) | |
| text = re.sub(r'\[.*?\]\(.*?\)', '', text) | |
| # Remove any gibberish or repeated patterns | |
| lines = text.split('\n') | |
| clean_lines = [] | |
| for line in lines: | |
| line = line.strip() | |
| if not line or len(line) < 3: | |
| continue | |
| if re.match(r'^[^a-zA-Z0-9\s]*$', line): | |
| continue | |
| clean_lines.append(line) | |
| # Take the first meaningful sentence/paragraph | |
| if clean_lines: | |
| response = clean_lines[0] | |
| else: | |
| response = text[:200] | |
| # Ensure it ends with proper punctuation | |
| if response and not response[-1] in '.!?': | |
| response = response + '.' | |
| return response.strip() | |
| def generate_french_response(user_input, history=None): | |
| """Generate French response using the actual LLM with improved prompting""" | |
| if not use_llm: | |
| fallback_responses = [ | |
| "Je comprends votre question. Pouvez-vous la reformuler?", | |
| "Je vais chercher cette information pour vous.", | |
| "C'est une question intéressante. Laissez-moi y réfléchir.", | |
| "Je peux vous aider avec cela. Un moment s'il vous plaît.", | |
| "Merci pour votre question. Voici ce que je peux vous dire à ce sujet." | |
| ] | |
| import random | |
| return random.choice(fallback_responses) | |
| try: | |
| # Build a cleaner prompt | |
| prompt = f"{system_prompt}\n\n" | |
| # Add conversation history if available (simplified) | |
| if history and len(history) > 0: | |
| recent = history[-2:] if len(history) >= 2 else history | |
| for msg in recent: | |
| if msg["role"] == "user": | |
| prompt += f"Question: {msg['content']}\n" | |
| elif msg["role"] == "assistant": | |
| prompt += f"Réponse: {msg['content']}\n" | |
| # Add current user input | |
| prompt += f"Question: {user_input}\nRéponse:" | |
| print(f"\nPrompt (first 500 chars): {prompt[:500]}...") | |
| # Tokenize | |
| inputs = llm_tokenizer( | |
| prompt, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512 | |
| ) | |
| # Move inputs to the same device as model | |
| device = llm_model.device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| # Generate response with conservative settings | |
| with torch.no_grad(): | |
| outputs = llm_model.generate( | |
| **inputs, | |
| max_new_tokens=100, | |
| do_sample=True, | |
| temperature=0.5, | |
| top_p=0.9, | |
| top_k=50, | |
| pad_token_id=llm_tokenizer.pad_token_id, | |
| eos_token_id=llm_tokenizer.eos_token_id, | |
| repetition_penalty=1.2, | |
| no_repeat_ngram_size=3 | |
| ) | |
| # Decode the response | |
| response = llm_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract only the assistant's response | |
| if "Réponse:" in response: | |
| parts = response.split("Réponse:") | |
| french_response = parts[-1].strip() | |
| else: | |
| french_response = response[len(prompt):].strip() | |
| # Clean the response | |
| french_response = clean_french_response(french_response) | |
| # Ensure we have a response | |
| if not french_response or len(french_response) < 5: | |
| french_response = "Je ne peux pas répondre à cette question pour le moment." | |
| print(f"Generated French response: {french_response[:150]}...") | |
| return french_response[:250] | |
| except Exception as e: | |
| print(f"Error generating French response: {e}") | |
| return "Je rencontre des difficultés techniques. Pouvez-vous reformuler votre question?" | |
| def chat_function(user_input, chat_history): | |
| """Main chat function with improved response handling""" | |
| if not user_input.strip(): | |
| return chat_history, "" | |
| try: | |
| print(f"\n{'='*50}") | |
| print(f"User input: {user_input}") | |
| # Generate French response using LLM | |
| french_response = generate_french_response(user_input, chat_history) | |
| print(f"French response: {french_response}") | |
| # Translate to Pular | |
| pular_response = translate_french_to_pular(french_response) | |
| print(f"Pular response: {pular_response}") | |
| print(f"{'='*50}\n") | |
| # Add to chat history | |
| chat_history.append({"role": "user", "content": user_input}) | |
| chat_history.append({"role": "assistant", "content": pular_response}) | |
| # Prepare details | |
| details = f"**🇫🇷 Français:** {french_response}\n\n**🌍 Pular:** {pular_response}" | |
| return chat_history, details | |
| except Exception as e: | |
| print(f"Chat error: {e}") | |
| error_msg = "Jaabi hakkunde ndee, mi wadataa. Tontu kadi." | |
| chat_history.append({"role": "user", "content": user_input}) | |
| chat_history.append({"role": "assistant", "content": error_msg}) | |
| details = f"**Erreur technique:** Veuillez réessayer." | |
| return chat_history, details | |
| # ==================== GRADIO INTERFACE ==================== | |
| with gr.Blocks( | |
| title="🤖 Chatbot Français-Pular avec IA - BIDIRECTIONNEL", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container {max-width: 900px; margin: auto;} | |
| .chatbot {min-height: 400px;} | |
| .details-box { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 15px; | |
| border-radius: 10px; | |
| margin-top: 15px; | |
| border: 2px solid #4a5568; | |
| } | |
| .warning-box { | |
| background: #fff3cd; | |
| border: 1px solid #ffeaa7; | |
| padding: 10px; | |
| border-radius: 5px; | |
| margin: 10px 0; | |
| } | |
| .example-btn { | |
| margin: 2px; | |
| font-size: 12px; | |
| } | |
| .translation-card { | |
| background: #f8f9fa; | |
| padding: 15px; | |
| border-radius: 10px; | |
| border: 1px solid #dee2e6; | |
| margin: 10px 0; | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 🇫🇷 ↔ 🌍 Chatbot Français-Pular avec IA - BIDIRECTIONNEL | |
| ### Un assistant intelligent avec traduction dans les deux sens | |
| """) | |
| # Status indicators | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📊 Statut du système") | |
| status_html = f""" | |
| <div style='background: #e8f5e9; padding: 10px; border-radius: 5px; margin: 5px 0;'> | |
| <strong>🤖 Modèle IA (Gemma-2-2B):</strong> {'<span style="color: green;">✅ Actif</span>' if use_llm else '<span style="color: orange;">⚠️ Basique</span>'} | |
| </div> | |
| <div style='background: #e3f2fd; padding: 10px; border-radius: 5px; margin: 5px 0;'> | |
| <strong>🔤 Traducteur Pular→Français (mlamined/pl_fr_104):</strong> {'<span style="color: green;">✅ Actif</span>' if translator_pular_to_french else '<span style="color: red;">❌ Erreur</span>'} | |
| </div> | |
| <div style='background: #e3f2fd; padding: 10px; border-radius: 5px; margin: 5px 0;'> | |
| <strong>🔤 Traducteur Français→Pular (mlamined/fr_pl_130):</strong> {'<span style="color: green;">✅ Actif</span>' if translator_french_to_pular else '<span style="color: red;">❌ Erreur</span>'} | |
| </div> | |
| <div style='background: #fff3e0; padding: 10px; border-radius: 5px; margin: 5px 0;'> | |
| <strong>⚡ Performance:</strong> {'<span style="color: orange;">CPU</span>' if not torch.cuda.is_available() else '<span style="color: green;">GPU</span>'} | |
| </div> | |
| """ | |
| gr.HTML(status_html) | |
| with gr.Tabs(): | |
| with gr.TabItem("💬 Chat Intelligent", id="chat"): | |
| chatbot = gr.Chatbot( | |
| label="Conversation", | |
| height=400, | |
| type="messages", | |
| avatar_images=("👤", "🤖"), | |
| show_label=True | |
| ) | |
| state = gr.State([]) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Votre message en français", | |
| placeholder="Posez n'importe quelle question ou dites quelque chose...", | |
| scale=4, | |
| max_lines=3, | |
| elem_id="user_input" | |
| ) | |
| submit_btn = gr.Button("Envoyer ➤", variant="primary", scale=1, elem_id="send_button") | |
| with gr.Row(): | |
| clear_btn = gr.Button("🗑️ Effacer", variant="secondary", size="sm") | |
| show_details = gr.Checkbox(label="📋 Afficher les détails", value=True) | |
| gr.Column(scale=4, min_width=0) | |
| details_output = gr.Markdown( | |
| label="Détails de la réponse", | |
| elem_classes="details-box", | |
| visible=True | |
| ) | |
| # Example conversation starters | |
| gr.Markdown("### 💡 Exemples pour commencer:") | |
| with gr.Row(): | |
| example_buttons = [] | |
| examples = [ | |
| "Donne moi cinq leçons de vie?", | |
| "Redige-moi", | |
| "Explique-moi l'importance de l'éducation", | |
| "Raconte-moi une courte histoire", | |
| "Ecris-moi une lettre pour demander de l'aide à un ami?" | |
| ] | |
| for example in examples: | |
| btn = gr.Button(example, size="sm", variant="secondary", elem_classes="example-btn") | |
| example_buttons.append(btn) | |
| # Chat functionality | |
| def respond(message, history, show_details_flag): | |
| if not message.strip(): | |
| return "", history, gr.update(value="", visible=False) | |
| history, details = chat_function(message, history) | |
| return "", history, gr.update(value=details, visible=show_details_flag) | |
| def clear_chat(): | |
| return [], gr.update(value="", visible=False) | |
| # Connect events | |
| msg.submit( | |
| respond, | |
| [msg, state, show_details], | |
| [msg, chatbot, details_output] | |
| ) | |
| submit_btn.click( | |
| respond, | |
| [msg, state, show_details], | |
| [msg, chatbot, details_output] | |
| ) | |
| clear_btn.click( | |
| clear_chat, | |
| None, | |
| [chatbot, details_output] | |
| ) | |
| # Connect example buttons | |
| for i, btn in enumerate(example_buttons): | |
| btn.click( | |
| fn=lambda ex=examples[i]: ex, | |
| inputs=None, | |
| outputs=msg | |
| ).then( | |
| fn=respond, | |
| inputs=[msg, state, show_details], | |
| outputs=[msg, chatbot, details_output] | |
| ) | |
| with gr.TabItem("🔤 Traducteur Bidirectionnel", id="translate"): | |
| gr.Markdown(""" | |
| ### Traduction dans les deux sens | |
| **🇫🇷 Français → 🌍 Pular** et **🌍 Pular → 🇫🇷 Français** | |
| """) | |
| with gr.Row(): | |
| # French to Pular translation | |
| with gr.Column(): | |
| gr.Markdown("#### 🇫🇷 → 🌍 Français vers Pular") | |
| french_input_ftop = gr.Textbox( | |
| label="Texte français", | |
| placeholder="Entrez du texte français à traduire en pular...", | |
| lines=4 | |
| ) | |
| with gr.Row(): | |
| translate_fr_to_pl = gr.Button("Traduire 🇫🇷→🌍", variant="primary") | |
| clear_fr_to_pl = gr.Button("Effacer", variant="secondary") | |
| pular_output = gr.Textbox( | |
| label="Traduction pular", | |
| lines=4, | |
| interactive=False | |
| ) | |
| # Pular to French translation (NEW) | |
| with gr.Column(): | |
| gr.Markdown("#### 🌍 → 🇫🇷 Pular vers Français") | |
| pular_input_ptof = gr.Textbox( | |
| label="Texte pular", | |
| placeholder="Entrez du texte pular à traduire en français...", | |
| lines=4 | |
| ) | |
| with gr.Row(): | |
| translate_pl_to_fr = gr.Button("Traduire 🌍→🇫🇷", variant="primary") | |
| clear_pl_to_fr = gr.Button("Effacer", variant="secondary") | |
| french_output = gr.Textbox( | |
| label="Traduction française", | |
| lines=4, | |
| interactive=False | |
| ) | |
| # Connect buttons | |
| # French to Pular | |
| translate_fr_to_pl.click( | |
| translate_french_to_pular, | |
| inputs=french_input_ftop, | |
| outputs=pular_output | |
| ) | |
| french_input_ftop.submit( | |
| translate_french_to_pular, | |
| inputs=french_input_ftop, | |
| outputs=pular_output | |
| ) | |
| clear_fr_to_pl.click( | |
| lambda: ("", ""), | |
| None, | |
| [french_input_ftop, pular_output] | |
| ) | |
| # Pular to French (NEW) | |
| translate_pl_to_fr.click( | |
| translate_pular_to_french, | |
| inputs=pular_input_ptof, | |
| outputs=french_output | |
| ) | |
| pular_input_ptof.submit( | |
| translate_pular_to_french, | |
| inputs=pular_input_ptof, | |
| outputs=french_output | |
| ) | |
| clear_pl_to_fr.click( | |
| lambda: ("", ""), | |
| None, | |
| [pular_input_ptof, french_output] | |
| ) | |
| gr.Markdown("### 📝 Exemples rapides") | |
| with gr.Row(): | |
| # French to Pular examples | |
| with gr.Column(): | |
| gr.Markdown("**Exemples Français→Pular:**") | |
| fr_to_pl_examples = gr.Examples( | |
| examples=[ | |
| ["Bonjour, je m'appelle Mamadou et je suis guinéen."], | |
| ["L'éducation est la clé du développement d'un pays."], | |
| ["La culture guinéenne est riche et diversifiée."] | |
| ], | |
| inputs=french_input_ftop, | |
| outputs=pular_output, | |
| fn=translate_french_to_pular, | |
| cache_examples=True, | |
| label="Cliquez sur un exemple" | |
| ) | |
| # Pular to French examples (NEW) | |
| with gr.Column(): | |
| gr.Markdown("**Exemples Pular→Français:**") | |
| pl_to_fr_examples = gr.Examples( | |
| examples=[ | |
| ["On jaaraama musee Alpha."], | |
| ["Miɗo weelaa."], | |
| ["Jannde ko saabi fii ɓantal leydi."] | |
| ], | |
| inputs=pular_input_ptof, | |
| outputs=french_output, | |
| fn=translate_pular_to_french, | |
| cache_examples=True, | |
| label="Cliquez sur un exemple" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown(""" | |
| ### ℹ️ À propos de ce système | |
| **Nouveautés:** | |
| - ✅ **Traduction Pular→Français** ajoutée (mlamined/pl_fr_104) | |
| - 🔄 **Traduction bidirectionnelle** complète | |
| - 🚀 **Deux modèles de traduction** indépendants | |
| **Fonctionnement:** | |
| 1. Vous écrivez en français ou en pular | |
| 2. Le système traduit dans la direction choisie | |
| 3. Pour le chat: français → IA → pular | |
| **Capacités:** | |
| - Réponses intelligentes et contextuelles | |
| - Traduction précise dans les deux sens | |
| - Interface intuitive et facile à utiliser | |
| **Note:** Les réponses peuvent prendre quelques secondes à générer sur CPU. | |
| """) | |
| if __name__ == "__main__": | |
| print("=" * 60) | |
| print("🚀 DÉMARRAGE DU CHATBOT BIDIRECTIONNEL") | |
| print(f"📊 Statut LLM: {'✅ Prêt' if use_llm else '❌ Échec'}") | |
| print(f"📊 Statut traducteur Pular→Français: {'✅ Prêt' if translator_pular_to_french else '❌ Échec'}") | |
| print(f"📊 Statut traducteur Français→Pular: {'✅ Prêt' if translator_french_to_pular else '❌ Échec'}") | |
| print(f"⚡ Matériel: {'GPU' if torch.cuda.is_available() else 'CPU'}") | |
| print("=" * 60) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| debug=False, | |
| show_error=True | |
| ) |