import gradio as gr import game from app import format_game_result def respond( message, history: list[dict[str, str]], system_message, max_tokens, temperature, top_p, game_mode_selection, hf_token: gr.OAuthToken | None = None, ): """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ # If this is the start of a new conversation (empty history), generate a new country/state if not history: game.guess_number = 0 if game_mode_selection == game.MODE_STATES: game.current_system = game.get_system_message(game.MODE_STATES) print(f"🔍 DEBUG - New session started, selected state: {game.selected_country}") else: game.current_system = game.get_system_message(game.MODE_COUNTRIES) print(f"🔍 DEBUG - New session started, selected country: {game.selected_country}") game.guess_number += 1 messages = [{"role": "system", "content": game.current_system + str(game.guess_number)}] messages.append({"role": "user", "content": message}) # Debug: Calculate approximate input token count total_input_chars = sum(len(str(msg.get("content", ""))) for msg in messages) estimated_input_tokens = total_input_chars // 4 # Rough approximation: 4 chars per token print(f"🔍 DEBUG - Estimated input tokens: {estimated_input_tokens}") print(f"🔍 DEBUG - Messages count: {len(messages)}") print(f"🔍 DEBUG - Max tokens setting: {max_tokens}") # Debug: Show each message type and length for i, msg in enumerate(messages): role = msg.get("role", "unknown") content = str(msg.get("content", "")) print(f"🔍 DEBUG - Message {i+1} ({role}): {len(content)} chars") if role == "system": print(f"🔍 DEBUG - System message preview: ...{content[-100:]}") elif role == "user": print(f"🔍 DEBUG - User message: {content}") elif role == "assistant": print(f"🔍 DEBUG - Assistant message: {content[:50]}...") response = "" output_token_count = 0 try: for message_chunk in ai.client.chat_completion( messages, stream=True, response_format={"type": "text"}, ): choices = message_chunk.choices token = "" if len(choices) and choices[0].delta.content: token = choices[0].delta.content output_token_count += 1 response += token # Debug: Show output token statistics estimated_output_tokens = len(response) // 4 # Rough approximation print(f"🔍 DEBUG - Output token chunks received: {output_token_count}") print(f"🔍 DEBUG - Estimated output tokens (by chars): {estimated_output_tokens}") print(f"🔍 DEBUG - Response length: {len(response)} characters") print(f"🔍 DEBUG - Raw response: {response}") # Clean the response to remove unwanted artifacts response = ai.clean_response(response) print(f"🔍 DEBUG - Cleaned response: {response}") # Check if this is a game end response and format it nicely if "The country was" in response or "The state was" in response: print(f"🔍 DEBUG - Game end detected! Location extracted: {game.selected_country}") return format_game_result(response) elif game.guess_number == 20: print(f"🔍 DEBUG - Maximum guesses reached: {game.guess_number}") return format_game_result(response) else: print("🔍 DEBUG - Regular response (no game end)") return response except Exception as e: return f"Error during inference: {str(e)}"