Jonathan Bejarano
Always keep the dropdown because it still determines the question types
ba5915f
raw
history blame
3.87 kB
import gradio as gr
import game
from app import format_game_result
def respond(
message,
history: list[dict[str, str]],
system_message,
max_tokens,
temperature,
top_p,
game_mode_selection,
hf_token: gr.OAuthToken | None = None,
):
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# If this is the start of a new conversation (empty history), generate a new country/state
if not history:
game.guess_number = 0
if game_mode_selection == game.MODE_STATES:
game.current_system = game.get_system_message(game.MODE_STATES)
print(f"πŸ” DEBUG - New session started, selected state: {game.selected_country}")
else:
game.current_system = game.get_system_message(game.MODE_COUNTRIES)
print(f"πŸ” DEBUG - New session started, selected country: {game.selected_country}")
game.guess_number += 1
messages = [{"role": "system", "content": game.current_system + str(game.guess_number)}]
messages.append({"role": "user", "content": message})
# Debug: Calculate approximate input token count
total_input_chars = sum(len(str(msg.get("content", ""))) for msg in messages)
estimated_input_tokens = total_input_chars // 4 # Rough approximation: 4 chars per token
print(f"πŸ” DEBUG - Estimated input tokens: {estimated_input_tokens}")
print(f"πŸ” DEBUG - Messages count: {len(messages)}")
print(f"πŸ” DEBUG - Max tokens setting: {max_tokens}")
# Debug: Show each message type and length
for i, msg in enumerate(messages):
role = msg.get("role", "unknown")
content = str(msg.get("content", ""))
print(f"πŸ” DEBUG - Message {i+1} ({role}): {len(content)} chars")
if role == "system":
print(f"πŸ” DEBUG - System message preview: ...{content[-100:]}")
elif role == "user":
print(f"πŸ” DEBUG - User message: {content}")
elif role == "assistant":
print(f"πŸ” DEBUG - Assistant message: {content[:50]}...")
response = ""
output_token_count = 0
try:
for message_chunk in ai.client.chat_completion(
messages,
stream=True,
response_format={"type": "text"},
):
choices = message_chunk.choices
token = ""
if len(choices) and choices[0].delta.content:
token = choices[0].delta.content
output_token_count += 1
response += token
# Debug: Show output token statistics
estimated_output_tokens = len(response) // 4 # Rough approximation
print(f"πŸ” DEBUG - Output token chunks received: {output_token_count}")
print(f"πŸ” DEBUG - Estimated output tokens (by chars): {estimated_output_tokens}")
print(f"πŸ” DEBUG - Response length: {len(response)} characters")
print(f"πŸ” DEBUG - Raw response: {response}")
# Clean the response to remove unwanted artifacts
response = ai.clean_response(response)
print(f"πŸ” DEBUG - Cleaned response: {response}")
# Check if this is a game end response and format it nicely
if "The country was" in response or "The state was" in response:
print(f"πŸ” DEBUG - Game end detected! Location extracted: {game.selected_country}")
return format_game_result(response)
elif game.guess_number == 20:
print(f"πŸ” DEBUG - Maximum guesses reached: {game.guess_number}")
return format_game_result(response)
else:
print("πŸ” DEBUG - Regular response (no game end)")
return response
except Exception as e:
return f"Error during inference: {str(e)}"