Spaces:
Sleeping
Sleeping
File size: 3,865 Bytes
ba5915f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import game
from app import format_game_result
def respond(
message,
history: list[dict[str, str]],
system_message,
max_tokens,
temperature,
top_p,
game_mode_selection,
hf_token: gr.OAuthToken | None = None,
):
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# If this is the start of a new conversation (empty history), generate a new country/state
if not history:
game.guess_number = 0
if game_mode_selection == game.MODE_STATES:
game.current_system = game.get_system_message(game.MODE_STATES)
print(f"π DEBUG - New session started, selected state: {game.selected_country}")
else:
game.current_system = game.get_system_message(game.MODE_COUNTRIES)
print(f"π DEBUG - New session started, selected country: {game.selected_country}")
game.guess_number += 1
messages = [{"role": "system", "content": game.current_system + str(game.guess_number)}]
messages.append({"role": "user", "content": message})
# Debug: Calculate approximate input token count
total_input_chars = sum(len(str(msg.get("content", ""))) for msg in messages)
estimated_input_tokens = total_input_chars // 4 # Rough approximation: 4 chars per token
print(f"π DEBUG - Estimated input tokens: {estimated_input_tokens}")
print(f"π DEBUG - Messages count: {len(messages)}")
print(f"π DEBUG - Max tokens setting: {max_tokens}")
# Debug: Show each message type and length
for i, msg in enumerate(messages):
role = msg.get("role", "unknown")
content = str(msg.get("content", ""))
print(f"π DEBUG - Message {i+1} ({role}): {len(content)} chars")
if role == "system":
print(f"π DEBUG - System message preview: ...{content[-100:]}")
elif role == "user":
print(f"π DEBUG - User message: {content}")
elif role == "assistant":
print(f"π DEBUG - Assistant message: {content[:50]}...")
response = ""
output_token_count = 0
try:
for message_chunk in ai.client.chat_completion(
messages,
stream=True,
response_format={"type": "text"},
):
choices = message_chunk.choices
token = ""
if len(choices) and choices[0].delta.content:
token = choices[0].delta.content
output_token_count += 1
response += token
# Debug: Show output token statistics
estimated_output_tokens = len(response) // 4 # Rough approximation
print(f"π DEBUG - Output token chunks received: {output_token_count}")
print(f"π DEBUG - Estimated output tokens (by chars): {estimated_output_tokens}")
print(f"π DEBUG - Response length: {len(response)} characters")
print(f"π DEBUG - Raw response: {response}")
# Clean the response to remove unwanted artifacts
response = ai.clean_response(response)
print(f"π DEBUG - Cleaned response: {response}")
# Check if this is a game end response and format it nicely
if "The country was" in response or "The state was" in response:
print(f"π DEBUG - Game end detected! Location extracted: {game.selected_country}")
return format_game_result(response)
elif game.guess_number == 20:
print(f"π DEBUG - Maximum guesses reached: {game.guess_number}")
return format_game_result(response)
else:
print("π DEBUG - Regular response (no game end)")
return response
except Exception as e:
return f"Error during inference: {str(e)}" |