Spaces:

jbejar86
/

Twenty-Questions-Geography

Sleeping

File size: 3,865 Bytes

ba5915f

import gradio as gr
import game 
from app import format_game_result
def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    game_mode_selection,
    hf_token: gr.OAuthToken | None = None,
):
    """
    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
    """

    # If this is the start of a new conversation (empty history), generate a new country/state
    if not history:
        game.guess_number = 0
        if game_mode_selection == game.MODE_STATES:
            game.current_system = game.get_system_message(game.MODE_STATES)
            print(f"🔍 DEBUG - New session started, selected state: {game.selected_country}")
        else:
            game.current_system = game.get_system_message(game.MODE_COUNTRIES)
            print(f"🔍 DEBUG - New session started, selected country: {game.selected_country}")

    game.guess_number += 1
    messages = [{"role": "system", "content": game.current_system + str(game.guess_number)}]
    messages.append({"role": "user", "content": message})

    # Debug: Calculate approximate input token count
    total_input_chars = sum(len(str(msg.get("content", ""))) for msg in messages)
    estimated_input_tokens = total_input_chars // 4  # Rough approximation: 4 chars per token
    print(f"🔍 DEBUG - Estimated input tokens: {estimated_input_tokens}")
    print(f"🔍 DEBUG - Messages count: {len(messages)}")
    print(f"🔍 DEBUG - Max tokens setting: {max_tokens}")
    
    # Debug: Show each message type and length
    for i, msg in enumerate(messages):
        role = msg.get("role", "unknown")
        content = str(msg.get("content", ""))
        print(f"🔍 DEBUG - Message {i+1} ({role}): {len(content)} chars")
        if role == "system":
            print(f"🔍 DEBUG - System message preview: ...{content[-100:]}")
        elif role == "user":
            print(f"🔍 DEBUG - User message: {content}")
        elif role == "assistant":
            print(f"🔍 DEBUG - Assistant message: {content[:50]}...")

   

    response = ""
    output_token_count = 0

    try:
        for message_chunk in ai.client.chat_completion(
            messages,
            stream=True,
            response_format={"type": "text"},
        ):
            choices = message_chunk.choices
            token = ""
            if len(choices) and choices[0].delta.content:
                token = choices[0].delta.content
                output_token_count += 1

            response += token

        # Debug: Show output token statistics
        estimated_output_tokens = len(response) // 4  # Rough approximation
        print(f"🔍 DEBUG - Output token chunks received: {output_token_count}")
        print(f"🔍 DEBUG - Estimated output tokens (by chars): {estimated_output_tokens}")
        print(f"🔍 DEBUG - Response length: {len(response)} characters")
        print(f"🔍 DEBUG - Raw response: {response}")

        # Clean the response to remove unwanted artifacts
        response = ai.clean_response(response)
        print(f"🔍 DEBUG - Cleaned response: {response}")

        # Check if this is a game end response and format it nicely
        if "The country was" in response or "The state was" in response:
            print(f"🔍 DEBUG - Game end detected! Location extracted: {game.selected_country}")
            return format_game_result(response)
        elif game.guess_number == 20:
            print(f"🔍 DEBUG - Maximum guesses reached: {game.guess_number}")
            return format_game_result(response)
        else:
            print("🔍 DEBUG - Regular response (no game end)")
            return response
    except Exception as e:
        return f"Error during inference: {str(e)}"