Spaces:
Paused
Paused
| """ | |
| SchoolSpiritΒ AI β Graniteβ3.3β2B chatbot (GradioΒ 4.3, messages API) | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β’ Persistent HF cache: HF_HOME=/data/.huggingface (25Β GB tier) | |
| β’ Persistent request log: /data/requests.log | |
| β’ Detailed system prompt (brand + guardrails) | |
| β’ Traces every request: Received β Prompt β generate() timing | |
| β’ Cleans replies & removes any stray βUser:β / βAI:β echoes | |
| """ | |
| # ββββββββββββββββββββ standard libraries βββββββββββββββββββββββββββββββββββ | |
| from __future__ import annotations | |
| import os, re, time, datetime, traceback | |
| # βββββ gradio + hf transformers ββββββββββββββββββββββββββββββββββββββββββββ | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from transformers.utils import logging as hf_logging | |
| # ββββββββββββββββββββ persistent disk paths ββββββββββββββββββββββββββββββββ | |
| os.environ["HF_HOME"] = "/data/.huggingface" # model / tokenizer cache | |
| LOG_FILE = "/data/requests.log" # simple persistent log | |
| def log(msg: str) -> None: | |
| """Print + append to /data/requests.log with UTC timestamp.""" | |
| ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3] | |
| line = f"[{ts}] {msg}" | |
| print(line, flush=True) | |
| try: # ignore firstβrun errors | |
| with open(LOG_FILE, "a") as f: | |
| f.write(line + "\n") | |
| except FileNotFoundError: | |
| pass | |
| # ββββββββββββββββββββ chatbot configuration ββββββββββββββββββββββββββββββββ | |
| MODEL_ID = "ibm-granite/granite-3.3-2b-instruct" # 2Β B params, Apacheβ2 | |
| MAX_TURNS = 6 # keep last N user/assistant pairs | |
| MAX_TOKENS = 128 # reply length (raise if you have patience) | |
| MAX_INPUT_CH = 400 # user message length guard | |
| SYSTEM_MSG = ( | |
| "You are **SchoolSpiritΒ AI**, the friendly digital mascot for a company " | |
| "that provides onβprem AI chat mascots, fineβtuning services, and turnkey " | |
| "GPU hardware for schools.\n\n" | |
| "β’ Keep answers concise, upbeat, and ageβappropriate (Kβ12).\n" | |
| "β’ If you are unsure, say so and suggest contacting a human staff member.\n" | |
| "β’ Never request personal data beyond an email if the user volunteers it.\n" | |
| "β’ Do **not** provide medical, legal, or financial advice.\n" | |
| "β’ No politics, mature content, or profanity.\n" | |
| "Respond in a friendly, encouraging toneβas a helpful school mascot!" | |
| ) | |
| # ββββββββββββββββββββ load model & pipeline ββββββββββββββββββββββββββββββββ | |
| hf_logging.set_verbosity_error() | |
| try: | |
| log("Loading tokenizer & model β¦") | |
| tok = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, device_map="auto", torch_dtype="auto" | |
| ) | |
| gen = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tok, | |
| max_new_tokens=MAX_TOKENS, | |
| do_sample=True, | |
| temperature=0.7, | |
| ) | |
| MODEL_ERR = None | |
| log("Model loaded β") | |
| except Exception as exc: # noqa: BLE001 | |
| MODEL_ERR, gen = f"Model load error: {exc}", None | |
| log(MODEL_ERR) | |
| # ββββββββββββββββββββ small helpers ββββββββββββββββββββββββββββββββββββββββ | |
| def clean(txt: str) -> str: | |
| """Collapse whitespace & guarantee nonβempty string.""" | |
| return re.sub(r"\s+", " ", txt.strip()) or "β¦" | |
| def trim_history(msgs: list[dict]) -> list[dict]: | |
| """Keep system + last MAX_TURNS pairs.""" | |
| return msgs if len(msgs) <= 1 + MAX_TURNS * 2 else [msgs[0]] + msgs[-MAX_TURNS * 2 :] | |
| # ββββββββββββββββββββ core chat function βββββββββββββββββββββββββββββββββββ | |
| def chat_fn(user_msg: str, history: list[dict] | None): | |
| log(f"User sent {len(user_msg)} chars") | |
| # ensure history list exists & begins with system prompt | |
| if not history or history[0]["role"] != "system": | |
| history = [{"role": "system", "content": SYSTEM_MSG}] | |
| # fatal modelβload failure | |
| if MODEL_ERR: | |
| return MODEL_ERR | |
| # basic userβinput checks | |
| user_msg = clean(user_msg or "") | |
| if not user_msg: | |
| return "Please type something." | |
| if len(user_msg) > MAX_INPUT_CH: | |
| return f"Message too long (>{MAX_INPUT_CH} chars)." | |
| # add user message & trim | |
| history.append({"role": "user", "content": user_msg}) | |
| history = trim_history(history) | |
| # build prompt string | |
| prompt_lines: list[str] = [] | |
| for m in history: | |
| if m["role"] == "system": | |
| prompt_lines.append(m["content"]) | |
| elif m["role"] == "user": | |
| prompt_lines.append(f"User: {m['content']}") | |
| else: | |
| prompt_lines.append(f"AI: {m['content']}") | |
| prompt_lines.append("AI:") | |
| prompt = "\n".join(prompt_lines) | |
| log(f"Prompt {len(prompt)} chars β’ generatingβ¦") | |
| # call generator | |
| t0 = time.time() | |
| try: | |
| raw = gen(prompt)[0]["generated_text"] | |
| reply = clean(raw.split("AI:", 1)[-1]) | |
| # β remove any echoed tags | |
| reply = re.split(r"\b(?:User:|AI:)", reply, 1)[0].strip() | |
| log(f"generate() {time.time() - t0:.2f}s β’ reply {len(reply)} chars") | |
| except Exception: # noqa: BLE001 | |
| log("β Inference exception:\n" + traceback.format_exc()) | |
| reply = "SorryβAI backend crashed. Please try again later." | |
| return reply | |
| # ββββββββββββββββββββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.ChatInterface( | |
| fn=chat_fn, | |
| chatbot=gr.Chatbot(height=480, type="messages"), | |
| title="SchoolSpiritΒ AI Chat", | |
| theme=gr.themes.Soft(primary_hue="blue"), # lightβblue accent | |
| type="messages", # modern message dicts | |
| ).launch() | |