Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import os | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # --- Choose the GGUF you want from HF and set here (exact names) --- | |
| REPO_ID = os.getenv("REPO_ID", "TheBloke/airoboros-mistral-7B-GGUF") | |
| MODEL_FILE = os.getenv("MODEL_FILE", "airoboros-mistral-7b.Q4_K_M.gguf") | |
| SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are an uncensored assistant. Answer directly.") | |
| APP_PASSWORD = os.getenv("APP_PASSWORD", "") # set in Settings → Variables if you want a password | |
| CTX = int(os.getenv("CTX", "4096")) | |
| THREADS = int(os.getenv("THREADS", "4")) | |
| N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0")) # 0 on free CPU Space | |
| # Download model (cached by HF infra) | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE, local_dir="models") | |
| # Load model | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=CTX, | |
| n_threads=THREADS, | |
| n_gpu_layers=N_GPU_LAYERS, | |
| verbose=False | |
| ) | |
| def chat_fn(history, user_msg): | |
| messages = [{"role":"system","content":SYSTEM_PROMPT}] | |
| for role, content in history: | |
| messages.append({"role": "user" if role=="user" else "assistant", "content": content}) | |
| messages.append({"role":"user","content":user_msg}) | |
| out = llm.create_chat_completion( | |
| messages=messages, | |
| temperature=0.9, top_p=0.92, repeat_penalty=1.05, max_tokens=1024 | |
| ) | |
| reply = out["choices"][0]["message"]["content"] | |
| history = history + [("user", user_msg), ("assistant", reply)] | |
| return history, "" | |
| with gr.Blocks(title="Airoboros Mistral 7B (Uncensored)") as demo: | |
| gr.Markdown("## Airoboros Mistral 7B (Uncensored)\nFree CPU Space is slow. Add a GPU in **Settings → Hardware** for speed.") | |
| # Simple password gate (optional) | |
| with gr.Row(): | |
| user = gr.Textbox(label="User (any)") | |
| pwd = gr.Textbox(label="Password", type="password") | |
| enter = gr.Button("Enter") | |
| gate_info = gr.Markdown(visible=False, value="Access granted. Start chatting below.") | |
| chat = gr.Chatbot(height=460, visible=False) | |
| msg = gr.Textbox(label="Message", visible=False, lines=3, placeholder="Ask anything…") | |
| send = gr.Button("Send", visible=False) | |
| state = gr.State([]) | |
| def allow(u, p): | |
| if not APP_PASSWORD or p == APP_PASSWORD: | |
| return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) | |
| return gr.update(value="Wrong password. Try again."), gr.update(), gr.update(), gr.update(), gr.update() | |
| enter.click(allow, [user, pwd], [gate_info, user, chat, msg, send]) | |
| send.click(chat_fn, [state, msg], [state, msg]).then(lambda s: s, state, chat) | |
| demo.queue().launch() | |