Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1148

open_llm_leaderboard / app.py

iamtheabdullah

Create app.py

fff9901 verified 4 months ago

raw

history blame

2.73 kB

	import os
	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	# --- Choose the GGUF you want from HF and set here (exact names) ---
	REPO_ID = os.getenv("REPO_ID", "TheBloke/airoboros-mistral-7B-GGUF")
	MODEL_FILE = os.getenv("MODEL_FILE", "airoboros-mistral-7b.Q4_K_M.gguf")

	SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are an uncensored assistant. Answer directly.")
	APP_PASSWORD = os.getenv("APP_PASSWORD", "") # set in Settings → Variables if you want a password
	CTX = int(os.getenv("CTX", "4096"))
	THREADS = int(os.getenv("THREADS", "4"))
	N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0")) # 0 on free CPU Space

	# Download model (cached by HF infra)
	model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE, local_dir="models")

	# Load model
	llm = Llama(
	model_path=model_path,
	n_ctx=CTX,
	n_threads=THREADS,
	n_gpu_layers=N_GPU_LAYERS,
	verbose=False
	)

	def chat_fn(history, user_msg):
	messages = [{"role":"system","content":SYSTEM_PROMPT}]
	for role, content in history:
	messages.append({"role": "user" if role=="user" else "assistant", "content": content})
	messages.append({"role":"user","content":user_msg})

	out = llm.create_chat_completion(
	messages=messages,
	temperature=0.9, top_p=0.92, repeat_penalty=1.05, max_tokens=1024
	)
	reply = out["choices"][0]["message"]["content"]
	history = history + [("user", user_msg), ("assistant", reply)]
	return history, ""

	with gr.Blocks(title="Airoboros Mistral 7B (Uncensored)") as demo:
	gr.Markdown("## Airoboros Mistral 7B (Uncensored)\nFree CPU Space is slow. Add a GPU in Settings → Hardware for speed.")

	# Simple password gate (optional)
	with gr.Row():
	user = gr.Textbox(label="User (any)")
	pwd = gr.Textbox(label="Password", type="password")
	enter = gr.Button("Enter")
	gate_info = gr.Markdown(visible=False, value="Access granted. Start chatting below.")
	chat = gr.Chatbot(height=460, visible=False)
	msg = gr.Textbox(label="Message", visible=False, lines=3, placeholder="Ask anything…")
	send = gr.Button("Send", visible=False)
	state = gr.State([])

	def allow(u, p):
	if not APP_PASSWORD or p == APP_PASSWORD:
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
	return gr.update(value="Wrong password. Try again."), gr.update(), gr.update(), gr.update(), gr.update()

	enter.click(allow, [user, pwd], [gate_info, user, chat, msg, send])

	send.click(chat_fn, [state, msg], [state, msg]).then(lambda s: s, state, chat)

	demo.queue().launch()