Spaces:

Pyasma
/

Querybot

Runtime error

App Files Files Community

Pyasma commited on Sep 17, 2023

Commit

bfe739b

1 Parent(s): d955ac2

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -32

app.py CHANGED Viewed

@@ -1,21 +1,16 @@
-from transformers import AutoTokenizer, pipeline
-from auto_gptq import AutoGPTQForCausalLM
 import gradio as gr
-model_name = "TheBloke/Llama-2-7B-Chat-GGML"
 use_triton = False
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
-model = AutoModelForCausalLM.from_quantized(model_name,
-        use_safetensors=True,
-        trust_remote_code=True,
-        device="cuda:0",
-        use_triton=use_triton,
-        quantize_config=None)
 model.to('cuda')
-#creating pipeline
 pipe = pipeline(
     "text-generation",
     model=model,
@@ -25,29 +20,23 @@ pipe = pipeline(
     top_p=0.95,
     repetition_penalty=1.15
 )
-from langchain.llms import HuggingFacePipeline
-from langchain.chains import LLMChain
-from langchain import PromptTemplate
-from langchain.memory import ConversationBufferMemory
-llm = HuggingFacePipeline(pipeline=pipe)
-prompt_template='''[INST] <<SYS>>
-You are an AI Query bot that will answer all of the user questions to the best of your ability. You are a helpful, respectful and honest. Your job is to answer the user query to the best of your ability. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
-You will be asked questions randomly on any topic just answer them to the best of your ability. Answer the question to the best of your ability without a single lie and harmful information.
-<</SYS>>'''
-memory = ConversationBufferMemory(memory_key="chat_history", ai_prefix="Query Bot", human_prefix="User")
-chat = LLMChain(
-    llm=llm,
-    prompt=PromptTemplate.from_template(prompt_template),
-    verbose=True,
-    memory=memory
-)
 def run(input, history=memory):
-  return chat.predict(prompt=input)
-output = gr.ChatInterface(run)
-ouput.launch(share=True)

+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import gradio as gr
+model_name = "TheBloke/Llama-2-7B-Chat-GGML"
 use_triton = False
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+model = AutoModelForCausalLM.from_pretrained(model_name)
 model.to('cuda')
+# Creating pipeline
 pipe = pipeline(
     "text-generation",
     model=model,
     top_p=0.95,
     repetition_penalty=1.15
 )
+# Define your system message
+system_message = """
+You are an AI Query bot that will answer all of the user questions to the best of your ability. You are helpful, respectful, and honest. Your job is to answer the user's query to the best of your ability. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
+You will be asked questions randomly on any topic. Just answer them to the best of your ability without a single lie and harmful information.
+"""
 def run(input, history=memory):
+    return chat.predict(prompt=input)
+output = gr.ChatInterface(
+    run,
+    inputs="text",
+    outputs="text",
+    title="AI Query Bot",
+    show_system_message=True,  # Display the system message
+    system_message=system_message,  # Set the system message
+)
+output.launch(share=True)