Pyasma commited on
Commit
bfe739b
Β·
1 Parent(s): d955ac2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -32
app.py CHANGED
@@ -1,21 +1,16 @@
1
- from transformers import AutoTokenizer, pipeline
2
- from auto_gptq import AutoGPTQForCausalLM
3
  import gradio as gr
4
 
5
- model_name = "TheBloke/Llama-2-7B-Chat-GGML"
6
 
7
  use_triton = False
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
10
 
11
- model = AutoModelForCausalLM.from_quantized(model_name,
12
- use_safetensors=True,
13
- trust_remote_code=True,
14
- device="cuda:0",
15
- use_triton=use_triton,
16
- quantize_config=None)
17
  model.to('cuda')
18
- #creating pipeline
 
19
  pipe = pipeline(
20
  "text-generation",
21
  model=model,
@@ -25,29 +20,23 @@ pipe = pipeline(
25
  top_p=0.95,
26
  repetition_penalty=1.15
27
  )
28
- from langchain.llms import HuggingFacePipeline
29
- from langchain.chains import LLMChain
30
- from langchain import PromptTemplate
31
- from langchain.memory import ConversationBufferMemory
32
- llm = HuggingFacePipeline(pipeline=pipe)
33
-
34
- prompt_template='''[INST] <<SYS>>
35
- You are an AI Query bot that will answer all of the user questions to the best of your ability. You are a helpful, respectful and honest. Your job is to answer the user query to the best of your ability. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
36
- You will be asked questions randomly on any topic just answer them to the best of your ability. Answer the question to the best of your ability without a single lie and harmful information.
37
- <</SYS>>'''
38
-
39
- memory = ConversationBufferMemory(memory_key="chat_history", ai_prefix="Query Bot", human_prefix="User")
40
 
 
 
 
 
 
41
 
42
- chat = LLMChain(
43
- llm=llm,
44
- prompt=PromptTemplate.from_template(prompt_template),
45
- verbose=True,
46
- memory=memory
47
- )
48
 
49
  def run(input, history=memory):
50
- return chat.predict(prompt=input)
51
-
52
- output = gr.ChatInterface(run)
53
- ouput.launch(share=True)
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
2
  import gradio as gr
3
 
4
+ model_name = "TheBloke/Llama-2-7B-Chat-GGML"
5
 
6
  use_triton = False
7
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
9
 
10
+ model = AutoModelForCausalLM.from_pretrained(model_name)
 
 
 
 
 
11
  model.to('cuda')
12
+
13
+ # Creating pipeline
14
  pipe = pipeline(
15
  "text-generation",
16
  model=model,
 
20
  top_p=0.95,
21
  repetition_penalty=1.15
22
  )
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Define your system message
25
+ system_message = """
26
+ You are an AI Query bot that will answer all of the user questions to the best of your ability. You are helpful, respectful, and honest. Your job is to answer the user's query to the best of your ability. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
27
+ You will be asked questions randomly on any topic. Just answer them to the best of your ability without a single lie and harmful information.
28
+ """
29
 
 
 
 
 
 
 
30
 
31
  def run(input, history=memory):
32
+ return chat.predict(prompt=input)
33
+
34
+ output = gr.ChatInterface(
35
+ run,
36
+ inputs="text",
37
+ outputs="text",
38
+ title="AI Query Bot",
39
+ show_system_message=True, # Display the system message
40
+ system_message=system_message, # Set the system message
41
+ )
42
+ output.launch(share=True)