Spaces:

diarizers-community
/

DiarizationLM-GGUF

Running

App Files Files Community

wq2012 commited on Jun 27, 2024

Commit

066ff91

verified ·

1 Parent(s): 700a4e4

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -18

app.py CHANGED Viewed

@@ -1,36 +1,49 @@
 import gradio as gr
 from gpt4all import GPT4All
 from huggingface_hub import hf_hub_download
 title = "DiarizationLM GGUF inference on CPU"
 description = """
-DiarizationLM GGUF inference on CPU
 """
 model_path = "models"
 model_name = "q4_k_m.gguf"
 hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
 print("Start the model init process")
-model = GPT4All(model_name=model_name, model_path=model_path, allow_download = False, device="cpu")
 print("Finish the model init process")
-model.config["promptTemplate"] = "{0} --> "
-model.config["systemPrompt"] = ""
-model._is_chat_session_activated = False
-print("Finish the model config process")
-def generater(message, history, temperature, top_p, top_k):
-    prompt = model.config["promptTemplate"].format(message)
     max_new_tokens = round(len(prompt) / 3.0 * 1.2)
     outputs = []
-    for token in model.generate(prompt=prompt, temp=0.0, top_k = 50, top_p = 0.9, max_tokens = max_new_tokens, streaming=True):
         outputs.append(token)
-        yield "".join(outputs)
 def vote(data: gr.LikeData):
     if data.liked:
         return
@@ -48,17 +61,14 @@ iface = gr.ChatInterface(
     chatbot=chatbot,
     additional_inputs=[],
     examples=[
-        ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well."],
    ]
 )
-print("Added iface")
 with gr.Blocks() as demo:
     chatbot.like(vote, None, None)
     iface.render()
-print("Rendered iface")
 if __name__ == "__main__":
     demo.queue(max_size=3).launch()

 import gradio as gr
 from gpt4all import GPT4All
 from huggingface_hub import hf_hub_download
+from diarizationlm import utils
 title = "DiarizationLM GGUF inference on CPU"
 description = """
+A demo of the DiarizationLM model finetuned from Llama 2. In this demo, we run a 4-bit quantized GGUF model on CPU.
+To learn more about DiarizationLM, check our paper: https://arxiv.org/abs/2401.03506
 """
 model_path = "models"
 model_name = "q4_k_m.gguf"
+prompt_suffix = " --> "
+completion_suffix = " [eod]"
 hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
 print("Start the model init process")
+model = GPT4All(model_name=model_name,
+                model_path=model_path,
+                allow_download = False,
+                evice="cpu")
 print("Finish the model init process")
+def generater(message, history):
+    prompt = message + prompt_suffix
     max_new_tokens = round(len(prompt) / 3.0 * 1.2)
     outputs = []
+    for token in model.generate(prompt=prompt,
+                                temp=0.0,
+                                top_k=50,
+                                top_p=0.9,
+                                max_tokens=max_new_tokens,
+                                streaming=True):
         outputs.append(token)
+        completion = "".join(outputs)
+        if completion.endswith(" [eod]"):
+            transferred_completion = utils.transfer_llm_completion(completion, message)
+            yield transferred_completion
+            return
+        else:
+            yield completion
 def vote(data: gr.LikeData):
     if data.liked:
         return
     chatbot=chatbot,
     additional_inputs=[],
     examples=[
+        ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well. What about <speaker:1> you? I'm doing well, too. Thank you."],
    ]
 )
 with gr.Blocks() as demo:
     chatbot.like(vote, None, None)
     iface.render()
 if __name__ == "__main__":
     demo.queue(max_size=3).launch()