| import gradio as gr | |
| from gpt4all import GPT4All | |
| from huggingface_hub import hf_hub_download | |
| from diarizationlm import utils | |
| title = "💬DiarizationLM GGUF inference on CPU💬" | |
| description = """ | |
| A demo of the DiarizationLM model finetuned from Llama 3. In this demo, we run a 4-bit quantized GGUF model on CPU. | |
| To learn more about DiarizationLM, check our paper: https://arxiv.org/abs/2401.03506 | |
| """ | |
| model_path = "models" | |
| model_name = "q4_k_m.gguf" | |
| prompt_suffix = " --> " | |
| completion_suffix = " [eod]" | |
| hf_hub_download(repo_id="google/DiarizationLM-8b-Fisher-v2", filename=model_name, local_dir=model_path) | |
| print("Start the model init process") | |
| model = GPT4All(model_name=model_name, | |
| model_path=model_path, | |
| allow_download = False, | |
| device="cpu") | |
| print("Finish the model init process") | |
| def generater(prompt): | |
| llm_prompt = prompt + prompt_suffix | |
| max_new_tokens = round(len(prompt) / 3.0 * 1.2) | |
| outputs = [] | |
| for token in model.generate(prompt=llm_prompt, | |
| temp=0.1, | |
| top_k=50, | |
| top_p=0.5, | |
| max_tokens=max_new_tokens, | |
| streaming=True): | |
| outputs.append(token) | |
| completion = "".join(outputs) | |
| yield completion | |
| if completion.endswith(" [eod]"): | |
| break | |
| transferred_completion = utils.transfer_llm_completion(completion, prompt) | |
| yield transferred_completion | |
| demo = gr.Interface( | |
| fn = generater, | |
| title=title, | |
| description = description, | |
| inputs=["text"], | |
| outputs=["text"], | |
| examples=[ | |
| ["<speaker:1> Hello, my name is Tom. May I speak to Laura <speaker:2> please? Hello, this is Laura. <speaker:1> Hi Laura, how are you? This is <speaker:2> Tom. Hi Tom, I haven't seen you for a <speaker:1> while."], | |
| ["<speaker:1> This demo looks really <speaker:2> good! Thanks, I am glad to hear that."], | |
| ], | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue(max_size=3).launch() |