kirubel1738 commited on
Commit
f021bfb
·
verified ·
1 Parent(s): 19a6024

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +23 -36
src/streamlit_app.py CHANGED
@@ -2,9 +2,8 @@
2
  import os
3
  import streamlit as st
4
  import torch
5
- from transformers import AutoTokenizer, pipeline
6
  from peft import PeftModel
7
- from unsloth import FastLanguageModel
8
 
9
  # -----------------------------
10
  # Ensure cache dirs are writable in Spaces
@@ -15,44 +14,25 @@ os.environ.setdefault("HF_DATASETS_CACHE", "/tmp/huggingface/datasets")
15
  os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
16
  os.environ.setdefault("XDG_CACHE_HOME", "/tmp/huggingface")
17
 
18
- # -----------------------------
19
- # Model IDs
20
- # -----------------------------
21
- BASE_MODEL = "unsloth/llama-3-8b-bnb-4bit"
22
- ADAPTER_MODEL = "kirubel1738/llama3-biology-qa"
23
 
24
- # -----------------------------
25
- # Load model once
26
- # -----------------------------
27
  @st.cache_resource
28
  def load_model():
29
- """Load LLaMA-3 8B with PEFT adapter entirely on CPU."""
30
- st.info("Loading LLaMA-3 model on CPU... This may take a while.")
 
31
 
32
- # Load tokenizer
33
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
34
-
35
- # Load base model in 4-bit on CPU
36
- base_model, _ = FastLanguageModel.from_pretrained(
37
- model_name=BASE_MODEL,
38
- max_seq_length=2048,
39
- dtype=None,
40
- load_in_4bit=True,
41
- device_map={"": "cpu"} # force CPU
42
- )
43
-
44
- # Apply adapter
45
- model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
46
-
47
- # Text-generation pipeline on CPU
48
  generator = pipeline(
49
  "text-generation",
50
  model=model,
51
  tokenizer=tokenizer,
52
- device=-1, # CPU
53
- max_new_tokens=256,
54
- do_sample=True,
55
- temperature=0.7
56
  )
57
  return generator
58
 
@@ -62,9 +42,10 @@ generator = load_model()
62
  # -----------------------------
63
  # Streamlit UI
64
  # -----------------------------
65
- st.set_page_config(page_title="LLaMA-3 Biology QA", layout="centered")
66
- st.title("🧬 LLaMA-3Biology QA Demo")
67
- st.write("Ask a biology question and get an answer generated by LLaMA-3 fine-tuned on the Biology QA dataset.")
 
68
 
69
  user_input = st.text_area("Enter your biology question:", height=150)
70
 
@@ -72,7 +53,13 @@ if st.button("Get Answer"):
72
  if user_input.strip():
73
  with st.spinner("Generating answer..."):
74
  try:
75
- result = generator(user_input)
 
 
 
 
 
 
76
  output_text = result[0]["generated_text"]
77
  st.success("Answer:")
78
  st.write(output_text)
@@ -82,4 +69,4 @@ if st.button("Get Answer"):
82
  st.warning("Please enter a question.")
83
 
84
  st.markdown("---")
85
- st.caption(f"Model: {BASE_MODEL} + adapter {ADAPTER_MODEL} | Runs on CPU")
 
2
  import os
3
  import streamlit as st
4
  import torch
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
  from peft import PeftModel
 
7
 
8
  # -----------------------------
9
  # Ensure cache dirs are writable in Spaces
 
14
  os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
15
  os.environ.setdefault("XDG_CACHE_HOME", "/tmp/huggingface")
16
 
17
+ # Base and adapter model IDs
18
+ BASE_MODEL = "microsoft/BioGPT-Large-PubMedQA"
19
+ ADAPTER_MODEL = "kirubel1738/biogpt-pubmedqa-finetuned
 
 
20
 
 
 
 
21
  @st.cache_resource
22
  def load_model():
23
+ """Load BioGPT with your Biology-QA adapter."""
24
+ # Pick device automatically
25
+ device = 0 if torch.cuda.is_available() else -1
26
 
 
27
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
28
+ base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
29
+ model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL) # apply adapter
30
+
 
 
 
 
 
 
 
 
 
 
 
31
  generator = pipeline(
32
  "text-generation",
33
  model=model,
34
  tokenizer=tokenizer,
35
+ device=device
 
 
 
36
  )
37
  return generator
38
 
 
42
  # -----------------------------
43
  # Streamlit UI
44
  # -----------------------------
45
+ st.set_page_config(page_title="BioGPT Biology QA Demo", layout="centered")
46
+ st.title("🧬 BioGPTPubmed QA Demo")
47
+
48
+ st.write("Ask a biology-related question and get an answer generated by BioGPT-Large-PubMedQA fine-tuned with your Biology-QA adapter.")
49
 
50
  user_input = st.text_area("Enter your biology question:", height=150)
51
 
 
53
  if user_input.strip():
54
  with st.spinner("Generating answer..."):
55
  try:
56
+ result = generator(
57
+ user_input,
58
+ max_new_tokens=128,
59
+ do_sample=True,
60
+ temperature=0.7,
61
+ top_p=0.9
62
+ )
63
  output_text = result[0]["generated_text"]
64
  st.success("Answer:")
65
  st.write(output_text)
 
69
  st.warning("Please enter a question.")
70
 
71
  st.markdown("---")
72
+ st.caption("Model: microsoft/BioGPT-Large-PubMedQA + adapter kirubel1738/biogpt-biology-qa | Runs on CPU/GPU automatically")