Spaces:

param2004
/

Medilingua-space

Sleeping

App Files Files Community

param2004 commited on Oct 7, 2025

Commit

bfade87

verified ·

1 Parent(s): 690bcb6

Update src/utils.py

Browse files

Files changed (1) hide show

src/utils.py +14 -24

src/utils.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import streamlit as st
 import pickle
 import pandas as pd
-from sentence_transformers import SentenceTransformer, models
-import torch
 import numpy as np
-from src.search import init_faiss
 from huggingface_hub import hf_hub_download
-# Repo IDs
 DATASET_REPO = "param2004/Medilingua-dataset"
 MODEL_REPO = "param2004/Medilingua-model"
@@ -15,47 +15,36 @@ MODEL_REPO = "param2004/Medilingua-model"
 def load_model():
     """Load SapBERT dynamically from Hugging Face Hub"""
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     st.info(f"🔬 Loading SapBERT from Hugging Face Hub on {device.upper()}...")
-    # Download model files dynamically
     try:
         model_path = hf_hub_download(
             repo_id=MODEL_REPO,
             filename="models/SapBERT-from-PubMedBERT-fulltext/pytorch_model.bin"
         )
-        # Load SentenceTransformer as before
         word_embedding_model = models.Transformer(model_path)
         pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
         model = SentenceTransformer(modules=[word_embedding_model, pooling_model], device=device)
-        st.success("✅ SapBERT loaded successfully from Hub.")
     except Exception as e:
-        st.error(f"❌ Failed to load SapBERT from Hub. Details: {e}")
         st.warning("⚠️ Falling back to 'all-MiniLM-L6-v2' model.")
         model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
     return model
 @st.cache_resource
 def load_data():
     """Load embeddings and dataset dynamically from Hugging Face Hub"""
     try:
-        # Download embeddings
-        question_emb_path = hf_hub_download(
-            repo_id=DATASET_REPO,
-            filename="dataset/question_embeddings.pkl"
-        )
-        doctor_emb_path = hf_hub_download(
-            repo_id=DATASET_REPO,
-            filename="dataset/doctor_embeddings.pkl"
-        )
-        dataset_csv_path = hf_hub_download(
-            repo_id=DATASET_REPO,
-            filename="dataset/dataset.csv"
-        )
         # Load embeddings
         with open(question_emb_path, 'rb') as f:
@@ -66,11 +55,12 @@ def load_data():
             doctor_data = pickle.load(f)
         doctor_embeddings = doctor_data.get('embeddings').astype('float32')
-        # Load CSV
         df = pd.read_csv(dataset_csv_path)
         df.dropna(subset=['Description', 'Patient', 'Doctor'], inplace=True)
         df.drop_duplicates(inplace=True)
         num_samples = min(len(df), len(question_embeddings), len(doctor_embeddings))
         df = df.iloc[:num_samples]
         question_embeddings = question_embeddings[:num_samples]

 import streamlit as st
 import pickle
 import pandas as pd
 import numpy as np
+import torch
+from sentence_transformers import SentenceTransformer, models
 from huggingface_hub import hf_hub_download
+from src.search import init_faiss
+# Hugging Face repo IDs
 DATASET_REPO = "param2004/Medilingua-dataset"
 MODEL_REPO = "param2004/Medilingua-model"
 def load_model():
     """Load SapBERT dynamically from Hugging Face Hub"""
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     st.info(f"🔬 Loading SapBERT from Hugging Face Hub on {device.upper()}...")
     try:
+        # Download model files from Hub
         model_path = hf_hub_download(
             repo_id=MODEL_REPO,
             filename="models/SapBERT-from-PubMedBERT-fulltext/pytorch_model.bin"
         )
+        # Build SentenceTransformer manually
         word_embedding_model = models.Transformer(model_path)
         pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
         model = SentenceTransformer(modules=[word_embedding_model, pooling_model], device=device)
+        st.success("✅ SapBERT loaded successfully from Hugging Face Hub.")
     except Exception as e:
+        st.error(f"❌ Failed to load SapBERT from Hub: {e}")
         st.warning("⚠️ Falling back to 'all-MiniLM-L6-v2' model.")
         model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
     return model
 @st.cache_resource
 def load_data():
     """Load embeddings and dataset dynamically from Hugging Face Hub"""
     try:
+        # Download embeddings & CSV from Hub
+        question_emb_path = hf_hub_download(DATASET_REPO, filename="dataset/question_embeddings.pkl")
+        doctor_emb_path = hf_hub_download(DATASET_REPO, filename="dataset/doctor_embeddings.pkl")
+        dataset_csv_path = hf_hub_download(DATASET_REPO, filename="dataset/dataset.csv")
         # Load embeddings
         with open(question_emb_path, 'rb') as f:
             doctor_data = pickle.load(f)
         doctor_embeddings = doctor_data.get('embeddings').astype('float32')
+        # Load dataset CSV
         df = pd.read_csv(dataset_csv_path)
         df.dropna(subset=['Description', 'Patient', 'Doctor'], inplace=True)
         df.drop_duplicates(inplace=True)
+        # Ensure all arrays align
         num_samples = min(len(df), len(question_embeddings), len(doctor_embeddings))
         df = df.iloc[:num_samples]
         question_embeddings = question_embeddings[:num_samples]