Commit
·
09343ba
1
Parent(s):
aa23397
Instead of Pre-loading of dataset, now server will use the lazy loading, i.e. load dataset only when a user sends request, overall it's loaded only once
Browse files- ASR_Server.py +39 -7
ASR_Server.py
CHANGED
|
@@ -38,16 +38,32 @@ job_status = {
|
|
| 38 |
"error_trace": None
|
| 39 |
}
|
| 40 |
|
|
|
|
| 41 |
csv_path = "test.csv"
|
| 42 |
-
# csv_transcript = f'test_with_{ASR_model.replace("/", "_")}.csv'
|
| 43 |
-
# csv_result = f'test_with_{ASR_model.replace("/","_")}_WER.csv'
|
| 44 |
df = pd.read_csv(csv_path)
|
| 45 |
print(f"CSV Loaded with {len(df)} rows")
|
| 46 |
|
| 47 |
-
#
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def generateTranscript(ASR_model):
|
| 53 |
import os
|
|
@@ -76,7 +92,23 @@ def generateTranscript(ASR_model):
|
|
| 76 |
csv_result = f'test_with_{ASR_model.replace("/","_")}_WER.csv'
|
| 77 |
|
| 78 |
try:
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# Check if transcript already exists
|
| 81 |
df_transcript = download_csv(csv_transcript)
|
| 82 |
if(df_transcript is None):
|
|
|
|
| 38 |
"error_trace": None
|
| 39 |
}
|
| 40 |
|
| 41 |
+
|
| 42 |
csv_path = "test.csv"
|
|
|
|
|
|
|
| 43 |
df = pd.read_csv(csv_path)
|
| 44 |
print(f"CSV Loaded with {len(df)} rows")
|
| 45 |
|
| 46 |
+
# lazy loading of dataset code:
|
| 47 |
+
_dataset = None
|
| 48 |
+
_dataset_lock = threading.Lock()
|
| 49 |
+
|
| 50 |
+
def get_dataset():
|
| 51 |
+
"""Lazily loads the dataset and ensures it's only loaded once."""
|
| 52 |
+
global _dataset
|
| 53 |
+
# Use a lock to prevent race conditions (two requests loading at once)
|
| 54 |
+
with _dataset_lock:
|
| 55 |
+
if _dataset is None:
|
| 56 |
+
print("Loading dataset for the first time...")
|
| 57 |
+
try:
|
| 58 |
+
ds = load_dataset("satyamr196/asr_fairness_audio", split="train")
|
| 59 |
+
_dataset = ds.cast_column("audio", Audio(decode=False))
|
| 60 |
+
print("Dataset loaded successfully.")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"FATAL: Failed to load dataset: {e}")
|
| 63 |
+
# Propagate the error
|
| 64 |
+
raise
|
| 65 |
+
return _dataset
|
| 66 |
+
# --- END LAZY LOADING ---
|
| 67 |
|
| 68 |
def generateTranscript(ASR_model):
|
| 69 |
import os
|
|
|
|
| 92 |
csv_result = f'test_with_{ASR_model.replace("/","_")}_WER.csv'
|
| 93 |
|
| 94 |
try:
|
| 95 |
+
|
| 96 |
+
# --- BLOCK TO LOAD THE DATASET ---
|
| 97 |
+
try:
|
| 98 |
+
print("Attempting to get dataset...")
|
| 99 |
+
dataset = get_dataset()
|
| 100 |
+
except Exception as e:
|
| 101 |
+
tb = traceback.format_exc()
|
| 102 |
+
print(f"❌ Failed to load dataset: {e}")
|
| 103 |
+
job_status.update({
|
| 104 |
+
"running": False,
|
| 105 |
+
"message": f"Critical error: Failed to load dataset.",
|
| 106 |
+
"error": str(e),
|
| 107 |
+
"error_trace": tb[:1000],
|
| 108 |
+
})
|
| 109 |
+
return # Stop the function if dataset fails to load
|
| 110 |
+
# --- END OF dataset load BLOCK ---
|
| 111 |
+
|
| 112 |
# Check if transcript already exists
|
| 113 |
df_transcript = download_csv(csv_transcript)
|
| 114 |
if(df_transcript is None):
|