zeeshan commited on
Commit
aeaa1d3
Β·
1 Parent(s): 8f8259e

Add timeout to weight download and .dockerignore to prevent repeated slow downloads

Browse files
.dockerignore ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git and version control
2
+ .git
3
+ .gitignore
4
+ .gitattributes
5
+
6
+ # Python cache
7
+ __pycache__
8
+ *.pyc
9
+ *.pyo
10
+ *.egg-info
11
+ .pytest_cache
12
+ .tox
13
+
14
+ # Virtual environments
15
+ venv
16
+ env
17
+ .venv
18
+
19
+ # IDE and editors
20
+ .vscode
21
+ .idea
22
+ *.swp
23
+ *.swo
24
+ *~
25
+
26
+ # OS files
27
+ .DS_Store
28
+ Thumbs.db
29
+ .directory
30
+
31
+ # Large files that shouldn't be in Docker
32
+ *.pth
33
+ *.pt
34
+ *.whl
35
+ *.zip
36
+ *.tar.gz
37
+
38
+ # Test and log files
39
+ *.log
40
+ test_output
41
+ *.tmp
42
+
43
+ # Frontend dev
44
+ node_modules
45
+ npm-debug.log
46
+
47
+ # Model outputs (will be generated at runtime)
48
+ deployment/backend/outputs/
49
+ model/work_dirs/
50
+ model/result/
51
+
52
+ # Documentation
53
+ docs/
54
+ *.md
55
+
56
+ # Training data and models
57
+ data/
58
+ checkpoints/
59
+ weights/
60
+
61
+ # Assets
62
+ assets/
63
+ *.png
64
+ *.jpg
65
+ *.jpeg
Dockerfile CHANGED
@@ -26,14 +26,16 @@ COPY requirements.txt /app/
26
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
27
  pip install --no-cache-dir -r requirements.txt
28
 
29
- # Copy application code
30
  COPY . /app/
31
 
32
  # Create checkpoint directory
33
  RUN mkdir -p /app/finetuning_rodla/finetuning_rodla/checkpoints
34
 
35
- # Download model weights during build
36
- RUN python /app/deployment/backend/download_weights.py || echo "⚠️ Weight download script completed (may need manual download)"
 
 
37
 
38
  # Run backend.py on port 7860 (HuggingFace standard)
39
  CMD ["python", "deployment/backend/backend.py"]
 
26
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
27
  pip install --no-cache-dir -r requirements.txt
28
 
29
+ # Copy application code (excluding large files via .dockerignore)
30
  COPY . /app/
31
 
32
  # Create checkpoint directory
33
  RUN mkdir -p /app/finetuning_rodla/finetuning_rodla/checkpoints
34
 
35
+ # Optional: Attempt to download weights at build time (non-blocking)
36
+ # If download fails or times out, the backend will fall back to heuristic detection
37
+ # Weights can be manually placed in: /app/finetuning_rodla/finetuning_rodla/checkpoints/rodla_internimage_xl_publaynet.pth
38
+ RUN timeout 300 python /app/deployment/backend/download_weights.py 2>&1 || echo "⚠️ Weight download timed out or failed - backend will use heuristic detection. To enable full detection, manually place weights file in checkpoints directory."
39
 
40
  # Run backend.py on port 7860 (HuggingFace standard)
41
  CMD ["python", "deployment/backend/backend.py"]
deployment/backend/download_weights.py CHANGED
@@ -2,56 +2,95 @@
2
  """
3
  Download model weights from Google Drive
4
  This script is used during Docker build or startup to download required weights
 
5
  """
6
  import os
7
- import urllib.request
8
  import sys
 
9
  from pathlib import Path
10
  import gdown
11
 
12
- def download_weights():
13
- """Download weights from Google Drive"""
14
-
15
- # Google Drive file ID from the shared link
16
- # https://drive.google.com/file/d/1BHyz2jH52Irt6izCeTRb4g2J5lXsA9cz/view?usp=sharing
17
- DRIVE_FILE_ID = "1BHyz2jH52Irt6izCeTRb4g2J5lXsA9cz"
18
-
19
- # Destination path
20
- REPO_ROOT = Path("/home/admin/CV/rodla-academic")
21
- WEIGHTS_DIR = REPO_ROOT / "finetuning_rodla/finetuning_rodla/checkpoints"
22
- WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
23
-
24
- WEIGHTS_PATH = WEIGHTS_DIR / "rodla_internimage_xl_publaynet.pth"
25
-
26
- # Check if weights already exist and are not empty
27
- if WEIGHTS_PATH.exists() and WEIGHTS_PATH.stat().st_size > 1000000: # > 1MB
28
- print(f"βœ… Weights already exist at {WEIGHTS_PATH}")
29
- print(f" Size: {WEIGHTS_PATH.stat().st_size / (1024**3):.2f} GB")
30
- return True
31
 
32
- print(f"πŸ“₯ Downloading model weights from Google Drive...")
33
- print(f" File ID: {DRIVE_FILE_ID}")
34
- print(f" Destination: {WEIGHTS_PATH}")
35
 
36
  try:
37
- # Download using gdown
38
- url = f"https://drive.google.com/uc?id={DRIVE_FILE_ID}"
39
- gdown.download(url, str(WEIGHTS_PATH), quiet=False)
 
 
 
 
 
 
 
 
 
40
 
41
- # Verify download
42
- if WEIGHTS_PATH.exists() and WEIGHTS_PATH.stat().st_size > 1000000:
43
- print(f"βœ… Download successful!")
 
 
44
  print(f" Size: {WEIGHTS_PATH.stat().st_size / (1024**3):.2f} GB")
 
45
  return True
46
- else:
47
- print(f"❌ Download failed or file is too small")
48
- return False
 
 
 
 
 
 
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
- print(f"❌ Error downloading weights: {e}")
52
  return False
 
 
53
 
54
 
55
  if __name__ == "__main__":
56
- success = download_weights()
 
 
57
  sys.exit(0 if success else 1)
 
2
  """
3
  Download model weights from Google Drive
4
  This script is used during Docker build or startup to download required weights
5
+ Includes timeout to prevent hanging on HuggingFace Spaces
6
  """
7
  import os
 
8
  import sys
9
+ import signal
10
  from pathlib import Path
11
  import gdown
12
 
13
+ # Timeout handler
14
+ class TimeoutError(Exception):
15
+ pass
16
+
17
+ def timeout_handler(signum, frame):
18
+ raise TimeoutError("Download timed out")
19
+
20
+ def download_weights(timeout_seconds=300):
21
+ """Download weights from Google Drive with timeout"""
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Set timeout (5 minutes max for HuggingFace Spaces)
24
+ signal.signal(signal.SIGALRM, timeout_handler)
25
+ signal.alarm(timeout_seconds)
26
 
27
  try:
28
+ # Google Drive file ID from the shared link
29
+ # https://drive.google.com/file/d/1BHyz2jH52Irt6izCeTRb4g2J5lXsA9cz/view?usp=sharing
30
+ DRIVE_FILE_ID = "1BHyz2jH52Irt6izCeTRb4g2J5lXsA9cz"
31
+
32
+ # Destination path - works in Docker (/app) and local dev
33
+ if os.path.exists("/app"):
34
+ REPO_ROOT = Path("/app")
35
+ else:
36
+ REPO_ROOT = Path(__file__).parent.parent.parent.parent
37
+
38
+ WEIGHTS_DIR = REPO_ROOT / "finetuning_rodla/finetuning_rodla/checkpoints"
39
+ WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
40
 
41
+ WEIGHTS_PATH = WEIGHTS_DIR / "rodla_internimage_xl_publaynet.pth"
42
+
43
+ # Check if weights already exist and are not empty
44
+ if WEIGHTS_PATH.exists() and WEIGHTS_PATH.stat().st_size > 1000000: # > 1MB
45
+ print(f"βœ… Weights already exist at {WEIGHTS_PATH}")
46
  print(f" Size: {WEIGHTS_PATH.stat().st_size / (1024**3):.2f} GB")
47
+ signal.alarm(0) # Cancel alarm
48
  return True
49
+
50
+ print(f"πŸ“₯ Downloading model weights from Google Drive...")
51
+ print(f" File ID: {DRIVE_FILE_ID}")
52
+ print(f" Destination: {WEIGHTS_PATH}")
53
+ print(f" Timeout: {timeout_seconds} seconds")
54
+
55
+ try:
56
+ # Download using gdown
57
+ url = f"https://drive.google.com/uc?id={DRIVE_FILE_ID}"
58
+ gdown.download(url, str(WEIGHTS_PATH), quiet=False)
59
 
60
+ # Verify download
61
+ if WEIGHTS_PATH.exists() and WEIGHTS_PATH.stat().st_size > 1000000:
62
+ print(f"βœ… Download successful!")
63
+ print(f" Size: {WEIGHTS_PATH.stat().st_size / (1024**3):.2f} GB")
64
+ signal.alarm(0) # Cancel alarm
65
+ return True
66
+ else:
67
+ print(f"❌ Download failed or file is too small")
68
+ signal.alarm(0) # Cancel alarm
69
+ return False
70
+
71
+ except TimeoutError:
72
+ print(f"⏱️ Download timed out after {timeout_seconds} seconds")
73
+ print(f" This is expected on HuggingFace Spaces. Backend will use heuristic detection.")
74
+ signal.alarm(0) # Cancel alarm
75
+ return False
76
+ except Exception as e:
77
+ print(f"❌ Error downloading weights: {e}")
78
+ signal.alarm(0) # Cancel alarm
79
+ return False
80
+
81
+ except TimeoutError:
82
+ print(f"⏱️ Download timed out after {timeout_seconds} seconds")
83
+ print(f" This is expected on HuggingFace Spaces. Backend will use heuristic detection.")
84
+ return False
85
  except Exception as e:
86
+ print(f"❌ Unexpected error: {e}")
87
  return False
88
+ finally:
89
+ signal.alarm(0) # Make sure alarm is cancelled
90
 
91
 
92
  if __name__ == "__main__":
93
+ # Allow timeout to be passed as argument, default to 5 minutes
94
+ timeout = int(sys.argv[1]) if len(sys.argv) > 1 else 300
95
+ success = download_weights(timeout_seconds=timeout)
96
  sys.exit(0 if success else 1)