shawnpi commited on
Commit
c7b3ab7
·
verified ·
1 Parent(s): 88bbfa2

Update gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +40 -36
gradio_app.py CHANGED
@@ -7,25 +7,47 @@ import soundfile as sf
7
  import tempfile
8
  import hashlib
9
  import requests
 
10
  from huggingface_hub import snapshot_download
11
 
12
- # ================= 1. 环境与自动同步逻辑 =================
13
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
14
 
15
  def sync_model_files():
 
16
  repo_id = "shawnpi/HQ-SVC"
17
- print(f">>> 正在同步模型权重 ({repo_id})...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
  snapshot_download(
20
  repo_id=repo_id,
21
- allow_patterns=["utils/pretrain/*", "config.json"],
22
  local_dir=".",
23
- local_dir_use_symlinks=False
 
 
24
  )
25
- print(">>> 权重同步完成")
26
  except Exception as e:
27
- print(f">>> 同步失败: {e}")
 
 
 
28
 
 
29
  sync_model_files()
30
 
31
  # ================= 2. 路径与模型加载逻辑 =================
@@ -52,9 +74,12 @@ def initialize_models(config_path):
52
  ARGS = load_config(config_path)
53
  ARGS.config = config_path
54
  device = ARGS.device
 
 
55
  VOCODER = Vocoder(vocoder_type='nsf-hifigan', vocoder_ckpt='utils/pretrain/nsf_hifigan/model', device=device)
56
  NET_G = load_hq_svc(mode='infer', device=device, model_path=ARGS.model_path, args=ARGS)
57
  NET_G.eval()
 
58
  fa_encoder, fa_decoder = load_facodec(device)
59
  PREPROCESSORS = {
60
  "fa_encoder": fa_encoder, "fa_decoder": fa_decoder,
@@ -63,22 +88,14 @@ def initialize_models(config_path):
63
  "content_encoder": None, "spk_encoder": None
64
  }
65
 
66
- # ================= 3. 推理逻辑 (增强鲁棒性) =================
67
  def predict(source_audio, target_files, shift_key, adjust_f0):
68
  global TARGET_CACHE
69
-
70
- # --- 鲁棒性检查 1: 检查源音频是否上传完毕 ---
71
  if source_audio is None:
72
- return "⚠️ 系统提示:未检测到源音频。请确认已选择文件,并等待上传进度条走完后再重新转换。", None
73
 
74
- # --- 鲁棒性检查 2: 检查文件路径有效性 ---
75
  if not os.path.exists(source_audio):
76
- return "❌ 系统错误:音频文件传输中断,请刷新页面重新上传音频。", None
77
-
78
- # --- 鲁棒性检查 3: 检查音频格式 (防止上传了奇怪的文件) ---
79
- valid_exts = ['.wav', '.mp3', '.flac', '.m4a', '.ogg', '.opus']
80
- if not any(source_audio.lower().endswith(ext) for ext in valid_exts):
81
- return f"❌ 系统错误:不支持该文件格式。请上传 {', '.join(valid_exts)} 格式的音频。", None
82
 
83
  sr, encoder_sr, device = ARGS.sample_rate, ARGS.encoder_sr, ARGS.device
84
 
@@ -98,18 +115,14 @@ def predict(source_audio, target_files, shift_key, adjust_f0):
98
  else:
99
  spk_list, f0_list = [], []
100
  for f in (target_files[:20] if target_files else []):
101
- # 再次校验目标参考音频是否有效
102
  f_path = f.name if hasattr(f, 'name') else f
103
  if not f_path or not os.path.exists(f_path): continue
104
-
105
  t_data = get_processed_file(f_path, sr, encoder_sr, VOCODER, PREPROCESSORS["volume_extractor"], PREPROCESSORS["f0_extractor"], PREPROCESSORS["fa_encoder"], PREPROCESSORS["fa_decoder"], None, None, device=device)
106
  if t_data:
107
  spk_list.append(t_data['spk'])
108
  f0_list.append(t_data['f0_origin'])
109
 
110
- if not spk_list:
111
- return "❌ 终端提示���目标参考音频上传失败或格式不正确,请重新上传。", None
112
-
113
  spk_ave = torch.stack(spk_list).mean(dim=0).squeeze().to(device)
114
  all_tar_f0 = np.concatenate(f0_list)
115
  TARGET_CACHE.update({"file_hash": current_hash, "spk_ave": spk_ave, "all_tar_f0": all_tar_f0})
@@ -132,9 +145,8 @@ def predict(source_audio, target_files, shift_key, adjust_f0):
132
  sf.write(out_p, wav_g.squeeze().cpu().numpy(), 44100)
133
  return f"{status} | Pitch Shifted: {shift_key}", out_p
134
  except Exception as e:
135
- return f"❌ 推理运行出错:{str(e)}。请尝试刷新页面并重新上传音频。", None
136
 
137
- # ================= 4. UI 界面 =================
138
  custom_css = """
139
  @import url('https://fonts.googleapis.com/css2?family=Press+Start+2P&display=swap');
140
  :root { --font: 'Press Start 2P', cursive !important; }
@@ -151,15 +163,10 @@ button.primary { background-color: #ff69b4 !important; color: #fff !important; }
151
  footer { display: none !important; }
152
  """
153
 
 
154
  def build_ui():
155
  with gr.Blocks(css=custom_css, title="HQ-SVC Pixel Pro") as demo:
156
- gr.HTML("""
157
- <div style="display: flex; justify-content: center; margin: 20px 0;">
158
- <div style="border: 4px solid #000; box-shadow: 8px 8px 0px #000;">
159
- <img src="file/images/kon-new.gif" style="max-width: 400px; width: 100%;">
160
- </div>
161
- </div>
162
- """)
163
  gr.Markdown("# 🎸HQ-SVC: SINGING VOICE CONVERSION AND SUPER-RESOLUTION🍰")
164
 
165
  with gr.Row():
@@ -176,19 +183,16 @@ def build_ui():
176
  result_audio = gr.Audio(label="OUTPUT (44.1kHz HQ)")
177
 
178
  run_btn.click(predict, [src_audio, tar_files, key_shift, auto_f0], [status_box, result_audio])
179
-
180
  return demo
181
 
182
  if __name__ == "__main__":
183
  config_p = "configs/hq_svc_infer.yaml"
184
  if os.path.exists(config_p):
185
  initialize_models(config_p)
186
- else:
187
- print(f"警告: 找不到配置文件 {config_p}。")
188
-
189
  demo = build_ui()
190
  temp_dir = tempfile.gettempdir()
191
  demo.launch(
192
  share=True,
193
- allowed_paths=[os.path.join(now_dir, "images"), now_dir, temp_dir]
194
  )
 
7
  import tempfile
8
  import hashlib
9
  import requests
10
+ import socket
11
  from huggingface_hub import snapshot_download
12
 
13
+ # ================= 1. 环境与智能同步逻辑 (支持纯离线) =================
14
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
15
 
16
  def sync_model_files():
17
+ """智能同步:优先保证离线可用,仅在在线且文件缺失时强制同步"""
18
  repo_id = "shawnpi/HQ-SVC"
19
+
20
+ # 定义核心权重路径(根据你的 YAML 配置对齐)
21
+ model_pth = "utils/pretrain/250000_step_val_loss_0.50.pth"
22
+ vocoder_dir = "utils/pretrain/nsf_hifigan/model"
23
+ rmvpe_path = "utils/pretrain/rmvpe/model.pt"
24
+ # 检查本地核心文件是否已存在
25
+ local_exists = os.path.exists(model_pth) and os.path.exists(vocoder_dir)
26
+
27
+ if local_exists:
28
+ print(">>> [离线模式] 检测到本地权重已完整")
29
+ return
30
+
31
+ # 如果本地文件缺失,则尝试网络同步
32
+ print(">>> [同步模式] 本地权重不完整,正在检测网络以获取权重...")
33
+
34
  try:
35
  snapshot_download(
36
  repo_id=repo_id,
37
+ allow_patterns=["utils/pretrain/*", "config.json"],
38
  local_dir=".",
39
+ local_dir_use_symlinks=False,
40
+ # 如果依然失败(如镜像站也连不上),则尝试仅使用本地缓存
41
+ resume_download=True
42
  )
43
+ print(">>> 权重同步完成。")
44
  except Exception as e:
45
+ if local_exists:
46
+ print(f">>> 同步失败但本地已有文件,将尝试继续运行。错误: {e}")
47
+ else:
48
+ print(f">>> [严重错误] 同步失败且本地缺少权重,程序可能无法运行: {e}")
49
 
50
+ # 在一切开始前执行智能同步
51
  sync_model_files()
52
 
53
  # ================= 2. 路径与模型加载逻辑 =================
 
74
  ARGS = load_config(config_path)
75
  ARGS.config = config_path
76
  device = ARGS.device
77
+
78
+ # 实例化模型
79
  VOCODER = Vocoder(vocoder_type='nsf-hifigan', vocoder_ckpt='utils/pretrain/nsf_hifigan/model', device=device)
80
  NET_G = load_hq_svc(mode='infer', device=device, model_path=ARGS.model_path, args=ARGS)
81
  NET_G.eval()
82
+
83
  fa_encoder, fa_decoder = load_facodec(device)
84
  PREPROCESSORS = {
85
  "fa_encoder": fa_encoder, "fa_decoder": fa_decoder,
 
88
  "content_encoder": None, "spk_encoder": None
89
  }
90
 
91
+ # ================= 3. 推理逻辑 (保持鲁棒性) =================
92
  def predict(source_audio, target_files, shift_key, adjust_f0):
93
  global TARGET_CACHE
 
 
94
  if source_audio is None:
95
+ return "⚠️ 系统提示:未检测到源音频。请确保文件已上传完毕。", None
96
 
 
97
  if not os.path.exists(source_audio):
98
+ return "❌ 系统错误:找不到音频文件,请重新上传。", None
 
 
 
 
 
99
 
100
  sr, encoder_sr, device = ARGS.sample_rate, ARGS.encoder_sr, ARGS.device
101
 
 
115
  else:
116
  spk_list, f0_list = [], []
117
  for f in (target_files[:20] if target_files else []):
 
118
  f_path = f.name if hasattr(f, 'name') else f
119
  if not f_path or not os.path.exists(f_path): continue
 
120
  t_data = get_processed_file(f_path, sr, encoder_sr, VOCODER, PREPROCESSORS["volume_extractor"], PREPROCESSORS["f0_extractor"], PREPROCESSORS["fa_encoder"], PREPROCESSORS["fa_decoder"], None, None, device=device)
121
  if t_data:
122
  spk_list.append(t_data['spk'])
123
  f0_list.append(t_data['f0_origin'])
124
 
125
+ if not spk_list: return "❌ 终端提示:参考音频处理失败。", None
 
 
126
  spk_ave = torch.stack(spk_list).mean(dim=0).squeeze().to(device)
127
  all_tar_f0 = np.concatenate(f0_list)
128
  TARGET_CACHE.update({"file_hash": current_hash, "spk_ave": spk_ave, "all_tar_f0": all_tar_f0})
 
145
  sf.write(out_p, wav_g.squeeze().cpu().numpy(), 44100)
146
  return f"{status} | Pitch Shifted: {shift_key}", out_p
147
  except Exception as e:
148
+ return f"❌ 推理运行出错:{str(e)}", None
149
 
 
150
  custom_css = """
151
  @import url('https://fonts.googleapis.com/css2?family=Press+Start+2P&display=swap');
152
  :root { --font: 'Press Start 2P', cursive !important; }
 
163
  footer { display: none !important; }
164
  """
165
 
166
+ # ================= 4. UI 界面 =================
167
  def build_ui():
168
  with gr.Blocks(css=custom_css, title="HQ-SVC Pixel Pro") as demo:
169
+ gr.HTML('<div style="text-align:center; margin:20px 0;"><img src="file/images/kon-new.gif" style="max-width:400px; border:4px solid #000; box-shadow:8px 8px 0px #000;"></div>')
 
 
 
 
 
 
170
  gr.Markdown("# 🎸HQ-SVC: SINGING VOICE CONVERSION AND SUPER-RESOLUTION🍰")
171
 
172
  with gr.Row():
 
183
  result_audio = gr.Audio(label="OUTPUT (44.1kHz HQ)")
184
 
185
  run_btn.click(predict, [src_audio, tar_files, key_shift, auto_f0], [status_box, result_audio])
 
186
  return demo
187
 
188
  if __name__ == "__main__":
189
  config_p = "configs/hq_svc_infer.yaml"
190
  if os.path.exists(config_p):
191
  initialize_models(config_p)
192
+
 
 
193
  demo = build_ui()
194
  temp_dir = tempfile.gettempdir()
195
  demo.launch(
196
  share=True,
197
+ allowed_paths=[os.path.join(os.path.dirname(__file__), "images"), os.path.dirname(__file__), temp_dir]
198
  )