WingsingFung commited on
Commit
1a5f8a2
·
verified ·
1 Parent(s): 0cbd0de

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. RESULTS.md +27 -0
  2. config.yaml +229 -0
  3. run.sh +1 -0
  4. run_enhance.sh +1 -0
  5. valid.loss.ave_5best.pth +3 -0
RESULTS.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Sun Sep 14 08:18:21 UTC 2025`
5
+ - python version: `3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202503`
7
+ - pytorch version: `pytorch 2.6.0+cu124`
8
+ - Git hash: `fc4802e363704a402473efb32c1397b46db1e521`
9
+ - Commit date: `Tue Sep 9 07:53:06 2025 +0000`
10
+
11
+
12
+ ## enh_train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6_raw
13
+
14
+ config: conf/efficient_train/tisdiss/train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6.yaml
15
+
16
+ |dataset|STOI|SAR|SDR|SIR|SI_SNR|
17
+ |---|---|---|---|---|---|
18
+ |en_2_re_1_tt_min_8k|98.38|22.23|21.83|33.56|21.51|
19
+ |en_2_re_2_tt_min_8k|98.87|24.12|23.77|35.94|23.48|
20
+ |en_2_re_3_tt_min_8k|99.06|24.99|24.67|37.05|24.39|
21
+ |en_2_re_4_tt_min_8k|99.15|25.40|25.09|37.58|24.82|
22
+ |en_2_re_5_tt_min_8k|99.18|25.60|25.30|37.85|25.03|
23
+ |en_2_re_6_tt_min_8k|99.20|25.69|25.40|38.00|25.13|
24
+ |en_2_re_7_tt_min_8k|99.21|25.74|25.45|38.09|25.19|
25
+ |en_2_re_8_tt_min_8k|99.21|25.75|25.47|38.13|25.20|
26
+ |en_2_re_9_tt_min_8k|99.21|25.75|25.47|38.15|25.20|
27
+ |en_2_re_10_tt_min_8k|99.21|25.74|25.46|38.16|25.19|
config.yaml ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/efficient_train/tisdiss/train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/enh_train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6_raw
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 16
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 150
35
+ patience: 10
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ check_early_stopping_begin_epoch: 0
44
+ best_model_criterion:
45
+ - - valid
46
+ - si_snr
47
+ - max
48
+ - - valid
49
+ - loss
50
+ - min
51
+ keep_nbest_models: 5
52
+ nbest_averaging_interval: 0
53
+ grad_clip: 5.0
54
+ grad_clip_type: 2.0
55
+ grad_noise: false
56
+ accum_grad: 1
57
+ no_forward_run: false
58
+ resume: true
59
+ train_dtype: float32
60
+ use_amp: false
61
+ log_interval: null
62
+ use_matplotlib: true
63
+ use_tensorboard: true
64
+ create_graph_in_tensorboard: false
65
+ use_wandb: false
66
+ wandb_project: null
67
+ wandb_id: null
68
+ wandb_entity: null
69
+ wandb_name: null
70
+ wandb_model_log_interval: -1
71
+ detect_anomaly: false
72
+ use_adapter: false
73
+ adapter: lora
74
+ save_strategy: all
75
+ adapter_conf: {}
76
+ pretrain_path: null
77
+ init_param: []
78
+ ignore_init_mismatch: false
79
+ freeze_param: []
80
+ num_iters_per_epoch: null
81
+ batch_size: 6
82
+ valid_batch_size: 16
83
+ batch_bins: 1000000
84
+ valid_batch_bins: null
85
+ category_sample_size: 10
86
+ train_shape_file:
87
+ - exp/enh_stats_8k/train/speech_mix_shape
88
+ - exp/enh_stats_8k/train/speech_ref1_shape
89
+ - exp/enh_stats_8k/train/speech_ref2_shape
90
+ valid_shape_file:
91
+ - exp/enh_stats_8k/valid/speech_mix_shape
92
+ - exp/enh_stats_8k/valid/speech_ref1_shape
93
+ - exp/enh_stats_8k/valid/speech_ref2_shape
94
+ batch_type: folded
95
+ valid_batch_type: null
96
+ fold_length:
97
+ - 80000
98
+ - 80000
99
+ - 80000
100
+ sort_in_batch: descending
101
+ shuffle_within_batch: true
102
+ sort_batch: descending
103
+ multiple_iterator: false
104
+ chunk_length: 500
105
+ chunk_shift_ratio: 0.5
106
+ num_cache_chunks: 1024
107
+ chunk_excluded_key_prefixes: []
108
+ chunk_default_fs: null
109
+ chunk_max_abs_length: null
110
+ chunk_discard_short_samples: true
111
+ train_data_path_and_name_and_type:
112
+ - - dump/raw/tr_min_8k/wav.scp
113
+ - speech_mix
114
+ - sound
115
+ - - dump/raw/tr_min_8k/spk1.scp
116
+ - speech_ref1
117
+ - sound
118
+ - - dump/raw/tr_min_8k/spk2.scp
119
+ - speech_ref2
120
+ - sound
121
+ valid_data_path_and_name_and_type:
122
+ - - dump/raw/cv_min_8k/wav.scp
123
+ - speech_mix
124
+ - sound
125
+ - - dump/raw/cv_min_8k/spk1.scp
126
+ - speech_ref1
127
+ - sound
128
+ - - dump/raw/cv_min_8k/spk2.scp
129
+ - speech_ref2
130
+ - sound
131
+ multi_task_dataset: false
132
+ allow_variable_data_keys: false
133
+ max_cache_size: 0.0
134
+ max_cache_fd: 32
135
+ allow_multi_rates: false
136
+ valid_max_cache_size: null
137
+ exclude_weight_decay: false
138
+ exclude_weight_decay_conf: {}
139
+ optim: adamw
140
+ optim_conf:
141
+ lr: 0.001
142
+ eps: 1.0e-08
143
+ weight_decay: 0.01
144
+ scheduler: warmupreducelronplateau
145
+ scheduler_conf:
146
+ warmup_steps: 2000
147
+ mode: min
148
+ factor: 0.5
149
+ patience: 3
150
+ init: xavier_uniform
151
+ model_conf:
152
+ normalize_variance: true
153
+ criterions:
154
+ - name: si_snr
155
+ conf:
156
+ eps: 1.0e-07
157
+ wrapper: pit
158
+ wrapper_conf:
159
+ weight: 1.0
160
+ independent_perm: true
161
+ speech_volume_normalize: null
162
+ rir_scp: null
163
+ rir_apply_prob: 1.0
164
+ noise_scp: null
165
+ noise_apply_prob: 1.0
166
+ noise_db_range: '13_15'
167
+ short_noise_thres: 0.5
168
+ use_reverberant_ref: false
169
+ num_spk: 2
170
+ num_noise_type: 1
171
+ sample_rate: 8000
172
+ force_single_channel: false
173
+ channel_reordering: false
174
+ categories: []
175
+ speech_segment: 32000
176
+ avoid_allzero_segment: true
177
+ flexible_numspk: false
178
+ dynamic_mixing: false
179
+ utt2spk: null
180
+ dynamic_mixing_gain_db: 0.0
181
+ encoder: stft
182
+ encoder_conf:
183
+ n_fft: 128
184
+ hop_length: 64
185
+ separator: tisdiss
186
+ separator_conf:
187
+ num_spk: 2
188
+ emb_dim: 128
189
+ norm_type: rmsgroupnorm
190
+ num_groups: 4
191
+ tf_order: ft
192
+ n_heads: 4
193
+ flash_attention: false
194
+ ffn_type:
195
+ - swiglu_conv1d
196
+ - swiglu_conv1d
197
+ ffn_hidden_dim:
198
+ - 384
199
+ - 384
200
+ conv1d_kernel: 4
201
+ conv1d_shift: 1
202
+ dropout: 0.0
203
+ eps: 1.0e-05
204
+ encoder_repeat_times: 2
205
+ encoder_n_layers: 1
206
+ reconstructor_repeat_times: 6
207
+ reconstructor_n_layers: 1
208
+ repeat_residual_module: true
209
+ reconstructor_repeat_residual_module: false
210
+ encoder_decoder: false
211
+ encoder_multi_decoder: false
212
+ encoder_n_layers_multi_decoder: false
213
+ reconstructor_multi_decoder: true
214
+ reconstructor_n_layers_multi_decoder: true
215
+ spliter_loss: true
216
+ decoder: stft
217
+ decoder_conf:
218
+ n_fft: 128
219
+ hop_length: 64
220
+ mask_module: multi_mask
221
+ mask_module_conf: {}
222
+ preprocessor: enh
223
+ preprocessor_conf: {}
224
+ diffusion_model: null
225
+ diffusion_model_conf: {}
226
+ required:
227
+ - output_dir
228
+ version: '202503'
229
+ distributed: false
run.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ ./enh.sh --train_set tr_min_8k --valid_set cv_min_8k --test_sets tt_min_8k --fs 8k --lang en --ngpu 1 --local_data_opts '--sample_rate 8k --min_or_max min' --enh_config conf/tuning/train_enh_dprnn_tasnet.yaml --stage 6 --stop_stage 6 --enh_config conf/efficient_train/tisdiss/train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6.yaml --ngpu 1 --stage 6 "$@"; exit $?
run_enhance.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ ./enh.sh --train_set tr_min_8k --valid_set cv_min_8k --test_sets tt_min_8k --fs 8k --lang en --ngpu 1 --local_data_opts '--sample_rate 8k --min_or_max min' --enh_config conf/tuning/train_enh_dprnn_tasnet.yaml --stage 7 --stop_stage 8 --enh_config conf/efficient_train/tisdiss/train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6.yaml --ngpu 1 --gpu_inference true --inference_model valid.loss.ave_5best.pth --inference_enh_config conf/efficient_infer/en_re/en_2_re_6.yaml --inference_nj 8 --stage 7 "$@"; exit $?
valid.loss.ave_5best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8aa155488f726e33e6f208c9bd831f13a39f1d18de64b34168fdd6037293a34
3
+ size 32083222