Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

RESULTS.md +27 -0
config.yaml +229 -0
run.sh +1 -0
run_enhance.sh +1 -0
valid.loss.ave_5best.pth +3 -0

RESULTS.md ADDED Viewed

	@@ -0,0 +1,27 @@

+<!-- Generated by ./scripts/utils/show_enh_score.sh -->
+# RESULTS
+## Environments
+- date: `Sun Sep 14 08:18:21 UTC 2025`
+- python version: `3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]`
+- espnet version: `espnet 202503`
+- pytorch version: `pytorch 2.6.0+cu124`
+- Git hash: `fc4802e363704a402473efb32c1397b46db1e521`
+  - Commit date: `Tue Sep 9 07:53:06 2025 +0000`
+## enh_train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6_raw
+config: conf/efficient_train/tisdiss/train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6.yaml
+|dataset|STOI|SAR|SDR|SIR|SI_SNR|
+|---|---|---|---|---|---|
+|en_2_re_1_tt_min_8k|98.38|22.23|21.83|33.56|21.51|
+|en_2_re_2_tt_min_8k|98.87|24.12|23.77|35.94|23.48|
+|en_2_re_3_tt_min_8k|99.06|24.99|24.67|37.05|24.39|
+|en_2_re_4_tt_min_8k|99.15|25.40|25.09|37.58|24.82|
+|en_2_re_5_tt_min_8k|99.18|25.60|25.30|37.85|25.03|
+|en_2_re_6_tt_min_8k|99.20|25.69|25.40|38.00|25.13|
+|en_2_re_7_tt_min_8k|99.21|25.74|25.45|38.09|25.19|
+|en_2_re_8_tt_min_8k|99.21|25.75|25.47|38.13|25.20|
+|en_2_re_9_tt_min_8k|99.21|25.75|25.47|38.15|25.20|
+|en_2_re_10_tt_min_8k|99.21|25.74|25.46|38.16|25.19|

config.yaml ADDED Viewed

	@@ -0,0 +1,229 @@

+config: conf/efficient_train/tisdiss/train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp/enh_train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6_raw
+ngpu: 1
+seed: 0
+num_workers: 16
+num_att_plot: 0
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+gradient_as_bucket_view: true
+ddp_comm_hook: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 150
+patience: 10
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+check_early_stopping_begin_epoch: 0
+best_model_criterion:
+-   - valid
+    - si_snr
+    - max
+-   - valid
+    - loss
+    - min
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: false
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 6
+valid_batch_size: 16
+batch_bins: 1000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp/enh_stats_8k/train/speech_mix_shape
+- exp/enh_stats_8k/train/speech_ref1_shape
+- exp/enh_stats_8k/train/speech_ref2_shape
+valid_shape_file:
+- exp/enh_stats_8k/valid/speech_mix_shape
+- exp/enh_stats_8k/valid/speech_ref1_shape
+- exp/enh_stats_8k/valid/speech_ref2_shape
+batch_type: folded
+valid_batch_type: null
+fold_length:
+- 80000
+- 80000
+- 80000
+sort_in_batch: descending
+shuffle_within_batch: true
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+-   - dump/raw/tr_min_8k/wav.scp
+    - speech_mix
+    - sound
+-   - dump/raw/tr_min_8k/spk1.scp
+    - speech_ref1
+    - sound
+-   - dump/raw/tr_min_8k/spk2.scp
+    - speech_ref2
+    - sound
+valid_data_path_and_name_and_type:
+-   - dump/raw/cv_min_8k/wav.scp
+    - speech_mix
+    - sound
+-   - dump/raw/cv_min_8k/spk1.scp
+    - speech_ref1
+    - sound
+-   - dump/raw/cv_min_8k/spk2.scp
+    - speech_ref2
+    - sound
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adamw
+optim_conf:
+    lr: 0.001
+    eps: 1.0e-08
+    weight_decay: 0.01
+scheduler: warmupreducelronplateau
+scheduler_conf:
+    warmup_steps: 2000
+    mode: min
+    factor: 0.5
+    patience: 3
+init: xavier_uniform
+model_conf:
+    normalize_variance: true
+criterions:
+-   name: si_snr
+    conf:
+        eps: 1.0e-07
+    wrapper: pit
+    wrapper_conf:
+        weight: 1.0
+        independent_perm: true
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+use_reverberant_ref: false
+num_spk: 2
+num_noise_type: 1
+sample_rate: 8000
+force_single_channel: false
+channel_reordering: false
+categories: []
+speech_segment: 32000
+avoid_allzero_segment: true
+flexible_numspk: false
+dynamic_mixing: false
+utt2spk: null
+dynamic_mixing_gain_db: 0.0
+encoder: stft
+encoder_conf:
+    n_fft: 128
+    hop_length: 64
+separator: tisdiss
+separator_conf:
+    num_spk: 2
+    emb_dim: 128
+    norm_type: rmsgroupnorm
+    num_groups: 4
+    tf_order: ft
+    n_heads: 4
+    flash_attention: false
+    ffn_type:
+    - swiglu_conv1d
+    - swiglu_conv1d
+    ffn_hidden_dim:
+    - 384
+    - 384
+    conv1d_kernel: 4
+    conv1d_shift: 1
+    dropout: 0.0
+    eps: 1.0e-05
+    encoder_repeat_times: 2
+    encoder_n_layers: 1
+    reconstructor_repeat_times: 6
+    reconstructor_n_layers: 1
+    repeat_residual_module: true
+    reconstructor_repeat_residual_module: false
+    encoder_decoder: false
+    encoder_multi_decoder: false
+    encoder_n_layers_multi_decoder: false
+    reconstructor_multi_decoder: true
+    reconstructor_n_layers_multi_decoder: true
+    spliter_loss: true
+decoder: stft
+decoder_conf:
+    n_fft: 128
+    hop_length: 64
+mask_module: multi_mask
+mask_module_conf: {}
+preprocessor: enh
+preprocessor_conf: {}
+diffusion_model: null
+diffusion_model_conf: {}
+required:
+- output_dir
+version: '202503'
+distributed: false

run.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ ./enh.sh --train_set tr_min_8k --valid_set cv_min_8k --test_sets tt_min_8k --fs 8k --lang en --ngpu 1 --local_data_opts '--sample_rate 8k --min_or_max min' --enh_config conf/tuning/train_enh_dprnn_tasnet.yaml --stage 6 --stop_stage 6 --enh_config conf/efficient_train/tisdiss/train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6.yaml --ngpu 1 --stage 6 "$@"; exit $?

run_enhance.sh ADDED Viewed

	@@ -0,0 +1 @@

+ ./enh.sh --train_set tr_min_8k --valid_set cv_min_8k --test_sets tt_min_8k --fs 8k --lang en --ngpu 1 --local_data_opts '--sample_rate 8k --min_or_max min' --enh_config conf/tuning/train_enh_dprnn_tasnet.yaml --stage 7 --stop_stage 8 --enh_config conf/efficient_train/tisdiss/train_enh_tisdiss_tflocoformer_en-residual_en1x2_re1x6_l1+1x6.yaml --ngpu 1 --gpu_inference true --inference_model valid.loss.ave_5best.pth --inference_enh_config conf/efficient_infer/en_re/en_2_re_6.yaml --inference_nj 8 --stage 7 "$@"; exit $?

valid.loss.ave_5best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8aa155488f726e33e6f208c9bd831f13a39f1d18de64b34168fdd6037293a34
+size 32083222