| { |
| "saving_path": "/home/ubuntu/experiments/a2s_mls", |
| "resume_checkpoint": null, |
| "vocoder_type": "SPEECHTOKENIZER", |
| "vocoder_config_path": null, |
| "vocoder_ckpt_path": null, |
| "metapath": [ |
| "/var/data_mls/train.json" |
| ], |
| "val_metapath": [ |
| "/var/data_mls/test.json" |
| ], |
| "pretrained_path": null, |
| "speaker_embedding_dir": null, |
| "sampledir": "/home/ubuntu/experiments/a2s_mls", |
| "lr": 0.0005, |
| "batch_size": 100.0, |
| "train_bucket_size": 8192, |
| "training_step": 800000, |
| "optim_flat_percent": 0.0, |
| "warmup_step": 10000, |
| "adam_beta1": 0.9, |
| "adam_beta2": 0.98, |
| "ffd_size": 1024, |
| "hidden_size": 1024, |
| "enc_nlayers": 8, |
| "dec_nlayers": 6, |
| "nheads": 8, |
| "dropout": 0.1, |
| "depthwise_conv_kernel_size": 5, |
| "aligner_softmax_temp": 1.0, |
| "layer_norm_eps": 1e-05, |
| "use_sem_tokens": true, |
| "use_spkr_emb": false, |
| "use_text_emb": false, |
| "fairseq": false, |
| "only_inference": false, |
| "speaker_embed_dropout": 0.05, |
| "label_smoothing": 0.0, |
| "val_check_interval": 1, |
| "max_dataset_samples": -1, |
| "check_val_every_n_epoch": 1, |
| "precision": "bf16", |
| "nworkers": 12, |
| "distributed": true, |
| "accelerator": "gpu", |
| "version": null, |
| "accumulate_grad_batches": 1, |
| "sagemaker": false, |
| "use_repetition_token": false, |
| "use_repetition_gating": false, |
| "repetition_penalty": 1.0, |
| "sampling_temperature": 1.0, |
| "top_k": -1, |
| "min_top_k": 3, |
| "top_p": 0.8, |
| "sample_num": 4, |
| "length_penalty_max_length": 150, |
| "length_penalty_max_prob": 0.95, |
| "max_input_length": 2048, |
| "max_output_length": 2000, |
| "phone_context_window": 3, |
| "sample_rate": 16000, |
| "n_codes": 1024, |
| "n_cluster_groups": 7, |
| "first_n_lvls": 7, |
| "use_pretrained_ckpt_cfg": false, |
| "n_semantic_codes": 1024 |
| } |
|
|