CNChTu · CNChTu · Oct 6, 2024 · Oct 6, 2024 · Oct 6, 2024 · Oct 6, 2024
diff --git a/configs/config_naivev2.yaml b/configs/config_naivev2.yaml
@@ -36,7 +36,16 @@ model:
     conv_dropout: 0.0
     atten_dropout: 0.1
     use_weight_norm: false
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_naivev2diff.yaml b/configs/config_naivev2diff.yaml
@@ -41,8 +41,20 @@ model:
     conv_model_type: 'mode1'
     conv_dropout: 0.0
     atten_dropout: 0.1
+    conv_model_activation: 'SiLU'
+    GLU_type: 'GLU'
+    channel_norm: false
   mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_naivev2diff_comb.yaml b/configs/config_naivev2diff_comb.yaml
@@ -42,6 +42,9 @@ model:
     conv_model_type: 'mode1' # dont change
     conv_dropout: 0.0
     atten_dropout: 0.1
+    conv_model_activation: 'SiLU'
+    GLU_type: 'GLU'
+    channel_norm: false
   mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
   naive_fn:
     type: 'LYNXNet' # LYNXNet is thr other name of ConformerNaiveEncoder(NaiveNet)
@@ -60,7 +63,16 @@ model:
     use_weight_norm: false
   naive_fn_grad_not_by_diffusion: false # dont change if dont understand; more info:diffusion/unit2mel.py
   naive_out_mel_cond_diff: false # mel condition diffusion is a test function, maybe can make the model learn faster but less quality and pitch range.
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_naivev2diff_shallow.yaml b/configs/config_naivev2diff_shallow.yaml
@@ -42,8 +42,20 @@ model:
     conv_model_type: 'mode1'
     conv_dropout: 0.0
     atten_dropout: 0.1
+    conv_model_activation: 'SiLU'
+    GLU_type: 'GLU'
+    channel_norm: false
   mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_naivev2diff_vae.yaml b/configs/config_naivev2diff_vae.yaml
@@ -41,8 +41,20 @@ model:
     conv_model_type: 'mode1'
     conv_dropout: 0.0
     atten_dropout: 0.1
+    conv_model_activation: 'SiLU'
+    GLU_type: 'GLU'
+    channel_norm: false
   mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'hifivaegan'
   ckpt: 'pretrain/hifivaegan/G_224800.pth'

diff --git a/configs/config_naivev2reflow.yaml b/configs/config_naivev2reflow.yaml
@@ -43,9 +43,24 @@ model:
     atten_dropout: 0.1
     conv_model_activation: 'SiLU'
     GLU_type: 'GLU'
+    channel_norm: false
   mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
   loss_type: 'l2' # 'l1', 'l2' or 'l2_lognorm'
-device: cuda
+  consistency: false
+  consistency_only: true
+  consistency_delta_t: 0.1
+  consistency_lambda_f: 1.0
+  consistency_lambda_v: 0.01 
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_naivev2diff_comb_compile.yaml → configs/config_naivev2reflow_1step_test.yaml b/configs/config_naivev2diff_comb_compile.yaml → configs/config_naivev2reflow_1step_test.yaml
@@ -18,27 +18,17 @@ data:
   extensions: # List of extension included in the data collection
     - wav
 model:
-  torch_compile_args:
-    use_copile: true
-    fullgraph: false
-    dynamic: 'none' # 'none',false or true
-    backend: 'inductor' # 'cudagraphs', 'inductor', 'onnxrt', 'openxla', 'openxla_eval', 'tvm'
-    mode: 'reduce-overhead' # 'default','reduce-overhead','max-autotune' or 'max-autotune-no-cudagraphs'
-    use_options: false # if use options, should be true
-    options:
-      k: 'v'
-  k_step_max: 100
-  type: 'DiffusionV2'
+  t_start: 0.0 # do not change
+  type: 'ReFlow1Step'
   n_hidden: 256
   use_pitch_aug: true  
   n_spk: 2 # max number of different speakers
   z_rate: 0 # dont change
   mean_only: true
-  max_beta: 0.02
   spec_min: -12
   spec_max: 2
-  denoise_fn:
-    type: 'NaiveV2Diff'
+  velocity_fn:
+    type: 'LYNXNetDiff'
     cn_layers: 6
     cn_chans: 512
     use_mlp: false # is use MLP in cond_emb and output_proj
@@ -48,36 +38,32 @@ model:
     conv_only: true # use Transformer block with conv block, if false
     wavenet_like: false # dont change if dont understand; more info:diffusion/naive_v2/naive_v2_diff.py
     use_norm: false # pre-norm for every layers
-    conv_model_type: 'mode1' # dont change
+    conv_model_type: 'mode1'
     conv_dropout: 0.0
     atten_dropout: 0.1
+    conv_model_activation: 'SiLU'
+    GLU_type: 'GLU'
+    channel_norm: false
   mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
-  naive_fn:
-    type: 'LYNXNet' # LYNXNet is thr other name of ConformerNaiveEncoder(NaiveNet)
-    n_layers: 3
-    n_chans: 256
-    simple_stack: false # use simple stack for unit emb
-    out_put_norm: true # norm and weight_norm in last layer
-    expansion_factor: 2
-    kernel_size: 31
-    conv_model_type: 'mode1' # dont change
-    num_heads: 8
-    use_norm: false # pre-norm for every layers
-    conv_only: true # use Transformer block with conv block, if false
-    conv_dropout: 0.0
-    atten_dropout: 0.1
-    use_weight_norm: false
-  naive_fn_grad_not_by_diffusion: false # dont change if dont understand; more info:diffusion/unit2mel.py
-  naive_out_mel_cond_diff: false # mel condition diffusion is a test function, maybe can make the model learn faster but less quality and pitch range.
-device: cuda
+  loss_type: 'l2' # 'l1', 'l2' or 'l2_lognorm'
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'
 infer:
-  speedup: 1
-  method: 'pndm' # 'ddim', 'pndm', 'dpm-solver' or 'unipc'
+  infer_step: 10
+  method: 'euler' # 'euler', 'rk4'
 env:
-  expdir: exp/naivev2diffcombocompile
+  expdir: exp/naivev2reflow
   gpu_id: 0
 train:
   ema_decay: 0.999 # <1
@@ -96,4 +82,4 @@ train:
   decay_step: 100000
   gamma: 0.5
   weight_decay: 0
-  save_opt: false
+  save_opt: false
diff --git a/configs/config_naivev2reflow_combo.yaml b/configs/config_naivev2reflow_combo.yaml
@@ -41,6 +41,9 @@ model:
     conv_model_type: 'mode1'
     conv_dropout: 0.0
     atten_dropout: 0.1
+    conv_model_activation: 'SiLU'
+    GLU_type: 'GLU'
+    channel_norm: false
   mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
   loss_type: 'l2' # 'l1', 'l2' or 'l2_lognorm'
   naive_fn:
@@ -60,7 +63,21 @@ model:
     use_weight_norm: false
   naive_fn_grad_not_by_reflow: false # dont change if dont understand; more info:diffusion/unit2mel.py
   naive_out_mel_cond_reflow: false # mel condition diffusion is a test function, maybe can make the model learn faster but less quality and pitch range.
-device: cuda
+  consistency: false
+  consistency_only: true
+  consistency_delta_t: 0.1
+  consistency_lambda_f: 1.0
+  consistency_lambda_v: 0.01 
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_naivev2reflow_shallow.yaml b/configs/config_naivev2reflow_shallow.yaml
@@ -41,9 +41,26 @@ model:
     conv_model_type: 'mode1'
     conv_dropout: 0.0
     atten_dropout: 0.1
+    conv_model_activation: 'SiLU'
+    GLU_type: 'GLU'
+    channel_norm: false
   mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
   loss_type: 'l2' # 'l1', 'l2' or 'l2_lognorm'
-device: cuda
+  consistency: false
+  consistency_only: true
+  consistency_delta_t: 0.1
+  consistency_lambda_f: 1.0
+  consistency_lambda_v: 0.01  
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_v2.yaml b/configs/config_v2.yaml
@@ -37,7 +37,16 @@ model:
     wn_tf_rf: false # only wn_tf_use is true and here is true will use RoFormer
     wn_tf_n_layers: 2
     wn_tf_n_head: 4
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_v2_comb.yaml b/configs/config_v2_comb.yaml
@@ -56,7 +56,16 @@ model:
     use_weight_norm: false
   naive_fn_grad_not_by_diffusion: false # dont change if dont understand; more info:diffusion/unit2mel.py
   naive_out_mel_cond_diff: false # mel condition diffusion is a test function, maybe can make the model learn faster but less quality and pitch range.
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_v2_reflow.yaml b/configs/config_v2_reflow.yaml
@@ -39,7 +39,21 @@ model:
     wn_tf_n_layers: 2
     wn_tf_n_head: 4
   loss_type: 'l2_lognorm' # 'l1', 'l2' or 'l2_lognorm'
-device: cuda
+  consistency: false
+  consistency_only: true
+  consistency_delta_t: 0.1
+  consistency_lambda_f: 1.0
+  consistency_lambda_v: 0.01 
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_v2_shallow.yaml b/configs/config_v2_shallow.yaml
@@ -38,7 +38,16 @@ model:
     wn_tf_rf: false # only wn_tf_use is true and here is true will use RoFormer
     wn_tf_n_layers: 2
     wn_tf_n_head: 4
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'nsf-hifigan'
   ckpt: 'pretrain/nsf_hifigan/model'

diff --git a/configs/config_v2_vae.yaml b/configs/config_v2_vae.yaml
@@ -37,7 +37,16 @@ model:
     wn_tf_rf: false # only wn_tf_use is true and here is true will use RoFormer
     wn_tf_n_layers: 2
     wn_tf_n_head: 4
-device: cuda
+device: 'cuda'
+ddp:
+  use_ddp: false #   if true, ddp_device will cover device and gpu id
+  port: '13348'
+  ddp_cache_gpu: false
+  ddp_device:
+    - 'cuda:1'
+    - 'cuda:2'
+    - 'cuda:3'
+    - 'cuda:4'
 vocoder:
   type: 'hifivaegan'
   ckpt: 'pretrain/hifivaegan/G_224800.pth'