Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V2.0 dev add ddp #77

Open
wants to merge 24 commits into
base: v2.0_dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion configs/config_naivev2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,16 @@ model:
conv_dropout: 0.0
atten_dropout: 0.1
use_weight_norm: false
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
14 changes: 13 additions & 1 deletion configs/config_naivev2diff.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,20 @@ model:
conv_model_type: 'mode1'
conv_dropout: 0.0
atten_dropout: 0.1
conv_model_activation: 'SiLU'
GLU_type: 'GLU'
channel_norm: false
mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
14 changes: 13 additions & 1 deletion configs/config_naivev2diff_comb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ model:
conv_model_type: 'mode1' # dont change
conv_dropout: 0.0
atten_dropout: 0.1
conv_model_activation: 'SiLU'
GLU_type: 'GLU'
channel_norm: false
mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
naive_fn:
type: 'LYNXNet' # LYNXNet is thr other name of ConformerNaiveEncoder(NaiveNet)
Expand All @@ -60,7 +63,16 @@ model:
use_weight_norm: false
naive_fn_grad_not_by_diffusion: false # dont change if dont understand; more info:diffusion/unit2mel.py
naive_out_mel_cond_diff: false # mel condition diffusion is a test function, maybe can make the model learn faster but less quality and pitch range.
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
14 changes: 13 additions & 1 deletion configs/config_naivev2diff_shallow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,20 @@ model:
conv_model_type: 'mode1'
conv_dropout: 0.0
atten_dropout: 0.1
conv_model_activation: 'SiLU'
GLU_type: 'GLU'
channel_norm: false
mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
14 changes: 13 additions & 1 deletion configs/config_naivev2diff_vae.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,20 @@ model:
conv_model_type: 'mode1'
conv_dropout: 0.0
atten_dropout: 0.1
conv_model_activation: 'SiLU'
GLU_type: 'GLU'
channel_norm: false
mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'hifivaegan'
ckpt: 'pretrain/hifivaegan/G_224800.pth'
Expand Down
17 changes: 16 additions & 1 deletion configs/config_naivev2reflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,24 @@ model:
atten_dropout: 0.1
conv_model_activation: 'SiLU'
GLU_type: 'GLU'
channel_norm: false
mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
loss_type: 'l2' # 'l1', 'l2' or 'l2_lognorm'
device: cuda
consistency: false
consistency_only: true
consistency_delta_t: 0.1
consistency_lambda_f: 1.0
consistency_lambda_v: 0.01
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,17 @@ data:
extensions: # List of extension included in the data collection
- wav
model:
torch_compile_args:
use_copile: true
fullgraph: false
dynamic: 'none' # 'none',false or true
backend: 'inductor' # 'cudagraphs', 'inductor', 'onnxrt', 'openxla', 'openxla_eval', 'tvm'
mode: 'reduce-overhead' # 'default','reduce-overhead','max-autotune' or 'max-autotune-no-cudagraphs'
use_options: false # if use options, should be true
options:
k: 'v'
k_step_max: 100
type: 'DiffusionV2'
t_start: 0.0 # do not change
type: 'ReFlow1Step'
n_hidden: 256
use_pitch_aug: true
n_spk: 2 # max number of different speakers
z_rate: 0 # dont change
mean_only: true
max_beta: 0.02
spec_min: -12
spec_max: 2
denoise_fn:
type: 'NaiveV2Diff'
velocity_fn:
type: 'LYNXNetDiff'
cn_layers: 6
cn_chans: 512
use_mlp: false # is use MLP in cond_emb and output_proj
Expand All @@ -48,36 +38,32 @@ model:
conv_only: true # use Transformer block with conv block, if false
wavenet_like: false # dont change if dont understand; more info:diffusion/naive_v2/naive_v2_diff.py
use_norm: false # pre-norm for every layers
conv_model_type: 'mode1' # dont change
conv_model_type: 'mode1'
conv_dropout: 0.0
atten_dropout: 0.1
conv_model_activation: 'SiLU'
GLU_type: 'GLU'
channel_norm: false
mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
naive_fn:
type: 'LYNXNet' # LYNXNet is thr other name of ConformerNaiveEncoder(NaiveNet)
n_layers: 3
n_chans: 256
simple_stack: false # use simple stack for unit emb
out_put_norm: true # norm and weight_norm in last layer
expansion_factor: 2
kernel_size: 31
conv_model_type: 'mode1' # dont change
num_heads: 8
use_norm: false # pre-norm for every layers
conv_only: true # use Transformer block with conv block, if false
conv_dropout: 0.0
atten_dropout: 0.1
use_weight_norm: false
naive_fn_grad_not_by_diffusion: false # dont change if dont understand; more info:diffusion/unit2mel.py
naive_out_mel_cond_diff: false # mel condition diffusion is a test function, maybe can make the model learn faster but less quality and pitch range.
device: cuda
loss_type: 'l2' # 'l1', 'l2' or 'l2_lognorm'
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
infer:
speedup: 1
method: 'pndm' # 'ddim', 'pndm', 'dpm-solver' or 'unipc'
infer_step: 10
method: 'euler' # 'euler', 'rk4'
env:
expdir: exp/naivev2diffcombocompile
expdir: exp/naivev2reflow
gpu_id: 0
train:
ema_decay: 0.999 # <1
Expand All @@ -96,4 +82,4 @@ train:
decay_step: 100000
gamma: 0.5
weight_decay: 0
save_opt: false
save_opt: false
19 changes: 18 additions & 1 deletion configs/config_naivev2reflow_combo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ model:
conv_model_type: 'mode1'
conv_dropout: 0.0
atten_dropout: 0.1
conv_model_activation: 'SiLU'
GLU_type: 'GLU'
channel_norm: false
mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
loss_type: 'l2' # 'l1', 'l2' or 'l2_lognorm'
naive_fn:
Expand All @@ -60,7 +63,21 @@ model:
use_weight_norm: false
naive_fn_grad_not_by_reflow: false # dont change if dont understand; more info:diffusion/unit2mel.py
naive_out_mel_cond_reflow: false # mel condition diffusion is a test function, maybe can make the model learn faster but less quality and pitch range.
device: cuda
consistency: false
consistency_only: true
consistency_delta_t: 0.1
consistency_lambda_f: 1.0
consistency_lambda_v: 0.01
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
19 changes: 18 additions & 1 deletion configs/config_naivev2reflow_shallow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,26 @@ model:
conv_model_type: 'mode1'
conv_dropout: 0.0
atten_dropout: 0.1
conv_model_activation: 'SiLU'
GLU_type: 'GLU'
channel_norm: false
mask_cond_ratio: 'NOTUSE' # input 'NOTUSE' if not use
loss_type: 'l2' # 'l1', 'l2' or 'l2_lognorm'
device: cuda
consistency: false
consistency_only: true
consistency_delta_t: 0.1
consistency_lambda_f: 1.0
consistency_lambda_v: 0.01
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
11 changes: 10 additions & 1 deletion configs/config_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,16 @@ model:
wn_tf_rf: false # only wn_tf_use is true and here is true will use RoFormer
wn_tf_n_layers: 2
wn_tf_n_head: 4
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
11 changes: 10 additions & 1 deletion configs/config_v2_comb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,16 @@ model:
use_weight_norm: false
naive_fn_grad_not_by_diffusion: false # dont change if dont understand; more info:diffusion/unit2mel.py
naive_out_mel_cond_diff: false # mel condition diffusion is a test function, maybe can make the model learn faster but less quality and pitch range.
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
16 changes: 15 additions & 1 deletion configs/config_v2_reflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,21 @@ model:
wn_tf_n_layers: 2
wn_tf_n_head: 4
loss_type: 'l2_lognorm' # 'l1', 'l2' or 'l2_lognorm'
device: cuda
consistency: false
consistency_only: true
consistency_delta_t: 0.1
consistency_lambda_f: 1.0
consistency_lambda_v: 0.01
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
11 changes: 10 additions & 1 deletion configs/config_v2_shallow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,16 @@ model:
wn_tf_rf: false # only wn_tf_use is true and here is true will use RoFormer
wn_tf_n_layers: 2
wn_tf_n_head: 4
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
Expand Down
11 changes: 10 additions & 1 deletion configs/config_v2_vae.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,16 @@ model:
wn_tf_rf: false # only wn_tf_use is true and here is true will use RoFormer
wn_tf_n_layers: 2
wn_tf_n_head: 4
device: cuda
device: 'cuda'
ddp:
use_ddp: false # if true, ddp_device will cover device and gpu id
port: '13348'
ddp_cache_gpu: false
ddp_device:
- 'cuda:1'
- 'cuda:2'
- 'cuda:3'
- 'cuda:4'
vocoder:
type: 'hifivaegan'
ckpt: 'pretrain/hifivaegan/G_224800.pth'
Expand Down
Loading