| datamodule: | |
| _target_: look2hear.datas.datamodule.DataModule | |
| batch_size: 1 | |
| num_workers: 8 | |
| pin_memory: true | |
| DataClass: | |
| _target_: look2hear.datas.datasets.waveform.WaveformDataClass | |
| train_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tr | |
| val_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/cv | |
| test_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tt | |
| n_src: 1 | |
| task: enhancement | |
| sample_rate: 16000 | |
| segment: 4 | |
| is_drop: false | |
| normalize_audio: false | |
| augmentation: false | |
| audiomodel: | |
| _target_: look2hear.models.ctcnet.CTCNet | |
| encoder_type: ConvolutionalEncoder | |
| decoder_type: ConvolutionalDecoder | |
| audio_channels: 1 | |
| audio_encoder_channels: 512 | |
| audio_encoder_kernels: 21 | |
| audio_encoder_strides: 10 | |
| audio_in_channels: 512 | |
| audio_out_channels: 512 | |
| audio_kernel_size: 5 | |
| audio_depth: 4 | |
| audio_block_type: ConvNormAct | |
| audio_norm_type: gLN | |
| audio_act_type: PReLU | |
| audio_shared: true | |
| visual_encoder_channels: 512 | |
| visual_in_channels: 64 | |
| visual_out_channels: 64 | |
| visual_kernel_size: 3 | |
| visual_depth: 4 | |
| visual_block_type: ConvNormAct | |
| visual_norm_type: BatchNorm1d | |
| visual_act_type: PReLU | |
| visual_shared: false | |
| fusion_type: ConcatFusion | |
| fusion_shared: false | |
| n_repeats: 3 | |
| m_repeats: 13 | |
| mask_types: MaskGenerator | |
| num_speakers: 1 | |
| mask_kernel_size: 1 | |
| mask_act: ReLU | |
| mask_RI_split: false | |
| mask_output_gate: false | |
| mask_dw_gate: false | |
| mask_direct: false | |
| mask_is2d: false | |
| videomodel: | |
| _target_: look2hear.video_models.resnetmodel.ResNetVideoModel | |
| activation_type: PReLU | |
| pretrained: /home/likai/ssd/Look2hear/pretrain_zoo/frcnn_128_512.backbone.pth.tar | |
| audio_optimizer: | |
| _target_: torch.optim.AdamW | |
| lr: 0.001 | |
| weight_decay: 0.1 | |
| audio_scheduler: | |
| _target_: torch.optim.lr_scheduler.ReduceLROnPlateau | |
| mode: min | |
| factor: 0.5 | |
| patience: 10 | |
| audio_loss: | |
| _target_: look2hear.losses.pitwrapper.PITLossWrapper | |
| loss_func: look2hear.losses.snr.neg_sisdr | |
| pit: true | |
| mode: permutation-wise | |
| eval_func: min | |
| system: | |
| _target_: look2hear.systems.single_speaker.SingleSpeaker | |
| freeze_video_model: true | |
| compile: false | |
| exp: | |
| dir: /home/likai/ssd/Look2hear/examples/CTCNet | |
| name: CTCNet-1 | |
| checkpoint: | |
| _target_: lightning.pytorch.callbacks.ModelCheckpoint | |
| dirpath: ${exp.dir}/${exp.name}/checkpoints | |
| monitor: val/neg_sisdr | |
| mode: min | |
| verbose: true | |
| save_top_k: 1 | |
| save_last: true | |
| filename: '{epoch}-{val/neg_sisdr:.4f}' | |
| logger: | |
| _target_: lightning.pytorch.loggers.WandbLogger | |
| name: ${exp.name} | |
| save_dir: ${exp.dir}/${exp.name}/logs | |
| offline: true | |
| project: Look2hear | |
| trainer: | |
| _target_: lightning.pytorch.Trainer | |
| devices: | |
| - 0 | |
| max_epochs: 5 | |
| sync_batchnorm: true | |
| gradient_clip_val: 5.0 | |
| default_root_dir: ${exp.dir}/${exp.name}/ | |
| accelerator: cuda | |
| limit_train_batches: 0.001 | |
| limit_val_batches: 0.001 | |
| fast_dev_run: false | |
| precision: bf16-mixed | |