# Parameters for AudioDataModule: # ============================================================================== AudioDataModule.num_workers = 20 # Parameters for AudioDataset: # ============================================================================== AudioDataset.half_precision = True AudioDataset.mono = True AudioDataset.new_freq = 16000 AudioDataset.num_frames = 480000 AudioDataset.orig_freq = 16000 # Parameters for build_dev_datamodule: # ============================================================================== build_dev_datamodule.datamodule = @discotube # Parameters for build_module: # ============================================================================== build_module.ckpt_path = 'model.ckpt' build_module.module = @modules.maskingmodel.MaskingModel build_module.net = @nets.conformer.Conformer build_module.representation = @nets.melspectrogram.MelSpectrogram # Parameters for Conformer: # ============================================================================== Conformer.alpha_deepnorm = 2.6321480259049848 Conformer.beta_deepnorm = 0.022386873579657126 Conformer.conv_kernel_size = 5 Conformer.depth = 24 Conformer.dropout = 0.2 Conformer.embed_dim = 1024 Conformer.input_dropout = 0.0 Conformer.mlp_ratio = 4.0 Conformer.mlp_residual_factor = 4.0 Conformer.num_heads = 8 Conformer.num_patches = 460 Conformer.use_deepnorm = True Conformer.use_rope = True # Parameters for CosineAnnealingCallback: # ============================================================================== CosineAnnealingCallback.eta_min = 1e-07 CosineAnnealingCallback.warmup_steps = 30000 # Parameters for DiscotubeAudioDataModule: # ============================================================================== DiscotubeAudioDataModule.batch_size = 32 DiscotubeAudioDataModule.data_dir = '' DiscotubeAudioDataModule.filelist_train = '' DiscotubeAudioDataModule.filelist_val = '' # Parameters for MaskingModel: # ============================================================================== MaskingModel.codebook_dim = 16 MaskingModel.codebook_size = 8192 MaskingModel.diff_input = False MaskingModel.lr = 0.0001 MaskingModel.mask_prob = 0.6 MaskingModel.mask_seconds = 0.4 MaskingModel.num_codebooks = 4 MaskingModel.plot_tokens = False MaskingModel.seed = 0 MaskingModel.weight_decay = 0.01 # Parameters for MelSpectrogram: # ============================================================================== MelSpectrogram.freq_mask_param = 0 MelSpectrogram.hop_len = 256 MelSpectrogram.mel_scale = 'slaney' MelSpectrogram.n_mel = 96 MelSpectrogram.norm = 'slaney' MelSpectrogram.norm_mean = 2.06755686098554 MelSpectrogram.norm_std = 1.268292820667291 MelSpectrogram.power = 2 MelSpectrogram.sr = 16000 MelSpectrogram.stretch_factor = 1 MelSpectrogram.time_mask_param = 0 MelSpectrogram.win_len = 512 MelSpectrogram.patch_size = (96, 4) # Parameters for train: # ============================================================================== train.params = \ {'accelerator': 'gpu', 'devices': 4, 'log_every_n_steps': 50, 'max_steps': 400000, 'num_nodes': 1, 'num_sanity_val_steps': 0, 'precision': 'bf16-mixed', 'strategy': 'ddp_find_unused_parameters_true'} train.wandb_params = \ {'entity': 'mtg-upf', 'group': 'masking_conformer', 'name': 'mask_conformer_rope_multi4_large', 'offline': True, 'project': 'mtg-ssl', 'save_dir': '/gpfs/projects/upf97/logs/'}