File size: 3,230 Bytes
eca55dc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | task_name: train
tags:
- audioset
- best-rq-2
- cluster GPU
train: true
test: true
ckpt_path: null
seed: 21072023
data:
_target_: src.data.audioset_datamodule.AudioSetDataModule
data_dir: ${paths.data_dir}/AudioSet
batch_size: 256
num_workers: ${oc.decode:${oc.env:SLURM_CPUS_PER_TASK}}
pin_memory: true
train_h5: full_unbal_bal_train_wav.h5
train_csv: silent_files_full_unbal_bal_train_wav.csv
val_h5: eval_soxrhq.h5
val_csv: silent_files_eval_soxrhq.csv
max_audio_length_sec: 10.0
target_sample_rate: 16000
collate_mode: pad
model:
_target_: src.models.best_rq2_module.BestRQ2Module
optimizer:
_target_: torch.optim.AdamW
_partial_: true
lr: 0.0001
weight_decay: 0.05
warmup_pct: 0.05
spectrogram_adjustment_mode: truncate
criterion:
_target_: torch.nn.CrossEntropyLoss
_partial_: true
reduction: mean
codebook_dim: 16
vocab_size: 8192
net:
spectrogram:
sample_rate: ${data.target_sample_rate}
n_fft: 2048
win_length_ms: 128
hop_length_ms: 39.0625
n_mels: 128
f_min: 0
f_max: 8000
power: 2.0
patch_embed:
img_size:
- 128
- 256
patch_size:
- 16
- 16
in_chans: 1
embed_dim: 768
masking:
input_size:
- 128
- 256
patch_size:
- 16
- 16
mask_ratio:
- 0.4
- 0.6
encoder:
embed_dim: 768
depth: 12
num_heads: 12
mlp_ratio: 4.0
qkv_bias: true
drop_rate: 0.0
attn_drop_rate: 0.0
drop_path_rate: 0.1
num_patches: 128
pos_embed_type: sincos
predictor:
embed_dim: 768
depth: 4
num_heads: 12
mlp_ratio: 4.0
qkv_bias: true
drop_rate: 0.0
attn_drop_rate: 0.0
drop_path_rate: 0.0
num_patches: 128
pos_embed_type: sincos
callbacks:
model_checkpoint: null
model_summary:
_target_: lightning.pytorch.callbacks.RichModelSummary
max_depth: 1
rich_progress_bar: null
safetensors:
_target_: src.callbacks.safetensors_callback.SafetensorsCallback
device_stats:
_target_: lightning.pytorch.callbacks.DeviceStatsMonitor
visualization:
_target_: src.callbacks.visualization_callback.VisualizationCallback
num_samples: 4
wandb_offline_checkpoint:
_target_: src.callbacks.wandb_callbacks.WandbOfflineCheckpointCallback
logger:
wandb:
_target_: lightning.pytorch.loggers.wandb.WandbLogger
save_dir: ${paths.output_dir}
offline: true
id: null
anonymous: null
project: audio embeddings
log_model: false
prefix: ""
group: ""
tags: []
job_type: ""
name: best_rq2-audioset-200k-256x1bs
trainer:
_target_: lightning.pytorch.trainer.Trainer
default_root_dir: ${paths.output_dir}
accelerator: gpu
devices: 1
check_val_every_n_epoch: 1
deterministic: false
max_steps: 200000
strategy: auto
max_time: 00:19:50:00
paths:
root_dir: ${oc.env:PROJECT_ROOT}
data_dir: ${paths.root_dir}/data/
log_dir: ${paths.root_dir}/logs/
output_dir: ${hydra:runtime.output_dir}
work_dir: ${hydra:runtime.cwd}
extras:
ignore_warnings: false
enforce_tags: true
print_config: true
|