{ "architectures": [ "Usad2Model" ], "attention_dropout_p": 0.0, "attention_type": "mhsa", "auto_map": { "AutoConfig": "configuration_usad2.Usad2Config", "AutoModel": "modeling_usad2.Usad2Model" }, "conv_dropout_p": 0.0, "conv_expansion_factor": 2, "conv_kernel_size": 31, "conv_pos": true, "conv_pos_depth": 5, "conv_pos_groups": 16, "conv_pos_width": 95, "conv_subsample_channels": 64, "conv_subsample_rate": 4, "encoder_dim": 1280, "feed_forward_dropout_p": 0.0, "feed_forward_expansion_factor": 4, "half_step_residual": true, "input_dim": 128, "input_dropout_p": 0.0, "layerdrop_p": 0.0, "model_type": "usad2", "num_attention_heads": 20, "num_layers": 32, "patch_size_freq": 16, "patch_size_time": 16, "pre_norm": true, "rms_norm": false, "sample_rate": 16000, "subsample_normalization": true, "torch_dtype": "float32", "transformer_style": true, "transformers_version": "4.49.0", "usad_v2": true, "use_framewise_subsample": true, "use_patchwise_subsample": false }