mazesmazes
/

tiny-audio

@@ -10,16 +10,10 @@
     ],
     "audio_config": {
       "_name_or_path": "",
-      "add_cross_attention": false,
       "architectures": null,
       "attention_dropout": 0.0,
-      "bos_token_id": null,
       "chunk_size_feed_forward": 0,
-      "cross_attention_hidden_size": null,
-      "decoder_start_token_id": null,
       "dtype": null,
-      "eos_token_id": null,
-      "finetuning_task": null,
       "head_dim": 64,
       "hidden_act": "gelu",
       "hidden_size": 1280,
@@ -29,7 +23,6 @@
       },
       "initializer_range": 0.02,
       "intermediate_size": 5120,
-      "is_decoder": false,
       "is_encoder_decoder": false,
       "label2id": {
         "LABEL_0": 0,
@@ -43,20 +36,14 @@
       "num_mel_bins": 128,
       "output_attentions": false,
       "output_hidden_states": false,
-      "pad_token_id": null,
       "partial_rotary_factor": 0.5,
-      "prefix": null,
       "problem_type": null,
       "return_dict": true,
       "rope_parameters": {
         "partial_rotary_factor": 0.5,
         "rope_theta": 10000.0,
         "rope_type": "default"
-      },
-      "sep_token_id": null,
-      "task_specific_params": null,
-      "tie_word_embeddings": true,
-      "tokenizer_class": null
     },
     "audio_token_id": 59260,
     "dtype": "bfloat16",
@@ -66,21 +53,17 @@
     "projector_hidden_act": "gelu",
     "text_config": {
       "_name_or_path": "",
-      "add_cross_attention": false,
       "architectures": null,
       "attention_bias": false,
       "attention_dropout": 0.0,
       "bos_token_id": 1,
       "chunk_size_feed_forward": 0,
-      "cross_attention_hidden_size": null,
-      "decoder_start_token_id": null,
       "dtype": null,
       "eos_token_id": [
         59246,
         59253,
         59255
       ],
-      "finetuning_task": null,
       "head_dim": 128,
       "hidden_act": "silu",
       "hidden_size": 2048,
@@ -90,7 +73,6 @@
       },
       "initializer_range": 0.02,
       "intermediate_size": 6144,
-      "is_decoder": false,
       "is_encoder_decoder": false,
       "label2id": {
         "LABEL_0": 0,
@@ -105,7 +87,6 @@
       "output_attentions": false,
       "output_hidden_states": false,
       "pad_token_id": null,
-      "prefix": null,
       "pretraining_tp": 1,
       "problem_type": null,
       "return_dict": true,
@@ -114,10 +95,7 @@
         "rope_theta": 10000.0,
         "rope_type": "default"
       },
-      "sep_token_id": null,
-      "task_specific_params": null,
       "tie_word_embeddings": false,
-      "tokenizer_class": null,
       "use_cache": true,
       "vocab_size": 59264
     },
@@ -141,6 +119,7 @@
       "type": "audio"
     }
   },
   "downsample_rate": 5,
   "dtype": "bfloat16",
   "encoder": {
@@ -150,16 +129,10 @@
     ],
     "audio_config": {
       "_name_or_path": "",
-      "add_cross_attention": false,
       "architectures": null,
       "attention_dropout": 0.0,
-      "bos_token_id": null,
       "chunk_size_feed_forward": 0,
-      "cross_attention_hidden_size": null,
-      "decoder_start_token_id": null,
       "dtype": null,
-      "eos_token_id": null,
-      "finetuning_task": null,
       "head_dim": 64,
       "hidden_act": "gelu",
       "hidden_size": 1280,
@@ -169,7 +142,6 @@
       },
       "initializer_range": 0.02,
       "intermediate_size": 5120,
-      "is_decoder": false,
       "is_encoder_decoder": false,
       "label2id": {
         "LABEL_0": 0,
@@ -183,20 +155,14 @@
       "num_mel_bins": 128,
       "output_attentions": false,
       "output_hidden_states": false,
-      "pad_token_id": null,
       "partial_rotary_factor": 0.5,
-      "prefix": null,
       "problem_type": null,
       "return_dict": true,
       "rope_parameters": {
         "partial_rotary_factor": 0.5,
         "rope_theta": 10000.0,
         "rope_type": "default"
-      },
-      "sep_token_id": null,
-      "task_specific_params": null,
-      "tie_word_embeddings": true,
-      "tokenizer_class": null
     },
     "audio_token_id": 59260,
     "dtype": "bfloat16",
@@ -206,21 +172,17 @@
     "projector_hidden_act": "gelu",
     "text_config": {
       "_name_or_path": "",
-      "add_cross_attention": false,
       "architectures": null,
       "attention_bias": false,
       "attention_dropout": 0.0,
       "bos_token_id": 1,
       "chunk_size_feed_forward": 0,
-      "cross_attention_hidden_size": null,
-      "decoder_start_token_id": null,
       "dtype": null,
       "eos_token_id": [
         59246,
         59253,
         59255
       ],
-      "finetuning_task": null,
       "head_dim": 128,
       "hidden_act": "silu",
       "hidden_size": 2048,
@@ -230,7 +192,6 @@
       },
       "initializer_range": 0.02,
       "intermediate_size": 6144,
-      "is_decoder": false,
       "is_encoder_decoder": false,
       "label2id": {
         "LABEL_0": 0,
@@ -245,7 +206,6 @@
       "output_attentions": false,
       "output_hidden_states": false,
       "pad_token_id": null,
-      "prefix": null,
       "pretraining_tp": 1,
       "problem_type": null,
       "return_dict": true,
@@ -254,10 +214,7 @@
         "rope_theta": 10000.0,
         "rope_type": "default"
       },
-      "sep_token_id": null,
-      "task_specific_params": null,
       "tie_word_embeddings": false,
-      "tokenizer_class": null,
       "use_cache": true,
       "vocab_size": 59264
     },
@@ -280,7 +237,7 @@
   "freq_mask_length": 27,
   "inference_warmup_tokens": 10,
   "label_smoothing": 0.0,
-  "length_penalty": 0.6,
   "llm_dim": 2048,
   "lora_alpha": 32,
   "lora_dropout": 0.0,
@@ -295,19 +252,19 @@
     "down_proj"
   ],
   "max_new_tokens": 128,
-  "min_new_tokens": 1,
   "model_dtype": "bfloat16",
   "model_type": "asr_model",
-  "no_repeat_ngram_size": 4,
   "num_beams": 1,
   "num_experts": 4,
   "num_experts_per_tok": 2,
-  "num_freq_masks": 1,
-  "num_time_masks": 1,
   "pipeline_tag": "automatic-speech-recognition",
   "pretrained_model_path": "mazesmazes/tiny-audio",
   "projector_dropout": 0.0,
-  "projector_hidden_dim": null,
   "projector_init_std": 0.02,
   "projector_num_layers": 2,
   "projector_pool_stride": 4,
@@ -317,9 +274,10 @@
   "qformer_num_heads": 16,
   "qformer_num_layers": 2,
   "qformer_window_size": 15,
-  "repetition_penalty": 1.1,
   "router_aux_loss_coef": 0.01,
   "system_prompt": "",
   "text_config": {
     "_name_or_path": "Qwen/Qwen3-1.7B",
     "architectures": [
@@ -327,6 +285,7 @@
     ],
     "attention_bias": false,
     "attention_dropout": 0.0,
     "dtype": "bfloat16",
     "eos_token_id": 151645,
     "head_dim": 128,
@@ -384,6 +343,8 @@
   },
   "text_model_id": "Qwen/Qwen3-1.7B",
   "time_mask_length": 100,
   "transformers_version": "5.0.0.dev0",
   "use_cache": false,
   "use_lora": false,

     ],
     "audio_config": {
       "_name_or_path": "",
       "architectures": null,
       "attention_dropout": 0.0,
       "chunk_size_feed_forward": 0,
       "dtype": null,
       "head_dim": 64,
       "hidden_act": "gelu",
       "hidden_size": 1280,
       },
       "initializer_range": 0.02,
       "intermediate_size": 5120,
       "is_encoder_decoder": false,
       "label2id": {
         "LABEL_0": 0,
       "num_mel_bins": 128,
       "output_attentions": false,
       "output_hidden_states": false,
       "partial_rotary_factor": 0.5,
       "problem_type": null,
       "return_dict": true,
       "rope_parameters": {
         "partial_rotary_factor": 0.5,
         "rope_theta": 10000.0,
         "rope_type": "default"
+      }
     },
     "audio_token_id": 59260,
     "dtype": "bfloat16",
     "projector_hidden_act": "gelu",
     "text_config": {
       "_name_or_path": "",
       "architectures": null,
       "attention_bias": false,
       "attention_dropout": 0.0,
       "bos_token_id": 1,
       "chunk_size_feed_forward": 0,
       "dtype": null,
       "eos_token_id": [
         59246,
         59253,
         59255
       ],
       "head_dim": 128,
       "hidden_act": "silu",
       "hidden_size": 2048,
       },
       "initializer_range": 0.02,
       "intermediate_size": 6144,
       "is_encoder_decoder": false,
       "label2id": {
         "LABEL_0": 0,
       "output_attentions": false,
       "output_hidden_states": false,
       "pad_token_id": null,
       "pretraining_tp": 1,
       "problem_type": null,
       "return_dict": true,
         "rope_theta": 10000.0,
         "rope_type": "default"
       },
       "tie_word_embeddings": false,
       "use_cache": true,
       "vocab_size": 59264
     },
       "type": "audio"
     }
   },
+  "do_sample": false,
   "downsample_rate": 5,
   "dtype": "bfloat16",
   "encoder": {
     ],
     "audio_config": {
       "_name_or_path": "",
       "architectures": null,
       "attention_dropout": 0.0,
       "chunk_size_feed_forward": 0,
       "dtype": null,
       "head_dim": 64,
       "hidden_act": "gelu",
       "hidden_size": 1280,
       },
       "initializer_range": 0.02,
       "intermediate_size": 5120,
       "is_encoder_decoder": false,
       "label2id": {
         "LABEL_0": 0,
       "num_mel_bins": 128,
       "output_attentions": false,
       "output_hidden_states": false,
       "partial_rotary_factor": 0.5,
       "problem_type": null,
       "return_dict": true,
       "rope_parameters": {
         "partial_rotary_factor": 0.5,
         "rope_theta": 10000.0,
         "rope_type": "default"
+      }
     },
     "audio_token_id": 59260,
     "dtype": "bfloat16",
     "projector_hidden_act": "gelu",
     "text_config": {
       "_name_or_path": "",
       "architectures": null,
       "attention_bias": false,
       "attention_dropout": 0.0,
       "bos_token_id": 1,
       "chunk_size_feed_forward": 0,
       "dtype": null,
       "eos_token_id": [
         59246,
         59253,
         59255
       ],
       "head_dim": 128,
       "hidden_act": "silu",
       "hidden_size": 2048,
       },
       "initializer_range": 0.02,
       "intermediate_size": 6144,
       "is_encoder_decoder": false,
       "label2id": {
         "LABEL_0": 0,
       "output_attentions": false,
       "output_hidden_states": false,
       "pad_token_id": null,
       "pretraining_tp": 1,
       "problem_type": null,
       "return_dict": true,
         "rope_theta": 10000.0,
         "rope_type": "default"
       },
       "tie_word_embeddings": false,
       "use_cache": true,
       "vocab_size": 59264
     },
   "freq_mask_length": 27,
   "inference_warmup_tokens": 10,
   "label_smoothing": 0.0,
+  "length_penalty": 1.0,
   "llm_dim": 2048,
   "lora_alpha": 32,
   "lora_dropout": 0.0,
     "down_proj"
   ],
   "max_new_tokens": 128,
+  "min_new_tokens": 0,
   "model_dtype": "bfloat16",
   "model_type": "asr_model",
+  "no_repeat_ngram_size": 0,
   "num_beams": 1,
   "num_experts": 4,
   "num_experts_per_tok": 2,
+  "num_freq_masks": 2,
+  "num_time_masks": 2,
   "pipeline_tag": "automatic-speech-recognition",
   "pretrained_model_path": "mazesmazes/tiny-audio",
   "projector_dropout": 0.0,
+  "projector_hidden_dim": 1024,
   "projector_init_std": 0.02,
   "projector_num_layers": 2,
   "projector_pool_stride": 4,
   "qformer_num_heads": 16,
   "qformer_num_layers": 2,
   "qformer_window_size": 15,
+  "repetition_penalty": 1.0,
   "router_aux_loss_coef": 0.01,
   "system_prompt": "",
+  "temperature": null,
   "text_config": {
     "_name_or_path": "Qwen/Qwen3-1.7B",
     "architectures": [
     ],
     "attention_bias": false,
     "attention_dropout": 0.0,
+    "bos_token_id": null,
     "dtype": "bfloat16",
     "eos_token_id": 151645,
     "head_dim": 128,
   },
   "text_model_id": "Qwen/Qwen3-1.7B",
   "time_mask_length": 100,
+  "top_k": null,
+  "top_p": null,
   "transformers_version": "5.0.0.dev0",
   "use_cache": false,
   "use_lora": false,

generation_config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
   "bos_token_id": 151643,
   "eos_token_id": [
     151645,
     151643
@@ -11,5 +12,6 @@
   "num_beams": 1,
   "pad_token_id": 151643,
   "repetition_penalty": 1.0,
-  "transformers_version": "5.0.0.dev0"
 }

 {
   "bos_token_id": 151643,
+  "do_sample": false,
   "eos_token_id": [
     151645,
     151643
   "num_beams": 1,
   "pad_token_id": 151643,
   "repetition_penalty": 1.0,
+  "transformers_version": "5.0.0.dev0",
+  "use_cache": true
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf1b1f99fd75659d1bf8524d117e87980d5b5e1b9bc20e06ec65573d4889a86c
-size 58732960

 version https://git-lfs.github.com/spec/v1
+oid sha256:59b57ea7c026d9a2fe4e3fbe75b731e69e7a1dd8e7d06ef06885f0d314338e0e
+size 14682440

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a56725c6b1d382330f25006e7e6a7061af0a69a1f8ef8c84fed5d2e358219ff8
 size 5265

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c29a7e9ee7106242caad94beb8ad7475df82442f5568b663dd73a29798f9e43
 size 5265