JuIm
/

ProGemma

Text Generation

Generated from Trainer

text-generation-inference

Model card Files Files and versions

Metrics Training metrics Community

JuIm commited on Jul 31, 2024

Commit

b74bb71

·

verified ·

1 Parent(s): 4dec132

Update config.json

Files changed (1) hide show

config.json +3 -3

config.json CHANGED Viewed

@@ -6,9 +6,9 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "attn_logit_softcapping": 50.0,
-  "bos_token_id": 1,
   "cache_implementation": "hybrid",
-  "eos_token_id": 2,
   "final_logit_softcapping": 30.0,
   "head_dim": 256,
   "hidden_activation": "gelu_pytorch_tanh",
@@ -20,7 +20,7 @@
   "num_attention_heads": 16,
   "num_hidden_layers": 14,
   "num_key_value_heads": 16,
-  "pad_token_id": 3,
   "query_pre_attn_scalar": 224,
   "rms_norm_eps": 1e-06,
   "rope_theta": 10000.0,

   "attention_bias": false,
   "attention_dropout": 0.0,
   "attn_logit_softcapping": 50.0,
+  "bos_token_id": 20,
   "cache_implementation": "hybrid",
+  "eos_token_id": 21,
   "final_logit_softcapping": 30.0,
   "head_dim": 256,
   "hidden_activation": "gelu_pytorch_tanh",
   "num_attention_heads": 16,
   "num_hidden_layers": 14,
   "num_key_value_heads": 16,
+  "pad_token_id": 22,
   "query_pre_attn_scalar": 224,
   "rms_norm_eps": 1e-06,
   "rope_theta": 10000.0,