Kalaphant
/

KalaBot

+{
+  "model_type": "bert",  // Specify the type of model (e.g., bert, gpt, etc.)
+  "vocabulary_size": 30522,  // Vocabulary size of the model
+  "hidden_size": 768,  // Size of the hidden layers
+  "num_attention_heads": 12,  // Number of attention heads in the model
+  "num_hidden_layers": 12,  // Number of hidden layers in the model
+  "intermediate_size": 3072,  // Size of the intermediate layers
+  "activation_function": "gelu",  // Activation function used in the model
+  "initializer_range": 0.02,  // Standard deviation of the truncated_normal_initializer
+  "layer_norm_eps": 1e-12,  // Epsilon value for layer normalization
+  "max_position_embeddings": 512,  // Maximum length of sequences
+  "tokenizer_type": "WordPiece",  // Type of tokenizer used
+  "special_tokens": {
+    "pad_token": "[PAD]",  // Padding token
+    "unk_token": "[UNK]",  // Unknown token
+    "cls_token": "[CLS]",  // Classification token
+    "sep_token": "[SEP]",  // Separator token
+    "mask_token": "[MASK]"  // Masking token
+  },
+  "dropout_rate": 0.1,  // Dropout rate for regularization
+  "learning_rate": 0.00005,  // Learning rate for training
+  "optimizer": "adamw",  // Optimizer used during training
+  "num_labels": 2,  // Number of labels for classification tasks
+  "train_batch_size": 16,  // Batch size for training
+  "eval_batch_size": 32,  // Batch size for evaluation
+  "epochs": 3,  // Number of epochs for training
+  "early_stopping_patience": 3  // Patience for early stopping
+}