if001
/

sample_phi-2

Text Generation

text-generation-inference

Model card Files Files and versions

if001 commited on Mar 11, 2024

Commit

cd4d4ec

·

verified ·

1 Parent(s): c520681

update

Files changed (1) hide show

README.md +74 -0

README.md CHANGED Viewed

@@ -1,3 +1,77 @@
 ---
 license: mit
 ---

 ---
 license: mit
 ---
+PhiConfig {
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu_new",
+  "hidden_size": 8,
+  "initializer_range": 0.02,
+  "intermediate_size": 10,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "phi",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 6,
+  "num_key_value_heads": 2,
+  "partial_rotary_factor": 0.5,
+  "qk_layernorm": false,
+  "resid_pdrop": 0.0,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 20
+}
+PhiForCausalLM(
+  (model): PhiModel(
+    (embed_tokens): Embedding(20, 8)
+    (embed_dropout): Dropout(p=0.0, inplace=False)
+    (layers): ModuleList(
+      (0-5): 6 x PhiDecoderLayer(
+        (self_attn): PhiAttention(
+          (q_proj): Linear(in_features=8, out_features=8, bias=True)
+          (k_proj): Linear(in_features=8, out_features=4, bias=True)
+          (v_proj): Linear(in_features=8, out_features=4, bias=True)
+          (dense): Linear(in_features=8, out_features=8, bias=True)
+          (rotary_emb): PhiRotaryEmbedding()
+        )
+        (mlp): PhiMLP(
+          (activation_fn): NewGELUActivation()
+          (fc1): Linear(in_features=8, out_features=10, bias=True)
+          (fc2): Linear(in_features=10, out_features=8, bias=True)
+        )
+        (input_layernorm): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
+        (resid_dropout): Dropout(p=0.0, inplace=False)
+      )
+    )
+    (final_layernorm): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
+  )
+  (lm_head): Linear(in_features=8, out_features=20, bias=True)
+)
+===========================================================================
+Layer (type:depth-idx)                             Param #
+===========================================================================
+PhiForCausalLM                                     --
+├─PhiModel: 1-1                                    --
+│    └─Embedding: 2-1                              160
+│    └─Dropout: 2-2                                --
+│    └─ModuleList: 2-3                             --
+│    │    └─PhiDecoderLayer: 3-1                   410
+│    │    └─PhiDecoderLayer: 3-2                   410
+│    │    └─PhiDecoderLayer: 3-3                   410
+│    │    └─PhiDecoderLayer: 3-4                   410
+│    │    └─PhiDecoderLayer: 3-5                   410
+│    │    └─PhiDecoderLayer: 3-6                   410
+│    └─LayerNorm: 2-4                              16
+├─Linear: 1-2                                      180
+===========================================================================
+Total params: 2,816
+Trainable params: 2,816
+Non-trainable params: 0
+===========================================================================