ssaka
/

Llama-2-7b-chat-hf-sharded-bf16-5GB

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

ssaka commited on Aug 9, 2023

Commit

4bfda0a

•

1 Parent(s): b40c01e

Upload LlamaForCausalLM

Files changed (2) hide show

config.json +0 -1
generation_config.json +1 -1

config.json CHANGED Viewed

@@ -14,7 +14,6 @@
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
   "num_key_value_heads": 32,
-  "pad_token_id": 0,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,

   "num_attention_heads": 32,
   "num_hidden_layers": 32,
   "num_key_value_heads": 32,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,

generation_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "eos_token_id": 2,
   "max_length": 4096,
   "max_memory": {
-    "cpu": 35495591936
   },
   "no_split_module_classes": [
     "LlamaDecoderLayer"

   "eos_token_id": 2,
   "max_length": 4096,
   "max_memory": {
+    "cpu": 27833966592
   },
   "no_split_module_classes": [
     "LlamaDecoderLayer"