Delete llama2.c-stories110M-pruned50

Browse files

Files changed (10) hide show

llama2.c-stories110M-pruned50/.DS_Store +0 -0
llama2.c-stories110M-pruned50/.gitattributes +0 -35
llama2.c-stories110M-pruned50/README.md +0 -74
llama2.c-stories110M-pruned50/config.json +0 -33
llama2.c-stories110M-pruned50/model.safetensors +0 -3
llama2.c-stories110M-pruned50/recipe.yaml +0 -6
llama2.c-stories110M-pruned50/special_tokens_map.json +0 -24
llama2.c-stories110M-pruned50/tokenizer.json +0 -0
llama2.c-stories110M-pruned50/tokenizer.model +0 -3
llama2.c-stories110M-pruned50/tokenizer_config.json +0 -38

llama2.c-stories110M-pruned50/.DS_Store DELETED Viewed

Binary file (6.15 kB)

llama2.c-stories110M-pruned50/.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

llama2.c-stories110M-pruned50/README.md DELETED Viewed

@@ -1,74 +0,0 @@
----
-base_model: Xenova/llama2.c-stories110M
-inference: true
-model_type: llama
-quantized_by: mgoin
-tags:
-- nm-vllm
-- sparse
----
-## llama2.c-stories110M-pruned50
-This repo contains model files for [llama2.c 110M tinystories](https://huggingface.co/Xenova/llama2.c-stories110M) optimized for [NM-vLLM](https://github.com/neuralmagic/nm-vllm), a high-throughput serving engine for compressed LLMs.
-This model was pruned with [SparseGPT](https://arxiv.org/abs/2301.00774), using [SparseML](https://github.com/neuralmagic/sparseml).
-## Inference
-Install [NM-vLLM](https://github.com/neuralmagic/nm-vllm) for fast inference and low memory-usage:
-```bash
-pip install nm-vllm[sparse]
-```
-Run in a Python pipeline for local inference:
-```python
-from vllm import LLM, SamplingParams
-model = LLM("nm-testing/llama2.c-stories110M-pruned50", sparsity="sparse_w16a16")
-prompt = "Hello my name is"
-sampling_params = SamplingParams(max_tokens=100, temperature=0)
-outputs = model.generate(prompt, sampling_params=sampling_params)
-print(outputs[0].outputs[0].text)
-```
-## Prompt template
-N/A
-## Sparsification
-For details on how this model was sparsified, see the `recipe.yaml` in this repo and follow the instructions below.
-Install [SparseML](https://github.com/neuralmagic/sparseml):
-```bash
-git clone https://github.com/neuralmagic/sparseml
-pip install -e "sparseml[transformers]"
-```
-Replace the recipe as you like and run this one-shot compression script to apply SparseGPT:
-```python
-import sparseml.transformers
-original_model_name = "Xenova/llama2.c-stories110M"
-calibration_dataset = "open_platypus"
-output_directory = "output/"
-recipe = """
-test_stage:
-  obcq_modifiers:
-    SparseGPTModifier:
-      sparsity: 0.5
-      sequential_update: true
-      targets: ['re:model.layers.\d*$']
-"""
-# Apply SparseGPT to the model
-sparseml.transformers.oneshot(
-    model=original_model_name,
-    dataset=calibration_dataset,
-    recipe=recipe,
-    output_dir=output_directory,
-)
-```
-## Slack
-For further support, and discussions on these models and AI in general, join [Neural Magic's Slack Community](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-q1a1cnvo-YBoICSIw3L1dmQpjBeDurQ)

llama2.c-stories110M-pruned50/config.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "_name_or_path": "/home/damian/.cache/huggingface/hub/models--neuralmagic--llama2.c-stories110M-pruned50/snapshots/ae99c0a865dc99a60311e3fd3fc145eb339cff77",
-  "architectures": [
-    "LlamaForCausalLM"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "bos_token_id": 1,
-  "eos_token_id": 2,
-  "hidden_act": "silu",
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 2048,
-  "max_position_embeddings": 1024,
-  "model_type": "llama",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "num_key_value_heads": 12,
-  "pretraining_tp": 1,
-  "rms_norm_eps": 1e-05,
-  "rope_scaling": null,
-  "rope_theta": 10000.0,
-  "sparsity_config": {
-    "format": "sparse_bitmask",
-    "global_sparsity": 38.77294698534069,
-    "sparsity_structure": "0:0"
-  },
-  "tie_word_embeddings": true,
-  "torch_dtype": "float32",
-  "transformers_version": "4.39.3",
-  "use_cache": true,
-  "vocab_size": 32000
-}

llama2.c-stories110M-pruned50/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:eeefac9ec1d09c9d64dc87cc943a67c1ac2fbba888c7f7c94d12714e28e9cde3
-size 384641204

llama2.c-stories110M-pruned50/recipe.yaml DELETED Viewed

@@ -1,6 +0,0 @@
-test_stage:
-  obcq_modifiers:
-    SparseGPTModifier:
-      sparsity: 0.5
-      sequential_update: true
-      targets: ['re:model.layers.\d*$']

llama2.c-stories110M-pruned50/special_tokens_map.json DELETED Viewed

@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "</s>",
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}

llama2.c-stories110M-pruned50/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

llama2.c-stories110M-pruned50/tokenizer.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
-size 499723

llama2.c-stories110M-pruned50/tokenizer_config.json DELETED Viewed

@@ -1,38 +0,0 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "<unk>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": [],
-  "bos_token": "<s>",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "</s>",
-  "model_max_length": 2048,
-  "pad_token": "</s>",
-  "sp_model_kwargs": {},
-  "tokenizer_class": "LlamaTokenizer",
-  "unk_token": "<unk>",
-  "use_default_system_prompt": true
-}