royleibov
/

granite-3.0-8b-instruct-ZipNN-Compressed

@@ -12,8 +12,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: instruction-following
-        name: IFEval
     metrics:
     - name: pass@1
       type: pass@1
@@ -22,8 +22,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: instruction-following
-        name: MT-Bench
     metrics:
     - name: pass@1
       type: pass@1
@@ -32,8 +32,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: human-exams
-        name: AGI-Eval
     metrics:
     - name: pass@1
       type: pass@1
@@ -42,8 +42,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: human-exams
-        name: MMLU
     metrics:
     - name: pass@1
       type: pass@1
@@ -52,8 +52,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: human-exams
-        name: MMLU-Pro
     metrics:
     - name: pass@1
       type: pass@1
@@ -62,18 +62,18 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: commonsense
-        name: OBQA
     metrics:
     - name: pass@1
       type: pass@1
-      value: 46.60
       veriefied: false
   - task:
       type: text-generation
     dataset:
-        type: commonsense
-        name: SIQA
     metrics:
     - name: pass@1
       type: pass@1
@@ -82,8 +82,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: commonsense
-        name: Hellaswag
     metrics:
     - name: pass@1
       type: pass@1
@@ -92,8 +92,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: commonsense
-        name: WinoGrande
     metrics:
     - name: pass@1
       type: pass@1
@@ -102,8 +102,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: commonsense
-        name: TruthfulQA
     metrics:
     - name: pass@1
       type: pass@1
@@ -112,8 +112,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: reading-comprehension
-        name: BoolQ
     metrics:
     - name: pass@1
       type: pass@1
@@ -122,8 +122,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: reading-comprehension
-        name: SQuAD 2.0
     metrics:
     - name: pass@1
       type: pass@1
@@ -132,8 +132,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: reasoning
-        name: ARC-C
     metrics:
     - name: pass@1
       type: pass@1
@@ -142,8 +142,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: reasoning
-        name: GPQA
     metrics:
     - name: pass@1
       type: pass@1
@@ -152,8 +152,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: reasoning
-        name: BBH
     metrics:
     - name: pass@1
       type: pass@1
@@ -162,8 +162,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: code
-        name: HumanEvalSynthesis
     metrics:
     - name: pass@1
       type: pass@1
@@ -172,8 +172,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: code
-        name: HumanEvalExplain
     metrics:
     - name: pass@1
       type: pass@1
@@ -182,8 +182,8 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: code
-        name: HumanEvalFix
     metrics:
     - name: pass@1
       type: pass@1
@@ -192,55 +192,110 @@ model-index:
   - task:
       type: text-generation
     dataset:
-        type: code
-        name: MBPP
     metrics:
     - name: pass@1
       type: pass@1
-      value: 49.60
-      veriefied: false
   - task:
       type: text-generation
     dataset:
-        type: math
-        name: GSM8K
     metrics:
     - name: pass@1
       type: pass@1
       value: 68.99
-      veriefied: false
   - task:
       type: text-generation
     dataset:
-        type: math
-        name: MATH
     metrics:
     - name: pass@1
       type: pass@1
       value: 30.94
-      veriefied: false
   - task:
       type: text-generation
     dataset:
-        type: multilingual
-        name: PAWS-X (7 langs)
     metrics:
     - name: pass@1
       type: pass@1
       value: 64.94
-      veriefied: false
   - task:
       type: text-generation
     dataset:
-        type: multilingual
-        name: MGSM (6 langs)
     metrics:
     - name: pass@1
       type: pass@1
-      value: 48.20
-      veriefied: false
 ---
 <!-- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png) -->
 <!-- ![image/png](granite-3_0-language-models_Group_1.png) -->
@@ -281,15 +336,19 @@ Install the following libraries:
 pip install torch torchvision torchaudio
 pip install accelerate
 pip install transformers
 ```
 Then, copy the snippet from the section that is relevant for your use case.
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "auto"
-model_path = "ibm-granite/granite-3.0-8b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 # drop device_map if running on CPU
 model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)

   - task:
       type: text-generation
     dataset:
+      type: instruction-following
+      name: IFEval
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: instruction-following
+      name: MT-Bench
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: human-exams
+      name: AGI-Eval
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: human-exams
+      name: MMLU
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: human-exams
+      name: MMLU-Pro
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: commonsense
+      name: OBQA
     metrics:
     - name: pass@1
       type: pass@1
+      value: 46.6
       veriefied: false
   - task:
       type: text-generation
     dataset:
+      type: commonsense
+      name: SIQA
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: commonsense
+      name: Hellaswag
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: commonsense
+      name: WinoGrande
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: commonsense
+      name: TruthfulQA
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: reading-comprehension
+      name: BoolQ
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: reading-comprehension
+      name: SQuAD 2.0
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: reasoning
+      name: ARC-C
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: reasoning
+      name: GPQA
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: reasoning
+      name: BBH
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: code
+      name: HumanEvalSynthesis
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: code
+      name: HumanEvalExplain
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: code
+      name: HumanEvalFix
     metrics:
     - name: pass@1
       type: pass@1
   - task:
       type: text-generation
     dataset:
+      type: code
+      name: MBPP
     metrics:
     - name: pass@1
       type: pass@1
+      value: 49.6
+      veriefied: false
   - task:
       type: text-generation
     dataset:
+      type: math
+      name: GSM8K
     metrics:
     - name: pass@1
       type: pass@1
       value: 68.99
+      veriefied: false
   - task:
       type: text-generation
     dataset:
+      type: math
+      name: MATH
     metrics:
     - name: pass@1
       type: pass@1
       value: 30.94
+      veriefied: false
   - task:
       type: text-generation
     dataset:
+      type: multilingual
+      name: PAWS-X (7 langs)
     metrics:
     - name: pass@1
       type: pass@1
       value: 64.94
+      veriefied: false
   - task:
       type: text-generation
     dataset:
+      type: multilingual
+      name: MGSM (6 langs)
     metrics:
     - name: pass@1
       type: pass@1
+      value: 48.2
+      veriefied: false
+base_model:
+- ibm-granite/granite-3.0-8b-instruct
 ---
+# Disclaimer and Requirements
+This model is a clone of [**ibm-granite/granite-3.0-8b-instruct**](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct) compressed using ZipNN. Compressed losslessly to 67% its original size, ZipNN saved ~6GB in storage and potentially ~9TB in data transfer **monthly**.
+### Requirement
+In order to use the model, ZipNN is necessary:
+```bash
+pip install zipnn
+```
+### Use This Model
+```python
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+from zipnn import zipnn_hf
+zipnn_hf()
+messages = [
+    {"role": "user", "content": "Who are you?"},
+]
+pipe = pipeline("text-generation", model="royleibov/granite-3.0-8b-instruct-ZipNN-Compressed")
+pipe(messages)
+```
+```python
+# Load model directly
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from zipnn import zipnn_hf
+zipnn_hf()
+model = AutoModelForCausalLM.from_pretrained(
+    "royleibov/granite-3.0-8b-instruct-ZipNN-Compressed",
+    device_map="auto",
+)
+tokenizer = AutoTokenizer.from_pretrained("royleibov/granite-3.0-8b-instruct-ZipNN-Compressed")
+```
+### ZipNN
+ZipNN also allows you to seemlessly save local disk space in your cache after the model is downloaded.
+To compress the cached model, simply run:
+```bash
+python zipnn_compress_path.py safetensors --model royleibov/granite-3.0-8b-instruct-ZipNN-Compressed --hf_cache
+```
+The model will be decompressed automatically and safely as long as `zipnn_hf()` is added at the top of the file like in the [example above](#use-this-model).
+To decompress manualy, simply run:
+```bash
+python zipnn_decompress_path.py --model royleibov/granite-3.0-8b-instruct-ZipNN-Compressed --hf_cache
+```
 <!-- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png) -->
 <!-- ![image/png](granite-3_0-language-models_Group_1.png) -->
 pip install torch torchvision torchaudio
 pip install accelerate
 pip install transformers
+pip install zipnn
 ```
 Then, copy the snippet from the section that is relevant for your use case.
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from zipnn import zipnn_hf
+zipnn_hf()
 device = "auto"
+model_path = "royleibov/granite-3.0-8b-instruct-ZipNN-Compressed"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 # drop device_map if running on CPU
 model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)