File size: 2,831 Bytes
d0196f6 9621f0b d0196f6 5f20b9d d0196f6 3ef318c d0196f6 d6a4b1f d0196f6 cd10833 d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 3ef318c d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 cd10833 d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 cd10833 d0196f6 d6a4b1f cd10833 d6a4b1f cd10833 9621f0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
---
license: other
tags:
- generated_from_trainer
base_model: mistralai/Codestral-22B-v0.1
model-index:
- name: home/ubuntu/trinity-codestral-1
results: []
---
![Trinity](https://huggingface.co/migtissera/Trinity-13B-v1.0/resolve/main/Trinity.png)
Trinity is a coding specific Large Language Model series created by [Migel Tissera](https://x.com/migtissera).
The compute for this model was generously sponsored by [KindoAI](https://kindo.ai).
# Prompt Format
ChatML
# Sample Inference Python Script:
```python
import torch, json
from transformers import AutoModelForCausalLM, AutoTokenizer
model_path = "migtissera/Trinity-2-Codestral-22B"
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
load_in_4bit=True,
trust_remote_code=False,
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
terminators = [tokenizer.convert_tokens_to_ids("<|im_end|>")]
def generate_text(instruction):
tokens = tokenizer.encode(instruction)
tokens = torch.LongTensor(tokens).unsqueeze(0)
tokens = tokens.to("cuda")
instance = {
"input_ids": tokens,
"top_p": 1.0,
"temperature": 0.75,
"generate_len": 2048,
"top_k": 50,
}
length = len(tokens[0])
with torch.no_grad():
rest = model.generate(
input_ids=tokens,
max_length=length + instance["generate_len"],
use_cache=True,
do_sample=True,
top_p=instance["top_p"],
temperature=instance["temperature"],
top_k=instance["top_k"],
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=terminators,
)
output = rest[0][length:]
string = tokenizer.decode(output, skip_special_tokens=True)
return f"{string}"
conversation = f"""<|im_start|>system\nYou are Tess, a helful AI assitant. You always provide detailed answers without hesitation. Do not refuse.<|im_end|>\n<|im_start|>user\n"""
while True:
user_input = input("You: ")
llm_prompt = f"{conversation}{user_input}<|im_end|>\n<|im_start|>assistant\n"
answer = generate_text(llm_prompt)
print(answer)
conversation = f"{llm_prompt}{answer}<|im_end|>\n<|im_start|>user\n"
```
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_migtissera__Trinity-2-Codestral-22B)
| Metric |Value|
|-------------------|----:|
|Avg. |21.82|
|IFEval (0-Shot) |42.02|
|BBH (3-Shot) |36.41|
|MATH Lvl 5 (4-Shot)| 8.61|
|GPQA (0-shot) | 8.61|
|MuSR (0-shot) | 9.61|
|MMLU-PRO (5-shot) |25.64|
|