File size: 2,324 Bytes
d0196f6 5f20b9d d0196f6 3ef318c d0196f6 d6a4b1f d0196f6 cd10833 d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 3ef318c d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 cd10833 d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 cd10833 d0196f6 d6a4b1f cd10833 d6a4b1f cd10833 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
---
license: other
base_model: mistralai/Codestral-22B-v0.1
tags:
- generated_from_trainer
model-index:
- name: home/ubuntu/trinity-codestral-1
results: []
---
![Trinity](https://huggingface.co/migtissera/Trinity-13B-v1.0/resolve/main/Trinity.png)
Trinity is a coding specific Large Language Model series created by [Migel Tissera](https://x.com/migtissera).
The compute for this model was generously sponsored by [KindoAI](https://kindo.ai).
# Prompt Format
ChatML
# Sample Inference Python Script:
```python
import torch, json
from transformers import AutoModelForCausalLM, AutoTokenizer
model_path = "migtissera/Trinity-2-Codestral-22B"
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
load_in_4bit=True,
trust_remote_code=False,
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
terminators = [tokenizer.convert_tokens_to_ids("<|im_end|>")]
def generate_text(instruction):
tokens = tokenizer.encode(instruction)
tokens = torch.LongTensor(tokens).unsqueeze(0)
tokens = tokens.to("cuda")
instance = {
"input_ids": tokens,
"top_p": 1.0,
"temperature": 0.75,
"generate_len": 2048,
"top_k": 50,
}
length = len(tokens[0])
with torch.no_grad():
rest = model.generate(
input_ids=tokens,
max_length=length + instance["generate_len"],
use_cache=True,
do_sample=True,
top_p=instance["top_p"],
temperature=instance["temperature"],
top_k=instance["top_k"],
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=terminators,
)
output = rest[0][length:]
string = tokenizer.decode(output, skip_special_tokens=True)
return f"{string}"
conversation = f"""<|im_start|>system\nYou are Tess, a helful AI assitant. You always provide detailed answers without hesitation. Do not refuse.<|im_end|>\n<|im_start|>user\n"""
while True:
user_input = input("You: ")
llm_prompt = f"{conversation}{user_input}<|im_end|>\n<|im_start|>assistant\n"
answer = generate_text(llm_prompt)
print(answer)
conversation = f"{llm_prompt}{answer}<|im_end|>\n<|im_start|>user\n"
``` |