File size: 2,589 Bytes
d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f d0196f6 d6a4b1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
---
license: other
base_model: mistralai/Codestral-22B-v0.1
tags:
- generated_from_trainer
model-index:
- name: home/ubuntu/trinity-codestral-1
results: []
---
![Trinity](https://huggingface.co/migtissera/Trinity-13B-v1.0/resolve/main/Trinity.png)
Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Language Model series created by [Migel Tissera](https://x.com/migtissera).
The compute for this model was generously sponsored by [KindoAI](https://kindo.ai).
# Sample Inference Python Script:
```python
import torch, json
from transformers import AutoModelForCausalLM, AutoTokenizer
model_path = "migtissera/Tess-3-Llama-3.1-405B"
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
load_in_4bit=True,
trust_remote_code=False,
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
terminators = [
tokenizer.convert_tokens_to_ids("<|end_of_text|>"),
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]
def generate_text(instruction):
tokens = tokenizer.encode(instruction)
tokens = torch.LongTensor(tokens).unsqueeze(0)
tokens = tokens.to("cuda")
instance = {
"input_ids": tokens,
"top_p": 1.0,
"temperature": 0.75,
"generate_len": 2048,
"top_k": 50,
}
length = len(tokens[0])
with torch.no_grad():
rest = model.generate(
input_ids=tokens,
max_length=length + instance["generate_len"],
use_cache=True,
do_sample=True,
top_p=instance["top_p"],
temperature=instance["temperature"],
top_k=instance["top_k"],
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=terminators,
)
output = rest[0][length:]
string = tokenizer.decode(output, skip_special_tokens=True)
return f"{string}"
conversation = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are Tess, a helful AI assitant. You always provide detailed answers without hesitation. Do not refuse.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"""
while True:
user_input = input("You: ")
llm_prompt = f"{conversation}{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
answer = generate_text(llm_prompt)
print(answer)
conversation = (
f"{llm_prompt}{answer}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
)
json_data = {"prompt": user_input, "answer": answer}
```
|