Update README.md
Browse files
README.md
CHANGED
@@ -8,4 +8,97 @@ widget:
|
|
8 |
pipeline_tag: text-generation
|
9 |
---
|
10 |
|
11 |
-
BangorAI/ALMA-Cymraeg-13B-0.1-4.0bpw-exl2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
pipeline_tag: text-generation
|
9 |
---
|
10 |
|
11 |
+
BangorAI/ALMA-Cymraeg-13B-0.1-4.0bpw-exl2
|
12 |
+
|
13 |
+
#### Esiampl
|
14 |
+
|
15 |
+
```python
|
16 |
+
import time
|
17 |
+
import sys, os
|
18 |
+
import dataclasses
|
19 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
20 |
+
|
21 |
+
from exllamav2 import(
|
22 |
+
ExLlamaV2,
|
23 |
+
ExLlamaV2Config,
|
24 |
+
ExLlamaV2Cache,
|
25 |
+
ExLlamaV2Tokenizer,
|
26 |
+
ExLlamaV2Lora,
|
27 |
+
)
|
28 |
+
|
29 |
+
from exllamav2.generator import (
|
30 |
+
ExLlamaV2StreamingGenerator,
|
31 |
+
ExLlamaV2Sampler
|
32 |
+
)
|
33 |
+
|
34 |
+
|
35 |
+
class ModelClass:
|
36 |
+
def __init__(self, generator, tokenizer, model):
|
37 |
+
self.generator = generator
|
38 |
+
self.tokenizer = tokenizer
|
39 |
+
self.model = model
|
40 |
+
|
41 |
+
DEBUG = os.environ.get("DEBUG") and True or False
|
42 |
+
|
43 |
+
# Cychwyn model a storfa
|
44 |
+
def load_model(model_directory, max_seq_len=8192):
|
45 |
+
"""
|
46 |
+
Yn llwytho model o gyfeiriadur ac yn dychwelyd y generadur a'r tocynnwr
|
47 |
+
"""
|
48 |
+
config = ExLlamaV2Config()
|
49 |
+
config.model_dir = model_directory
|
50 |
+
config.max_seq_len = max_seq_len
|
51 |
+
config.prepare()
|
52 |
+
|
53 |
+
model = ExLlamaV2(config)
|
54 |
+
print("Llwytho model: " + model_directory)
|
55 |
+
|
56 |
+
cache = ExLlamaV2Cache(model, lazy = True, max_seq_len=max_seq_len)
|
57 |
+
model.load_autosplit(cache)
|
58 |
+
|
59 |
+
tokenizer = ExLlamaV2Tokenizer(config)
|
60 |
+
generator = ExLlamaV2StreamingGenerator(model, cache, tokenizer)
|
61 |
+
model = ModelClass(generator=generator, tokenizer=tokenizer, model=model)
|
62 |
+
generator.warmup()
|
63 |
+
return model
|
64 |
+
|
65 |
+
def generate_text(prompt, settings, max_new_tokens):
|
66 |
+
sys.stdout.flush()
|
67 |
+
input_ids = base_model.tokenizer.encode(prompt)
|
68 |
+
generated_tokens = 0 # input_ids.shape[-1]
|
69 |
+
base_model.generator.set_stop_conditions(["\n"])
|
70 |
+
base_model.generator.begin_stream(input_ids, settings)
|
71 |
+
time_begin = time.time()
|
72 |
+
|
73 |
+
while True:
|
74 |
+
chunk, eos, _ = base_model.generator.stream()
|
75 |
+
generated_tokens += 1
|
76 |
+
print (chunk, end = "")
|
77 |
+
sys.stdout.flush()
|
78 |
+
if eos or generated_tokens == max_new_tokens: break
|
79 |
+
|
80 |
+
time_end = time.time()
|
81 |
+
time_total = time_end - time_begin
|
82 |
+
print(f"\nYmateb cyflawn mewn {time_total:.2f} eiliad, {generated_tokens} tocyn, {generated_tokens / time_total:.2f} tocyn/eiliad")
|
83 |
+
return ""
|
84 |
+
|
85 |
+
base_model = load_model("./ALMA-Cymraeg-13B-0.1-4.0bpw-exl2")
|
86 |
+
|
87 |
+
settings = ExLlamaV2Sampler.Settings()
|
88 |
+
settings.temperature = 0.15 # newid fel bod angen e.e. 0.75
|
89 |
+
settings.top_k = 90 # newid fel bod angen e.e. 50
|
90 |
+
settings.top_p = 1.0 # ayyb
|
91 |
+
settings.token_repetition_penalty = 1.15 # ayyb
|
92 |
+
max_new_tokens = 2000 # ayyb
|
93 |
+
|
94 |
+
system_prompt = "Cyfieithwch y testun Saesneg canlynol i'r Gymraeg."
|
95 |
+
|
96 |
+
while True:
|
97 |
+
user_input = input("Saesneg: ")
|
98 |
+
|
99 |
+
prompt = f"{system_prompt}\n\n### Saesneg:\n{user_input}\n\n### Cymraeg:\n"
|
100 |
+
if DEBUG: print(f"{prompt}\n\n")
|
101 |
+
print("Cymraeg:")
|
102 |
+
response = generate_text(prompt, settings, max_new_tokens)
|
103 |
+
print("="*132)
|
104 |
+
```
|