Update README.md
Browse filesNorod78/distilgpt2-base-pretrained-he
README.md
CHANGED
@@ -37,8 +37,8 @@ The Open Super-large Crawled ALMAnaCH coRpus is a huge multilingual corpus obtai
|
|
37 |
|
38 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
39 |
|
40 |
-
tokenizer = AutoTokenizer.from_pretrained("Norod78/
|
41 |
-
model = AutoModelForCausalLM.from_pretrained("Norod78/
|
42 |
|
43 |
prompt_text = "אני אוהב שוקולד ועוגות"
|
44 |
max_len = 512
|
@@ -80,7 +80,10 @@ if input_ids != None:
|
|
80 |
print("Updated max_len = " + str(max_len))
|
81 |
|
82 |
stop_token = "<|endoftext|>"
|
83 |
-
new_lines = "\
|
|
|
|
|
|
|
84 |
|
85 |
sample_outputs = model.generate(
|
86 |
input_ids,
|
@@ -91,7 +94,9 @@ sample_outputs = model.generate(
|
|
91 |
num_return_sequences=sample_output_num
|
92 |
)
|
93 |
|
94 |
-
print(100 * '-' + "\
|
|
|
|
|
95 |
for i, sample_output in enumerate(sample_outputs):
|
96 |
|
97 |
text = tokenizer.decode(sample_output, skip_special_tokens=True)
|
@@ -102,7 +107,9 @@ for i, sample_output in enumerate(sample_outputs):
|
|
102 |
# Remove all text after 3 newlines
|
103 |
text = text[: text.find(new_lines) if new_lines else None]
|
104 |
|
105 |
-
print("\
|
106 |
-
|
|
|
|
|
107 |
|
108 |
```
|
|
|
37 |
|
38 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
39 |
|
40 |
+
tokenizer = AutoTokenizer.from_pretrained("Norod78/distilgpt2-base-pretrained-he")
|
41 |
+
model = AutoModelForCausalLM.from_pretrained("Norod78/distilgpt2-base-pretrained-he", pad_token_id=tokenizer.eos_token_id)
|
42 |
|
43 |
prompt_text = "אני אוהב שוקולד ועוגות"
|
44 |
max_len = 512
|
|
|
80 |
print("Updated max_len = " + str(max_len))
|
81 |
|
82 |
stop_token = "<|endoftext|>"
|
83 |
+
new_lines = "\
|
84 |
+
\
|
85 |
+
\
|
86 |
+
"
|
87 |
|
88 |
sample_outputs = model.generate(
|
89 |
input_ids,
|
|
|
94 |
num_return_sequences=sample_output_num
|
95 |
)
|
96 |
|
97 |
+
print(100 * '-' + "\
|
98 |
+
\t\tOutput\
|
99 |
+
" + 100 * '-')
|
100 |
for i, sample_output in enumerate(sample_outputs):
|
101 |
|
102 |
text = tokenizer.decode(sample_output, skip_special_tokens=True)
|
|
|
107 |
# Remove all text after 3 newlines
|
108 |
text = text[: text.find(new_lines) if new_lines else None]
|
109 |
|
110 |
+
print("\
|
111 |
+
{}: {}".format(i, text))
|
112 |
+
print("\
|
113 |
+
" + 100 * '-')
|
114 |
|
115 |
```
|