Norod78 commited on
Commit
187c509
·
1 Parent(s): beca15b

Update README.md

Browse files

Norod78/distilgpt2-base-pretrained-he

Files changed (1) hide show
  1. README.md +13 -6
README.md CHANGED
@@ -37,8 +37,8 @@ The Open Super-large Crawled ALMAnaCH coRpus is a huge multilingual corpus obtai
37
 
38
  from transformers import AutoTokenizer, AutoModelForCausalLM
39
 
40
- tokenizer = AutoTokenizer.from_pretrained("Norod78/hebrew-distilgpt2")
41
- model = AutoModelForCausalLM.from_pretrained("Norod78/hebrew-distilgpt2", pad_token_id=tokenizer.eos_token_id)
42
 
43
  prompt_text = "אני אוהב שוקולד ועוגות"
44
  max_len = 512
@@ -80,7 +80,10 @@ if input_ids != None:
80
  print("Updated max_len = " + str(max_len))
81
 
82
  stop_token = "<|endoftext|>"
83
- new_lines = "\n\n\n"
 
 
 
84
 
85
  sample_outputs = model.generate(
86
  input_ids,
@@ -91,7 +94,9 @@ sample_outputs = model.generate(
91
  num_return_sequences=sample_output_num
92
  )
93
 
94
- print(100 * '-' + "\n\t\tOutput\n" + 100 * '-')
 
 
95
  for i, sample_output in enumerate(sample_outputs):
96
 
97
  text = tokenizer.decode(sample_output, skip_special_tokens=True)
@@ -102,7 +107,9 @@ for i, sample_output in enumerate(sample_outputs):
102
  # Remove all text after 3 newlines
103
  text = text[: text.find(new_lines) if new_lines else None]
104
 
105
- print("\n{}: {}".format(i, text))
106
- print("\n" + 100 * '-')
 
 
107
 
108
  ```
 
37
 
38
  from transformers import AutoTokenizer, AutoModelForCausalLM
39
 
40
+ tokenizer = AutoTokenizer.from_pretrained("Norod78/distilgpt2-base-pretrained-he")
41
+ model = AutoModelForCausalLM.from_pretrained("Norod78/distilgpt2-base-pretrained-he", pad_token_id=tokenizer.eos_token_id)
42
 
43
  prompt_text = "אני אוהב שוקולד ועוגות"
44
  max_len = 512
 
80
  print("Updated max_len = " + str(max_len))
81
 
82
  stop_token = "<|endoftext|>"
83
+ new_lines = "\
84
+ \
85
+ \
86
+ "
87
 
88
  sample_outputs = model.generate(
89
  input_ids,
 
94
  num_return_sequences=sample_output_num
95
  )
96
 
97
+ print(100 * '-' + "\
98
+ \t\tOutput\
99
+ " + 100 * '-')
100
  for i, sample_output in enumerate(sample_outputs):
101
 
102
  text = tokenizer.decode(sample_output, skip_special_tokens=True)
 
107
  # Remove all text after 3 newlines
108
  text = text[: text.find(new_lines) if new_lines else None]
109
 
110
+ print("\
111
+ {}: {}".format(i, text))
112
+ print("\
113
+ " + 100 * '-')
114
 
115
  ```