Norod78
/

distilgpt2-base-pretrained-he

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Norod78 commited on Jul 26, 2021

Commit

187c509

·

1 Parent(s): beca15b

Update README.md

Norod78/distilgpt2-base-pretrained-he

Files changed (1) hide show

README.md +13 -6

README.md CHANGED Viewed

@@ -37,8 +37,8 @@ The Open Super-large Crawled ALMAnaCH coRpus is a huge multilingual corpus obtai
 from transformers import AutoTokenizer, AutoModelForCausalLM
-tokenizer = AutoTokenizer.from_pretrained("Norod78/hebrew-distilgpt2")
-model = AutoModelForCausalLM.from_pretrained("Norod78/hebrew-distilgpt2", pad_token_id=tokenizer.eos_token_id)
 prompt_text = "אני אוהב שוקולד ועוגות"
 max_len = 512
@@ -80,7 +80,10 @@ if input_ids != None:
 print("Updated max_len = " + str(max_len))
 stop_token = "<|endoftext|>"
-new_lines = "\n\n\n"
 sample_outputs = model.generate(
     input_ids,
@@ -91,7 +94,9 @@ sample_outputs = model.generate(
     num_return_sequences=sample_output_num
 )
-print(100 * '-' + "\n\t\tOutput\n" + 100 * '-')
 for i, sample_output in enumerate(sample_outputs):
   text = tokenizer.decode(sample_output, skip_special_tokens=True)
@@ -102,7 +107,9 @@ for i, sample_output in enumerate(sample_outputs):
   # Remove all text after 3 newlines
   text = text[: text.find(new_lines) if new_lines else None]
-  print("\n{}: {}".format(i, text))
-  print("\n" + 100 * '-')
 ```

 from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("Norod78/distilgpt2-base-pretrained-he")
+model = AutoModelForCausalLM.from_pretrained("Norod78/distilgpt2-base-pretrained-he", pad_token_id=tokenizer.eos_token_id)
 prompt_text = "אני אוהב שוקולד ועוגות"
 max_len = 512
 print("Updated max_len = " + str(max_len))
 stop_token = "<|endoftext|>"
+new_lines = "\
+\
+\
+"
 sample_outputs = model.generate(
     input_ids,
     num_return_sequences=sample_output_num
 )
+print(100 * '-' + "\
+\t\tOutput\
+" + 100 * '-')
 for i, sample_output in enumerate(sample_outputs):
   text = tokenizer.decode(sample_output, skip_special_tokens=True)
   # Remove all text after 3 newlines
   text = text[: text.find(new_lines) if new_lines else None]
+  print("\
+{}: {}".format(i, text))
+  print("\
+" + 100 * '-')
 ```