erfan226 commited on
Commit
684d6eb
1 Parent(s): c17fe90

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +14 -10
README.md CHANGED
@@ -7,22 +7,26 @@ This is a paraphrasing model for the Persian language. It is based on [the monol
7
  ```python
8
 
9
  >>> pip install transformers
10
- >>> from transformers import (T5ForConditionalGeneration, AutoTokenizer)
11
  >>> import torch
12
 
13
  model_path = 'erfan226/persian-t5-paraphraser'
14
  model = T5ForConditionalGeneration.from_pretrained(model_path)
15
  tokenizer = AutoTokenizer.from_pretrained(model_path)
 
16
 
17
  def paraphrase(text):
18
- input = tokenizer(text, return_tensors='pt', padding=True).to(model.device)
19
- max_size = int(input.input_ids.shape[1] * 1.5 + 10)
20
- out = model.generate(**input, encoder_no_repeat_ngram_size=4, do_sample=False, num_beams=10, max_length=max_size, no_repeat_ngram_size=4,)
21
- return tokenizer.decode(out[0], skip_special_tokens=True)
22
 
23
- for text1, text2 in zip(x, y):
24
- print("Original:", text1)
25
- print("Paraphrase:", paraphrase(text1))
26
- print("Original Paraphrase:", text2)
27
 
28
- ```
 
 
 
 
 
 
7
  ```python
8
 
9
  >>> pip install transformers
10
+ >>> from transformers import (T5ForConditionalGeneration, AutoTokenizer, pipeline)
11
  >>> import torch
12
 
13
  model_path = 'erfan226/persian-t5-paraphraser'
14
  model = T5ForConditionalGeneration.from_pretrained(model_path)
15
  tokenizer = AutoTokenizer.from_pretrained(model_path)
16
+ pipe = pipeline(task='text2text-generation', model=model, tokenizer=tokenizer)
17
 
18
  def paraphrase(text):
19
+ for j in range(5):
20
+ out = pipe(text, encoder_no_repeat_ngram_size=5, do_sample=True, num_beams=5, max_length=128)[0]['generated_text']
21
+ print("Paraphrase:", out)
 
22
 
23
+ text = "این یک مقالهٔ خرد آلمان است. می‌توانید با گسترش آن به ویکی‌پدیا کمک کنید."
24
+ print("Original:", text)
25
+ paraphrase(text)
 
26
 
27
+ ```
28
+
29
+ # Training data
30
+
31
+ ```python
32
+ This model was trained on the Persian subset of the [Tapaco dataset](https://huggingface.co/datasets/tapaco).