Update README.md
Browse files
README.md
CHANGED
@@ -7,22 +7,26 @@ This is a paraphrasing model for the Persian language. It is based on [the monol
|
|
7 |
```python
|
8 |
|
9 |
>>> pip install transformers
|
10 |
-
>>> from transformers import (T5ForConditionalGeneration, AutoTokenizer)
|
11 |
>>> import torch
|
12 |
|
13 |
model_path = 'erfan226/persian-t5-paraphraser'
|
14 |
model = T5ForConditionalGeneration.from_pretrained(model_path)
|
15 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
|
16 |
|
17 |
def paraphrase(text):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
return tokenizer.decode(out[0], skip_special_tokens=True)
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
print("Original Paraphrase:", text2)
|
27 |
|
28 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
7 |
```python
|
8 |
|
9 |
>>> pip install transformers
|
10 |
+
>>> from transformers import (T5ForConditionalGeneration, AutoTokenizer, pipeline)
|
11 |
>>> import torch
|
12 |
|
13 |
model_path = 'erfan226/persian-t5-paraphraser'
|
14 |
model = T5ForConditionalGeneration.from_pretrained(model_path)
|
15 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
16 |
+
pipe = pipeline(task='text2text-generation', model=model, tokenizer=tokenizer)
|
17 |
|
18 |
def paraphrase(text):
|
19 |
+
for j in range(5):
|
20 |
+
out = pipe(text, encoder_no_repeat_ngram_size=5, do_sample=True, num_beams=5, max_length=128)[0]['generated_text']
|
21 |
+
print("Paraphrase:", out)
|
|
|
22 |
|
23 |
+
text = "این یک مقالهٔ خرد آلمان است. میتوانید با گسترش آن به ویکیپدیا کمک کنید."
|
24 |
+
print("Original:", text)
|
25 |
+
paraphrase(text)
|
|
|
26 |
|
27 |
+
```
|
28 |
+
|
29 |
+
# Training data
|
30 |
+
|
31 |
+
```python
|
32 |
+
This model was trained on the Persian subset of the [Tapaco dataset](https://huggingface.co/datasets/tapaco).
|