G2PTL Update
Browse files
README.md
CHANGED
@@ -4,11 +4,11 @@ license: apache-2.0
|
|
4 |
---
|
5 |
|
6 |
|
7 |
-
# G2PTL
|
8 |
|
9 |
## Introduction
|
10 |
|
11 |
-
G2PTL: A Geography-Graph Pre-trained model for address.
|
12 |
|
13 |
|
14 |
## Model description
|
@@ -47,8 +47,8 @@ You can use this model directly with a pipeline for masked language modeling:
|
|
47 |
|
48 |
```Python
|
49 |
>>> from transformers import pipeline, AutoModel, AutoTokenizer
|
50 |
-
>>> model = AutoModel.from_pretrained('
|
51 |
-
>>> tokenizer = AutoTokenizer.from_pretrained('
|
52 |
|
53 |
>>> mask_filler = pipeline(task= 'fill-mask', model= model,tokenizer = tokenizer)
|
54 |
>>> mask_filler("浙江省杭州市[MASK]杭区五常街道阿里巴巴西溪园区")
|
@@ -80,8 +80,8 @@ You can also use this model for multiple [MASK] filling in PyTorch:
|
|
80 |
```python
|
81 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
82 |
import torch
|
83 |
-
model = AutoModel.from_pretrained('
|
84 |
-
tokenizer = AutoTokenizer.from_pretrained('
|
85 |
model.eval()
|
86 |
text = ['浙江省杭州市[MASK][MASK][MASK]五常街道阿里巴巴西溪园区']
|
87 |
encoded_input = tokenizer(text, return_tensors='pt')
|
@@ -101,8 +101,8 @@ Here is how to use this model to get the HTC output of a given text in PyTorch:
|
|
101 |
|
102 |
```python
|
103 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
104 |
-
model = AutoModel.from_pretrained('
|
105 |
-
tokenizer = AutoTokenizer.from_pretrained('
|
106 |
model.eval()
|
107 |
text = "浙江省杭州市五常街道阿里巴巴西溪园区"
|
108 |
encoded_input = tokenizer(text, return_tensors='pt')
|
@@ -119,8 +119,8 @@ Here is how to use this model to get the features/embeddings of a given text in
|
|
119 |
|
120 |
```python
|
121 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
122 |
-
model = AutoModel.from_pretrained('
|
123 |
-
tokenizer = AutoTokenizer.from_pretrained('
|
124 |
model.eval()
|
125 |
text = "浙江省杭州市余杭区五常街道阿里巴巴西溪园区"
|
126 |
encoded_input = tokenizer(text, return_tensors='pt')
|
@@ -133,8 +133,8 @@ Here is how to use this model to get cosine similarity between two address texts
|
|
133 |
```python
|
134 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
135 |
import torch
|
136 |
-
model = AutoModel.from_pretrained('
|
137 |
-
tokenizer = AutoTokenizer.from_pretrained('
|
138 |
model.eval()
|
139 |
text = ["浙江省杭州市余杭区五常街道阿里巴巴西溪园区", "浙江省杭州市阿里巴巴西溪园区"]
|
140 |
encoded_input = tokenizer(text, return_tensors='pt', padding=True)
|
|
|
4 |
---
|
5 |
|
6 |
|
7 |
+
# G2PTL-1
|
8 |
|
9 |
## Introduction
|
10 |
|
11 |
+
G2PTL-1: A Geography-Graph Pre-trained model for address. This work is the first version of G2PTL (v1.0)
|
12 |
|
13 |
|
14 |
## Model description
|
|
|
47 |
|
48 |
```Python
|
49 |
>>> from transformers import pipeline, AutoModel, AutoTokenizer
|
50 |
+
>>> model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
51 |
+
>>> tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
52 |
|
53 |
>>> mask_filler = pipeline(task= 'fill-mask', model= model,tokenizer = tokenizer)
|
54 |
>>> mask_filler("浙江省杭州市[MASK]杭区五常街道阿里巴巴西溪园区")
|
|
|
80 |
```python
|
81 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
82 |
import torch
|
83 |
+
model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
84 |
+
tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
85 |
model.eval()
|
86 |
text = ['浙江省杭州市[MASK][MASK][MASK]五常街道阿里巴巴西溪园区']
|
87 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
|
101 |
|
102 |
```python
|
103 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
104 |
+
model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
105 |
+
tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
106 |
model.eval()
|
107 |
text = "浙江省杭州市五常街道阿里巴巴西溪园区"
|
108 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
|
119 |
|
120 |
```python
|
121 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
122 |
+
model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
123 |
+
tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
124 |
model.eval()
|
125 |
text = "浙江省杭州市余杭区五常街道阿里巴巴西溪园区"
|
126 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
|
133 |
```python
|
134 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
135 |
import torch
|
136 |
+
model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
137 |
+
tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
138 |
model.eval()
|
139 |
text = ["浙江省杭州市余杭区五常街道阿里巴巴西溪园区", "浙江省杭州市阿里巴巴西溪园区"]
|
140 |
encoded_input = tokenizer(text, return_tensors='pt', padding=True)
|