Update README.md
Browse files
README.md
CHANGED
@@ -22,7 +22,6 @@ tags:
|
|
22 |
|
23 |
| | Detect-Acc | Detect-Precision | Detect-Recall | Detect-F1 | Correct-Acc | Correct-Precision | Correct-Recall | Correct-F1 |
|
24 |
|--|--|--|--|--|--|--|--|--|
|
25 |
-
| Chararcter-level | - | - | - | 87.16 | - | - | - | 91.39 |
|
26 |
| Sentence-level | 84.7 | 77.3 | 81.3 | 79.3 | 84.0 | 75.9 | 79.9 | 77.8 |
|
27 |
|
28 |
|
@@ -31,6 +30,15 @@ tags:
|
|
31 |
|
32 |
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/iioSnail/SCOPE/blob/main/ChineseBERT-for-csc_Demo.ipynb)
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
```
|
35 |
from transformers import AutoTokenizer, AutoModel
|
36 |
|
@@ -58,14 +66,40 @@ model = AutoModel.from_pretrained("iioSnail/ReaLiSe-for-csc", trust_remote_code=
|
|
58 |
|
59 |
model.set_tokenizer(tokenizer) # 使用predict方法前,调用该方法
|
60 |
print(model.predict("我是练习时长两念半的鸽仁练习生蔡徐坤"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
```
|
62 |
|
63 |
输出:
|
64 |
|
65 |
```
|
66 |
-
|
67 |
```
|
68 |
|
|
|
69 |
# 常见问题
|
70 |
|
71 |
1. 网络问题,例如:`Connection Error`
|
|
|
22 |
|
23 |
| | Detect-Acc | Detect-Precision | Detect-Recall | Detect-F1 | Correct-Acc | Correct-Precision | Correct-Recall | Correct-F1 |
|
24 |
|--|--|--|--|--|--|--|--|--|
|
|
|
25 |
| Sentence-level | 84.7 | 77.3 | 81.3 | 79.3 | 84.0 | 75.9 | 79.9 | 77.8 |
|
26 |
|
27 |
|
|
|
30 |
|
31 |
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/iioSnail/SCOPE/blob/main/ChineseBERT-for-csc_Demo.ipynb)
|
32 |
|
33 |
+
|
34 |
+
安装依赖:
|
35 |
+
|
36 |
+
```
|
37 |
+
!pip install transformers
|
38 |
+
!pip install pypinyin
|
39 |
+
!pip install boto3
|
40 |
+
```
|
41 |
+
|
42 |
```
|
43 |
from transformers import AutoTokenizer, AutoModel
|
44 |
|
|
|
66 |
|
67 |
model.set_tokenizer(tokenizer) # 使用predict方法前,调用该方法
|
68 |
print(model.predict("我是练习时长两念半的鸽仁练习生蔡徐坤"))
|
69 |
+
print(model.predict(["我是练习时长两念半的鸽仁练习生蔡徐坤", "喜换唱跳、rap 和 蓝球"]))
|
70 |
+
```
|
71 |
+
|
72 |
+
输出:
|
73 |
+
|
74 |
+
```
|
75 |
+
我是练习时长两年半的各仁练习生蔡徐坤
|
76 |
+
['我是练习时长两年半的各仁练习生蔡徐坤', '喜欢唱跳、rap 和 蓝球']
|
77 |
+
```
|
78 |
+
|
79 |
+
# 模型训练
|
80 |
+
|
81 |
+
```
|
82 |
+
from transformers import AutoTokenizer, AutoModel
|
83 |
+
|
84 |
+
tokenizer = AutoTokenizer.from_pretrained("iioSnail/ReaLiSe-for-csc", trust_remote_code=True)
|
85 |
+
model = AutoModel.from_pretrained("iioSnail/ReaLiSe-for-csc", trust_remote_code=True)
|
86 |
+
|
87 |
+
inputs = tokenizer(["我是炼习时长两念半的个人练习生蔡徐坤", "喜换唱跳rap蓝球"],
|
88 |
+
text_target=["我是练习时长两年半的个人练习生蔡徐坤", "喜欢唱跳rap篮球"],
|
89 |
+
padding=True,
|
90 |
+
return_tensors='pt')
|
91 |
+
loss = model(**inputs).loss
|
92 |
+
print("loss:", loss)
|
93 |
+
loss.backward()
|
94 |
```
|
95 |
|
96 |
输出:
|
97 |
|
98 |
```
|
99 |
+
loss: tensor(0.6515, grad_fn=<NllLossBackward0>)
|
100 |
```
|
101 |
|
102 |
+
|
103 |
# 常见问题
|
104 |
|
105 |
1. 网络问题,例如:`Connection Error`
|