iioSnail commited on
Commit
9635637
1 Parent(s): 6083b5f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +36 -2
README.md CHANGED
@@ -22,7 +22,6 @@ tags:
22
 
23
  | | Detect-Acc | Detect-Precision | Detect-Recall | Detect-F1 | Correct-Acc | Correct-Precision | Correct-Recall | Correct-F1 |
24
  |--|--|--|--|--|--|--|--|--|
25
- | Chararcter-level | - | - | - | 87.16 | - | - | - | 91.39 |
26
  | Sentence-level | 84.7 | 77.3 | 81.3 | 79.3 | 84.0 | 75.9 | 79.9 | 77.8 |
27
 
28
 
@@ -31,6 +30,15 @@ tags:
31
 
32
  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/iioSnail/SCOPE/blob/main/ChineseBERT-for-csc_Demo.ipynb)
33
 
 
 
 
 
 
 
 
 
 
34
  ```
35
  from transformers import AutoTokenizer, AutoModel
36
 
@@ -58,14 +66,40 @@ model = AutoModel.from_pretrained("iioSnail/ReaLiSe-for-csc", trust_remote_code=
58
 
59
  model.set_tokenizer(tokenizer) # 使用predict方法前,调用该方法
60
  print(model.predict("我是练习时长两念半的鸽仁练习生蔡徐坤"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  ```
62
 
63
  输出:
64
 
65
  ```
66
- 我是练习时长两年半的鸽人练习生蔡徐坤
67
  ```
68
 
 
69
  # 常见问题
70
 
71
  1. 网络问题,例如:`Connection Error`
 
22
 
23
  | | Detect-Acc | Detect-Precision | Detect-Recall | Detect-F1 | Correct-Acc | Correct-Precision | Correct-Recall | Correct-F1 |
24
  |--|--|--|--|--|--|--|--|--|
 
25
  | Sentence-level | 84.7 | 77.3 | 81.3 | 79.3 | 84.0 | 75.9 | 79.9 | 77.8 |
26
 
27
 
 
30
 
31
  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/iioSnail/SCOPE/blob/main/ChineseBERT-for-csc_Demo.ipynb)
32
 
33
+
34
+ 安装依赖:
35
+
36
+ ```
37
+ !pip install transformers
38
+ !pip install pypinyin
39
+ !pip install boto3
40
+ ```
41
+
42
  ```
43
  from transformers import AutoTokenizer, AutoModel
44
 
 
66
 
67
  model.set_tokenizer(tokenizer) # 使用predict方法前,调用该方法
68
  print(model.predict("我是练习时长两念半的鸽仁练习生蔡徐坤"))
69
+ print(model.predict(["我是练习时长两念半的鸽仁练习生蔡徐坤", "喜换唱跳、rap 和 蓝球"]))
70
+ ```
71
+
72
+ 输出:
73
+
74
+ ```
75
+ 我是练习时长两年半的各仁练习生蔡徐坤
76
+ ['我是练习时长两年半的各仁练习生蔡徐坤', '喜欢唱跳、rap 和 蓝球']
77
+ ```
78
+
79
+ # 模型训练
80
+
81
+ ```
82
+ from transformers import AutoTokenizer, AutoModel
83
+
84
+ tokenizer = AutoTokenizer.from_pretrained("iioSnail/ReaLiSe-for-csc", trust_remote_code=True)
85
+ model = AutoModel.from_pretrained("iioSnail/ReaLiSe-for-csc", trust_remote_code=True)
86
+
87
+ inputs = tokenizer(["我是炼习时长两念半的个人练习生蔡徐坤", "喜换唱跳rap蓝球"],
88
+ text_target=["我是练习时长两年半的个人练习生蔡徐坤", "喜欢唱跳rap篮球"],
89
+ padding=True,
90
+ return_tensors='pt')
91
+ loss = model(**inputs).loss
92
+ print("loss:", loss)
93
+ loss.backward()
94
  ```
95
 
96
  输出:
97
 
98
  ```
99
+ loss: tensor(0.6515, grad_fn=<NllLossBackward0>)
100
  ```
101
 
102
+
103
  # 常见问题
104
 
105
  1. 网络问题,例如:`Connection Error`