pierreguillou
commited on
Commit
•
d3e0af2
1
Parent(s):
b05819d
Update files/functions.py
Browse files- files/functions.py +5 -3
files/functions.py
CHANGED
@@ -137,13 +137,14 @@ langdetect2Tesseract = {v:k for k,v in Tesseract2langdetect.items()}
|
|
137 |
|
138 |
## model / feature extractor / tokenizer
|
139 |
|
140 |
-
from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
|
141 |
-
|
142 |
import torch
|
143 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
144 |
|
145 |
# model
|
146 |
-
|
|
|
|
|
|
|
147 |
model = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
|
148 |
model.to(device);
|
149 |
|
@@ -153,6 +154,7 @@ feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
|
|
153 |
|
154 |
# tokenizer
|
155 |
from transformers import AutoTokenizer
|
|
|
156 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
|
157 |
|
158 |
## General
|
|
|
137 |
|
138 |
## model / feature extractor / tokenizer
|
139 |
|
|
|
|
|
140 |
import torch
|
141 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
142 |
|
143 |
# model
|
144 |
+
from transformers import LayoutLMv2ForTokenClassification
|
145 |
+
|
146 |
+
model_id = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-linelevel-ml384"
|
147 |
+
|
148 |
model = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
|
149 |
model.to(device);
|
150 |
|
|
|
154 |
|
155 |
# tokenizer
|
156 |
from transformers import AutoTokenizer
|
157 |
+
tokenizer_id = "xlm-roberta-base"
|
158 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
|
159 |
|
160 |
## General
|