pierreguillou commited on
Commit
d3e0af2
1 Parent(s): b05819d

Update files/functions.py

Browse files
Files changed (1) hide show
  1. files/functions.py +5 -3
files/functions.py CHANGED
@@ -137,13 +137,14 @@ langdetect2Tesseract = {v:k for k,v in Tesseract2langdetect.items()}
137
 
138
  ## model / feature extractor / tokenizer
139
 
140
- from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
141
-
142
  import torch
143
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
144
 
145
  # model
146
- # tokenizer = LayoutXLMTokenizerFast.from_pretrained(model_id)
 
 
 
147
  model = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
148
  model.to(device);
149
 
@@ -153,6 +154,7 @@ feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
153
 
154
  # tokenizer
155
  from transformers import AutoTokenizer
 
156
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
157
 
158
  ## General
 
137
 
138
  ## model / feature extractor / tokenizer
139
 
 
 
140
  import torch
141
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
142
 
143
  # model
144
+ from transformers import LayoutLMv2ForTokenClassification
145
+
146
+ model_id = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-linelevel-ml384"
147
+
148
  model = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
149
  model.to(device);
150
 
 
154
 
155
  # tokenizer
156
  from transformers import AutoTokenizer
157
+ tokenizer_id = "xlm-roberta-base"
158
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
159
 
160
  ## General