playmak3r commited on
Commit
5afd42b
1 Parent(s): 421f96d

feat: add model module

Browse files
Files changed (2) hide show
  1. server/model.py +37 -0
  2. server/requirements.txt +2 -1
server/model.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctranslate2
2
+ import sentencepiece as spm
3
+
4
+
5
+ modelDir = "./model"
6
+ sp_source_model = "./model/spm.ja.nopretok.model"
7
+ sp_target_model = "./model/spm.en.nopretok.model"
8
+ # inter_threads: quantas operações independentes podem ser executadas simultaneamente
9
+ translator = ctranslate2.Translator(modelDir, device="cpu", intra_threads=4, inter_threads=1)
10
+
11
+
12
+ def tokenizeBatch(text):
13
+ sp = spm.SentencePieceProcessor(sp_source_model)
14
+ if isinstance(text, list): return sp.encode(text, out_type=str)
15
+ elif isinstance(text, str):
16
+ return [sp.encode(text, out_type=str)]
17
+
18
+
19
+ def detokenizeBatch(text: str):
20
+ sp = spm.SentencePieceProcessor(sp_target_model)
21
+ translation = sp.decode(text)
22
+ return translation
23
+
24
+
25
+ def translate(text: str):
26
+ translated = translator.translate_batch(
27
+ source=tokenizeBatch(text),
28
+ num_hypotheses= 1,
29
+ return_alternatives= False,
30
+ replace_unknowns= False,
31
+ no_repeat_ngram_size= 3, # repetition_penalty
32
+ disable_unk= True,
33
+ beam_size= 5,
34
+ sampling_temperature= 0,
35
+ )
36
+
37
+ return [''.join( detokenizeBatch(result.hypotheses[0]) ) for result in translated]
server/requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  flask
2
  waitress
 
3
  CTranslate2
4
- #fairseq
 
1
  flask
2
  waitress
3
+ flask_cors
4
  CTranslate2
5
+ sentencepiece