davanstrien HF staff commited on
Commit
15ccfd9
1 Parent(s): 152d64b

Refactor translate function to split input text into smaller chunks

Browse files
Files changed (1) hide show
  1. app.py +13 -1
app.py CHANGED
@@ -21,7 +21,7 @@ model, tokenizer = load_model()
21
 
22
 
23
  @spaces.GPU
24
- def translate(text: str, src_lang: str, tgt_lang: str):
25
  source = code_mapping[src_lang]
26
  target = code_mapping[tgt_lang]
27
  translator = pipeline(
@@ -36,6 +36,18 @@ def translate(text: str, src_lang: str, tgt_lang: str):
36
  return output[0]["translation_text"]
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  description = """
40
  No Language Left Behind (NLLB) is a series of open-source models aiming to provide high-quality translations between 200 language."""
41
 
 
21
 
22
 
23
  @spaces.GPU
24
+ def _translate(text: str, src_lang: str, tgt_lang: str):
25
  source = code_mapping[src_lang]
26
  target = code_mapping[tgt_lang]
27
  translator = pipeline(
 
36
  return output[0]["translation_text"]
37
 
38
 
39
+ def translate(text: str, src_lang: str, tgt_lang: str):
40
+ # split the input text into smaller chunks
41
+ # split first on newlines
42
+ outputs = ""
43
+ chunks = text.split("\n")
44
+ for chunk in chunks:
45
+ # run the translation on each chunk
46
+ output = _translate(chunk, src_lang, tgt_lang)
47
+ outputs += output + "\n"
48
+ return outputs
49
+
50
+
51
  description = """
52
  No Language Left Behind (NLLB) is a series of open-source models aiming to provide high-quality translations between 200 language."""
53