Spaces:
Running
on
Zero
Running
on
Zero
Commit
•
15ccfd9
1
Parent(s):
152d64b
Refactor translate function to split input text into smaller chunks
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ model, tokenizer = load_model()
|
|
21 |
|
22 |
|
23 |
@spaces.GPU
|
24 |
-
def
|
25 |
source = code_mapping[src_lang]
|
26 |
target = code_mapping[tgt_lang]
|
27 |
translator = pipeline(
|
@@ -36,6 +36,18 @@ def translate(text: str, src_lang: str, tgt_lang: str):
|
|
36 |
return output[0]["translation_text"]
|
37 |
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
description = """
|
40 |
No Language Left Behind (NLLB) is a series of open-source models aiming to provide high-quality translations between 200 language."""
|
41 |
|
|
|
21 |
|
22 |
|
23 |
@spaces.GPU
|
24 |
+
def _translate(text: str, src_lang: str, tgt_lang: str):
|
25 |
source = code_mapping[src_lang]
|
26 |
target = code_mapping[tgt_lang]
|
27 |
translator = pipeline(
|
|
|
36 |
return output[0]["translation_text"]
|
37 |
|
38 |
|
39 |
+
def translate(text: str, src_lang: str, tgt_lang: str):
|
40 |
+
# split the input text into smaller chunks
|
41 |
+
# split first on newlines
|
42 |
+
outputs = ""
|
43 |
+
chunks = text.split("\n")
|
44 |
+
for chunk in chunks:
|
45 |
+
# run the translation on each chunk
|
46 |
+
output = _translate(chunk, src_lang, tgt_lang)
|
47 |
+
outputs += output + "\n"
|
48 |
+
return outputs
|
49 |
+
|
50 |
+
|
51 |
description = """
|
52 |
No Language Left Behind (NLLB) is a series of open-source models aiming to provide high-quality translations between 200 language."""
|
53 |
|