eaysu commited on
Commit
b31c836
1 Parent(s): b5d5b5e

sentences translating seperately

Browse files
Files changed (1) hide show
  1. app.py +24 -22
app.py CHANGED
@@ -1,6 +1,12 @@
1
  import gradio as gr
2
  from transformers import MarianMTModel, MarianTokenizer
3
  import torch
 
 
 
 
 
 
4
 
5
  # Cache for storing models and tokenizers
6
  models_cache = {}
@@ -19,7 +25,7 @@ def load_model(model_name):
19
 
20
  def translate_text(model_name, text):
21
  """
22
- Translate input text using the specified model.
23
  """
24
  if not model_name or not text:
25
  return "Please select a model and provide text for translation."
@@ -28,14 +34,23 @@ def translate_text(model_name, text):
28
  # Load the model and tokenizer
29
  model, tokenizer = load_model(model_name)
30
 
31
- # Tokenize the text
32
- tokens = tokenizer(text, return_tensors="pt", padding=True)
33
- if torch.cuda.is_available():
34
- tokens = {k: v.to('cuda') for k, v in tokens.items()}
 
 
 
 
 
 
 
 
 
 
35
 
36
- # Generate translation
37
- translated = model.generate(**tokens)
38
- return tokenizer.decode(translated[0], skip_special_tokens=True)
39
 
40
  except Exception as e:
41
  return f"Error: {str(e)}"
@@ -44,20 +59,7 @@ def translate_text(model_name, text):
44
  model_options = [
45
  ("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
46
  ("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
47
- ("English to French", "Helsinki-NLP/opus-mt-tc-big-en-fr"),
48
- ("French to English", "Helsinki-NLP/opus-mt-tc-big-fr-en"),
49
- ("English to German", "Helsinki-NLP/opus-mt-en-de"),
50
- ("German to English", "Helsinki-NLP/opus-mt-de-en"),
51
- ("English to Spanish", "Helsinki-NLP/opus-mt-tc-big-en-es"),
52
- ("Spanish to English", "Helsinki-NLP/opus-mt-es-en"),
53
- ("English to Arabic", "Helsinki-NLP/opus-mt-tc-big-en-ar"),
54
- ("Arabic to English", "Helsinki-NLP/opus-mt-tc-big-ar-en"),
55
- ("English to Urdu", "Helsinki-NLP/opus-mt-en-ur"),
56
- ("Urdu to English", "Helsinki-NLP/opus-mt-ur-en"),
57
- ("English to Hindi", "Helsinki-NLP/opus-mt-en-hi"),
58
- ("Hindi to English", "Helsinki-NLP/opus-mt-hi-en"),
59
- ("English to Chinese", "Helsinki-NLP/opus-mt-en-zh"),
60
- ("Chinese to English", "Helsinki-NLP/opus-mt-zh-en"),
61
  ]
62
 
63
  # Create Gradio interface
 
1
  import gradio as gr
2
  from transformers import MarianMTModel, MarianTokenizer
3
  import torch
4
+ import nltk
5
+
6
+ # Download punkt for sentence tokenization
7
+ nltk.download('punkt')
8
+
9
+ from nltk.tokenize import sent_tokenize
10
 
11
  # Cache for storing models and tokenizers
12
  models_cache = {}
 
25
 
26
  def translate_text(model_name, text):
27
  """
28
+ Translate input text sentence by sentence using the specified model.
29
  """
30
  if not model_name or not text:
31
  return "Please select a model and provide text for translation."
 
34
  # Load the model and tokenizer
35
  model, tokenizer = load_model(model_name)
36
 
37
+ # Split text into sentences
38
+ sentences = sent_tokenize(text)
39
+ translated_sentences = []
40
+
41
+ for sentence in sentences:
42
+ # Tokenize the sentence
43
+ tokens = tokenizer(sentence, return_tensors="pt", padding=True)
44
+ if torch.cuda.is_available():
45
+ tokens = {k: v.to('cuda') for k, v in tokens.items()}
46
+
47
+ # Generate translation for the sentence
48
+ translated = model.generate(**tokens)
49
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
50
+ translated_sentences.append(translated_text)
51
 
52
+ # Join translated sentences back into a single string
53
+ return " ".join(translated_sentences)
 
54
 
55
  except Exception as e:
56
  return f"Error: {str(e)}"
 
59
  model_options = [
60
  ("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
61
  ("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
62
+ # Add other models here...
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ]
64
 
65
  # Create Gradio interface