Spaces:
Sleeping
Sleeping
huzaifanafees
commited on
Commit
·
46cfa5a
1
Parent(s):
12646ff
Update translator.py
Browse files- translator.py +12 -0
translator.py
CHANGED
@@ -1,7 +1,18 @@
|
|
1 |
import tensorflow as tf
|
2 |
import numpy as np
|
|
|
3 |
from config import config
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
class Translator(tf.Module):
|
6 |
def __init__(self, sp_model_en, sp_model_ur, transformer):
|
7 |
self.sp_model_en = sp_model_en
|
@@ -9,6 +20,7 @@ class Translator(tf.Module):
|
|
9 |
self.transformer = transformer
|
10 |
|
11 |
def __call__(self, sentence, max_length=config.sequence_length):
|
|
|
12 |
sentence = tf.constant(sentence)
|
13 |
if len(sentence.shape) == 0:
|
14 |
sentence = sentence[tf.newaxis]
|
|
|
1 |
import tensorflow as tf
|
2 |
import numpy as np
|
3 |
+
import re
|
4 |
from config import config
|
5 |
|
6 |
+
def clean_text(text):
|
7 |
+
text = text.lower() # Lowercase the text
|
8 |
+
text = re.sub(r'^[^\w\s(]+', '', text) # Remove any punctuation at the start of the sentence
|
9 |
+
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) # Remove URLs
|
10 |
+
text = re.sub(r"([*'_.,!?؟،۔()])\1+", r'\1', text) # Reduce multiple instances of the same punctuation to one
|
11 |
+
text = re.sub(r'([^\w\s])\1+', r'\1', text) # Reduce sequences of the same non-alphanumeric character (excluding spaces and specific punctuation) to one
|
12 |
+
text = re.sub(r"[^\w,'-.?!؟،۔\s]", '', text) # Remove special characters and symbols
|
13 |
+
text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
|
14 |
+
return text
|
15 |
+
|
16 |
class Translator(tf.Module):
|
17 |
def __init__(self, sp_model_en, sp_model_ur, transformer):
|
18 |
self.sp_model_en = sp_model_en
|
|
|
20 |
self.transformer = transformer
|
21 |
|
22 |
def __call__(self, sentence, max_length=config.sequence_length):
|
23 |
+
sentence = clean_text(sentence)
|
24 |
sentence = tf.constant(sentence)
|
25 |
if len(sentence.shape) == 0:
|
26 |
sentence = sentence[tf.newaxis]
|