kavyamanohar commited on
Commit
998926e
·
verified ·
1 Parent(s): 969634e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from tensorflow.keras.preprocessing.text import Tokenizer
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ import re
6
+ from huggingface_hub import from_pretrained_keras
7
+
8
+ # Load the model from Hugging Face
9
+ model = from_pretrained_keras("vrclc/transliteration")
10
+
11
+ # Define source and target tokenizers
12
+ source_tokens = list('abcdefghijklmnopqrstuvwxyz ')
13
+ source_tokenizer = Tokenizer(char_level=True, filters='')
14
+ source_tokenizer.fit_on_texts(source_tokens)
15
+
16
+ malayalam_tokens = [
17
+ # Independent vowels
18
+ 'അ', 'ആ', 'ഇ', 'ഈ', 'ഉ', 'ഊ', 'ഋ', 'ൠ', 'ഌ', 'ൡ', 'എ', 'ഏ', 'ഐ', 'ഒ', 'ഓ', 'ഔ',
19
+ # Consonants
20
+ 'ക', 'ഖ', 'ഗ', 'ഘ', 'ങ', 'ച', 'ഛ', 'ജ', 'ഝ', 'ഞ',
21
+ 'ട', 'ഠ', 'ഡ', 'ഢ', 'ണ', 'ത', 'ഥ', 'ദ', 'ധ', 'ന',
22
+ 'പ', 'ഫ', 'ബ', 'ഭ', 'മ', 'യ', 'ര', 'ല', 'വ', 'ശ',
23
+ 'ഷ', 'സ', 'ഹ', 'ള', 'ഴ', 'റ',
24
+ # Chillu letters
25
+ 'ൺ', 'ൻ', 'ർ', 'ൽ', 'ൾ',
26
+ # Additional characters
27
+ 'ം', 'ഃ', '്',
28
+ # Vowel modifiers / Signs
29
+ 'ാ', 'ി', 'ീ', 'ു', 'ൂ', 'ൃ', 'ൄ', 'െ', 'േ', 'ൈ', 'ൊ', 'ോ', 'ൌ', 'ൗ', ' '
30
+ ]
31
+
32
+ # Create tokenizer for Malayalam tokens
33
+ target_tokenizer = Tokenizer(char_level=True, filters='')
34
+ target_tokenizer.fit_on_texts(malayalam_tokens)
35
+
36
+ # Get max sequence length from the model
37
+ max_seq_length = model.get_layer("encoder_input").input_shape[0][1]
38
+
39
+ def transliterate_with_split_tokens(input_text, model, source_tokenizer, target_tokenizer, max_seq_length):
40
+ """
41
+ Transliterates input text, preserving non-token characters.
42
+ """
43
+ # Handle empty input
44
+ if not input_text:
45
+ return ""
46
+
47
+ # Regular expression to split the text into tokens and non-tokens
48
+ tokens_and_non_tokens = re.findall(r"([a-zA-Z]+)|([^a-zA-Z]+)", input_text)
49
+ transliterated_text = ""
50
+
51
+ for token_or_non_token in tokens_and_non_tokens:
52
+ token = token_or_non_token[0]
53
+ non_token = token_or_non_token[1]
54
+
55
+ if token:
56
+ # Convert to lowercase to handle mixed case
57
+ token = token.lower()
58
+ input_sequence = source_tokenizer.texts_to_sequences([token])[0]
59
+ input_sequence_padded = pad_sequences([input_sequence], maxlen=max_seq_length, padding='post')
60
+ predicted_sequence = model.predict(input_sequence_padded)
61
+ predicted_indices = np.argmax(predicted_sequence, axis=-1)[0]
62
+ transliterated_word = ''.join([target_tokenizer.index_word[idx] for idx in predicted_indices if idx != 0])
63
+ transliterated_text += transliterated_word
64
+ elif non_token:
65
+ transliterated_text += non_token
66
+
67
+ return transliterated_text
68
+
69
+ # Create Gradio interface with enhanced features
70
+ def create_transliteration_interface():
71
+ # Define input and output components with more details
72
+ input_textbox = gr.Textbox(
73
+ lines=3,
74
+ placeholder="Enter English text to transliterate to Malayalam...",
75
+ label="Input Text"
76
+ )
77
+
78
+ output_textbox = gr.Textbox(
79
+ lines=3,
80
+ label="Transliterated Malayalam Text"
81
+ )
82
+
83
+ # Create the Gradio interface with more comprehensive configuration
84
+ interface = gr.Interface(
85
+ fn=transliterate_with_split_tokens,
86
+ inputs=[
87
+ gr.Textbox(
88
+ lines=3,
89
+ placeholder="Enter English text to transliterate to Malayalam...",
90
+ label="Input Text"
91
+ )
92
+ ],
93
+ outputs=[
94
+ gr.Textbox(
95
+ lines=3,
96
+ label="Transliterated Malayalam Text"
97
+ )
98
+ ],
99
+ title="🌟 English to Malayalam Transliterator",
100
+ description="Transliterate English text to Malayalam characters. Simply type or paste your English text, and see the Malayalam transliteration instantly!",
101
+ article="## How to Use\n1. Enter English text in the input box\n2. The transliteration will appear automatically\n3. Works with words, phrases, and sentences",
102
+ examples=[
103
+ ["ente veed"],
104
+ ["malayalam"],
105
+ ["hello world"],
106
+ ["njan pranayam"]
107
+ ],
108
+ theme="huggingface"
109
+ )
110
+
111
+ return interface
112
+
113
+ # Launch the Gradio interface
114
+ if __name__ == "__main__":
115
+ iface = create_transliteration_interface()
116
+ iface.launch()