File size: 16,590 Bytes
24dd6da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
import streamlit as st
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import AutoTokenizer, AutoModelForTokenClassification

st.set_page_config(page_title="NLP Super Application", layout="wide")


TASKS = {
    "Home": "Welcome to the NLP Super Application! Select a task from the left to get started.",
    "Sentiment Analysis": "Analyze the sentiment of your text (positive, negative, or neutral).",
    "Named Entity Recognition": "Extract entities like names, dates, and locations from text.",
    "Summarization": "Summarize long pieces of text into concise summaries.",
    "Text Generation": "Generate creative text based on a prompt.",
    "Machine Translation": "Translate text between languages.",
    "Question Answering": "Get answers to questions from a given context.",
    "Text Similarity": "Find semantic similarity between two texts.",
    "Zero-Shot Classification": "Classify text into unseen categories.",
    "Keyword Extraction": "Extract important keywords or phrases from text.",
    "Emotion Detection": "Identify and classify the emotional tone (e.g., joy, sadness, anger, fear, surprise, love) in a given text.",
}


EXAMPLES = {
    "Sentiment Analysis": "I love the simplicity of this app. Itโ€™s amazing!",
    "Named Entity Recognition": "Barack Obama was born on August 4, 1961, in Honolulu, Hawaii.",
    "Summarization": "In the world of technology, artificial intelligence (AI) is a rapidly growing field that has the potential to revolutionize many industries, including healthcare, finance, and education. AI refers to the simulation of human intelligence processes by machines, particularly computer systems. These processes include learning, reasoning, problem-solving, perception, and language understanding. In healthcare, AI can be used to analyze medical data, predict patient outcomes, and assist in diagnosing diseases. In finance, AI algorithms are used for fraud detection, risk management, and personalized financial advice. In education, AI-powered tools are being developed to provide personalized learning experiences for students. Despite its potential, there are challenges and concerns related to the widespread adoption of AI, including data privacy, ethical implications, and the impact on jobs. While AI can bring significant benefits, it is essential to address these concerns to ensure that its integration into society is done in a responsible and ethical manner. As AI continues to evolve, its role in shaping the future of various sectors will only increase, making it one of the most significant technological advancements of the 21st century.",
    "Text Generation": "Once upon a time, in a faraway land, there was a magical forest where",
    "Machine Translation": "Hello, how are you?",
    "Question Answering": {"context": "The Eiffel Tower is located in Paris, France.", "question": "Where is the Eiffel Tower located?"},
    "Text Similarity": ["The quick brown fox jumps over the lazy dog.", "A fast brown fox leaps over a sleeping dog."],
    "Zero-Shot Classification": {"text": "I need to book a flight ticket for my vacation.", "labels": "travel, finance, education"},
    "Keyword Extraction": "Natural Language Processing enables machines to understand human language better.",
    "Emotion Detection": "I feel so excited about this new opportunity!"
}
def count_words(text):
    words = text.split()
    return len(words)

left_column, right_column = st.columns([2, 12])  # Adjust column width as needed



# Left column (for task options)
with left_column:
    st.sidebar.title("โœจ NLP Tasks โœจ")
    st.sidebar.write("Choose a task to get started:")
    task_icons = {
        "Sentiment Analysis": "๐Ÿ˜ƒ",
        "Named Entity Recognition": "๐Ÿ“",
        "Summarization": "๐Ÿ“",
        "Text Generation": "๐Ÿ’ฌ",
        "Machine Translation": "๐ŸŒ",
        "Question Answering": "โ“",
        "Text Similarity": "๐Ÿ”",
        "Zero-Shot Classification": "โš–๏ธ",
        "Keyword Extraction": "๐Ÿ”‘",
        "Emotion Detection": "๐Ÿ˜ž๐Ÿ˜ ๐Ÿ™‚",
    }

    selected_task = st.sidebar.radio("Select a Task", list(TASKS.keys()), format_func=lambda task: f"{task_icons.get(task, '')} {task}")



# Right column (only About Me on Home page)
with right_column:
    if selected_task == "Home":
        st.markdown("# Welcome to Your NLP Application! ๐Ÿ‘‹")
        st.markdown("### About Me ๐Ÿง‘โ€๐Ÿ’ป")
        st.write("**Name:** Shaik Hidaythulla ๐Ÿค–")  
        st.write("**Role:** Data Scientist / NLP Enthusiast ๐Ÿ“Š / ๐Ÿค–")  
        st.write("**Email:** shaikhidaythulla07@gmail.com ๐Ÿ“ง") 
        st.write("[GitHub](https://github.com/SHAIK-07) ๐Ÿ”—")
        st.write("[LinkedIn](https://www.linkedin.com/in/shaik-hidaythulla/) ๐Ÿ”—")  
        st.markdown("### About Project ๐Ÿ“š")
        st.write("This is a comprehensive NLP application that offers a wide range of text processing tasks including sentiment analysis, paraphrasing, keyword extraction, and more. ๐Ÿ”๐Ÿ“")

    elif selected_task in TASKS:
        task_emoji = task_icons.get(selected_task, "๐Ÿ”ง")  # Default to wrench if no emoji is found
        st.markdown(f"### {task_emoji} {selected_task}")
        st.write(TASKS[selected_task])
    

    try:
        # Sentiment Analysis
        if selected_task == "Sentiment Analysis":
            text = st.text_area("Enter text to analyze sentiment:", value=EXAMPLES["Sentiment Analysis"])
            if st.button("Analyze Sentiment"):
                sentiment_analyzer = pipeline("sentiment-analysis")
                result = sentiment_analyzer(text)
                sentiment_label = result[0]['label']
                confidence = result[0]['score']
                sentiment_to_emoji = {
                    "POSITIVE": "๐Ÿ˜Š",  
                    "NEGATIVE": "๐Ÿ˜ž",  
                    "NEUTRAL": "๐Ÿ˜"    
                    }
                emoji = sentiment_to_emoji.get(sentiment_label, "๐Ÿค”") 
       
                st.write(f"**Sentiment:** {sentiment_label} {emoji}")
                st.write(f"**Confidence:** {confidence:.2f}")

                
        
        # Named Entity Recognition
        elif selected_task == "Named Entity Recognition":
        
            text = st.text_area("Enter text to extract named entities:", value=EXAMPLES["Named Entity Recognition"])
    
        
            entity_mapping = {
                "PER": "Person",
                "ORG": "Organization",
                "LOC": "Location",
                "MISC": "Miscellaneous",
                "GPE": "Geopolitical Entity",
                "FAC": "Facility",
                "TIME": "Time",
                "DATE": "Date",
                "PERCENT": "Percentage",
                "MONEY": "Money",
                "QUANTITY": "Quantity",
                "ORDINAL": "Ordinal",
                "CARDINAL": "Cardinal"
                 }

            entity_to_emoji = {
                "Person": "๐Ÿง‘โ€๐Ÿคโ€๐Ÿง‘",   
                "Organization": "๐Ÿข",    
                "Location": "๐ŸŒ",        
                "Miscellaneous": "๐Ÿ”ฎ",   
                "Geopolitical Entity": "๐ŸŒ",  
                "Facility": "๐Ÿฅ",        
                "Time": "โฐ",            
                "Date": "๐Ÿ“…",            
                "Percentage": "๐Ÿ’ฏ",      
                "Money": "๐Ÿ’ฐ",           
                "Quantity": "๐Ÿ”ข",        
                "Ordinal": "๐Ÿ”ข",         
                "Cardinal": "๐Ÿ”ข"         
    }
        
            if st.button("Extract Entities"):
                ner = pipeline("ner", aggregation_strategy="simple")
                result = ner(text)
                st.write("**Extracted Entities:**")
                for entity in result:
                    entity_type_full = entity_mapping.get(entity['entity_group'], entity['entity_group'])
                    entity_emoji = entity_to_emoji.get(entity_type_full, "โ“")  
                    st.write(f"{entity['word']} ({entity_type_full} {entity_emoji})")
        
        # Summarization
        elif selected_task == "Summarization":
            text = st.text_area("Enter text to summarize:", value=EXAMPLES["Summarization"])
            word_count = count_words(text)
            st.write(f"**Word Count:** {word_count} words")
            st.write("### Parameters:")
            max_length = st.slider(
                "Maximum Summary Length",
                min_value=20, max_value=200, value=100,
                help="The longest the summary can be. Increase for more detailed summaries."
            )
            min_length = st.slider(
                "Minimum Summary Length",
                min_value=10, max_value=100, value=30,
                help="The shortest the summary can be. Increase for concise summaries."
            )
            if st.button("Summarize"):
                summarizer = pipeline("summarization")
                result = summarizer(text, max_length=max_length, min_length=min_length)
                st.write(f"**Summary:**\n{result[0]['summary_text']}")
                After_word_count = count_words(result[0]['summary_text'])
                st.write(f"**After Summarization Word Count:** {After_word_count} words")
                
        
        # Text Generation
        elif selected_task == "Text Generation":
            prompt = st.text_area("Enter text to generate from:", value=EXAMPLES["Text Generation"])
            st.write("### Parameters:")
            max_length = st.slider(
                "Maximum Length of Generated Text",
                min_value=20, max_value=200, value=50,
                help="Controls how long the generated text can be. Increase for more content."
            )
            temperature = st.slider(
                "Creativity Level (Temperature)",
                min_value=0.1, max_value=1.5, value=1.0,
                help="Higher values make text more creative but less predictable. Lower values make text more focused."
            )
            top_k = st.slider(
                "Top-k Sampling (Number of Choices)",
                min_value=1, max_value=100, value=50,
                help="Limits the number of words considered at each generation step. Lower values lead to more focused output."
            )
            if st.button("Generate Text"):
                generator = pipeline("text-generation", model="gpt2")
                result = generator(prompt, max_length=max_length, temperature=temperature, top_k=top_k)
                st.write(f"**Generated Text:**\n{result[0]['generated_text']}")

        
        # Machine Translation
        elif selected_task == "Machine Translation":
            
            text = st.text_area("Enter text to translate:", value=EXAMPLES["Machine Translation"])
    
            language_map = {
                "French": "fr",
                "German": "de"
                }
    
            
            target_lang = st.selectbox(
                "Select target language (Currently we have French and German only)", list(language_map.keys()),
                help="Choose the language you want to translate your text into."
                )
    
            
            if st.button("Translate Text"):
            
                target_lang_code = language_map[target_lang]
        
        
                pipeline_name = f"translation_en_to_{target_lang_code}"  
                translator = pipeline(pipeline_name)
                result = translator(text)
                st.write(f"**Translated Text:** {result[0]['translation_text']}")
            

        
        # Question Answering
        elif selected_task == "Question Answering":
            context = st.text_area("Enter context:", value=EXAMPLES["Question Answering"]["context"])
            question = st.text_input("Enter question:", value=EXAMPLES["Question Answering"]["question"])
            st.write("### Parameters:")
            top_k = st.slider(
                "Number of Answers to Retrieve",
                min_value=1, max_value=5, value=1,
                help="Retrieve the top-k most relevant answers. Higher values provide more options."
            )
            if st.button("Get Answer"):
                qa = pipeline("question-answering")
                result = qa(question=question, context=context, top_k=top_k)
                if top_k == 1:
                    st.write(f"**Answer:** {result['answer']}")
                else:
                    for i, ans in enumerate(result):
                        st.write(f"**Answer {i+1}:** {ans['answer']}, **Score:** {ans['score']:.2f}")

        
        # Text Similarity
        elif selected_task == "Text Similarity":
            text1 = st.text_area("Enter first text:", value=EXAMPLES["Text Similarity"][0])
            text2 = st.text_area("Enter second text:", value=EXAMPLES["Text Similarity"][1])
            if st.button("Check Similarity"):
                similarity = pipeline("zero-shot-classification")
                result = similarity(text1, candidate_labels=[text2])
                st.write(f"**Similarity Score:** {result['scores'][0]:.2f}")
        
        # Zero-Shot Classification
        elif selected_task == "Zero-Shot Classification":
            text = st.text_area("Enter text for classification:", value=EXAMPLES["Zero-Shot Classification"]["text"])
            labels = st.text_input(
                "Enter Candidate Labels (Comma-Separated):",
                value=EXAMPLES["Zero-Shot Classification"]["labels"],
                help="Provide a list of possible categories for classification, separated by commas."
            )
            multi_label = st.checkbox(
                "Allow Multiple Labels",
                value=False,
                help="Check this if the text can belong to more than one category."
            )
            if st.button("Classify"):
                zero_shot = pipeline("zero-shot-classification")
                result = zero_shot(text, candidate_labels=labels.split(","), multi_label=multi_label)
                for label, score in zip(result["labels"], result["scores"]):
                    st.write(f"**Label:** {label}, **Score:** {score:.2f}")
        
        
        # Keyword Extraction
        elif selected_task == "Keyword Extraction":
            text = st.text_area("Enter text to extract keywords:", value=EXAMPLES["Keyword Extraction"])
            if st.button("Extract Keywords"):
                try:
                    model_name = "ml6team/keyphrase-extraction-distilbert-inspec"
                    tokenizer = AutoTokenizer.from_pretrained(model_name)
                    model = AutoModelForTokenClassification.from_pretrained(model_name)
                    extractor = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
                    result = extractor(text)
                    keywords = [item["word"] for item in result]
                    st.write("**Extracted Keywords:**")
                    st.write(", ".join(keywords) if keywords else "No keywords found.")
                except Exception as e:
                    st.error(f"An error occurred during keyword extraction: {str(e)}")

        
        # Emotion Detection
        elif selected_task == "Emotion Detection":
            text = st.text_area("Enter text to analyze emotions:", value=EXAMPLES["Emotion Detection"])
            emotion_to_emoji = {
                "anger": "๐Ÿ˜ก",       
                "fear": "๐Ÿ˜จ",        
                "joy": "๐Ÿ˜Š",         
                "love": "๐Ÿ˜",        
                "sadness": "๐Ÿ˜ข",      
                "surprise": "๐Ÿ˜ฒ",     
                "neutral": "๐Ÿ˜"}
    
    
            if st.button("Detect Emotion"):
                try:
            
                    emotion_model = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion")
                    result = emotion_model(text, top_k=None)

                    st.write("**Detected Emotions with Confidence Scores:**")
                    for item in result:
                        emotion = item['label']
                        emoji = emotion_to_emoji.get(emotion, "โ“")  
                        st.write(f"- {emotion.capitalize()} {emoji}: {item['score']:.2f}")
                except Exception as e:
                    st.error(f"An error occurred while detecting emotions: {str(e)}")

    except Exception as e:
        st.error(f"An error occurred: {str(e)}")