from model_functions import * from preprocessor import * import streamlit as st import pandas as pd st.cache_data def load_example_file(file): with open(file, "rb") as f: return f.read() def main(): # Load models tokenizer_sentiment, model_sentiment = load_sentiment_analyzer() tokenizer_summary, model_summary = load_summarizer() pipe_ner = load_NER() st.title("WhatsApp Analysis Tool") st.markdown("This app summarizes Whatsapp chats and provides named entity recognition as well as sentiment analysis for the conversation") st.markdown("**NOTE**: *This app can only receive chats downloaded from IOS as the downloaded chat format is different than from Android.*") st.markdown("Download your whatsapp chat by going to Settings > Chats > Export Chat and there select the chat you want to summarize (download 'Without Media').") st.markdown("**Example Files**: Download example zip files to test the app:") example_files = { "Example 1": "example1.zip", "Example 2": "example2.zip", "Example 3": "example3.zip" } for name, file in example_files.items(): data = load_example_file(file) st.download_button(label=name, data=data, file_name=file, mime="application/zip") # File uploader uploaded_file = st.file_uploader("Choose a file (.zip)", type=['zip']) if uploaded_file is not None: file_type = detect_file_type(uploaded_file.name) if file_type == "zip": # Process the file data = preprocess_whatsapp_messages(uploaded_file, file_type) if data.empty: st.write("No messages found or the file could not be processed.") else: # Date selector date_options = data['date'].dt.strftime('%Y-%m-%d').unique() selected_date = st.selectbox("Select a date for analysis:", date_options) if selected_date: text_for_analysis = get_dated_input(data, selected_date) with st.expander("Show/Hide Original Conversation"): st.markdown(f"```\n{text_for_analysis}\n```", unsafe_allow_html=True) process = st.button('Process') if process: # Perform analysis sentiment = get_sentiment_analysis(text_for_analysis, tokenizer_sentiment, model_sentiment) summary = generate_summary(text_for_analysis, tokenizer_summary, model_summary) ner_results = get_NER(summary, pipe_ner) # Display results st.subheader("Sentiment Analysis") st.write("Sentiment:", sentiment) st.subheader("Summary") st.write("Summary:", summary) st.subheader("Named Entity Recognition") ner_df = pd.DataFrame(ner_results, columns=["Word", "Entity Group"]) st.write(ner_df) else: st.error("Unsupported file type. Please upload a .txt or .zip file.") else: st.info("Please upload a file to proceed.") if __name__ == "__main__": main()