import streamlit as st import imaplib import email from email.header import decode_header import torch from transformers import BertTokenizer, BertForSequenceClassification import re class EmailProcessor: @staticmethod def decode_email_content(content, default_charset='utf-8'): if isinstance(content, bytes): try: return content.decode(default_charset) except UnicodeDecodeError: try: return content.decode('iso-8859-1') except UnicodeDecodeError: return content.decode(default_charset, errors='ignore') return str(content) @staticmethod def clean_text(text): text = re.sub(r'<[^>]+>', '', text) text = re.sub(r'\s+', ' ', text) return text.strip() @staticmethod def get_emails(email_address, password, imap_server, imap_port): try: imap = imaplib.IMAP4_SSL(imap_server, imap_port) imap.login(email_address, password) imap.select('INBOX') _, message_numbers = imap.search(None, 'ALL') emails = [] for num in message_numbers[0].split()[-5:]: _, msg_data = imap.fetch(num, '(RFC822)') email_body = msg_data[0][1] message = email.message_from_bytes(email_body) subject = decode_header(message["subject"])[0][0] if isinstance(subject, bytes): subject = EmailProcessor.decode_email_content(subject) if message.is_multipart(): content = '' for part in message.walk(): if part.get_content_type() == "text/plain": payload = part.get_payload(decode=True) if payload: charset = part.get_content_charset() or 'utf-8' content += EmailProcessor.decode_email_content(payload, charset) else: payload = message.get_payload(decode=True) if payload: charset = message.get_content_charset() or 'utf-8' content = EmailProcessor.decode_email_content(payload, charset) else: content = "" emails.append({ 'subject': subject, 'content': EmailProcessor.clean_text(content) }) imap.close() imap.logout() return emails, None except Exception as e: return None, str(e) class PhishingDetector: def __init__(self, model_path="./phishing_model"): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.tokenizer = BertTokenizer.from_pretrained(model_path) self.model = BertForSequenceClassification.from_pretrained( model_path, num_labels=2 ).to(self.device) self.model.eval() @torch.no_grad() def predict(self, text): cleaned_text = EmailProcessor.clean_text(text) inputs = self.tokenizer( cleaned_text, return_tensors="pt", truncation=True, max_length=512, padding=True ) inputs = {k: v.to(self.device) for k, v in inputs.items()} outputs = self.model(**inputs) probabilities = torch.nn.functional.softmax(outputs.logits, dim=1) return probabilities[0][1].item() # Initialize the app st.title("📧 Email Phishing Detector") st.write("Connect your email account to analyze messages for potential phishing attempts.") # Email configuration in sidebar with st.sidebar: st.header("Email Settings") email_address = st.text_input("Email Address", key="email_address_input") password = st.text_input("Password", type="password", key="password_input") imap_server = st.text_input("IMAP Server", value="imap.gmail.com", key="imap_server_input") imap_port = st.number_input("IMAP Port", value=993, key="imap_port_input") # Initialize the model using st.cache_resource @st.cache_resource def load_detector(): return PhishingDetector() try: detector = load_detector() model_loaded = True except Exception as e: st.error(f"Error loading model: {str(e)}") model_loaded = False # Add manual text analysis option st.markdown("### 📝 Manual Text Analysis") manual_text = st.text_area("Enter text to analyze:", height=100, key="manual_text_input") if st.button("Analyze Text", key="analyze_text_btn") and manual_text.strip(): with st.spinner("Analyzing text..."): phishing_score = detector.predict(manual_text) risk_color = "red" if phishing_score > 0.5 else "green" st.markdown(f"**Phishing Risk Score:** {phishing_score:.2%}", unsafe_allow_html=True) if phishing_score > 0.8: st.error("⚠️ High Risk: This text shows strong indicators of being a phishing attempt!") elif phishing_score > 0.5: st.warning("⚠️ Medium Risk: This text shows some suspicious characteristics.") else: st.success("✅ Low Risk: This text appears to be legitimate.") st.markdown("### 📨 Email Analysis") if model_loaded and st.button("Analyze Emails", key="analyze_emails_btn"): if not email_address or not password: st.warning("Please enter your email credentials.") else: with st.spinner("Connecting to email..."): emails, error = EmailProcessor.get_emails(email_address, password, imap_server, imap_port) if error: st.error(f"Error connecting to email: {error}") elif emails: st.success("Successfully retrieved emails!") for i, email_data in enumerate(emails): with st.expander(f"Email {i+1}: {email_data['subject']}"): phishing_score = detector.predict(email_data['content']) risk_color = "red" if phishing_score > 0.5 else "green" st.markdown(f"**Phishing Risk Score:** {phishing_score:.2%}", unsafe_allow_html=True) if phishing_score > 0.8: st.error("⚠️ High Risk: This email shows strong indicators of being a phishing attempt!") elif phishing_score > 0.5: st.warning("⚠️ Medium Risk: This email shows some suspicious characteristics.") else: st.success("✅ Low Risk: This email appears to be legitimate.") st.text_area("Email Content", email_data['content'], height=100, key=f"email_content_{i}") else: st.warning("No emails found in inbox.") st.sidebar.markdown("---") st.sidebar.markdown(""" ### Instructions 1. Enter your email credentials 2. For Gmail: - Use an App Password instead of your regular password - Enable 2FA and generate an App Password from Google Account settings 3. Click "Analyze Emails" to scan your recent emails """) st.sidebar.markdown("---") st.sidebar.markdown(""" ### About This application uses a BERT-based model to detect phishing attempts in emails. You can either: 1. Analyze your emails directly by connecting your email account 2. Manually input text to analyze for phishing content """)