import streamlit as st
import imaplib
import email
from email.header import decode_header
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import re

class EmailProcessor:
    @staticmethod
    def decode_email_content(content, default_charset='utf-8'):
        if isinstance(content, bytes):
            try:
                return content.decode(default_charset)
            except UnicodeDecodeError:
                try:
                    return content.decode('iso-8859-1')
                except UnicodeDecodeError:
                    return content.decode(default_charset, errors='ignore')
        return str(content)

    @staticmethod
    def clean_text(text):
        text = re.sub(r'<[^>]+>', '', text)
        text = re.sub(r'\s+', ' ', text)
        return text.strip()

    @staticmethod
    def get_emails(email_address, password, imap_server, imap_port):
        try:
            imap = imaplib.IMAP4_SSL(imap_server, imap_port)
            imap.login(email_address, password)
            imap.select('INBOX')
            
            _, message_numbers = imap.search(None, 'ALL')
            
            emails = []
            for num in message_numbers[0].split()[-5:]:
                _, msg_data = imap.fetch(num, '(RFC822)')
                email_body = msg_data[0][1]
                message = email.message_from_bytes(email_body)
                
                subject = decode_header(message["subject"])[0][0]
                if isinstance(subject, bytes):
                    subject = EmailProcessor.decode_email_content(subject)
                
                if message.is_multipart():
                    content = ''
                    for part in message.walk():
                        if part.get_content_type() == "text/plain":
                            payload = part.get_payload(decode=True)
                            if payload:
                                charset = part.get_content_charset() or 'utf-8'
                                content += EmailProcessor.decode_email_content(payload, charset)
                else:
                    payload = message.get_payload(decode=True)
                    if payload:
                        charset = message.get_content_charset() or 'utf-8'
                        content = EmailProcessor.decode_email_content(payload, charset)
                    else:
                        content = ""
                
                emails.append({
                    'subject': subject,
                    'content': EmailProcessor.clean_text(content)
                })
            
            imap.close()
            imap.logout()
            return emails, None
            
        except Exception as e:
            return None, str(e)

class PhishingDetector:
    def __init__(self, model_path="./phishing_model"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = BertTokenizer.from_pretrained(model_path)
        self.model = BertForSequenceClassification.from_pretrained(
            model_path,
            num_labels=2
        ).to(self.device)
        self.model.eval()

    @torch.no_grad()
    def predict(self, text):
        cleaned_text = EmailProcessor.clean_text(text)
        inputs = self.tokenizer(
            cleaned_text,
            return_tensors="pt",
            truncation=True,
            max_length=512,
            padding=True
        )
        
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        outputs = self.model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
        return probabilities[0][1].item()

# Initialize the app
st.title("📧 Email Phishing Detector")
st.write("Connect your email account to analyze messages for potential phishing attempts.")

# Email configuration in sidebar
with st.sidebar:
    st.header("Email Settings")
    email_address = st.text_input("Email Address", key="email_address_input")
    password = st.text_input("Password", type="password", key="password_input")
    imap_server = st.text_input("IMAP Server", value="imap.gmail.com", key="imap_server_input")
    imap_port = st.number_input("IMAP Port", value=993, key="imap_port_input")

# Initialize the model using st.cache_resource
@st.cache_resource
def load_detector():
    return PhishingDetector()

try:
    detector = load_detector()
    model_loaded = True
except Exception as e:
    st.error(f"Error loading model: {str(e)}")
    model_loaded = False

# Add manual text analysis option
st.markdown("### 📝 Manual Text Analysis")
manual_text = st.text_area("Enter text to analyze:", height=100, key="manual_text_input")
if st.button("Analyze Text", key="analyze_text_btn") and manual_text.strip():
    with st.spinner("Analyzing text..."):
        phishing_score = detector.predict(manual_text)
        risk_color = "red" if phishing_score > 0.5 else "green"
        st.markdown(f"**Phishing Risk Score:** <span style='color:{risk_color}'>{phishing_score:.2%}</span>", unsafe_allow_html=True)
        
        if phishing_score > 0.8:
            st.error("⚠️ High Risk: This text shows strong indicators of being a phishing attempt!")
        elif phishing_score > 0.5:
            st.warning("⚠️ Medium Risk: This text shows some suspicious characteristics.")
        else:
            st.success("✅ Low Risk: This text appears to be legitimate.")

st.markdown("### 📨 Email Analysis")
if model_loaded and st.button("Analyze Emails", key="analyze_emails_btn"):
    if not email_address or not password:
        st.warning("Please enter your email credentials.")
    else:
        with st.spinner("Connecting to email..."):
            emails, error = EmailProcessor.get_emails(email_address, password, imap_server, imap_port)
            
            if error:
                st.error(f"Error connecting to email: {error}")
            elif emails:
                st.success("Successfully retrieved emails!")
                
                for i, email_data in enumerate(emails):
                    with st.expander(f"Email {i+1}: {email_data['subject']}"):
                        phishing_score = detector.predict(email_data['content'])
                        
                        risk_color = "red" if phishing_score > 0.5 else "green"
                        st.markdown(f"**Phishing Risk Score:** <span style='color:{risk_color}'>{phishing_score:.2%}</span>", unsafe_allow_html=True)
                        
                        if phishing_score > 0.8:
                            st.error("⚠️ High Risk: This email shows strong indicators of being a phishing attempt!")
                        elif phishing_score > 0.5:
                            st.warning("⚠️ Medium Risk: This email shows some suspicious characteristics.")
                        else:
                            st.success("✅ Low Risk: This email appears to be legitimate.")
                        
                        st.text_area("Email Content", email_data['content'], height=100, key=f"email_content_{i}")
            else:
                st.warning("No emails found in inbox.")

st.sidebar.markdown("---")
st.sidebar.markdown("""
### Instructions
1. Enter your email credentials
2. For Gmail:
   - Use an App Password instead of your regular password
   - Enable 2FA and generate an App Password from Google Account settings
3. Click "Analyze Emails" to scan your recent emails
""")

st.sidebar.markdown("---")
st.sidebar.markdown("""
### About
This application uses a BERT-based model to detect phishing attempts in emails.
You can either:
1. Analyze your emails directly by connecting your email account
2. Manually input text to analyze for phishing content
""")