File size: 7,793 Bytes
7730772
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import streamlit as st
import imaplib
import email
from email.header import decode_header
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import re

class EmailProcessor:
    @staticmethod
    def decode_email_content(content, default_charset='utf-8'):
        if isinstance(content, bytes):
            try:
                return content.decode(default_charset)
            except UnicodeDecodeError:
                try:
                    return content.decode('iso-8859-1')
                except UnicodeDecodeError:
                    return content.decode(default_charset, errors='ignore')
        return str(content)

    @staticmethod
    def clean_text(text):
        text = re.sub(r'<[^>]+>', '', text)
        text = re.sub(r'\s+', ' ', text)
        return text.strip()

    @staticmethod
    def get_emails(email_address, password, imap_server, imap_port):
        try:
            imap = imaplib.IMAP4_SSL(imap_server, imap_port)
            imap.login(email_address, password)
            imap.select('INBOX')
            
            _, message_numbers = imap.search(None, 'ALL')
            
            emails = []
            for num in message_numbers[0].split()[-5:]:
                _, msg_data = imap.fetch(num, '(RFC822)')
                email_body = msg_data[0][1]
                message = email.message_from_bytes(email_body)
                
                subject = decode_header(message["subject"])[0][0]
                if isinstance(subject, bytes):
                    subject = EmailProcessor.decode_email_content(subject)
                
                if message.is_multipart():
                    content = ''
                    for part in message.walk():
                        if part.get_content_type() == "text/plain":
                            payload = part.get_payload(decode=True)
                            if payload:
                                charset = part.get_content_charset() or 'utf-8'
                                content += EmailProcessor.decode_email_content(payload, charset)
                else:
                    payload = message.get_payload(decode=True)
                    if payload:
                        charset = message.get_content_charset() or 'utf-8'
                        content = EmailProcessor.decode_email_content(payload, charset)
                    else:
                        content = ""
                
                emails.append({
                    'subject': subject,
                    'content': EmailProcessor.clean_text(content)
                })
            
            imap.close()
            imap.logout()
            return emails, None
            
        except Exception as e:
            return None, str(e)

class PhishingDetector:
    def __init__(self, model_path="./phishing_model"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = BertTokenizer.from_pretrained(model_path)
        self.model = BertForSequenceClassification.from_pretrained(
            model_path,
            num_labels=2
        ).to(self.device)
        self.model.eval()

    @torch.no_grad()
    def predict(self, text):
        cleaned_text = EmailProcessor.clean_text(text)
        inputs = self.tokenizer(
            cleaned_text,
            return_tensors="pt",
            truncation=True,
            max_length=512,
            padding=True
        )
        
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        outputs = self.model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
        return probabilities[0][1].item()

# Initialize the app
st.title("πŸ“§ Email Phishing Detector")
st.write("Connect your email account to analyze messages for potential phishing attempts.")

# Email configuration in sidebar
with st.sidebar:
    st.header("Email Settings")
    email_address = st.text_input("Email Address", key="email_address_input")
    password = st.text_input("Password", type="password", key="password_input")
    imap_server = st.text_input("IMAP Server", value="imap.gmail.com", key="imap_server_input")
    imap_port = st.number_input("IMAP Port", value=993, key="imap_port_input")

# Initialize the model using st.cache_resource
@st.cache_resource
def load_detector():
    return PhishingDetector()

try:
    detector = load_detector()
    model_loaded = True
except Exception as e:
    st.error(f"Error loading model: {str(e)}")
    model_loaded = False

# Add manual text analysis option
st.markdown("### πŸ“ Manual Text Analysis")
manual_text = st.text_area("Enter text to analyze:", height=100, key="manual_text_input")
if st.button("Analyze Text", key="analyze_text_btn") and manual_text.strip():
    with st.spinner("Analyzing text..."):
        phishing_score = detector.predict(manual_text)
        risk_color = "red" if phishing_score > 0.5 else "green"
        st.markdown(f"**Phishing Risk Score:** <span style='color:{risk_color}'>{phishing_score:.2%}</span>", unsafe_allow_html=True)
        
        if phishing_score > 0.8:
            st.error("⚠️ High Risk: This text shows strong indicators of being a phishing attempt!")
        elif phishing_score > 0.5:
            st.warning("⚠️ Medium Risk: This text shows some suspicious characteristics.")
        else:
            st.success("βœ… Low Risk: This text appears to be legitimate.")

st.markdown("### πŸ“¨ Email Analysis")
if model_loaded and st.button("Analyze Emails", key="analyze_emails_btn"):
    if not email_address or not password:
        st.warning("Please enter your email credentials.")
    else:
        with st.spinner("Connecting to email..."):
            emails, error = EmailProcessor.get_emails(email_address, password, imap_server, imap_port)
            
            if error:
                st.error(f"Error connecting to email: {error}")
            elif emails:
                st.success("Successfully retrieved emails!")
                
                for i, email_data in enumerate(emails):
                    with st.expander(f"Email {i+1}: {email_data['subject']}"):
                        phishing_score = detector.predict(email_data['content'])
                        
                        risk_color = "red" if phishing_score > 0.5 else "green"
                        st.markdown(f"**Phishing Risk Score:** <span style='color:{risk_color}'>{phishing_score:.2%}</span>", unsafe_allow_html=True)
                        
                        if phishing_score > 0.8:
                            st.error("⚠️ High Risk: This email shows strong indicators of being a phishing attempt!")
                        elif phishing_score > 0.5:
                            st.warning("⚠️ Medium Risk: This email shows some suspicious characteristics.")
                        else:
                            st.success("βœ… Low Risk: This email appears to be legitimate.")
                        
                        st.text_area("Email Content", email_data['content'], height=100, key=f"email_content_{i}")
            else:
                st.warning("No emails found in inbox.")

st.sidebar.markdown("---")
st.sidebar.markdown("""
### Instructions
1. Enter your email credentials
2. For Gmail:
   - Use an App Password instead of your regular password
   - Enable 2FA and generate an App Password from Google Account settings
3. Click "Analyze Emails" to scan your recent emails
""")

st.sidebar.markdown("---")
st.sidebar.markdown("""
### About
This application uses a BERT-based model to detect phishing attempts in emails.
You can either:
1. Analyze your emails directly by connecting your email account
2. Manually input text to analyze for phishing content
""")