Spaces:
Sleeping
Sleeping
File size: 7,793 Bytes
7730772 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import streamlit as st
import imaplib
import email
from email.header import decode_header
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import re
class EmailProcessor:
@staticmethod
def decode_email_content(content, default_charset='utf-8'):
if isinstance(content, bytes):
try:
return content.decode(default_charset)
except UnicodeDecodeError:
try:
return content.decode('iso-8859-1')
except UnicodeDecodeError:
return content.decode(default_charset, errors='ignore')
return str(content)
@staticmethod
def clean_text(text):
text = re.sub(r'<[^>]+>', '', text)
text = re.sub(r'\s+', ' ', text)
return text.strip()
@staticmethod
def get_emails(email_address, password, imap_server, imap_port):
try:
imap = imaplib.IMAP4_SSL(imap_server, imap_port)
imap.login(email_address, password)
imap.select('INBOX')
_, message_numbers = imap.search(None, 'ALL')
emails = []
for num in message_numbers[0].split()[-5:]:
_, msg_data = imap.fetch(num, '(RFC822)')
email_body = msg_data[0][1]
message = email.message_from_bytes(email_body)
subject = decode_header(message["subject"])[0][0]
if isinstance(subject, bytes):
subject = EmailProcessor.decode_email_content(subject)
if message.is_multipart():
content = ''
for part in message.walk():
if part.get_content_type() == "text/plain":
payload = part.get_payload(decode=True)
if payload:
charset = part.get_content_charset() or 'utf-8'
content += EmailProcessor.decode_email_content(payload, charset)
else:
payload = message.get_payload(decode=True)
if payload:
charset = message.get_content_charset() or 'utf-8'
content = EmailProcessor.decode_email_content(payload, charset)
else:
content = ""
emails.append({
'subject': subject,
'content': EmailProcessor.clean_text(content)
})
imap.close()
imap.logout()
return emails, None
except Exception as e:
return None, str(e)
class PhishingDetector:
def __init__(self, model_path="./phishing_model"):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.tokenizer = BertTokenizer.from_pretrained(model_path)
self.model = BertForSequenceClassification.from_pretrained(
model_path,
num_labels=2
).to(self.device)
self.model.eval()
@torch.no_grad()
def predict(self, text):
cleaned_text = EmailProcessor.clean_text(text)
inputs = self.tokenizer(
cleaned_text,
return_tensors="pt",
truncation=True,
max_length=512,
padding=True
)
inputs = {k: v.to(self.device) for k, v in inputs.items()}
outputs = self.model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
return probabilities[0][1].item()
# Initialize the app
st.title("π§ Email Phishing Detector")
st.write("Connect your email account to analyze messages for potential phishing attempts.")
# Email configuration in sidebar
with st.sidebar:
st.header("Email Settings")
email_address = st.text_input("Email Address", key="email_address_input")
password = st.text_input("Password", type="password", key="password_input")
imap_server = st.text_input("IMAP Server", value="imap.gmail.com", key="imap_server_input")
imap_port = st.number_input("IMAP Port", value=993, key="imap_port_input")
# Initialize the model using st.cache_resource
@st.cache_resource
def load_detector():
return PhishingDetector()
try:
detector = load_detector()
model_loaded = True
except Exception as e:
st.error(f"Error loading model: {str(e)}")
model_loaded = False
# Add manual text analysis option
st.markdown("### π Manual Text Analysis")
manual_text = st.text_area("Enter text to analyze:", height=100, key="manual_text_input")
if st.button("Analyze Text", key="analyze_text_btn") and manual_text.strip():
with st.spinner("Analyzing text..."):
phishing_score = detector.predict(manual_text)
risk_color = "red" if phishing_score > 0.5 else "green"
st.markdown(f"**Phishing Risk Score:** <span style='color:{risk_color}'>{phishing_score:.2%}</span>", unsafe_allow_html=True)
if phishing_score > 0.8:
st.error("β οΈ High Risk: This text shows strong indicators of being a phishing attempt!")
elif phishing_score > 0.5:
st.warning("β οΈ Medium Risk: This text shows some suspicious characteristics.")
else:
st.success("β
Low Risk: This text appears to be legitimate.")
st.markdown("### π¨ Email Analysis")
if model_loaded and st.button("Analyze Emails", key="analyze_emails_btn"):
if not email_address or not password:
st.warning("Please enter your email credentials.")
else:
with st.spinner("Connecting to email..."):
emails, error = EmailProcessor.get_emails(email_address, password, imap_server, imap_port)
if error:
st.error(f"Error connecting to email: {error}")
elif emails:
st.success("Successfully retrieved emails!")
for i, email_data in enumerate(emails):
with st.expander(f"Email {i+1}: {email_data['subject']}"):
phishing_score = detector.predict(email_data['content'])
risk_color = "red" if phishing_score > 0.5 else "green"
st.markdown(f"**Phishing Risk Score:** <span style='color:{risk_color}'>{phishing_score:.2%}</span>", unsafe_allow_html=True)
if phishing_score > 0.8:
st.error("β οΈ High Risk: This email shows strong indicators of being a phishing attempt!")
elif phishing_score > 0.5:
st.warning("β οΈ Medium Risk: This email shows some suspicious characteristics.")
else:
st.success("β
Low Risk: This email appears to be legitimate.")
st.text_area("Email Content", email_data['content'], height=100, key=f"email_content_{i}")
else:
st.warning("No emails found in inbox.")
st.sidebar.markdown("---")
st.sidebar.markdown("""
### Instructions
1. Enter your email credentials
2. For Gmail:
- Use an App Password instead of your regular password
- Enable 2FA and generate an App Password from Google Account settings
3. Click "Analyze Emails" to scan your recent emails
""")
st.sidebar.markdown("---")
st.sidebar.markdown("""
### About
This application uses a BERT-based model to detect phishing attempts in emails.
You can either:
1. Analyze your emails directly by connecting your email account
2. Manually input text to analyze for phishing content
""")
|