web-phishing-detection

Sleeping

rmdhirr commited on Jun 15

Commit

8cd35aa

•

1 Parent(s): b685318

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,11 +2,11 @@ import gradio as gr
 import tensorflow as tf
 import numpy as np
 import nltk
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from nltk.stem import WordNetLemmatizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
-from tensorflow.keras.preprocessing.text import Tokenizer
 import re
 # Load the model
@@ -51,12 +51,11 @@ max_url_length = 180
 max_html_length = 2000
 max_words = 10000
-url_tokenizer = Tokenizer(num_words=max_words, char_level=True)
-html_tokenizer = Tokenizer(num_words=max_words)
-# Dummy fit to initialize tokenizers
-url_tokenizer.fit_on_texts(["dummy"])
-html_tokenizer.fit_on_texts(["dummy"])
 def preprocess_input(input_text, tokenizer, max_length):
     sequences = tokenizer.texts_to_sequences([input_text])
@@ -80,9 +79,9 @@ def get_prediction(input_text, input_type):
 def phishing_detection(input_text, input_type):
     prediction = get_prediction(input_text, input_type)
     if prediction > 0.7:
-        return f"Warning: This site is likely a phishing site!"
     else:
-        return f"Safe: This site is not likely a phishing site."
 iface = gr.Interface(
     fn=phishing_detection,

 import tensorflow as tf
 import numpy as np
 import nltk
+import pickle
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from nltk.stem import WordNetLemmatizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import re
 # Load the model
 max_html_length = 2000
 max_words = 10000
+# Load tokenizers
+with open('url_tokenizer.pkl', 'rb') as f:
+    url_tokenizer = pickle.load(f)
+with open('html_tokenizer.pkl', 'rb') as f:
+    html_tokenizer = pickle.load(f)
 def preprocess_input(input_text, tokenizer, max_length):
     sequences = tokenizer.texts_to_sequences([input_text])
 def phishing_detection(input_text, input_type):
     prediction = get_prediction(input_text, input_type)
     if prediction > 0.7:
+        return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
     else:
+        return f"Safe: This site is not likely a phishing site. ({prediction:.2f})"
 iface = gr.Interface(
     fn=phishing_detection,