Spaces:

Reaumur
/

Credit-Card-Fraud-Detection

Sleeping

App Files Files Community

Reaumur commited on May 9, 2024

Commit

1fef517

verified ·

1 Parent(s): fc56f9b

Upload 6 files

Browse files

Files changed (3) hide show

app.py +7 -3
eda.py +16 -10
prediction.py +0 -1

app.py CHANGED Viewed

@@ -3,15 +3,19 @@ import numpy as np
 import pandas as pd
 import joblib
 import eda
 from eda import eda_page
 import prediction
 from prediction import model_page
 #Load data
-data = pd.read_csv("fraud_test.csv")
 st.header('Milestone 2')
 st.write("""

 import pandas as pd
 import joblib
 import eda
 from eda import eda_page
 import prediction
 from prediction import model_page
 #Load data
+fraud = pd.read_csv('fraud_test.csv')
+# Define the percentage of data you want to sample
+sample_percentage = 50  # Adjust this percentage as needed
+# Randomly sample the data based on the percentage
+data = fraud.sample(frac=sample_percentage/100, random_state=22)  # Set a random seed for reproducibility
 st.header('Milestone 2')
 st.write("""

eda.py CHANGED Viewed

@@ -4,8 +4,14 @@ import numpy as np
 import pandas as pd
 import seaborn as sns
-# Load data from a CSV file
-data = pd.read_csv('fraud_test.csv')
 # def annotate_bar(ax, custom_y_func, font_size=14):
@@ -50,10 +56,10 @@ def eda_page():
         st.pyplot(fig)
     st.write("**Explanation**:")
     markdown_text = """
-    * From the top 10 transaction amount by job we can see `Therapist` have the most fraud with almost 4000 transaction amount meanwhile `Film/Video editor` are the most non fraud with 30.000 transaction
-    * From the top 10 transaction amount by state we can see `NY` have the most fraud with 10.000 transaction amount meanwhile `TX`are the most non fraud with above 250.000 transaction
-    * From the top 10 transaction amount by city we can see `Camden` have the most fraud with 3500 transaction amount meanwhile `Naples` are the most non fraud with 250.000 transaction
-    * From the top 10 transaction amount by merchant we can see `Commier` have most fraud with 3000 transaction amount meanwhile `Corwin-Romaguera` are the most non fraud with almost 250.000 transaction
     """
     st.markdown(markdown_text)
@@ -84,10 +90,10 @@ def eda_page():
     st.write("**Explanation**:")
     markdown_text = """
-    * From the top 10 transaction by job we can see `Color Technologist` have the most fraud with above 20 transaction meanwhile `Film/Video editor` are the most non fraud with 2.000 transaction
-    * From the top 10 transaction by state we can see `NY` have the most fraud with above 80 transaction meanwhile `TX`are the most non fraud with 20.000 transaction
-    * From the top 10 transaction by city we can see `Camden` have the most fraud above 20 transaction meanwhile `Birmingham` are the most non fraud with almost 1.200 transaction
-    * From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most non fraud with almost 1.000 transaction
     """
     st.markdown(markdown_text)

 import pandas as pd
 import seaborn as sns
+#Load data
+fraud = pd.read_csv('fraud_test.csv')
+# Define the percentage of data you want to sample
+sample_percentage = 50  # Adjust this percentage as needed
+# Randomly sample the data based on the percentage
+data = fraud.sample(frac=sample_percentage/100, random_state=22)
 # def annotate_bar(ax, custom_y_func, font_size=14):
         st.pyplot(fig)
     st.write("**Explanation**:")
     markdown_text = """
+    * From the top 10 transaction amount by job we can see `Science Writer` have the most fraud with over 10.000 transaction amount meanwhile `Film/Video editor` are the most non fraud with almost 160.000 transaction
+    * From the top 10 transaction amount by state we can see `NY` have the most fraud with almost 60.000 transaction amount meanwhile `TX`are the most non fraud with above 1.400.000 transaction
+    * From the top 10 transaction amount by city we can see `Camden` have the most fraud with over 10.000 transaction amount meanwhile `Meridian` are the most non fraud with almost 100.000 transaction
+    * From the top 10 transaction amount by merchant we can see `Heathcote, Yost and Kertzmann` have most fraud with almost 10.000 transaction amount meanwhile `Killback-LLC` are the most non fraud with over 80.000 transaction
     """
     st.markdown(markdown_text)
     st.write("**Explanation**:")
     markdown_text = """
+    * From the top 10 transaction by job we can see `Color Technologist` have the most fraud with over 20 transaction meanwhile `Film/Video editor` are the most not fraud with over 2.000 transaction
+    * From the top 10 transaction by state we can see `NY` have the most fraud with over 80 transaction meanwhile `TX`are the most not fraud with 20.000 transaction
+    * From the top 10 transaction by city we can see `Camden` have the most fraud over 20 transaction meanwhile `Birmingham` are the most not fraud with almost 1.200 transaction
+    * From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most not fraud with almost 1.000 transaction
     """
     st.markdown(markdown_text)

prediction.py CHANGED Viewed

@@ -8,7 +8,6 @@ from scipy.stats import randint
 from datetime import datetime, timedelta
 from sklearn.utils import shuffle
-#from scikit-learn.utils import shuffle
 def model_page():
     st.title("Model Prediction of Credit Card Fault")

 from datetime import datetime, timedelta
 from sklearn.utils import shuffle
 def model_page():
     st.title("Model Prediction of Credit Card Fault")