Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- app.py +7 -3
- eda.py +16 -10
- prediction.py +0 -1
app.py
CHANGED
@@ -3,15 +3,19 @@ import numpy as np
|
|
3 |
import pandas as pd
|
4 |
import joblib
|
5 |
|
6 |
-
|
7 |
import eda
|
8 |
from eda import eda_page
|
9 |
import prediction
|
10 |
from prediction import model_page
|
11 |
|
12 |
-
|
13 |
#Load data
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
st.header('Milestone 2')
|
17 |
st.write("""
|
|
|
3 |
import pandas as pd
|
4 |
import joblib
|
5 |
|
|
|
6 |
import eda
|
7 |
from eda import eda_page
|
8 |
import prediction
|
9 |
from prediction import model_page
|
10 |
|
|
|
11 |
#Load data
|
12 |
+
fraud = pd.read_csv('fraud_test.csv')
|
13 |
+
|
14 |
+
# Define the percentage of data you want to sample
|
15 |
+
sample_percentage = 50 # Adjust this percentage as needed
|
16 |
+
|
17 |
+
# Randomly sample the data based on the percentage
|
18 |
+
data = fraud.sample(frac=sample_percentage/100, random_state=22) # Set a random seed for reproducibility
|
19 |
|
20 |
st.header('Milestone 2')
|
21 |
st.write("""
|
eda.py
CHANGED
@@ -4,8 +4,14 @@ import numpy as np
|
|
4 |
import pandas as pd
|
5 |
import seaborn as sns
|
6 |
|
7 |
-
#
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# def annotate_bar(ax, custom_y_func, font_size=14):
|
11 |
|
@@ -50,10 +56,10 @@ def eda_page():
|
|
50 |
st.pyplot(fig)
|
51 |
st.write("**Explanation**:")
|
52 |
markdown_text = """
|
53 |
-
* From the top 10 transaction amount by job we can see `
|
54 |
-
* From the top 10 transaction amount by state we can see `NY` have the most fraud with
|
55 |
-
* From the top 10 transaction amount by city we can see `Camden` have the most fraud with
|
56 |
-
* From the top 10 transaction amount by merchant we can see `
|
57 |
"""
|
58 |
st.markdown(markdown_text)
|
59 |
|
@@ -84,10 +90,10 @@ def eda_page():
|
|
84 |
|
85 |
st.write("**Explanation**:")
|
86 |
markdown_text = """
|
87 |
-
* From the top 10 transaction by job we can see `Color Technologist` have the most fraud with
|
88 |
-
* From the top 10 transaction by state we can see `NY` have the most fraud with
|
89 |
-
* From the top 10 transaction by city we can see `Camden` have the most fraud
|
90 |
-
* From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most
|
91 |
"""
|
92 |
st.markdown(markdown_text)
|
93 |
|
|
|
4 |
import pandas as pd
|
5 |
import seaborn as sns
|
6 |
|
7 |
+
#Load data
|
8 |
+
fraud = pd.read_csv('fraud_test.csv')
|
9 |
+
|
10 |
+
# Define the percentage of data you want to sample
|
11 |
+
sample_percentage = 50 # Adjust this percentage as needed
|
12 |
+
|
13 |
+
# Randomly sample the data based on the percentage
|
14 |
+
data = fraud.sample(frac=sample_percentage/100, random_state=22)
|
15 |
|
16 |
# def annotate_bar(ax, custom_y_func, font_size=14):
|
17 |
|
|
|
56 |
st.pyplot(fig)
|
57 |
st.write("**Explanation**:")
|
58 |
markdown_text = """
|
59 |
+
* From the top 10 transaction amount by job we can see `Science Writer` have the most fraud with over 10.000 transaction amount meanwhile `Film/Video editor` are the most non fraud with almost 160.000 transaction
|
60 |
+
* From the top 10 transaction amount by state we can see `NY` have the most fraud with almost 60.000 transaction amount meanwhile `TX`are the most non fraud with above 1.400.000 transaction
|
61 |
+
* From the top 10 transaction amount by city we can see `Camden` have the most fraud with over 10.000 transaction amount meanwhile `Meridian` are the most non fraud with almost 100.000 transaction
|
62 |
+
* From the top 10 transaction amount by merchant we can see `Heathcote, Yost and Kertzmann` have most fraud with almost 10.000 transaction amount meanwhile `Killback-LLC` are the most non fraud with over 80.000 transaction
|
63 |
"""
|
64 |
st.markdown(markdown_text)
|
65 |
|
|
|
90 |
|
91 |
st.write("**Explanation**:")
|
92 |
markdown_text = """
|
93 |
+
* From the top 10 transaction by job we can see `Color Technologist` have the most fraud with over 20 transaction meanwhile `Film/Video editor` are the most not fraud with over 2.000 transaction
|
94 |
+
* From the top 10 transaction by state we can see `NY` have the most fraud with over 80 transaction meanwhile `TX`are the most not fraud with 20.000 transaction
|
95 |
+
* From the top 10 transaction by city we can see `Camden` have the most fraud over 20 transaction meanwhile `Birmingham` are the most not fraud with almost 1.200 transaction
|
96 |
+
* From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most not fraud with almost 1.000 transaction
|
97 |
"""
|
98 |
st.markdown(markdown_text)
|
99 |
|
prediction.py
CHANGED
@@ -8,7 +8,6 @@ from scipy.stats import randint
|
|
8 |
|
9 |
from datetime import datetime, timedelta
|
10 |
from sklearn.utils import shuffle
|
11 |
-
#from scikit-learn.utils import shuffle
|
12 |
|
13 |
def model_page():
|
14 |
st.title("Model Prediction of Credit Card Fault")
|
|
|
8 |
|
9 |
from datetime import datetime, timedelta
|
10 |
from sklearn.utils import shuffle
|
|
|
11 |
|
12 |
def model_page():
|
13 |
st.title("Model Prediction of Credit Card Fault")
|