Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- app.py +3 -7
- eda.py +10 -16
- prediction.py +1 -0
app.py
CHANGED
@@ -3,19 +3,15 @@ import numpy as np
|
|
3 |
import pandas as pd
|
4 |
import joblib
|
5 |
|
|
|
6 |
import eda
|
7 |
from eda import eda_page
|
8 |
import prediction
|
9 |
from prediction import model_page
|
10 |
|
11 |
-
#Load data
|
12 |
-
fraud = pd.read_csv('fraud_test.csv')
|
13 |
-
|
14 |
-
# Define the percentage of data you want to sample
|
15 |
-
sample_percentage = 50 # Adjust this percentage as needed
|
16 |
|
17 |
-
#
|
18 |
-
data =
|
19 |
|
20 |
st.header('Milestone 2')
|
21 |
st.write("""
|
|
|
3 |
import pandas as pd
|
4 |
import joblib
|
5 |
|
6 |
+
|
7 |
import eda
|
8 |
from eda import eda_page
|
9 |
import prediction
|
10 |
from prediction import model_page
|
11 |
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
#Load data
|
14 |
+
data = pd.read_csv("fraud_test.csv")
|
15 |
|
16 |
st.header('Milestone 2')
|
17 |
st.write("""
|
eda.py
CHANGED
@@ -4,14 +4,8 @@ import numpy as np
|
|
4 |
import pandas as pd
|
5 |
import seaborn as sns
|
6 |
|
7 |
-
#Load data
|
8 |
-
|
9 |
-
|
10 |
-
# Define the percentage of data you want to sample
|
11 |
-
sample_percentage = 50 # Adjust this percentage as needed
|
12 |
-
|
13 |
-
# Randomly sample the data based on the percentage
|
14 |
-
data = fraud.sample(frac=sample_percentage/100, random_state=22)
|
15 |
|
16 |
# def annotate_bar(ax, custom_y_func, font_size=14):
|
17 |
|
@@ -56,10 +50,10 @@ def eda_page():
|
|
56 |
st.pyplot(fig)
|
57 |
st.write("**Explanation**:")
|
58 |
markdown_text = """
|
59 |
-
* From the top 10 transaction amount by job we can see `
|
60 |
-
* From the top 10 transaction amount by state we can see `NY` have the most fraud with
|
61 |
-
* From the top 10 transaction amount by city we can see `Camden` have the most fraud with
|
62 |
-
* From the top 10 transaction amount by merchant we can see `
|
63 |
"""
|
64 |
st.markdown(markdown_text)
|
65 |
|
@@ -90,10 +84,10 @@ def eda_page():
|
|
90 |
|
91 |
st.write("**Explanation**:")
|
92 |
markdown_text = """
|
93 |
-
* From the top 10 transaction by job we can see `Color Technologist` have the most fraud with
|
94 |
-
* From the top 10 transaction by state we can see `NY` have the most fraud with
|
95 |
-
* From the top 10 transaction by city we can see `Camden` have the most fraud
|
96 |
-
* From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most
|
97 |
"""
|
98 |
st.markdown(markdown_text)
|
99 |
|
|
|
4 |
import pandas as pd
|
5 |
import seaborn as sns
|
6 |
|
7 |
+
# Load data from a CSV file
|
8 |
+
data = pd.read_csv('fraud_test.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# def annotate_bar(ax, custom_y_func, font_size=14):
|
11 |
|
|
|
50 |
st.pyplot(fig)
|
51 |
st.write("**Explanation**:")
|
52 |
markdown_text = """
|
53 |
+
* From the top 10 transaction amount by job we can see `Therapist` have the most fraud with almost 4000 transaction amount meanwhile `Film/Video editor` are the most non fraud with 30.000 transaction
|
54 |
+
* From the top 10 transaction amount by state we can see `NY` have the most fraud with 10.000 transaction amount meanwhile `TX`are the most non fraud with above 250.000 transaction
|
55 |
+
* From the top 10 transaction amount by city we can see `Camden` have the most fraud with 3500 transaction amount meanwhile `Naples` are the most non fraud with 250.000 transaction
|
56 |
+
* From the top 10 transaction amount by merchant we can see `Commier` have most fraud with 3000 transaction amount meanwhile `Corwin-Romaguera` are the most non fraud with almost 250.000 transaction
|
57 |
"""
|
58 |
st.markdown(markdown_text)
|
59 |
|
|
|
84 |
|
85 |
st.write("**Explanation**:")
|
86 |
markdown_text = """
|
87 |
+
* From the top 10 transaction by job we can see `Color Technologist` have the most fraud with above 20 transaction meanwhile `Film/Video editor` are the most non fraud with 2.000 transaction
|
88 |
+
* From the top 10 transaction by state we can see `NY` have the most fraud with above 80 transaction meanwhile `TX`are the most non fraud with 20.000 transaction
|
89 |
+
* From the top 10 transaction by city we can see `Camden` have the most fraud above 20 transaction meanwhile `Birmingham` are the most non fraud with almost 1.200 transaction
|
90 |
+
* From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most non fraud with almost 1.000 transaction
|
91 |
"""
|
92 |
st.markdown(markdown_text)
|
93 |
|
prediction.py
CHANGED
@@ -8,6 +8,7 @@ from scipy.stats import randint
|
|
8 |
|
9 |
from datetime import datetime, timedelta
|
10 |
from sklearn.utils import shuffle
|
|
|
11 |
|
12 |
def model_page():
|
13 |
st.title("Model Prediction of Credit Card Fault")
|
|
|
8 |
|
9 |
from datetime import datetime, timedelta
|
10 |
from sklearn.utils import shuffle
|
11 |
+
#from scikit-learn.utils import shuffle
|
12 |
|
13 |
def model_page():
|
14 |
st.title("Model Prediction of Credit Card Fault")
|