Reaumur commited on
Commit
fc56f9b
·
verified ·
1 Parent(s): de870a3

Upload 5 files

Browse files
Files changed (3) hide show
  1. app.py +3 -7
  2. eda.py +10 -16
  3. prediction.py +1 -0
app.py CHANGED
@@ -3,19 +3,15 @@ import numpy as np
3
  import pandas as pd
4
  import joblib
5
 
 
6
  import eda
7
  from eda import eda_page
8
  import prediction
9
  from prediction import model_page
10
 
11
- #Load data
12
- fraud = pd.read_csv('fraud_test.csv')
13
-
14
- # Define the percentage of data you want to sample
15
- sample_percentage = 50 # Adjust this percentage as needed
16
 
17
- # Randomly sample the data based on the percentage
18
- data = fraud.sample(frac=sample_percentage/100, random_state=22) # Set a random seed for reproducibility
19
 
20
  st.header('Milestone 2')
21
  st.write("""
 
3
  import pandas as pd
4
  import joblib
5
 
6
+
7
  import eda
8
  from eda import eda_page
9
  import prediction
10
  from prediction import model_page
11
 
 
 
 
 
 
12
 
13
+ #Load data
14
+ data = pd.read_csv("fraud_test.csv")
15
 
16
  st.header('Milestone 2')
17
  st.write("""
eda.py CHANGED
@@ -4,14 +4,8 @@ import numpy as np
4
  import pandas as pd
5
  import seaborn as sns
6
 
7
- #Load data
8
- fraud = pd.read_csv('fraud_test.csv')
9
-
10
- # Define the percentage of data you want to sample
11
- sample_percentage = 50 # Adjust this percentage as needed
12
-
13
- # Randomly sample the data based on the percentage
14
- data = fraud.sample(frac=sample_percentage/100, random_state=22)
15
 
16
  # def annotate_bar(ax, custom_y_func, font_size=14):
17
 
@@ -56,10 +50,10 @@ def eda_page():
56
  st.pyplot(fig)
57
  st.write("**Explanation**:")
58
  markdown_text = """
59
- * From the top 10 transaction amount by job we can see `Science Writer` have the most fraud with over 10.000 transaction amount meanwhile `Film/Video editor` are the most non fraud with almost 160.000 transaction
60
- * From the top 10 transaction amount by state we can see `NY` have the most fraud with almost 60.000 transaction amount meanwhile `TX`are the most non fraud with above 1.400.000 transaction
61
- * From the top 10 transaction amount by city we can see `Camden` have the most fraud with over 10.000 transaction amount meanwhile `Meridian` are the most non fraud with almost 100.000 transaction
62
- * From the top 10 transaction amount by merchant we can see `Heathcote, Yost and Kertzmann` have most fraud with almost 10.000 transaction amount meanwhile `Killback-LLC` are the most non fraud with over 80.000 transaction
63
  """
64
  st.markdown(markdown_text)
65
 
@@ -90,10 +84,10 @@ def eda_page():
90
 
91
  st.write("**Explanation**:")
92
  markdown_text = """
93
- * From the top 10 transaction by job we can see `Color Technologist` have the most fraud with over 20 transaction meanwhile `Film/Video editor` are the most not fraud with over 2.000 transaction
94
- * From the top 10 transaction by state we can see `NY` have the most fraud with over 80 transaction meanwhile `TX`are the most not fraud with 20.000 transaction
95
- * From the top 10 transaction by city we can see `Camden` have the most fraud over 20 transaction meanwhile `Birmingham` are the most not fraud with almost 1.200 transaction
96
- * From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most not fraud with almost 1.000 transaction
97
  """
98
  st.markdown(markdown_text)
99
 
 
4
  import pandas as pd
5
  import seaborn as sns
6
 
7
+ # Load data from a CSV file
8
+ data = pd.read_csv('fraud_test.csv')
 
 
 
 
 
 
9
 
10
  # def annotate_bar(ax, custom_y_func, font_size=14):
11
 
 
50
  st.pyplot(fig)
51
  st.write("**Explanation**:")
52
  markdown_text = """
53
+ * From the top 10 transaction amount by job we can see `Therapist` have the most fraud with almost 4000 transaction amount meanwhile `Film/Video editor` are the most non fraud with 30.000 transaction
54
+ * From the top 10 transaction amount by state we can see `NY` have the most fraud with 10.000 transaction amount meanwhile `TX`are the most non fraud with above 250.000 transaction
55
+ * From the top 10 transaction amount by city we can see `Camden` have the most fraud with 3500 transaction amount meanwhile `Naples` are the most non fraud with 250.000 transaction
56
+ * From the top 10 transaction amount by merchant we can see `Commier` have most fraud with 3000 transaction amount meanwhile `Corwin-Romaguera` are the most non fraud with almost 250.000 transaction
57
  """
58
  st.markdown(markdown_text)
59
 
 
84
 
85
  st.write("**Explanation**:")
86
  markdown_text = """
87
+ * From the top 10 transaction by job we can see `Color Technologist` have the most fraud with above 20 transaction meanwhile `Film/Video editor` are the most non fraud with 2.000 transaction
88
+ * From the top 10 transaction by state we can see `NY` have the most fraud with above 80 transaction meanwhile `TX`are the most non fraud with 20.000 transaction
89
+ * From the top 10 transaction by city we can see `Camden` have the most fraud above 20 transaction meanwhile `Birmingham` are the most non fraud with almost 1.200 transaction
90
+ * From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most non fraud with almost 1.000 transaction
91
  """
92
  st.markdown(markdown_text)
93
 
prediction.py CHANGED
@@ -8,6 +8,7 @@ from scipy.stats import randint
8
 
9
  from datetime import datetime, timedelta
10
  from sklearn.utils import shuffle
 
11
 
12
  def model_page():
13
  st.title("Model Prediction of Credit Card Fault")
 
8
 
9
  from datetime import datetime, timedelta
10
  from sklearn.utils import shuffle
11
+ #from scikit-learn.utils import shuffle
12
 
13
  def model_page():
14
  st.title("Model Prediction of Credit Card Fault")