Reaumur commited on
Commit
1fef517
·
verified ·
1 Parent(s): fc56f9b

Upload 6 files

Browse files
Files changed (3) hide show
  1. app.py +7 -3
  2. eda.py +16 -10
  3. prediction.py +0 -1
app.py CHANGED
@@ -3,15 +3,19 @@ import numpy as np
3
  import pandas as pd
4
  import joblib
5
 
6
-
7
  import eda
8
  from eda import eda_page
9
  import prediction
10
  from prediction import model_page
11
 
12
-
13
  #Load data
14
- data = pd.read_csv("fraud_test.csv")
 
 
 
 
 
 
15
 
16
  st.header('Milestone 2')
17
  st.write("""
 
3
  import pandas as pd
4
  import joblib
5
 
 
6
  import eda
7
  from eda import eda_page
8
  import prediction
9
  from prediction import model_page
10
 
 
11
  #Load data
12
+ fraud = pd.read_csv('fraud_test.csv')
13
+
14
+ # Define the percentage of data you want to sample
15
+ sample_percentage = 50 # Adjust this percentage as needed
16
+
17
+ # Randomly sample the data based on the percentage
18
+ data = fraud.sample(frac=sample_percentage/100, random_state=22) # Set a random seed for reproducibility
19
 
20
  st.header('Milestone 2')
21
  st.write("""
eda.py CHANGED
@@ -4,8 +4,14 @@ import numpy as np
4
  import pandas as pd
5
  import seaborn as sns
6
 
7
- # Load data from a CSV file
8
- data = pd.read_csv('fraud_test.csv')
 
 
 
 
 
 
9
 
10
  # def annotate_bar(ax, custom_y_func, font_size=14):
11
 
@@ -50,10 +56,10 @@ def eda_page():
50
  st.pyplot(fig)
51
  st.write("**Explanation**:")
52
  markdown_text = """
53
- * From the top 10 transaction amount by job we can see `Therapist` have the most fraud with almost 4000 transaction amount meanwhile `Film/Video editor` are the most non fraud with 30.000 transaction
54
- * From the top 10 transaction amount by state we can see `NY` have the most fraud with 10.000 transaction amount meanwhile `TX`are the most non fraud with above 250.000 transaction
55
- * From the top 10 transaction amount by city we can see `Camden` have the most fraud with 3500 transaction amount meanwhile `Naples` are the most non fraud with 250.000 transaction
56
- * From the top 10 transaction amount by merchant we can see `Commier` have most fraud with 3000 transaction amount meanwhile `Corwin-Romaguera` are the most non fraud with almost 250.000 transaction
57
  """
58
  st.markdown(markdown_text)
59
 
@@ -84,10 +90,10 @@ def eda_page():
84
 
85
  st.write("**Explanation**:")
86
  markdown_text = """
87
- * From the top 10 transaction by job we can see `Color Technologist` have the most fraud with above 20 transaction meanwhile `Film/Video editor` are the most non fraud with 2.000 transaction
88
- * From the top 10 transaction by state we can see `NY` have the most fraud with above 80 transaction meanwhile `TX`are the most non fraud with 20.000 transaction
89
- * From the top 10 transaction by city we can see `Camden` have the most fraud above 20 transaction meanwhile `Birmingham` are the most non fraud with almost 1.200 transaction
90
- * From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most non fraud with almost 1.000 transaction
91
  """
92
  st.markdown(markdown_text)
93
 
 
4
  import pandas as pd
5
  import seaborn as sns
6
 
7
+ #Load data
8
+ fraud = pd.read_csv('fraud_test.csv')
9
+
10
+ # Define the percentage of data you want to sample
11
+ sample_percentage = 50 # Adjust this percentage as needed
12
+
13
+ # Randomly sample the data based on the percentage
14
+ data = fraud.sample(frac=sample_percentage/100, random_state=22)
15
 
16
  # def annotate_bar(ax, custom_y_func, font_size=14):
17
 
 
56
  st.pyplot(fig)
57
  st.write("**Explanation**:")
58
  markdown_text = """
59
+ * From the top 10 transaction amount by job we can see `Science Writer` have the most fraud with over 10.000 transaction amount meanwhile `Film/Video editor` are the most non fraud with almost 160.000 transaction
60
+ * From the top 10 transaction amount by state we can see `NY` have the most fraud with almost 60.000 transaction amount meanwhile `TX`are the most non fraud with above 1.400.000 transaction
61
+ * From the top 10 transaction amount by city we can see `Camden` have the most fraud with over 10.000 transaction amount meanwhile `Meridian` are the most non fraud with almost 100.000 transaction
62
+ * From the top 10 transaction amount by merchant we can see `Heathcote, Yost and Kertzmann` have most fraud with almost 10.000 transaction amount meanwhile `Killback-LLC` are the most non fraud with over 80.000 transaction
63
  """
64
  st.markdown(markdown_text)
65
 
 
90
 
91
  st.write("**Explanation**:")
92
  markdown_text = """
93
+ * From the top 10 transaction by job we can see `Color Technologist` have the most fraud with over 20 transaction meanwhile `Film/Video editor` are the most not fraud with over 2.000 transaction
94
+ * From the top 10 transaction by state we can see `NY` have the most fraud with over 80 transaction meanwhile `TX`are the most not fraud with 20.000 transaction
95
+ * From the top 10 transaction by city we can see `Camden` have the most fraud over 20 transaction meanwhile `Birmingham` are the most not fraud with almost 1.200 transaction
96
+ * From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most not fraud with almost 1.000 transaction
97
  """
98
  st.markdown(markdown_text)
99
 
prediction.py CHANGED
@@ -8,7 +8,6 @@ from scipy.stats import randint
8
 
9
  from datetime import datetime, timedelta
10
  from sklearn.utils import shuffle
11
- #from scikit-learn.utils import shuffle
12
 
13
  def model_page():
14
  st.title("Model Prediction of Credit Card Fault")
 
8
 
9
  from datetime import datetime, timedelta
10
  from sklearn.utils import shuffle
 
11
 
12
  def model_page():
13
  st.title("Model Prediction of Credit Card Fault")