masa729406 commited on
Commit
b3c9d45
·
1 Parent(s): fe17720

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -79
app.py CHANGED
@@ -1,97 +1,104 @@
1
- # import altair
2
 
3
  import gradio as gr
4
  from math import sqrt
5
- import pandas as pd
6
- import numpy as np
7
- from datetime import datetime
8
  import matplotlib
 
9
  matplotlib.use("Agg")
 
10
  import matplotlib.pyplot as plt
 
 
11
 
 
12
 
13
- # # pip install beautifulsoup4
14
- # # pip install requests_html
15
- # import requests
16
- # from bs4 import BeautifulSoup as bs
17
- # from requests_html import AsyncHTMLSession
18
-
19
- # # Webページを取得して解析する
20
- # load_url = "https://www.football-lab.jp/kyot/match/"
21
- # html = requests.get(load_url)
22
- # soup = bs(html.content, "html.parser")
23
-
24
- # url23 = 'https://www.football-lab.jp/ka-f/match/'
25
- # dfs23 = pd.read_html(url23)
26
- # url22 = 'https://www.football-lab.jp/ka-f/match/?year=2022'
27
- # dfs22 = pd.read_html(url22)
28
- # url21 = 'https://www.football-lab.jp/ka-f/match/?year=2021'
29
- # dfs21 = pd.read_html(url21)
30
- # url20 = 'https://www.football-lab.jp/ka-f/match/?year=2020'
31
- # dfs20 = pd.read_html(url20)
32
-
33
- # #シーズン毎に分類
34
- # res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
35
- # res22 = pd.DataFrame([['S2022']]*len(dfs22[0])).join(dfs22, lsuffix='0')
36
- # res21 = pd.DataFrame([['S2021']]*len(dfs21[0])).join(dfs21, lsuffix='0')
37
- # res20 = pd.DataFrame([['S2020']]*len(dfs20[0])).join(dfs20, lsuffix='0')
38
-
39
- # df_tmp = pd.concat([res23, res22, res21, res20])
40
- # df = df_tmp
41
-
42
- # df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
43
- # df = df.query('stadium=="等々力"').reset_index()
44
- # df = df.query('audience.notna()', engine='python').reset_index()
45
- # df = df[['audience', 'year', 'date']]
46
- # #seasonカラムから年を抽出
47
- # df["year"] = df["year"].apply(lambda x: str(x)[1:5])
48
- # #開催日から月と日を分割
49
- # df['month'] = df['date'].str.split(pat='.', expand=True)[0]
50
- # df['day'] = df['date'].str.split(pat='.', expand=True)[1]
51
- # #数値データを日付データに変換
52
- # df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
53
- # #日付昇順に並び替える
54
- # df = df.sort_values('date', ascending=True)
55
- # df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
56
- # df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
57
- # df["date_ymd"] = df["date_ymd"].astype(int)
58
- # df['date_before'] = df['date_ymd'] - 1
59
- # df["date_before"] = df["date_before"]
60
- # df = df[['audience', 'date_ymd', 'date_before']]
61
- # df['last_audience'] = df['audience'].shift(1)
62
-
63
- # df_aji = pd.read_csv('fish_price.csv')
64
-
65
- # df_train = pd.merge(df, df_aji, left_on='date_before', right_on='date', how='left')
66
-
67
- # df_train = df_train.query('date > 20201202')
68
- # df_train = df_train.drop(['date_before', 'date_ymd'], axis=1)
69
- # df_train["audience"] = df_train["audience"].str.replace(",", "").astype(int)
70
- # df_train["last_audience"] = df_train["last_audience"].str.replace(",", "").astype(int)
71
 
72
- df_train = pd.read_csv('df_train.csv')
 
 
73
 
 
74
  X = df_train.drop('audience', axis=1)
75
  y = df_train['audience']
76
-
77
- import sklearn
78
- from sklearn.linear_model import LinearRegression
79
- from sklearn.metrics import log_loss
80
- from sklearn.preprocessing import StandardScaler
81
  linear_regression = LinearRegression()
82
- linear_regression.fit(X,y)
83
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  def outbreak(date):
86
- if date:
87
-
88
-
89
-
90
- if __name__ == "__main__":
91
- start_date = d_today
92
- end_date = d_tom
93
- df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
94
- # df_aji_pre.to_csv("fish_price_pre.csv", index=False)
 
95
 
96
  df_pre = df.tail(1).reset_index()
97
  df_pre = df_pre.drop('index', axis=1)
@@ -99,6 +106,9 @@ def outbreak(date):
99
  df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']]
100
  df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'})
101
 
 
 
 
102
  pred = linear_regression.predict(df_aji_ft_pre)
103
  df_aji_ft_pre['audience_pred'] = pred
104
  df_aji_ft_pre['date'] = df_aji_ft_pre['date'].astype(int)
 
1
+ import altair
2
 
3
  import gradio as gr
4
  from math import sqrt
 
 
 
5
  import matplotlib
6
+
7
  matplotlib.use("Agg")
8
+
9
  import matplotlib.pyplot as plt
10
+ import numpy as np
11
+ import pandas as pd
12
 
13
+ import datetime
14
 
15
+ from sklearn.linear_model import LinearRegression
16
+ from sklearn.metrics import log_loss
17
+ from sklearn.preprocessing import StandardScaler
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ import requests
20
+ from bs4 import BeautifulSoup as bs
21
+ from requests_html import AsyncHTMLSession
22
 
23
+ df_train = pd.read_csv('df_train.csv')
24
  X = df_train.drop('audience', axis=1)
25
  y = df_train['audience']
 
 
 
 
 
26
  linear_regression = LinearRegression()
27
+ model = linear_regression.fit(X,y)
28
+
29
+ d_today = datetime.date.today()
30
+ d_tom = datetime.date.today() + datetime.timedelta(days = 1)
31
+
32
+ # 動作確認
33
+ d_y = datetime.date.today() + datetime.timedelta(days = -1)
34
+ if __name__ == "__main__":
35
+ start_date = d_y
36
+ end_date = d_today
37
+ df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
38
+ df_aji_pre['date'] = df_aji_pre['date'].astype(int)
39
+
40
+ # if __name__ == "__main__":
41
+ # start_date = d_today
42
+ # end_date = d_tom
43
+ # df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
44
+ # df_aji_pre['date'] = df_aji_pre['date'].astype(int)
45
+
46
+ url23 = 'https://www.football-lab.jp/ka-f/match/'
47
+ dfs23 = pd.read_html(url23)
48
+
49
+ #シーズン毎に分類
50
+ res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
51
+
52
+ df = res23
53
+
54
+ df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
55
+ df = df.query('stadium=="等々力"').reset_index()
56
+ df = df.query('audience.notna()', engine='python').reset_index()
57
+ df = df[['audience', 'year', 'date']]
58
+ #seasonカラムから年を抽出
59
+ df["year"] = df["year"].apply(lambda x: str(x)[1:5])
60
+ #開催日から月と日を分割
61
+ df['month'] = df['date'].str.split(pat='.', expand=True)[0]
62
+ df['day'] = df['date'].str.split(pat='.', expand=True)[1]
63
+ #数値データを日付データに変換
64
+ df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
65
+ #日付昇順に並び替える
66
+ df = df.sort_values('date', ascending=True)
67
+ df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
68
+ df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
69
+ df["date_ymd"] = df["date_ymd"].astype(int)
70
+ df['date_before'] = df['date_ymd'] - 1
71
+ df["date_before"] = df["date_before"]
72
+ df = df[['audience', 'date_ymd', 'date_before']]
73
+ df['last_audience'] = df['audience'].shift(1)
74
+
75
+ # df_pre = pd.merge(df, df_aji_pre, left_on='date_before', right_on='date', how='left')
76
+
77
+ # df_pre = df_pre.drop(['date_before', 'date_ymd'], axis=1)
78
+ # df_pre["audience"] = df_pre["audience"].str.replace(",", "").astype(int)
79
+ # df_pre["last_audience"] = df_pre["last_audience"].str.replace(",", "").astype(int)
80
+
81
+ # start_date = int(start_date)
82
+ # df_pre = df.query('date <= start_date')
83
+
84
+
85
+ df_pre = df.tail(1).reset_index()
86
+ df_pre = df_pre.drop('index', axis=1)
87
+ df_aji_ft_pre = pd.concat([df_pre, df_aji_pre], axis=1)
88
+ df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']]
89
+ df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'})
90
 
91
  def outbreak(date):
92
+ if date:
93
+
94
+ # if __name__ == "__main__":
95
+ # import datetime
96
+ # d_today = datetime.date.today()
97
+ # d_tom = datetime.date.today() + datetime.timedelta(days = 1)
98
+ # start_date = d_today
99
+ # end_date = d_tom
100
+ # df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
101
+ # # df_aji_pre.to_csv("fish_price_pre.csv", index=False)
102
 
103
  df_pre = df.tail(1).reset_index()
104
  df_pre = df_pre.drop('index', axis=1)
 
106
  df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']]
107
  df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'})
108
 
109
+ X = df_train.drop('audience', axis=1)
110
+ y = df_train['audience']
111
+
112
  pred = linear_regression.predict(df_aji_ft_pre)
113
  df_aji_ft_pre['audience_pred'] = pred
114
  df_aji_ft_pre['date'] = df_aji_ft_pre['date'].astype(int)