Spaces:
Runtime error
Runtime error
masa729406
commited on
Commit
·
359e36e
1
Parent(s):
23d8057
Update app.py
Browse files
app.py
CHANGED
@@ -8,37 +8,131 @@ matplotlib.use("Agg")
|
|
8 |
|
9 |
import matplotlib.pyplot as plt
|
10 |
import numpy as np
|
11 |
-
|
12 |
import pandas as pd
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
filename = 'model.pkl'
|
18 |
-
loaded_rf_model = pickle.load(open(filename, 'rb'))
|
19 |
-
X_test = pd.DataFrame(
|
20 |
-
data={'saba': [300, 200, 3030, 400],
|
21 |
-
'date_ymd': [20230328, 20230329, 20230330, 20230331]}
|
22 |
-
)
|
23 |
-
y_pred_ = loaded_rf_model.predict(X_test, num_iteration=gbm.best_iteration)
|
24 |
|
25 |
def outbreak(date):
|
26 |
if date:
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
fig = plt.figure()
|
31 |
-
plt.plot(
|
32 |
-
plt.
|
|
|
33 |
plt.ylabel("audience")
|
34 |
plt.xlabel("Days since Day 0")
|
35 |
return fig
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
demo.launch()
|
|
|
8 |
|
9 |
import matplotlib.pyplot as plt
|
10 |
import numpy as np
|
11 |
+
import plotly.express as px
|
12 |
import pandas as pd
|
13 |
|
14 |
+
# Webページを取得して解析する
|
15 |
+
load_url = "https://www.football-lab.jp/kyot/match/"
|
16 |
+
html = requests.get(load_url)
|
17 |
+
soup = bs(html.content, "html.parser")
|
18 |
+
|
19 |
+
!pip install beautifulsoup4
|
20 |
+
!pip install requests_html
|
21 |
+
import requests
|
22 |
+
from bs4 import BeautifulSoup as bs
|
23 |
+
from requests_html import AsyncHTMLSession
|
24 |
+
|
25 |
+
import pandas as pd
|
26 |
+
from datetime import datetime
|
27 |
+
from IPython.display import display
|
28 |
+
|
29 |
+
url23 = 'https://www.football-lab.jp/ka-f/match/'
|
30 |
+
dfs23 = pd.read_html(url23)
|
31 |
+
url22 = 'https://www.football-lab.jp/ka-f/match/?year=2022'
|
32 |
+
dfs22 = pd.read_html(url22)
|
33 |
+
url21 = 'https://www.football-lab.jp/ka-f/match/?year=2021'
|
34 |
+
dfs21 = pd.read_html(url21)
|
35 |
+
url20 = 'https://www.football-lab.jp/ka-f/match/?year=2020'
|
36 |
+
dfs20 = pd.read_html(url20)
|
37 |
+
|
38 |
+
#シーズン毎に分類
|
39 |
+
res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
|
40 |
+
res22 = pd.DataFrame([['S2022']]*len(dfs22[0])).join(dfs22, lsuffix='0')
|
41 |
+
res21 = pd.DataFrame([['S2021']]*len(dfs21[0])).join(dfs21, lsuffix='0')
|
42 |
+
res20 = pd.DataFrame([['S2020']]*len(dfs20[0])).join(dfs20, lsuffix='0')
|
43 |
+
|
44 |
+
df_tmp = pd.concat([res23, res22, res21, res20])
|
45 |
+
df = df_tmp
|
46 |
+
|
47 |
+
df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
|
48 |
+
df = df.query('stadium=="等々力"').reset_index()
|
49 |
+
df = df.query('audience.notna()', engine='python').reset_index()
|
50 |
+
df = df[['audience', 'year', 'date']]
|
51 |
+
#seasonカラムから年を抽出
|
52 |
+
df["year"] = df["year"].apply(lambda x: str(x)[1:5])
|
53 |
+
#開催日から月と日を分割
|
54 |
+
df['month'] = df['date'].str.split(pat='.', expand=True)[0]
|
55 |
+
df['day'] = df['date'].str.split(pat='.', expand=True)[1]
|
56 |
+
#数値データを日付データに変換
|
57 |
+
df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
|
58 |
+
#日付昇順に並び替える
|
59 |
+
df = df.sort_values('date', ascending=True)
|
60 |
+
df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
|
61 |
+
df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
|
62 |
+
df["date_ymd"] = df["date_ymd"].astype(int)
|
63 |
+
df['date_before'] = df['date_ymd'] - 1
|
64 |
+
df["date_before"] = df["date_before"]
|
65 |
+
df = df[['audience', 'date_ymd', 'date_before']]
|
66 |
+
df['last_audience'] = df['audience'].shift(1)
|
67 |
+
|
68 |
+
df_aji = pd.read_csv('fish_price.csv')
|
69 |
+
|
70 |
+
df_train = pd.merge(df, df_aji, left_on='date_before', right_on='date', how='left')
|
71 |
+
|
72 |
+
df_train = df_train.query('date > 20201202')
|
73 |
+
df_train = df_train.drop(['date_before', 'date_ymd'], axis=1)
|
74 |
+
df_train["audience"] = df_train["audience"].str.replace(",", "").astype(int)
|
75 |
+
df_train["last_audience"] = df_train["last_audience"].str.replace(",", "").astype(int)
|
76 |
+
|
77 |
+
X = df_train.drop('audience', axis=1)
|
78 |
+
y = df_train['audience']
|
79 |
+
|
80 |
+
from sklearn.linear_model import LinearRegression
|
81 |
+
from sklearn.metrics import log_loss
|
82 |
+
from sklearn.preprocessing import StandardScaler
|
83 |
+
linear_regression = LinearRegression()
|
84 |
+
linear_regression.fit(X,y)
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
def outbreak(date):
|
88 |
if date:
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
start_date = d_today
|
94 |
+
end_date = d_tom
|
95 |
+
df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
|
96 |
+
# df_aji_pre.to_csv("fish_price_pre.csv", index=False)
|
97 |
+
|
98 |
+
df_pre = df.tail(1).reset_index()
|
99 |
+
df_pre = df_pre.drop('index', axis=1)
|
100 |
+
df_aji_ft_pre = pd.concat([df_pre, df_aji_pre], axis=1)
|
101 |
+
df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']]
|
102 |
+
df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'})
|
103 |
+
|
104 |
+
pred = linear_regression.predict(df_aji_ft_pre)
|
105 |
+
df_aji_ft_pre['audience_pred'] = pred
|
106 |
+
df_aji_ft_pre['date'] = df_aji_ft_pre['date'].astype(int)
|
107 |
+
|
108 |
fig = plt.figure()
|
109 |
+
plt.plot(df_train['date'], df_train['audience'], label='original')
|
110 |
+
plt.plot(df_aji_ft_pre['date'], df_aji_ft_pre['audience_pred'], '*', label='predict')
|
111 |
+
plt.title("prediction of audince")
|
112 |
plt.ylabel("audience")
|
113 |
plt.xlabel("Days since Day 0")
|
114 |
return fig
|
115 |
|
116 |
+
with gr.Blocks() as demo:
|
117 |
+
gr.Markdown(
|
118 |
+
"""
|
119 |
+
# 川崎フロンターレの観客動員数の予測
|
120 |
+
川崎フロンターレの等々力陸上競技場での試合の観客数を「あじ」の価格をもとに予測する。
|
121 |
+
## 使用データ
|
122 |
+
* 東京卸売市場日報
|
123 |
+
* Football Lab
|
124 |
+
## 予測ロジック
|
125 |
+
観客動員数は雨天か否かで左右されると考えられる。そこで雨天の可能性をあじの価格を利用し表した。
|
126 |
+
一般的に雨天の場合、低気圧の影響で海面が上昇し漁に出ることが難しくなる。
|
127 |
+
そのため漁獲量が減少し、あじの価格が上昇すると考えられる。
|
128 |
+
"""
|
129 |
+
)
|
130 |
+
with gr.Row():
|
131 |
+
with gr.Column():
|
132 |
+
date_input = gr.Checkbox(label='please input date')
|
133 |
+
prediction_btn = gr.Button(value="predict")
|
134 |
+
with gr.Column():
|
135 |
+
prediction = gr.Plot(label = "時系列プロット")
|
136 |
+
prediction_btn.click(outbreak, inputs=date_input, outputs=prediction)
|
137 |
|
138 |
+
demo.launch()
|
|