Spaces:
Runtime error
Runtime error
import altair | |
import gradio as gr | |
from math import sqrt | |
import matplotlib | |
matplotlib.use("Agg") | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import datetime | |
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import log_loss | |
from sklearn.preprocessing import StandardScaler | |
import requests | |
from bs4 import BeautifulSoup as bs | |
from requests_html import AsyncHTMLSession | |
df_train = pd.read_csv('df_train.csv') | |
X = df_train.drop('audience', axis=1) | |
y = df_train['audience'] | |
linear_regression = LinearRegression() | |
model = linear_regression.fit(X,y) | |
d_today = datetime.date.today() | |
d_tom = datetime.date.today() + datetime.timedelta(days = 1) | |
# 動作確認 | |
d_y = datetime.date.today() + datetime.timedelta(days = -1) | |
if __name__ == "__main__": | |
start_date = d_y | |
end_date = d_today | |
df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date) | |
df_aji_pre['date'] = df_aji_pre['date'].astype(int) | |
# if __name__ == "__main__": | |
# start_date = d_today | |
# end_date = d_tom | |
# df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date) | |
# df_aji_pre['date'] = df_aji_pre['date'].astype(int) | |
url23 = 'https://www.football-lab.jp/ka-f/match/' | |
dfs23 = pd.read_html(url23) | |
#シーズン毎に分類 | |
res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0') | |
df = res23 | |
df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'}) | |
df = df.query('stadium=="等々力"').reset_index() | |
df = df.query('audience.notna()', engine='python').reset_index() | |
df = df[['audience', 'year', 'date']] | |
#seasonカラムから年を抽出 | |
df["year"] = df["year"].apply(lambda x: str(x)[1:5]) | |
#開催日から月と日を分割 | |
df['month'] = df['date'].str.split(pat='.', expand=True)[0] | |
df['day'] = df['date'].str.split(pat='.', expand=True)[1] | |
#数値データを日付データに変換 | |
df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']}) | |
#日付昇順に並び替える | |
df = df.sort_values('date', ascending=True) | |
df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d') | |
df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m') | |
df["date_ymd"] = df["date_ymd"].astype(int) | |
df['date_before'] = df['date_ymd'] - 1 | |
df["date_before"] = df["date_before"] | |
df = df[['audience', 'date_ymd', 'date_before']] | |
df['last_audience'] = df['audience'].shift(1) | |
# df_pre = pd.merge(df, df_aji_pre, left_on='date_before', right_on='date', how='left') | |
# df_pre = df_pre.drop(['date_before', 'date_ymd'], axis=1) | |
# df_pre["audience"] = df_pre["audience"].str.replace(",", "").astype(int) | |
# df_pre["last_audience"] = df_pre["last_audience"].str.replace(",", "").astype(int) | |
# start_date = int(start_date) | |
# df_pre = df.query('date <= start_date') | |
df_pre = df.tail(1).reset_index() | |
df_pre = df_pre.drop('index', axis=1) | |
df_aji_ft_pre = pd.concat([df_pre, df_aji_pre], axis=1) | |
df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']] | |
df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'}) | |
def outbreak(date): | |
if date: | |
# if __name__ == "__main__": | |
# import datetime | |
# d_today = datetime.date.today() | |
# d_tom = datetime.date.today() + datetime.timedelta(days = 1) | |
# start_date = d_today | |
# end_date = d_tom | |
# df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date) | |
# # df_aji_pre.to_csv("fish_price_pre.csv", index=False) | |
df_pre = df.tail(1).reset_index() | |
df_pre = df_pre.drop('index', axis=1) | |
df_aji_ft_pre = pd.concat([df_pre, df_aji_pre], axis=1) | |
df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']] | |
df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'}) | |
X = df_train.drop('audience', axis=1) | |
y = df_train['audience'] | |
pred = linear_regression.predict(df_aji_ft_pre) | |
df_aji_ft_pre['audience_pred'] = pred | |
df_aji_ft_pre['date'] = df_aji_ft_pre['date'].astype(int) | |
fig = plt.figure() | |
plt.plot(df_train['date'], df_train['audience'], label='original') | |
plt.plot(df_aji_ft_pre['date'], df_aji_ft_pre['audience_pred'], '*', label='predict') | |
plt.title("prediction of audince") | |
plt.ylabel("audience") | |
plt.xlabel("Days since Day 0") | |
return fig | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# 川崎フロンターレの観客動員数の予測 | |
川崎フロンターレの等々力陸上競技場での試合の観客数を「あじ」の価格をもとに予測する。 | |
## 使用データ | |
* 東京卸売市場日報 | |
* Football Lab | |
## 予測ロジック | |
観客動員数は雨天か否かで左右されると考えられる。そこで雨天の可能性をあじの価格を利用し表した。 | |
一般的に雨天の場合、低気圧の影響で海面が上昇し漁に出ることが難しくなる。 | |
そのため漁獲量が減少し、あじの価格が上昇すると考えられる。 | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
date_input = gr.Checkbox(label='please input date') | |
prediction_btn = gr.Button(value="predict") | |
with gr.Column(): | |
prediction = gr.Plot(label = "時系列プロット") | |
prediction_btn.click(outbreak, inputs=date_input, outputs=prediction) | |
demo.launch() |