chEstyleU / app.py
masa729406's picture
Update app.py
aba8ab1
raw
history blame
5.56 kB
# import altair
import gradio as gr
from math import sqrt
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
# # pip install beautifulsoup4
# # pip install requests_html
# import requests
# from bs4 import BeautifulSoup as bs
# from requests_html import AsyncHTMLSession
# # Webページを取得して解析する
# load_url = "https://www.football-lab.jp/kyot/match/"
# html = requests.get(load_url)
# soup = bs(html.content, "html.parser")
# url23 = 'https://www.football-lab.jp/ka-f/match/'
# dfs23 = pd.read_html(url23)
# url22 = 'https://www.football-lab.jp/ka-f/match/?year=2022'
# dfs22 = pd.read_html(url22)
# url21 = 'https://www.football-lab.jp/ka-f/match/?year=2021'
# dfs21 = pd.read_html(url21)
# url20 = 'https://www.football-lab.jp/ka-f/match/?year=2020'
# dfs20 = pd.read_html(url20)
# #シーズン毎に分類
# res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
# res22 = pd.DataFrame([['S2022']]*len(dfs22[0])).join(dfs22, lsuffix='0')
# res21 = pd.DataFrame([['S2021']]*len(dfs21[0])).join(dfs21, lsuffix='0')
# res20 = pd.DataFrame([['S2020']]*len(dfs20[0])).join(dfs20, lsuffix='0')
# df_tmp = pd.concat([res23, res22, res21, res20])
# df = df_tmp
# df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
# df = df.query('stadium=="等々力"').reset_index()
# df = df.query('audience.notna()', engine='python').reset_index()
# df = df[['audience', 'year', 'date']]
# #seasonカラムから年を抽出
# df["year"] = df["year"].apply(lambda x: str(x)[1:5])
# #開催日から月と日を分割
# df['month'] = df['date'].str.split(pat='.', expand=True)[0]
# df['day'] = df['date'].str.split(pat='.', expand=True)[1]
# #数値データを日付データに変換
# df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
# #日付昇順に並び替える
# df = df.sort_values('date', ascending=True)
# df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
# df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
# df["date_ymd"] = df["date_ymd"].astype(int)
# df['date_before'] = df['date_ymd'] - 1
# df["date_before"] = df["date_before"]
# df = df[['audience', 'date_ymd', 'date_before']]
# df['last_audience'] = df['audience'].shift(1)
# df_aji = pd.read_csv('fish_price.csv')
# df_train = pd.merge(df, df_aji, left_on='date_before', right_on='date', how='left')
# df_train = df_train.query('date > 20201202')
# df_train = df_train.drop(['date_before', 'date_ymd'], axis=1)
# df_train["audience"] = df_train["audience"].str.replace(",", "").astype(int)
# df_train["last_audience"] = df_train["last_audience"].str.replace(",", "").astype(int)
df_train = pd.read_csv('df_train.csv')
X = df_train.drop('audience', axis=1)
y = df_train['audience']
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
linear_regression = LinearRegression()
linear_regression.fit(X,y)
def outbreak(date):
if date:
if __name__ == "__main__":
start_date = d_today
end_date = d_tom
df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
# df_aji_pre.to_csv("fish_price_pre.csv", index=False)
df_pre = df.tail(1).reset_index()
df_pre = df_pre.drop('index', axis=1)
df_aji_ft_pre = pd.concat([df_pre, df_aji_pre], axis=1)
df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']]
df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'})
pred = linear_regression.predict(df_aji_ft_pre)
df_aji_ft_pre['audience_pred'] = pred
df_aji_ft_pre['date'] = df_aji_ft_pre['date'].astype(int)
fig = plt.figure()
plt.plot(df_train['date'], df_train['audience'], label='original')
plt.plot(df_aji_ft_pre['date'], df_aji_ft_pre['audience_pred'], '*', label='predict')
plt.title("prediction of audince")
plt.ylabel("audience")
plt.xlabel("Days since Day 0")
return fig
with gr.Blocks() as demo:
gr.Markdown(
"""
# 川崎フロンターレの観客動員数の予測
川崎フロンターレの等々力陸上競技場での試合の観客数を「あじ」の価格をもとに予測する。
## 使用データ
* 東京卸売市場日報
* Football Lab
## 予測ロジック
観客動員数は雨天か否かで左右されると考えられる。そこで雨天の可能性をあじの価格を利用し表した。
一般的に雨天の場合、低気圧の影響で海面が上昇し漁に出ることが難しくなる。
そのため漁獲量が減少し、あじの価格が上昇すると考えられる。
"""
)
with gr.Row():
with gr.Column():
date_input = gr.Checkbox(label='please input date')
prediction_btn = gr.Button(value="predict")
with gr.Column():
prediction = gr.Plot(label = "時系列プロット")
prediction_btn.click(outbreak, inputs=date_input, outputs=prediction)
demo.launch()