File size: 5,845 Bytes
b3c9d45
763a979
4f59094
 
7e798dd
b3c9d45
7e798dd
b3c9d45
4f59094
b3c9d45
 
7e798dd
b3c9d45
7e798dd
b3c9d45
 
 
7e798dd
b3c9d45
 
 
359e36e
b3c9d45
359e36e
 
 
b3c9d45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a2c3c8
4f59094
b3c9d45
 
 
 
 
 
 
 
 
 
359e36e
 
 
 
 
 
 
b3c9d45
 
 
359e36e
 
 
 
4f59094
359e36e
 
 
7e493b4
4f59094
 
 
359e36e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a2c3c8
359e36e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import altair

import gradio as gr
from math import sqrt
import matplotlib

matplotlib.use("Agg")

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import datetime

from sklearn.linear_model import LinearRegression
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler

import requests
from bs4 import BeautifulSoup as bs
from requests_html import AsyncHTMLSession

df_train = pd.read_csv('df_train.csv')
X = df_train.drop('audience', axis=1)
y = df_train['audience']
linear_regression = LinearRegression()
model = linear_regression.fit(X,y)

d_today = datetime.date.today()
d_tom = datetime.date.today() + datetime.timedelta(days = 1)

# 動作確認
d_y = datetime.date.today() + datetime.timedelta(days = -1)
if __name__ == "__main__":
    start_date = d_y
    end_date = d_today
    df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
    df_aji_pre['date'] = df_aji_pre['date'].astype(int)

# if __name__ == "__main__":
#     start_date = d_today
#     end_date = d_tom
#     df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
#     df_aji_pre['date'] = df_aji_pre['date'].astype(int)

url23 = 'https://www.football-lab.jp/ka-f/match/'
dfs23 = pd.read_html(url23)

#シーズン毎に分類
res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')

df = res23

df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
df = df.query('stadium=="等々力"').reset_index()
df = df.query('audience.notna()', engine='python').reset_index()
df = df[['audience', 'year', 'date']]
#seasonカラムから年を抽出
df["year"] = df["year"].apply(lambda x: str(x)[1:5])
#開催日から月と日を分割
df['month']  = df['date'].str.split(pat='.', expand=True)[0]
df['day'] = df['date'].str.split(pat='.', expand=True)[1]
#数値データを日付データに変換
df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
#日付昇順に並び替える
df = df.sort_values('date', ascending=True)
df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
df["date_ymd"] = df["date_ymd"].astype(int)
df['date_before'] = df['date_ymd'] - 1
df["date_before"] = df["date_before"]
df = df[['audience', 'date_ymd', 'date_before']] 
df['last_audience'] = df['audience'].shift(1)

# df_pre = pd.merge(df, df_aji_pre, left_on='date_before', right_on='date', how='left')

# df_pre = df_pre.drop(['date_before', 'date_ymd'], axis=1)
# df_pre["audience"] = df_pre["audience"].str.replace(",", "").astype(int)
# df_pre["last_audience"] = df_pre["last_audience"].str.replace(",", "").astype(int)

# start_date = int(start_date)
# df_pre = df.query('date <= start_date')


df_pre = df.tail(1).reset_index()
df_pre = df_pre.drop('index', axis=1)
df_aji_ft_pre = pd.concat([df_pre, df_aji_pre], axis=1)
df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']]
df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'})

def outbreak(date):
  if date:  

    # if __name__ == "__main__":
    #   import datetime
    #   d_today = datetime.date.today()
    #   d_tom = datetime.date.today() + datetime.timedelta(days = 1)
    #   start_date = d_today
    #   end_date = d_tom
    #   df_aji_pre = get_fish_price_data(start_date=start_date, end_date=end_date)
    #   # df_aji_pre.to_csv("fish_price_pre.csv", index=False)
    
    df_pre = df.tail(1).reset_index()
    df_pre = df_pre.drop('index', axis=1)
    df_aji_ft_pre = pd.concat([df_pre, df_aji_pre], axis=1)
    df_aji_ft_pre = df_aji_ft_pre[['audience', 'date', 'low_price', 'center_price', 'high_price', 'quantity']]
    df_aji_ft_pre = df_aji_ft_pre.rename(columns={'audience': 'last_audience', 0: 'year', '開催日': 'date', '観客数': 'audience'})

    X = df_train.drop('audience', axis=1)
    y = df_train['audience']
    
    pred = linear_regression.predict(df_aji_ft_pre)
    df_aji_ft_pre['audience_pred'] = pred
    df_aji_ft_pre['date'] = df_aji_ft_pre['date'].astype(int)

    fig = plt.figure()
    plt.plot(df_train['date'], df_train['audience'], label='original')
    plt.plot(df_aji_ft_pre['date'], df_aji_ft_pre['audience_pred'], '*', label='predict')
    plt.title("prediction of audince")
    plt.ylabel("audience")
    plt.xlabel("Days since Day 0")
    return fig

with gr.Blocks() as demo:
    gr.Markdown(
                            """
                            # 川崎フロンターレの観客動員数の予測
                             川崎フロンターレの等々力陸上競技場での試合の観客数を「あじ」の価格をもとに予測する。
                            ## 使用データ
                             * 東京卸売市場日報
                             * Football Lab 
                            ## 予測ロジック
                            観客動員数は雨天か否かで左右されると考えられる。そこで雨天の可能性をあじの価格を利用し表した。
                            一般的に雨天の場合、低気圧の影響で海面が上昇し漁に出ることが難しくなる。
                            そのため漁獲量が減少し、あじの価格が上昇すると考えられる。
                             """
                        )
    with gr.Row():
      with gr.Column():
        date_input = gr.Checkbox(label='please input date')
        prediction_btn = gr.Button(value="predict")
      with gr.Column():
        prediction = gr.Plot(label = "時系列プロット")
    prediction_btn.click(outbreak, inputs=date_input, outputs=prediction)

demo.launch()