Spaces:
Runtime error
Runtime error
masa729406
commited on
Commit
·
7e798dd
1
Parent(s):
ac4a734
Update app.py
Browse files
app.py
CHANGED
@@ -5,67 +5,71 @@ from math import sqrt
|
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
7 |
from datetime import datetime
|
|
|
|
|
8 |
import matplotlib.pyplot as plt
|
9 |
-
|
10 |
-
|
11 |
-
# pip install beautifulsoup4
|
12 |
-
# pip install requests_html
|
13 |
-
import requests
|
14 |
-
from bs4 import BeautifulSoup as bs
|
15 |
-
from requests_html import AsyncHTMLSession
|
16 |
-
|
17 |
-
# Webページを取得して解析する
|
18 |
-
load_url = "https://www.football-lab.jp/kyot/match/"
|
19 |
-
html = requests.get(load_url)
|
20 |
-
soup = bs(html.content, "html.parser")
|
21 |
-
|
22 |
-
url23 = 'https://www.football-lab.jp/ka-f/match/'
|
23 |
-
dfs23 = pd.read_html(url23)
|
24 |
-
url22 = 'https://www.football-lab.jp/ka-f/match/?year=2022'
|
25 |
-
dfs22 = pd.read_html(url22)
|
26 |
-
url21 = 'https://www.football-lab.jp/ka-f/match/?year=2021'
|
27 |
-
dfs21 = pd.read_html(url21)
|
28 |
-
url20 = 'https://www.football-lab.jp/ka-f/match/?year=2020'
|
29 |
-
dfs20 = pd.read_html(url20)
|
30 |
-
|
31 |
-
#シーズン毎に分類
|
32 |
-
res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
|
33 |
-
res22 = pd.DataFrame([['S2022']]*len(dfs22[0])).join(dfs22, lsuffix='0')
|
34 |
-
res21 = pd.DataFrame([['S2021']]*len(dfs21[0])).join(dfs21, lsuffix='0')
|
35 |
-
res20 = pd.DataFrame([['S2020']]*len(dfs20[0])).join(dfs20, lsuffix='0')
|
36 |
-
|
37 |
-
df_tmp = pd.concat([res23, res22, res21, res20])
|
38 |
-
df = df_tmp
|
39 |
-
|
40 |
-
df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
|
41 |
-
df = df.query('stadium=="等々力"').reset_index()
|
42 |
-
df = df.query('audience.notna()', engine='python').reset_index()
|
43 |
-
df = df[['audience', 'year', 'date']]
|
44 |
-
#seasonカラムから年を抽出
|
45 |
-
df["year"] = df["year"].apply(lambda x: str(x)[1:5])
|
46 |
-
#開催日から月と日を分割
|
47 |
-
df['month'] = df['date'].str.split(pat='.', expand=True)[0]
|
48 |
-
df['day'] = df['date'].str.split(pat='.', expand=True)[1]
|
49 |
-
#数値データを日付データに変換
|
50 |
-
df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
|
51 |
-
#日付昇順に並び替える
|
52 |
-
df = df.sort_values('date', ascending=True)
|
53 |
-
df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
|
54 |
-
df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
|
55 |
-
df["date_ymd"] = df["date_ymd"].astype(int)
|
56 |
-
df['date_before'] = df['date_ymd'] - 1
|
57 |
-
df["date_before"] = df["date_before"]
|
58 |
-
df = df[['audience', 'date_ymd', 'date_before']]
|
59 |
-
df['last_audience'] = df['audience'].shift(1)
|
60 |
-
|
61 |
-
df_aji = pd.read_csv('fish_price.csv')
|
62 |
-
|
63 |
-
df_train = pd.merge(df, df_aji, left_on='date_before', right_on='date', how='left')
|
64 |
-
|
65 |
-
df_train = df_train.query('date > 20201202')
|
66 |
-
df_train = df_train.drop(['date_before', 'date_ymd'], axis=1)
|
67 |
-
df_train["audience"] = df_train["audience"].str.replace(",", "").astype(int)
|
68 |
-
df_train["last_audience"] = df_train["last_audience"].str.replace(",", "").astype(int)
|
|
|
|
|
69 |
|
70 |
X = df_train.drop('audience', axis=1)
|
71 |
y = df_train['audience']
|
|
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
7 |
from datetime import datetime
|
8 |
+
import matplotlib
|
9 |
+
matplotlib.use("Agg")
|
10 |
import matplotlib.pyplot as plt
|
11 |
+
|
12 |
+
|
13 |
+
# # pip install beautifulsoup4
|
14 |
+
# # pip install requests_html
|
15 |
+
# import requests
|
16 |
+
# from bs4 import BeautifulSoup as bs
|
17 |
+
# from requests_html import AsyncHTMLSession
|
18 |
+
|
19 |
+
# # Webページを取得して解析する
|
20 |
+
# load_url = "https://www.football-lab.jp/kyot/match/"
|
21 |
+
# html = requests.get(load_url)
|
22 |
+
# soup = bs(html.content, "html.parser")
|
23 |
+
|
24 |
+
# url23 = 'https://www.football-lab.jp/ka-f/match/'
|
25 |
+
# dfs23 = pd.read_html(url23)
|
26 |
+
# url22 = 'https://www.football-lab.jp/ka-f/match/?year=2022'
|
27 |
+
# dfs22 = pd.read_html(url22)
|
28 |
+
# url21 = 'https://www.football-lab.jp/ka-f/match/?year=2021'
|
29 |
+
# dfs21 = pd.read_html(url21)
|
30 |
+
# url20 = 'https://www.football-lab.jp/ka-f/match/?year=2020'
|
31 |
+
# dfs20 = pd.read_html(url20)
|
32 |
+
|
33 |
+
# #シーズン毎に分類
|
34 |
+
# res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
|
35 |
+
# res22 = pd.DataFrame([['S2022']]*len(dfs22[0])).join(dfs22, lsuffix='0')
|
36 |
+
# res21 = pd.DataFrame([['S2021']]*len(dfs21[0])).join(dfs21, lsuffix='0')
|
37 |
+
# res20 = pd.DataFrame([['S2020']]*len(dfs20[0])).join(dfs20, lsuffix='0')
|
38 |
+
|
39 |
+
# df_tmp = pd.concat([res23, res22, res21, res20])
|
40 |
+
# df = df_tmp
|
41 |
+
|
42 |
+
# df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
|
43 |
+
# df = df.query('stadium=="等々力"').reset_index()
|
44 |
+
# df = df.query('audience.notna()', engine='python').reset_index()
|
45 |
+
# df = df[['audience', 'year', 'date']]
|
46 |
+
# #seasonカラムから年を抽出
|
47 |
+
# df["year"] = df["year"].apply(lambda x: str(x)[1:5])
|
48 |
+
# #開催日から月と日を分割
|
49 |
+
# df['month'] = df['date'].str.split(pat='.', expand=True)[0]
|
50 |
+
# df['day'] = df['date'].str.split(pat='.', expand=True)[1]
|
51 |
+
# #数値データを日付データに変換
|
52 |
+
# df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
|
53 |
+
# #日付昇順に並び替える
|
54 |
+
# df = df.sort_values('date', ascending=True)
|
55 |
+
# df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
|
56 |
+
# df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
|
57 |
+
# df["date_ymd"] = df["date_ymd"].astype(int)
|
58 |
+
# df['date_before'] = df['date_ymd'] - 1
|
59 |
+
# df["date_before"] = df["date_before"]
|
60 |
+
# df = df[['audience', 'date_ymd', 'date_before']]
|
61 |
+
# df['last_audience'] = df['audience'].shift(1)
|
62 |
+
|
63 |
+
# df_aji = pd.read_csv('fish_price.csv')
|
64 |
+
|
65 |
+
# df_train = pd.merge(df, df_aji, left_on='date_before', right_on='date', how='left')
|
66 |
+
|
67 |
+
# df_train = df_train.query('date > 20201202')
|
68 |
+
# df_train = df_train.drop(['date_before', 'date_ymd'], axis=1)
|
69 |
+
# df_train["audience"] = df_train["audience"].str.replace(",", "").astype(int)
|
70 |
+
# df_train["last_audience"] = df_train["last_audience"].str.replace(",", "").astype(int)
|
71 |
+
|
72 |
+
df_train = pd.read_csv('df_train.csv')
|
73 |
|
74 |
X = df_train.drop('audience', axis=1)
|
75 |
y = df_train['audience']
|