masa729406 commited on
Commit
7e798dd
·
1 Parent(s): ac4a734

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -60
app.py CHANGED
@@ -5,67 +5,71 @@ from math import sqrt
5
  import pandas as pd
6
  import numpy as np
7
  from datetime import datetime
 
 
8
  import matplotlib.pyplot as plt
9
- # matplotlib.use("Agg")
10
-
11
- # pip install beautifulsoup4
12
- # pip install requests_html
13
- import requests
14
- from bs4 import BeautifulSoup as bs
15
- from requests_html import AsyncHTMLSession
16
-
17
- # Webページを取得して解析する
18
- load_url = "https://www.football-lab.jp/kyot/match/"
19
- html = requests.get(load_url)
20
- soup = bs(html.content, "html.parser")
21
-
22
- url23 = 'https://www.football-lab.jp/ka-f/match/'
23
- dfs23 = pd.read_html(url23)
24
- url22 = 'https://www.football-lab.jp/ka-f/match/?year=2022'
25
- dfs22 = pd.read_html(url22)
26
- url21 = 'https://www.football-lab.jp/ka-f/match/?year=2021'
27
- dfs21 = pd.read_html(url21)
28
- url20 = 'https://www.football-lab.jp/ka-f/match/?year=2020'
29
- dfs20 = pd.read_html(url20)
30
-
31
- #シーズン毎に分類
32
- res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
33
- res22 = pd.DataFrame([['S2022']]*len(dfs22[0])).join(dfs22, lsuffix='0')
34
- res21 = pd.DataFrame([['S2021']]*len(dfs21[0])).join(dfs21, lsuffix='0')
35
- res20 = pd.DataFrame([['S2020']]*len(dfs20[0])).join(dfs20, lsuffix='0')
36
-
37
- df_tmp = pd.concat([res23, res22, res21, res20])
38
- df = df_tmp
39
-
40
- df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
41
- df = df.query('stadium=="等々力"').reset_index()
42
- df = df.query('audience.notna()', engine='python').reset_index()
43
- df = df[['audience', 'year', 'date']]
44
- #seasonカラムから年を抽出
45
- df["year"] = df["year"].apply(lambda x: str(x)[1:5])
46
- #開催日から月と日を分割
47
- df['month'] = df['date'].str.split(pat='.', expand=True)[0]
48
- df['day'] = df['date'].str.split(pat='.', expand=True)[1]
49
- #数値データを日付データに変換
50
- df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
51
- #日付昇順に並び替える
52
- df = df.sort_values('date', ascending=True)
53
- df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
54
- df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
55
- df["date_ymd"] = df["date_ymd"].astype(int)
56
- df['date_before'] = df['date_ymd'] - 1
57
- df["date_before"] = df["date_before"]
58
- df = df[['audience', 'date_ymd', 'date_before']]
59
- df['last_audience'] = df['audience'].shift(1)
60
-
61
- df_aji = pd.read_csv('fish_price.csv')
62
-
63
- df_train = pd.merge(df, df_aji, left_on='date_before', right_on='date', how='left')
64
-
65
- df_train = df_train.query('date > 20201202')
66
- df_train = df_train.drop(['date_before', 'date_ymd'], axis=1)
67
- df_train["audience"] = df_train["audience"].str.replace(",", "").astype(int)
68
- df_train["last_audience"] = df_train["last_audience"].str.replace(",", "").astype(int)
 
 
69
 
70
  X = df_train.drop('audience', axis=1)
71
  y = df_train['audience']
 
5
  import pandas as pd
6
  import numpy as np
7
  from datetime import datetime
8
+ import matplotlib
9
+ matplotlib.use("Agg")
10
  import matplotlib.pyplot as plt
11
+
12
+
13
+ # # pip install beautifulsoup4
14
+ # # pip install requests_html
15
+ # import requests
16
+ # from bs4 import BeautifulSoup as bs
17
+ # from requests_html import AsyncHTMLSession
18
+
19
+ # # Webページを取得して解析する
20
+ # load_url = "https://www.football-lab.jp/kyot/match/"
21
+ # html = requests.get(load_url)
22
+ # soup = bs(html.content, "html.parser")
23
+
24
+ # url23 = 'https://www.football-lab.jp/ka-f/match/'
25
+ # dfs23 = pd.read_html(url23)
26
+ # url22 = 'https://www.football-lab.jp/ka-f/match/?year=2022'
27
+ # dfs22 = pd.read_html(url22)
28
+ # url21 = 'https://www.football-lab.jp/ka-f/match/?year=2021'
29
+ # dfs21 = pd.read_html(url21)
30
+ # url20 = 'https://www.football-lab.jp/ka-f/match/?year=2020'
31
+ # dfs20 = pd.read_html(url20)
32
+
33
+ # #シーズン毎に分類
34
+ # res23 = pd.DataFrame([['S2023']]*len(dfs23[0])).join(dfs23, lsuffix='0')
35
+ # res22 = pd.DataFrame([['S2022']]*len(dfs22[0])).join(dfs22, lsuffix='0')
36
+ # res21 = pd.DataFrame([['S2021']]*len(dfs21[0])).join(dfs21, lsuffix='0')
37
+ # res20 = pd.DataFrame([['S2020']]*len(dfs20[0])).join(dfs20, lsuffix='0')
38
+
39
+ # df_tmp = pd.concat([res23, res22, res21, res20])
40
+ # df = df_tmp
41
+
42
+ # df = df.rename(columns={'会場': 'stadium', 0: 'year', '開催日': 'date', '観客数': 'audience'})
43
+ # df = df.query('stadium=="等々力"').reset_index()
44
+ # df = df.query('audience.notna()', engine='python').reset_index()
45
+ # df = df[['audience', 'year', 'date']]
46
+ # #seasonカラムから年を抽出
47
+ # df["year"] = df["year"].apply(lambda x: str(x)[1:5])
48
+ # #開催日から月と日を分割
49
+ # df['month'] = df['date'].str.split(pat='.', expand=True)[0]
50
+ # df['day'] = df['date'].str.split(pat='.', expand=True)[1]
51
+ # #数値データを日付データに変換
52
+ # df['date'] = pd.to_datetime({'year': df['year'], 'month': df['month'], 'day': df['day']})
53
+ # #日付昇順に並び替える
54
+ # df = df.sort_values('date', ascending=True)
55
+ # df['date_ymd'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
56
+ # df['date_ym'] = pd.to_datetime(df['date']).dt.strftime('%Y%m')
57
+ # df["date_ymd"] = df["date_ymd"].astype(int)
58
+ # df['date_before'] = df['date_ymd'] - 1
59
+ # df["date_before"] = df["date_before"]
60
+ # df = df[['audience', 'date_ymd', 'date_before']]
61
+ # df['last_audience'] = df['audience'].shift(1)
62
+
63
+ # df_aji = pd.read_csv('fish_price.csv')
64
+
65
+ # df_train = pd.merge(df, df_aji, left_on='date_before', right_on='date', how='left')
66
+
67
+ # df_train = df_train.query('date > 20201202')
68
+ # df_train = df_train.drop(['date_before', 'date_ymd'], axis=1)
69
+ # df_train["audience"] = df_train["audience"].str.replace(",", "").astype(int)
70
+ # df_train["last_audience"] = df_train["last_audience"].str.replace(",", "").astype(int)
71
+
72
+ df_train = pd.read_csv('df_train.csv')
73
 
74
  X = df_train.drop('audience', axis=1)
75
  y = df_train['audience']