mmmapms commited on
Commit
7fe8c1e
·
verified ·
1 Parent(s): 0f35109

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -46
app.py CHANGED
@@ -6,24 +6,57 @@ import requests
6
  from io import StringIO
7
  import base64
8
 
9
- #@st.cache_data(ttl=86400) # TTL is set for 86400 seconds (24 hours)
10
  def load_data_predictions(github_token):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- url = 'https://api.github.com/repos/mmmapms/Forecast_DAM_V2/contents/Predictions.csv'
13
- headers = {'Authorization': f'token {github_token}'}
 
 
 
14
 
15
- response = requests.get(url, headers=headers)
16
- st.write("Status code: ", response.status_code)
17
- st.write("Response JSON:", response.json())
18
- if response.status_code == 200:
19
-
20
- file_content = response.json()['content']
21
- decoded_content = base64.b64decode(file_content).decode('utf-8')
22
- csv_content = StringIO(decoded_content)
23
-
24
- df = pd.read_csv(csv_content, encoding='utf-8')
 
 
 
25
 
26
- df = df.rename(columns={
 
 
 
 
 
 
 
 
27
  'Price': 'Real Price',
28
  'DNN1': 'Neural Network 1',
29
  'DNN2': 'Neural Network 2',
@@ -36,48 +69,23 @@ def load_data_predictions(github_token):
36
  'Persis': 'Persistence Model',
37
  'Hybrid_Ensemble': 'Hybrid Ensemble',
38
  'Weighted_Ensemble': 'Weighted Ensemble'
39
- })
40
- df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
41
- df_filtered = df.dropna(subset=['Real Price'])
42
- return df, df_filtered
43
- else:
44
- st.error("Failed to download data. Please check your GitHub token and repository details.")
45
- return pd.DataFrame(), pd.DataFrame()
46
 
47
 
48
  github_token = st.secrets["GitHub_Token_Margarida"]
49
 
50
  if github_token:
51
  df, df_filtered = load_data_predictions(github_token)
52
-
53
  else:
54
  st.warning("Please enter your GitHub Personal Access Token to proceed.")
55
 
56
 
57
- #@st.cache_data
58
- #def load_data_predictions():
59
- # df = pd.read_csv('Predictions.csv')
60
- # df = df.rename(columns={
61
- # 'Price': 'Real Price',
62
- # 'DNN1': 'Neural Network 1',
63
- # 'DNN2': 'Neural Network 2',
64
- # 'DNN3': 'Neural Network 3',
65
- # 'DNN4': 'Neural Network 4',
66
- # 'DNN_Ensemble': 'Neural Network Ensemble',
67
- # 'LEAR56': 'Regularized Linear Model 1',
68
- # 'LEAR84': 'Regularized Linear Model 2',
69
- # 'LEAR112': 'Regularized Linear Model 3',
70
- # 'LEAR730': 'Regularized Linear Model 4',
71
- # 'LEAR_Ensemble': 'Regularized Linear Model Ensemble',
72
- # 'Persis': 'Persistence Model',
73
- # 'Hybrid_Ensemble': 'Hybrid Ensemble'
74
- #})
75
- # df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
76
- # df_filtered = df.dropna(subset=['Real Price'])
77
- # return df, df_filtered
78
-
79
- #df, df_filtered = load_data_predictions()
80
-
81
  min_date_allowed_pred = df_filtered['Date'].min().date()
82
  max_date_allowed_pred = df_filtered['Date'].max().date()
83
 
 
6
  from io import StringIO
7
  import base64
8
 
 
9
  def load_data_predictions(github_token):
10
+ """
11
+ Fetch Predictions.csv from the GitHub 'Forecast_DAM_V2' repository
12
+ via the blob SHA. This works for files larger than 1 MB.
13
+ """
14
+ owner = "mmmapms"
15
+ repo = "Forecast_DAM_V2"
16
+ file_path = "Predictions.csv"
17
+
18
+ # 1. Get file metadata (including SHA) from the “contents” endpoint
19
+ url_contents = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
20
+ headers_contents = {
21
+ "Authorization": f"token {github_token}",
22
+ }
23
+ response_contents = requests.get(url_contents, headers=headers_contents)
24
+ st.write("Status code (contents):", response_contents.status_code)
25
+ st.write("Response JSON (contents):", response_contents.json())
26
+
27
+ if response_contents.status_code != 200:
28
+ st.error("Failed to download file metadata. Check token and file path.")
29
+ return pd.DataFrame(), pd.DataFrame()
30
 
31
+ json_data = response_contents.json()
32
+ # We expect "sha" to be present for the file
33
+ if "sha" not in json_data:
34
+ st.error("No 'sha' field found in JSON response. File might be missing.")
35
+ return pd.DataFrame(), pd.DataFrame()
36
 
37
+ sha = json_data["sha"]
38
+
39
+ # 2. Use the “blobs” endpoint to fetch the raw file content
40
+ url_blob = f"https://api.github.com/repos/{owner}/{repo}/git/blobs/{sha}"
41
+ headers_blob = {
42
+ "Authorization": f"token {github_token}",
43
+ "Accept": "application/vnd.github.v3.raw", # crucial for large files
44
+ }
45
+ response_blob = requests.get(url_blob, headers=headers_blob)
46
+
47
+ if response_blob.status_code != 200:
48
+ st.error(f"Failed to fetch raw blob. Status code: {response_blob.status_code}")
49
+ return pd.DataFrame(), pd.DataFrame()
50
 
51
+ # The response body is the raw CSV text
52
+ csv_text = response_blob.text
53
+ csv_content = StringIO(csv_text)
54
+
55
+ # 3. Read the CSV into a Pandas DataFrame
56
+ df = pd.read_csv(csv_content, encoding='utf-8')
57
+
58
+ # 4. Rename columns as needed
59
+ df = df.rename(columns={
60
  'Price': 'Real Price',
61
  'DNN1': 'Neural Network 1',
62
  'DNN2': 'Neural Network 2',
 
69
  'Persis': 'Persistence Model',
70
  'Hybrid_Ensemble': 'Hybrid Ensemble',
71
  'Weighted_Ensemble': 'Weighted Ensemble'
72
+ })
73
+
74
+ # 5. Parse dates and filter
75
+ df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
76
+ df_filtered = df.dropna(subset=['Real Price'])
77
+
78
+ return df, df_filtered
79
 
80
 
81
  github_token = st.secrets["GitHub_Token_Margarida"]
82
 
83
  if github_token:
84
  df, df_filtered = load_data_predictions(github_token)
 
85
  else:
86
  st.warning("Please enter your GitHub Personal Access Token to proceed.")
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  min_date_allowed_pred = df_filtered['Date'].min().date()
90
  max_date_allowed_pred = df_filtered['Date'].max().date()
91