Chiragkumar Savani commited on
Commit
42c790d
1 Parent(s): 7155478

Fixes for name changes in columns for various excel files

Browse files
Files changed (1) hide show
  1. app.py +22 -30
app.py CHANGED
@@ -67,41 +67,34 @@ def get_output_value(value1, value2, is_high=False):
67
  def process_csv(file):
68
  df = pd.read_csv(file)
69
  df.columns = df.columns.str.strip() # Remove trailing spaces from column names
 
 
 
 
 
 
 
 
 
 
70
 
71
  # Add three empty columns between LOW PRICE and CLOSE PRICE
72
- low_price_index = df.columns.get_loc('LOW PRICE')
73
  df.insert(low_price_index + 1, 'HIGH Result', '')
74
  df.insert(low_price_index + 2, 'LOW Result', '')
75
  df.insert(low_price_index + 3, 'Empty Column', '')
76
 
77
  # Convert DATE to datetime
78
- df['DATE'] = pd.to_datetime(df['DATE'], format='%d-%b-%Y')
79
 
80
  # Detect the last Thursday of each month and insert an empty row after it
81
- df['Last_Thursday'] = df['DATE'].apply(last_thursday)
82
- # is_last_thursday = df['DATE'] == df['Last_Thursday']
83
-
84
- # rows = []
85
- # for i, row in df.iterrows():
86
- # rows.append(row)
87
- # if is_last_thursday[i]:
88
- # # Append an empty row (NaN values) after the last Thursday
89
- # empty_row = pd.Series([None] * len(df.columns), index=df.columns)
90
- # rows.append(empty_row)
91
-
92
- # df = pd.DataFrame(rows)
93
- # print(df)
94
- # df = df.drop(columns=['Last_Thursday'])
95
 
96
  indices_to_insert = []
97
 
98
  for i in range(len(df)):
99
- if df.loc[i, 'DATE'] == df.loc[i, 'Last_Thursday']:
100
  indices_to_insert.append(i)
101
-
102
- # Insert empty rows
103
- # for idx in reversed(indices_to_insert):
104
- # df = pd.concat([df.iloc[:idx], pd.DataFrame([{}]), df.iloc[idx:]]).reset_index(drop=True)
105
  df['Separator'] = ''
106
 
107
  # Insert empty rows and update the Last_Thursday column
@@ -109,13 +102,12 @@ def process_csv(file):
109
  # Insert an empty row
110
  df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
111
 
112
- # df['HIGH PRICE'] = df['HIGH PRICE'].str.replace(',', '')
113
- price_columns = ['HIGH PRICE', 'LOW PRICE']
114
  df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce')
115
 
116
  # Calculate global thresholds for HIGH PRICE and LOW PRICE columns
117
- high_price_threshold = calculate_threshold(df['HIGH PRICE'].max(), is_high_price=True)
118
- low_price_threshold = calculate_threshold(df['LOW PRICE'].min(), is_high_price=False)
119
 
120
  # Process HIGH PRICE and LOW PRICE columns
121
  def process_column(df, style_df, column_name, result_column_name, threshold):
@@ -132,7 +124,7 @@ def process_csv(file):
132
  for j in range(i - 1, -1, -1):
133
  diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
134
  if diff < threshold and not element_used[rows[j]]:
135
- output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'HIGH' in column_name)
136
  # print(f"i {rows[i]} j {rows[j]} {column_name}")
137
  # print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
138
  df.at[rows[j], result_column_name] = output_value
@@ -144,7 +136,7 @@ def process_csv(file):
144
  style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
145
 
146
  # check if there is higher or lower value, if yes, then colorize it
147
- response = check_condition_passed(df, column_name, rows[j], output_value, 'HIGH' in column_name)
148
  if response:
149
  style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
150
  break
@@ -156,11 +148,11 @@ def process_csv(file):
156
  style_df = pd.DataFrame('', index=df.index, columns=df.columns)
157
  output_file = file.replace(".csv", "_processed.xlsx")
158
 
159
- process_column(df, style_df, 'HIGH PRICE', 'HIGH Result', high_price_threshold)
160
- process_column(df, style_df, 'LOW PRICE', 'LOW Result', low_price_threshold)
161
 
162
  # add an empty row before the new month
163
- df['DATE'] = df['DATE'].dt.strftime('%d-%b-%Y')
164
  # df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
165
 
166
  styled_df = df.style.apply(lambda _: style_df, axis=None)
 
67
  def process_csv(file):
68
  df = pd.read_csv(file)
69
  df.columns = df.columns.str.strip() # Remove trailing spaces from column names
70
+ HIGH_NAME = "HIGH PRICE"
71
+ if HIGH_NAME not in df.columns:
72
+ HIGH_NAME = "HIGH"
73
+ LOW_NAME = "LOW PRICE"
74
+ if LOW_NAME not in df.columns:
75
+ LOW_NAME = "LOW"
76
+
77
+ DATE_NAME = "DATE"
78
+ if DATE_NAME not in df.columns:
79
+ DATE_NAME = "Date"
80
 
81
  # Add three empty columns between LOW PRICE and CLOSE PRICE
82
+ low_price_index = df.columns.get_loc(LOW_NAME)
83
  df.insert(low_price_index + 1, 'HIGH Result', '')
84
  df.insert(low_price_index + 2, 'LOW Result', '')
85
  df.insert(low_price_index + 3, 'Empty Column', '')
86
 
87
  # Convert DATE to datetime
88
+ df[DATE_NAME] = pd.to_datetime(df[DATE_NAME], format='%d-%b-%Y')
89
 
90
  # Detect the last Thursday of each month and insert an empty row after it
91
+ df['Last_Thursday'] = df[DATE_NAME].apply(last_thursday)
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  indices_to_insert = []
94
 
95
  for i in range(len(df)):
96
+ if df.loc[i, DATE_NAME] == df.loc[i, 'Last_Thursday']:
97
  indices_to_insert.append(i)
 
 
 
 
98
  df['Separator'] = ''
99
 
100
  # Insert empty rows and update the Last_Thursday column
 
102
  # Insert an empty row
103
  df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
104
 
105
+ price_columns = [HIGH_NAME, LOW_NAME]
 
106
  df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce')
107
 
108
  # Calculate global thresholds for HIGH PRICE and LOW PRICE columns
109
+ high_price_threshold = calculate_threshold(df[HIGH_NAME].max(), is_high_price=True)
110
+ low_price_threshold = calculate_threshold(df[LOW_NAME].min(), is_high_price=False)
111
 
112
  # Process HIGH PRICE and LOW PRICE columns
113
  def process_column(df, style_df, column_name, result_column_name, threshold):
 
124
  for j in range(i - 1, -1, -1):
125
  diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
126
  if diff < threshold and not element_used[rows[j]]:
127
+ output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'high' in column_name.lower())
128
  # print(f"i {rows[i]} j {rows[j]} {column_name}")
129
  # print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
130
  df.at[rows[j], result_column_name] = output_value
 
136
  style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
137
 
138
  # check if there is higher or lower value, if yes, then colorize it
139
+ response = check_condition_passed(df, column_name, rows[j], output_value, 'high' in column_name.lower())
140
  if response:
141
  style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
142
  break
 
148
  style_df = pd.DataFrame('', index=df.index, columns=df.columns)
149
  output_file = file.replace(".csv", "_processed.xlsx")
150
 
151
+ process_column(df, style_df, HIGH_NAME, 'HIGH Result', high_price_threshold)
152
+ process_column(df, style_df, LOW_NAME, 'LOW Result', low_price_threshold)
153
 
154
  # add an empty row before the new month
155
+ df[DATE_NAME] = df[DATE_NAME].dt.strftime('%d-%b-%Y')
156
  # df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
157
 
158
  styled_df = df.style.apply(lambda _: style_df, axis=None)