Chiragkumar Savani commited on
Commit
7ad44eb
1 Parent(s): 47b49e7

Finally grouping of a month and checking final condition

Browse files
Files changed (1) hide show
  1. app.py +129 -95
app.py CHANGED
@@ -1,24 +1,25 @@
1
- import os
2
  import pandas as pd
3
- import numpy as np
4
  import gradio as gr
5
- from openpyxl import Workbook, load_workbook
6
- from openpyxl.styles import PatternFill
7
  import random
8
 
 
9
  def generate_random_light_color():
10
  min_brightness = 0.7
11
  while True:
12
  r, g, b = [random.randint(128, 255) for _ in range(3)]
13
  brightness = (r * 0.299 + g * 0.587 + b * 0.114) / 255
14
  if brightness >= min_brightness:
15
- return f'{r:02X}{g:02X}{b:02X}'
 
16
 
 
 
 
 
 
 
17
  def calculate_threshold(value, is_high_price=True):
18
- """
19
- Determine the threshold based on the provided ranges.
20
- The function checks if it's for High Price or Low Price and returns the respective threshold.
21
- """
22
  if 0 <= value <= 200:
23
  return 0.20
24
  elif 201 <= value <= 500:
@@ -35,107 +36,140 @@ def calculate_threshold(value, is_high_price=True):
35
  return 5.0
36
  else:
37
  return 5.0
 
 
 
 
 
 
 
38
 
39
- def process_section(ws, start_row, end_row, col_index1, col_index2, output_col_index1, output_col_index2, high_threshold, low_threshold):
40
- colored_pairs = set()
41
-
42
- # Process first column
43
- for i in range(end_row, start_row - 1, -1):
44
- for j in range(i - 1, start_row - 1, -1):
45
- cell_value_i = ws.cell(i, col_index1).value
46
- cell_value_j = ws.cell(j, col_index1).value
47
- if not (isinstance(cell_value_i, (int, float)) and isinstance(cell_value_j, (int, float))):
48
- continue
49
- if abs(cell_value_i - cell_value_j) <= high_threshold:
50
- if (i, col_index1) not in colored_pairs and (j, col_index1) not in colored_pairs:
51
- color = generate_random_light_color()
52
- fill = PatternFill(start_color=color, end_color=color, fill_type="solid")
53
- ws.cell(i, col_index1).fill = fill
54
- ws.cell(j, col_index1).fill = fill
55
- colored_pairs.add((i, col_index1))
56
- colored_pairs.add((j, col_index1))
57
-
58
- output_value1 = max(int(cell_value_i), int(cell_value_j)) + 1
59
- ws.cell(j, output_col_index1).value = output_value1
60
-
61
- for k in range(j - 1, start_row - 1, -1):
62
- cell_value_k = ws.cell(k, col_index1).value
63
- if isinstance(cell_value_k, (int, float)) and cell_value_k > output_value1:
64
- if ws.cell(k, 6).value > max(cell_value_i, cell_value_j):
65
- ws.cell(j, output_col_index1).fill = fill
66
- break
67
 
68
- # Process second column
69
- for i in range(end_row, start_row - 1, -1):
70
- for j in range(i - 1, start_row - 1, -1):
71
- cell_value_i = ws.cell(i, col_index2).value
72
- cell_value_j = ws.cell(j, col_index2).value
73
- if not (isinstance(cell_value_i, (int, float)) and isinstance(cell_value_j, (int, float))):
74
- continue
75
- if abs(cell_value_i - cell_value_j) <= low_threshold:
76
- if (i, col_index2) not in colored_pairs and (j, col_index2) not in colored_pairs:
77
- color = generate_random_light_color()
78
- fill = PatternFill(start_color=color, end_color=color, fill_type="solid")
79
- ws.cell(i, col_index2).fill = fill
80
- ws.cell(j, col_index2).fill = fill
81
- colored_pairs.add((i, col_index2))
82
- colored_pairs.add((j, col_index2))
83
-
84
- output_value2 = min(int(cell_value_i), int(cell_value_j)) - 1
85
- ws.cell(j, output_col_index2).value = output_value2
86
-
87
- for k in range(j - 1, start_row - 1, -1):
88
- cell_value_k = ws.cell(k, col_index2).value
89
- if isinstance(cell_value_k, (int, float)) and cell_value_k < output_value2:
90
- if ws.cell(k, 6).value < min(cell_value_i, cell_value_j):
91
- ws.cell(j, output_col_index2).fill = fill
92
- break
93
 
94
- def highlight_pairs_with_rgb(input_file):
95
- df = pd.read_csv(input_file)
96
- df.columns = df.columns.str.strip()
97
 
98
- # Convert to Excel and add three empty columns
99
- excel_filename = input_file.replace(".csv", ".xlsx")
100
- with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
101
- # Reorder and create Excel file with empty columns
102
- df_new = pd.concat([df.iloc[:, :8], pd.DataFrame(columns=['', '', '']), df.iloc[:, 8:]], axis=1)
103
- df_new.to_excel(writer, index=False, sheet_name="Sheet1")
104
 
105
- wb = load_workbook(excel_filename)
106
- ws = wb.active
 
 
 
 
 
 
107
 
108
- # Determine thresholds for High Price (Column 6) and Low Price (Column 7)
109
- high_price_col = df["HIGH PRICE"].dropna()
110
- low_price_col = df["LOW PRICE"].dropna()
111
 
112
- high_threshold = calculate_threshold(high_price_col.max(), is_high_price=True)
113
- low_threshold = calculate_threshold(low_price_col.min(), is_high_price=False)
 
 
 
 
 
114
 
115
- last_row = ws.max_row
116
- col_index1, col_index2 = 6, 7 # HIGH PRICE and LOW PRICE columns in Excel
117
- output_col_index1, output_col_index2 = 9, 10 # Empty columns added earlier
118
 
119
- ws.cell(1, output_col_index1).value = "High Result"
120
- ws.cell(1, output_col_index2).value = "Low Result"
121
 
122
- start_row = 2
123
- while start_row <= last_row:
124
- end_row = start_row
125
- while end_row <= last_row and ws.cell(end_row, 1).value is not None:
126
- end_row += 1
127
- end_row -= 1
128
 
129
- process_section(ws, start_row, end_row, col_index1, col_index2, output_col_index1, output_col_index2, high_threshold, low_threshold)
130
- start_row = end_row + 2
 
 
131
 
132
- wb.save(excel_filename)
133
- return excel_filename
 
 
134
 
135
- def gradio_interface(input_file):
136
- output_file = highlight_pairs_with_rgb(input_file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  return output_file
138
 
 
 
 
 
139
  # Gradio app interface
140
  iface = gr.Interface(
141
  fn=gradio_interface,
 
 
1
  import pandas as pd
2
+ from pandas.tseries.offsets import MonthEnd
3
  import gradio as gr
 
 
4
  import random
5
 
6
+ # Function to generate a random light color
7
  def generate_random_light_color():
8
  min_brightness = 0.7
9
  while True:
10
  r, g, b = [random.randint(128, 255) for _ in range(3)]
11
  brightness = (r * 0.299 + g * 0.587 + b * 0.114) / 255
12
  if brightness >= min_brightness:
13
+ return '#{:02x}{:02x}{:02x}'.format(*(r, g, b))
14
+
15
 
16
+ # Function to set the background color of a specific cell
17
+ def set_cell_color(styles_df, index, column, hex_color):
18
+ styles_df.at[index, column] = f'background-color: {hex_color}'
19
+ return styles_df
20
+
21
+ # Function to calculate the threshold
22
  def calculate_threshold(value, is_high_price=True):
 
 
 
 
23
  if 0 <= value <= 200:
24
  return 0.20
25
  elif 201 <= value <= 500:
 
36
  return 5.0
37
  else:
38
  return 5.0
39
+
40
+ def last_thursday(dt):
41
+ # Get the last day of the month
42
+ last_day_of_month = dt + MonthEnd(0)
43
+ # Calculate how many days to subtract to get the last Thursday
44
+ offset = (last_day_of_month.weekday() - 3) % 7
45
+ return last_day_of_month - pd.Timedelta(days=offset)
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ def check_condition_passed(df, column_name, max_index, output_value, is_high = True):
49
+ if is_high:
50
+ filtered_df = df[(df.index < max_index) & (df[column_name] > output_value)]
51
+ else:
52
+ filtered_df = df[(df.index < max_index) & (df[column_name] < output_value)]
53
+
54
+ if not filtered_df.empty:
55
+ return True
56
+ else:
57
+ return False
58
+
59
+ def get_output_value(value1, value2, is_high=False):
60
+ if is_high:
61
+ return max(int(value1), int(value2)) + 1
62
+ else:
63
+ return min(int(value1), int(value2)) - 1
 
 
 
 
 
 
 
 
 
64
 
 
 
 
65
 
66
+ # Function to read CSV and generate Excel with modifications
67
+ def process_csv(file):
68
+ df = pd.read_csv(file)
69
+ df.columns = df.columns.str.strip() # Remove trailing spaces from column names
 
 
70
 
71
+ # Add three empty columns between LOW PRICE and CLOSE PRICE
72
+ low_price_index = df.columns.get_loc('LOW PRICE')
73
+ df.insert(low_price_index + 1, 'HIGH Result', '')
74
+ df.insert(low_price_index + 2, 'LOW Result', '')
75
+ df.insert(low_price_index + 3, 'Empty Column', '')
76
+
77
+ # Convert DATE to datetime
78
+ df['DATE'] = pd.to_datetime(df['DATE'], format='%d-%b-%Y')
79
 
80
+ # Detect the last Thursday of each month and insert an empty row after it
81
+ df['Last_Thursday'] = df['DATE'].apply(last_thursday)
82
+ # is_last_thursday = df['DATE'] == df['Last_Thursday']
83
 
84
+ # rows = []
85
+ # for i, row in df.iterrows():
86
+ # rows.append(row)
87
+ # if is_last_thursday[i]:
88
+ # # Append an empty row (NaN values) after the last Thursday
89
+ # empty_row = pd.Series([None] * len(df.columns), index=df.columns)
90
+ # rows.append(empty_row)
91
 
92
+ # df = pd.DataFrame(rows)
93
+ # print(df)
94
+ # df = df.drop(columns=['Last_Thursday'])
95
 
96
+ indices_to_insert = []
 
97
 
98
+ for i in range(len(df)):
99
+ if df.loc[i, 'DATE'] == df.loc[i, 'Last_Thursday']:
100
+ indices_to_insert.append(i)
 
 
 
101
 
102
+ # Insert empty rows
103
+ # for idx in reversed(indices_to_insert):
104
+ # df = pd.concat([df.iloc[:idx], pd.DataFrame([{}]), df.iloc[idx:]]).reset_index(drop=True)
105
+ df['Separator'] = ''
106
 
107
+ # Insert empty rows and update the Last_Thursday column
108
+ for idx in reversed(indices_to_insert):
109
+ # Insert an empty row
110
+ df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
111
 
112
+
113
+ # Calculate global thresholds for HIGH PRICE and LOW PRICE columns
114
+ high_price_threshold = calculate_threshold(df['HIGH PRICE'].max(), is_high_price=True)
115
+ low_price_threshold = calculate_threshold(df['LOW PRICE'].min(), is_high_price=False)
116
+
117
+ # Process HIGH PRICE and LOW PRICE columns
118
+ def process_column(df, style_df, column_name, result_column_name, threshold):
119
+
120
+ element_used = [False] * len(df[column_name])
121
+ # for last_thurday_date, group in df.groupby('Last_Thursday', sort=False):
122
+ grouped_df = df.groupby((df['Separator'] == 'Separator').cumsum())
123
+ for group_name, group in grouped_df:
124
+ group = group[group['Separator'] != 'Separator']
125
+ rows = group.index.tolist()
126
+ print(rows)
127
+ for i in range(len(rows) - 1, -1, -1):
128
+ if not element_used[rows[i]]:
129
+ for j in range(i - 1, -1, -1):
130
+ diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
131
+ if diff < threshold and not element_used[rows[j]]:
132
+ output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'HIGH' in column_name)
133
+ # print(f"i {rows[i]} j {rows[j]} {column_name}")
134
+ # print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
135
+ df.at[rows[j], result_column_name] = output_value
136
+
137
+ element_used[rows[i]] = True
138
+ element_used[rows[j]] = True
139
+ color = generate_random_light_color()
140
+ style_df = set_cell_color(style_df, index=rows[i], column=column_name, hex_color=color)
141
+ style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
142
+
143
+ # check if there is higher or lower value, if yes, then colorize it
144
+ response = check_condition_passed(df, column_name, rows[j], output_value, 'HIGH' in column_name)
145
+ if response:
146
+ style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
147
+ break
148
+
149
+ # Create a dictionary to map column names to Excel letters
150
+ column_letter_map = {v: k for k, v in enumerate(df.columns, start=1)}
151
+
152
+ # Save to an Excel file and get the workbook
153
+ style_df = pd.DataFrame('', index=df.index, columns=df.columns)
154
+ output_file = file.replace(".csv", "_processed.xlsx")
155
+
156
+ process_column(df, style_df, 'HIGH PRICE', 'HIGH Result', high_price_threshold)
157
+ process_column(df, style_df, 'LOW PRICE', 'LOW Result', low_price_threshold)
158
+
159
+ # add an empty row before the new month
160
+ df['DATE'] = df['DATE'].dt.strftime('%d-%b-%Y')
161
+ # df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
162
+
163
+ styled_df = df.style.apply(lambda _: style_df, axis=None)
164
+
165
+ styled_df.to_excel(output_file, engine='openpyxl', index=False)
166
+
167
  return output_file
168
 
169
+ # Gradio Interface
170
+ def gradio_interface(file):
171
+ return process_csv(file)
172
+
173
  # Gradio app interface
174
  iface = gr.Interface(
175
  fn=gradio_interface,