import pandas as pd from pandas.tseries.offsets import MonthEnd import gradio as gr import random # Function to generate a random light color def generate_random_light_color(): min_brightness = 0.7 while True: r, g, b = [random.randint(128, 255) for _ in range(3)] brightness = (r * 0.299 + g * 0.587 + b * 0.114) / 255 if brightness >= min_brightness: return '#{:02x}{:02x}{:02x}'.format(*(r, g, b)) # Function to set the background color of a specific cell def set_cell_color(styles_df, index, column, hex_color): styles_df.at[index, column] = f'background-color: {hex_color}' return styles_df # Function to calculate the threshold def calculate_threshold(value, is_high_price=True): if 0 <= value <= 200: return 0.20 elif 201 <= value <= 500: return 0.70 elif 501 <= value <= 1000: return 1.0 elif 1001 <= value <= 2000: return 2.0 elif 2001 <= value <= 3000: return 3.0 elif 3001 <= value <= 4000: return 4.0 elif 4001 <= value <= 5000: return 5.0 else: return 5.0 def last_thursday(dt): # Get the last day of the month last_day_of_month = dt + MonthEnd(0) # Calculate how many days to subtract to get the last Thursday offset = (last_day_of_month.weekday() - 3) % 7 return last_day_of_month - pd.Timedelta(days=offset) def check_condition_passed(df, column_name, max_index, output_value, close_column, value1, value2, is_high = True): # print(f"Max index: {max_index}") for index_maybe in range(max_index-1, -1, -1): # print(f"At index : {index_maybe}") if is_high: if df.loc[index_maybe, column_name] > output_value: close_value = df.loc[index_maybe, close_column] # print(f"current value: {df.loc[index_maybe, column_name]} - close value {close_value}") if close_value > value1 and close_value > value2: return True break else: if df.loc[index_maybe, column_name] < output_value: close_value = df.loc[index_maybe, close_column] if close_value < value1 and close_value < value2: return True break return False # if is_high: # filtered_df = df[(df.index < max_index) & (df[column_name] > output_value)] # else: # filtered_df = df[(df.index < max_index) & (df[column_name] < output_value)] # if not filtered_df.empty: # first_valid_row = filtered_df.iloc[0] # print(f"Respective close row: {first_valid_row}") # close_value = first_valid_row[close_column] # print(f"Respective close value: {close_value}") # if is_high: # if close_value > value1 and close_value > value2: # return True # else: # if close_value < value1 and close_value < value2: # return True # else: # return False def get_output_value(value1, value2, is_high=False): if is_high: return max(int(value1), int(value2)) + 1 else: return min(int(value1), int(value2)) - 1 # Function to read CSV and generate Excel with modifications def process_csv(file): df = pd.read_csv(file) df.columns = df.columns.str.strip() # Remove trailing spaces from column names HIGH_NAME = "HIGH PRICE" if HIGH_NAME not in df.columns: HIGH_NAME = "HIGH" LOW_NAME = "LOW PRICE" if LOW_NAME not in df.columns: LOW_NAME = "LOW" CLOSE_NAME = "CLOSE PRICE" if CLOSE_NAME not in df.columns: CLOSE_NAME = "close" DATE_NAME = "DATE" if DATE_NAME not in df.columns: DATE_NAME = "Date" # Add three empty columns between LOW PRICE and CLOSE PRICE low_price_index = df.columns.get_loc(CLOSE_NAME) df.insert(low_price_index + 1, 'HIGH Result', '') df.insert(low_price_index + 2, 'LOW Result', '') df.insert(low_price_index + 3, 'Empty Column', '') # Convert DATE to datetime df[DATE_NAME] = pd.to_datetime(df[DATE_NAME], format='%d-%b-%Y') # Detect the last Thursday of each month and insert an empty row after it df['Last_Thursday'] = df[DATE_NAME].apply(last_thursday) indices_to_insert = [] for i in range(len(df)): if df.loc[i, DATE_NAME] == df.loc[i, 'Last_Thursday']: indices_to_insert.append(i) df['Separator'] = '' # Insert empty rows and update the Last_Thursday column for idx in reversed(indices_to_insert): # Insert an empty row df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True) price_columns = [HIGH_NAME, LOW_NAME, CLOSE_NAME] df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce') # Calculate global thresholds for HIGH PRICE and LOW PRICE columns high_price_threshold = calculate_threshold(df[HIGH_NAME].max(), is_high_price=True) low_price_threshold = calculate_threshold(df[LOW_NAME].min(), is_high_price=False) # Process HIGH PRICE and LOW PRICE columns def process_column(df, style_df, column_name, result_column_name, threshold): element_used = [False] * len(df[column_name]) # for last_thurday_date, group in df.groupby('Last_Thursday', sort=False): grouped_df = df.groupby((df['Separator'] == 'Separator').cumsum()) for group_name, group in grouped_df: group = group[group['Separator'] != 'Separator'] rows = group.index.tolist() print(rows) for i in range(len(rows) - 1, -1, -1): if not element_used[rows[i]]: for j in range(i - 1, -1, -1): diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name]) if diff < threshold and not element_used[rows[j]]: output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'high' in column_name.lower()) # print(f"i {rows[i]} j {rows[j]} {column_name}") # print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}") df.at[rows[j], result_column_name] = output_value element_used[rows[i]] = True element_used[rows[j]] = True color = generate_random_light_color() style_df = set_cell_color(style_df, index=rows[i], column=column_name, hex_color=color) style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color) # check if there is higher or lower value, if yes, then colorize it response = check_condition_passed(df, column_name, rows[j], output_value, CLOSE_NAME, df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'high' in column_name.lower()) if response: style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color) break # Create a dictionary to map column names to Excel letters column_letter_map = {v: k for k, v in enumerate(df.columns, start=1)} # Save to an Excel file and get the workbook style_df = pd.DataFrame('', index=df.index, columns=df.columns) output_file = file.replace(".csv", "_processed.xlsx") process_column(df, style_df, HIGH_NAME, 'HIGH Result', high_price_threshold) process_column(df, style_df, LOW_NAME, 'LOW Result', low_price_threshold) # add an empty row before the new month df[DATE_NAME] = df[DATE_NAME].dt.strftime('%d-%b-%Y') # df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y') styled_df = df.style.apply(lambda _: style_df, axis=None) styled_df.to_excel(output_file, engine='openpyxl', index=False) return output_file # Gradio Interface def gradio_interface(file): return process_csv(file) # Gradio app interface iface = gr.Interface( fn=gradio_interface, inputs=gr.File(label="Upload CSV File (.csv)", file_count="single"), outputs=gr.File(label="Download Processed Excel File"), title="CSV to Excel Processor with Cell Highlighting", description="Upload a CSV file with stock data, and download a processed Excel file with highlighted cells." ) if __name__ == "__main__": iface.launch()