Spaces:

cs70
/

test-excel

Sleeping

App Files Files Community

Chiragkumar Savani commited on Aug 31, 2024

Commit

7ad44eb

1 Parent(s): 47b49e7

Finally grouping of a month and checking final condition

Browse files

Files changed (1) hide show

app.py +129 -95

app.py CHANGED Viewed

@@ -1,24 +1,25 @@
-import os
 import pandas as pd
-import numpy as np
 import gradio as gr
-from openpyxl import Workbook, load_workbook
-from openpyxl.styles import PatternFill
 import random
 def generate_random_light_color():
     min_brightness = 0.7
     while True:
         r, g, b = [random.randint(128, 255) for _ in range(3)]
         brightness = (r * 0.299 + g * 0.587 + b * 0.114) / 255
         if brightness >= min_brightness:
-            return f'{r:02X}{g:02X}{b:02X}'
 def calculate_threshold(value, is_high_price=True):
-    """
-    Determine the threshold based on the provided ranges.
-    The function checks if it's for High Price or Low Price and returns the respective threshold.
-    """
     if 0 <= value <= 200:
         return 0.20
     elif 201 <= value <= 500:
@@ -35,107 +36,140 @@ def calculate_threshold(value, is_high_price=True):
         return 5.0
     else:
         return 5.0
-def process_section(ws, start_row, end_row, col_index1, col_index2, output_col_index1, output_col_index2, high_threshold, low_threshold):
-    colored_pairs = set()
-    # Process first column
-    for i in range(end_row, start_row - 1, -1):
-        for j in range(i - 1, start_row - 1, -1):
-            cell_value_i = ws.cell(i, col_index1).value
-            cell_value_j = ws.cell(j, col_index1).value
-            if not (isinstance(cell_value_i, (int, float)) and isinstance(cell_value_j, (int, float))):
-                continue
-            if abs(cell_value_i - cell_value_j) <= high_threshold:
-                if (i, col_index1) not in colored_pairs and (j, col_index1) not in colored_pairs:
-                    color = generate_random_light_color()
-                    fill = PatternFill(start_color=color, end_color=color, fill_type="solid")
-                    ws.cell(i, col_index1).fill = fill
-                    ws.cell(j, col_index1).fill = fill
-                    colored_pairs.add((i, col_index1))
-                    colored_pairs.add((j, col_index1))
-                    output_value1 = max(int(cell_value_i), int(cell_value_j)) + 1
-                    ws.cell(j, output_col_index1).value = output_value1
-                    for k in range(j - 1, start_row - 1, -1):
-                        cell_value_k = ws.cell(k, col_index1).value
-                        if isinstance(cell_value_k, (int, float)) and cell_value_k > output_value1:
-                            if ws.cell(k, 6).value > max(cell_value_i, cell_value_j):
-                                ws.cell(j, output_col_index1).fill = fill
-                            break
-    # Process second column
-    for i in range(end_row, start_row - 1, -1):
-        for j in range(i - 1, start_row - 1, -1):
-            cell_value_i = ws.cell(i, col_index2).value
-            cell_value_j = ws.cell(j, col_index2).value
-            if not (isinstance(cell_value_i, (int, float)) and isinstance(cell_value_j, (int, float))):
-                continue
-            if abs(cell_value_i - cell_value_j) <= low_threshold:
-                if (i, col_index2) not in colored_pairs and (j, col_index2) not in colored_pairs:
-                    color = generate_random_light_color()
-                    fill = PatternFill(start_color=color, end_color=color, fill_type="solid")
-                    ws.cell(i, col_index2).fill = fill
-                    ws.cell(j, col_index2).fill = fill
-                    colored_pairs.add((i, col_index2))
-                    colored_pairs.add((j, col_index2))
-                    output_value2 = min(int(cell_value_i), int(cell_value_j)) - 1
-                    ws.cell(j, output_col_index2).value = output_value2
-                    for k in range(j - 1, start_row - 1, -1):
-                        cell_value_k = ws.cell(k, col_index2).value
-                        if isinstance(cell_value_k, (int, float)) and cell_value_k < output_value2:
-                            if ws.cell(k, 6).value < min(cell_value_i, cell_value_j):
-                                ws.cell(j, output_col_index2).fill = fill
-                            break
-def highlight_pairs_with_rgb(input_file):
-    df = pd.read_csv(input_file)
-    df.columns = df.columns.str.strip()
-    # Convert to Excel and add three empty columns
-    excel_filename = input_file.replace(".csv", ".xlsx")
-    with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
-        # Reorder and create Excel file with empty columns
-        df_new = pd.concat([df.iloc[:, :8], pd.DataFrame(columns=['', '', '']), df.iloc[:, 8:]], axis=1)
-        df_new.to_excel(writer, index=False, sheet_name="Sheet1")
-    wb = load_workbook(excel_filename)
-    ws = wb.active
-    # Determine thresholds for High Price (Column 6) and Low Price (Column 7)
-    high_price_col = df["HIGH PRICE"].dropna()
-    low_price_col = df["LOW PRICE"].dropna()
-    high_threshold = calculate_threshold(high_price_col.max(), is_high_price=True)
-    low_threshold = calculate_threshold(low_price_col.min(), is_high_price=False)
-    last_row = ws.max_row
-    col_index1, col_index2 = 6, 7  # HIGH PRICE and LOW PRICE columns in Excel
-    output_col_index1, output_col_index2 = 9, 10  # Empty columns added earlier
-    ws.cell(1, output_col_index1).value = "High Result"
-    ws.cell(1, output_col_index2).value = "Low Result"
-    start_row = 2
-    while start_row <= last_row:
-        end_row = start_row
-        while end_row <= last_row and ws.cell(end_row, 1).value is not None:
-            end_row += 1
-        end_row -= 1
-        process_section(ws, start_row, end_row, col_index1, col_index2, output_col_index1, output_col_index2, high_threshold, low_threshold)
-        start_row = end_row + 2
-    wb.save(excel_filename)
-    return excel_filename
-def gradio_interface(input_file):
-    output_file = highlight_pairs_with_rgb(input_file.name)
     return output_file
 # Gradio app interface
 iface = gr.Interface(
     fn=gradio_interface,

 import pandas as pd
+from pandas.tseries.offsets import MonthEnd
 import gradio as gr
 import random
+# Function to generate a random light color
 def generate_random_light_color():
     min_brightness = 0.7
     while True:
         r, g, b = [random.randint(128, 255) for _ in range(3)]
         brightness = (r * 0.299 + g * 0.587 + b * 0.114) / 255
         if brightness >= min_brightness:
+            return '#{:02x}{:02x}{:02x}'.format(*(r, g, b))
+# Function to set the background color of a specific cell
+def set_cell_color(styles_df, index, column, hex_color):
+    styles_df.at[index, column] = f'background-color: {hex_color}'
+    return styles_df
+# Function to calculate the threshold
 def calculate_threshold(value, is_high_price=True):
     if 0 <= value <= 200:
         return 0.20
     elif 201 <= value <= 500:
         return 5.0
     else:
         return 5.0
+def last_thursday(dt):
+    # Get the last day of the month
+    last_day_of_month = dt + MonthEnd(0)
+    # Calculate how many days to subtract to get the last Thursday
+    offset = (last_day_of_month.weekday() - 3) % 7
+    return last_day_of_month - pd.Timedelta(days=offset)
+def check_condition_passed(df, column_name, max_index, output_value, is_high = True):
+    if is_high:
+        filtered_df = df[(df.index < max_index) & (df[column_name] > output_value)]
+    else:
+        filtered_df = df[(df.index < max_index) & (df[column_name] < output_value)]
+    if not filtered_df.empty:
+        return True
+    else:
+        return False
+def get_output_value(value1, value2, is_high=False):
+    if is_high:
+        return max(int(value1), int(value2)) + 1
+    else:
+        return min(int(value1), int(value2)) - 1
+# Function to read CSV and generate Excel with modifications
+def process_csv(file):
+    df = pd.read_csv(file)
+    df.columns = df.columns.str.strip()  # Remove trailing spaces from column names
+    # Add three empty columns between LOW PRICE and CLOSE PRICE
+    low_price_index = df.columns.get_loc('LOW PRICE')
+    df.insert(low_price_index + 1, 'HIGH Result', '')
+    df.insert(low_price_index + 2, 'LOW Result', '')
+    df.insert(low_price_index + 3, 'Empty Column', '')
+    # Convert DATE to datetime
+    df['DATE'] = pd.to_datetime(df['DATE'], format='%d-%b-%Y')
+    # Detect the last Thursday of each month and insert an empty row after it
+    df['Last_Thursday'] = df['DATE'].apply(last_thursday)
+    # is_last_thursday = df['DATE'] == df['Last_Thursday']
+    # rows = []
+    # for i, row in df.iterrows():
+    #     rows.append(row)
+    #     if is_last_thursday[i]:
+    #         # Append an empty row (NaN values) after the last Thursday
+    #         empty_row = pd.Series([None] * len(df.columns), index=df.columns)
+    #         rows.append(empty_row)
+    # df = pd.DataFrame(rows)
+    # print(df)
+    # df = df.drop(columns=['Last_Thursday'])
+    indices_to_insert = []
+    for i in range(len(df)):
+        if df.loc[i, 'DATE'] == df.loc[i, 'Last_Thursday']:
+            indices_to_insert.append(i)
+    # Insert empty rows
+    # for idx in reversed(indices_to_insert):
+    #     df = pd.concat([df.iloc[:idx], pd.DataFrame([{}]), df.iloc[idx:]]).reset_index(drop=True)
+    df['Separator'] = ''
+    # Insert empty rows and update the Last_Thursday column
+    for idx in reversed(indices_to_insert):
+        # Insert an empty row
+        df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
+    # Calculate global thresholds for HIGH PRICE and LOW PRICE columns
+    high_price_threshold = calculate_threshold(df['HIGH PRICE'].max(), is_high_price=True)
+    low_price_threshold = calculate_threshold(df['LOW PRICE'].min(), is_high_price=False)
+    # Process HIGH PRICE and LOW PRICE columns
+    def process_column(df, style_df, column_name, result_column_name, threshold):
+        element_used = [False] * len(df[column_name])
+        # for last_thurday_date, group in df.groupby('Last_Thursday', sort=False):
+        grouped_df = df.groupby((df['Separator'] == 'Separator').cumsum())
+        for group_name, group in grouped_df:
+            group = group[group['Separator'] != 'Separator']
+            rows = group.index.tolist()
+            print(rows)
+            for i in range(len(rows) - 1, -1, -1):
+                if not element_used[rows[i]]:
+                    for j in range(i - 1, -1, -1):
+                        diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
+                        if diff < threshold and not element_used[rows[j]]:
+                            output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'HIGH' in column_name)
+                            # print(f"i {rows[i]} j {rows[j]} {column_name}")
+                            # print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
+                            df.at[rows[j], result_column_name] = output_value
+                            element_used[rows[i]] = True
+                            element_used[rows[j]] = True
+                            color = generate_random_light_color()
+                            style_df = set_cell_color(style_df, index=rows[i], column=column_name, hex_color=color)
+                            style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
+                            # check if there is higher or lower value, if yes, then colorize it
+                            response = check_condition_passed(df, column_name, rows[j], output_value, 'HIGH' in column_name)
+                            if response:
+                                style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
+                            break
+    # Create a dictionary to map column names to Excel letters
+    column_letter_map = {v: k for k, v in enumerate(df.columns, start=1)}
+    # Save to an Excel file and get the workbook
+    style_df = pd.DataFrame('', index=df.index, columns=df.columns)
+    output_file = file.replace(".csv", "_processed.xlsx")
+    process_column(df, style_df, 'HIGH PRICE', 'HIGH Result', high_price_threshold)
+    process_column(df, style_df, 'LOW PRICE', 'LOW Result', low_price_threshold)
+    # add an empty row before the new month
+    df['DATE'] = df['DATE'].dt.strftime('%d-%b-%Y')
+    # df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
+    styled_df = df.style.apply(lambda _: style_df, axis=None)
+    styled_df.to_excel(output_file, engine='openpyxl', index=False)
     return output_file
+# Gradio Interface
+def gradio_interface(file):
+    return process_csv(file)
 # Gradio app interface
 iface = gr.Interface(
     fn=gradio_interface,