Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import re | |
from collections import Counter | |
import os | |
from openpyxl import load_workbook | |
from openpyxl.drawing.image import Image | |
def process_excel(file): | |
# μμ νμΌ μ½κΈ° | |
df = pd.read_excel(file.name) | |
# Dμ΄μ λ°μ΄ν° μΆμΆ | |
product_names = df.iloc[:, 3].dropna() # Dμ΄μ 0λΆν° μμνλ―λ‘ indexλ 3 | |
# ν€μλ μΆμΆ λ° λΉλ κ³μ° | |
all_keywords = [] | |
for name in product_names: | |
# νΉμλ¬Έμ μ κ±° λ° κ³΅λ°± κΈ°μ€μΌλ‘ λΆν | |
words = re.sub(r'[^\w\s]', '', name).split() | |
# μ€λ³΅ μ κ±° | |
unique_words = set(words) | |
all_keywords.extend(unique_words) | |
# λΉλ κ³μ° | |
keyword_counts = Counter(all_keywords) | |
# κ²°κ³Όλ₯Ό λ°μ΄ν°νλ μμΌλ‘ μ 리 | |
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency']) | |
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True) | |
# λλ ν 리 μμ± νμΈ λ° νμΌ μ μ₯ | |
output_dir = "output" | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
output_file = os.path.join(output_dir, "keyword_counts.xlsx") | |
# μμ νμΌμ λ°μ΄ν°λ₯Ό A4, B4 μ λΆν° μ°κΈ° | |
with pd.ExcelWriter(output_file, engine='openpyxl') as writer: | |
result_df.to_excel(writer, index=False, startrow=3) # startrow=3μΌλ‘ μ€μ νμ¬ 4λ²μ§Έ ν(A4, B4)λΆν° μμ | |
# μ΄λ―Έμ§λ₯Ό μμ νμΌμ A1 μ μ μ½μ | |
wb = load_workbook(output_file) | |
ws = wb.active | |
# ssboost-logo.png νμΌμ A1 μ μ μ½μ | |
img = Image("ssboost-logo.png") | |
ws.add_image(img, "A1") | |
# μμ λ μμ νμΌ μ μ₯ | |
wb.save(output_file) | |
return output_file | |
# Gradio μΈν°νμ΄μ€ μ μ | |
iface = gr.Interface( | |
fn=process_excel, | |
inputs=gr.File(file_types=[".xlsx"]), # μμ νμΌλ§ μ λ‘λν μ μκ² μ€μ | |
outputs="file", | |
title="Excel Keyword Extractor with Image", | |
description="μμ νμΌμ Dμ΄μμ ν€μλλ₯Ό μΆμΆνκ³ λΉλλ₯Ό κ³μ°ν ν, A1 μ μ μ΄λ―Έμ§λ₯Ό μ½μ νμ¬ μλ‘μ΄ μμ νμΌλ‘ μΆλ ₯ν©λλ€." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |