Spaces:
Sleeping
Sleeping
File size: 2,464 Bytes
0b61b2b 10af70f 0b61b2b 10af70f 2ce3912 67d017e 238794e 0b61b2b 10af70f 2ce3912 46e9dda 0b61b2b 46e9dda 0b61b2b 10af70f 0b61b2b 2ce3912 46e9dda 6c1add2 0b61b2b 67d017e 1cd778f 67d017e 46e9dda 0b61b2b 33e3849 46e9dda 67d017e 0b61b2b 238794e 10af70f 0b61b2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import pandas as pd
import re
from collections import Counter
import os
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
def process_excel(file):
# ์์
ํ์ผ ์ฝ๊ธฐ
df = pd.read_excel(file.name)
# D์ด์ ๋ฐ์ดํฐ ์ถ์ถ
product_names = df.iloc[:, 3].dropna() # D์ด์ 0๋ถํฐ ์์ํ๋ฏ๋ก index๋ 3
# ํค์๋ ์ถ์ถ ๋ฐ ๋น๋ ๊ณ์ฐ
all_keywords = []
for name in product_names:
# ํน์๋ฌธ์ ์ ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค์ผ๋ก ๋ถํ
words = re.sub(r'[^\w\s]', '', name).split()
# ์ค๋ณต ์ ๊ฑฐ
unique_words = set(words)
all_keywords.extend(unique_words)
# ๋น๋ ๊ณ์ฐ
keyword_counts = Counter(all_keywords)
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํฐํ๋ ์์ผ๋ก ์ ๋ฆฌ
result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
# ๋๋ ํ ๋ฆฌ ์์ฑ ํ์ธ ๋ฐ ํ์ผ ์ ์ฅ
output_dir = "output"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_file = os.path.join(output_dir, "keyword_counts.xlsx")
# ์์
ํ์ผ์ ๋ฐ์ดํฐ๋ฅผ A4, B4 ์
๋ถํฐ ์ฐ๊ธฐ
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
result_df.to_excel(writer, index=False, startrow=3) # startrow=3์ผ๋ก ์ค์ ํ์ฌ 4๋ฒ์งธ ํ(A4, B4)๋ถํฐ ์์
# ์ด๋ฏธ์ง๋ฅผ ์์
ํ์ผ์ A1 ์
์ ์ฝ์
wb = load_workbook(output_file)
ws = wb.active
# ssboost-logo.png ํ์ผ์ A1 ์
์ ์ฝ์
img = Image("ssboost-logo.png")
# ์ด๋ฏธ์ง ํฌ๊ธฐ ์ค์ (1.54cm ๋์ด, 5.69cm ๋๋น)
img.height = 1.54 * 28.3465 # 1 cm = 28.3465 ํฌ์ธํธ
img.width = 5.69 * 28.3465 # 1 cm = 28.3465 ํฌ์ธํธ
ws.add_image(img, "A1")
# ์์ ๋ ์์
ํ์ผ ์ ์ฅ
wb.save(output_file)
return output_file
# Gradio ์ธํฐํ์ด์ค ์ ์
iface = gr.Interface(
fn=process_excel,
inputs=gr.File(file_types=[".xlsx"]), # ์์
ํ์ผ๋ง ์
๋ก๋ํ ์ ์๊ฒ ์ค์
outputs="file",
title="Excel Keyword Extractor with Image",
description="์์
ํ์ผ์ D์ด์์ ํค์๋๋ฅผ ์ถ์ถํ๊ณ ๋น๋๋ฅผ ๊ณ์ฐํ ํ, A1 ์
์ ์ด๋ฏธ์ง๋ฅผ ์ฝ์
ํ์ฌ ์๋ก์ด ์์
ํ์ผ๋ก ์ถ๋ ฅํฉ๋๋ค."
)
if __name__ == "__main__":
iface.launch()
|