KEYWORD_1 / app.py
CSB261's picture
Update app.py
a5ab553 verified
raw
history blame
2.2 kB
import gradio as gr
import pandas as pd
import re
from collections import Counter
from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference
def process_excel(file):
# ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
df = pd.read_excel(file)
# D์—ด(D4๋ถ€ํ„ฐ)์˜ ์ƒํ’ˆ๋ช… ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
data = df.iloc[3:, 3].dropna().astype(str) # D4๋ถ€ํ„ฐ D์—ด ์ „์ฒด ๊ฐ€์ ธ์˜ค๊ธฐ (4๋ฒˆ์งธ ํ–‰๋ถ€ํ„ฐ ์‹œ์ž‘)
# ํ‚ค์›Œ๋“œ ์ถ”์ถœ ๋ฐ ์ฒ˜๋ฆฌ
keyword_list = []
for item in data:
keywords = re.findall(r'\b\w+\b', item) # ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐํ•˜๊ณ  ํ‚ค์›Œ๋“œ ์ถ”์ถœ
keywords = list(set(keywords)) # ์ค‘๋ณต ์ œ๊ฑฐ
keyword_list.extend(keywords)
# ํ‚ค์›Œ๋“œ ๋นˆ๋„์ˆ˜ ๊ณ„์‚ฐ
keyword_count = Counter(keyword_list)
# ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ๋ณ€ํ™˜
result_df = pd.DataFrame(keyword_count.items(), columns=['ํ‚ค์›Œ๋“œ', '๋นˆ๋„']).sort_values(by='๋นˆ๋„', ascending=False).reset_index(drop=True)
# A4์™€ B4 ์…€๋ถ€ํ„ฐ ๋ฐ์ดํ„ฐ๊ฐ€ ๋“ค์–ด๊ฐ€๋„๋ก ์ˆ˜์ •
with pd.ExcelWriter('keyword_result.xlsx', engine='openpyxl') as writer:
result_df.to_excel(writer, index=False, startrow=3, startcol=0) # A4 ์…€์— ํ•ด๋‹นํ•˜๋Š” 3๋ฒˆ์งธ ํ–‰, 0๋ฒˆ์งธ ์—ด๋ถ€ํ„ฐ ์‹œ์ž‘
# ์›Œํฌ๋ถ ๋ฐ ์‹œํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
workbook =
sheet = writer.sheets['Sheet1']
# ์ฐจํŠธ ์ƒ์„ฑ
chart = BarChart()
data = Reference(sheet, min_col=2, min_row=4, max_row=3 + len(result_df), max_col=2)
categories = Reference(sheet, min_col=1, min_row=4, max_row=3 + len(result_df))
chart.add_data(data, titles_from_data=True)
chart.set_categories(categories)
chart.title = "ํ‚ค์›Œ๋“œ ๋นˆ๋„์ˆ˜"
chart.x_axis.title = "ํ‚ค์›Œ๋“œ"
chart.y_axis.title = "๋นˆ๋„"
# ์ฐจํŠธ๋ฅผ ์‹œํŠธ์— ์ถ”๊ฐ€
sheet.add_chart(chart, "E4") # E4 ์…€์— ์ฐจํŠธ๋ฅผ ์ถ”๊ฐ€
return 'keyword_result.xlsx'
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
interface = gr.Interface(
fn=process_excel,
inputs=gr.File(label="์—‘์…€ ํŒŒ์ผ ์—…๋กœ๋“œ"),
outputs=gr.File(label="๋ถ„์„ ๊ฒฐ๊ณผ ํŒŒ์ผ")
)
if __name__ == "__main__":
interface.launch()