File size: 2,464 Bytes
0b61b2b
10af70f
0b61b2b
10af70f
2ce3912
67d017e
 
238794e
0b61b2b
10af70f
2ce3912
46e9dda
0b61b2b
46e9dda
0b61b2b
 
10af70f
0b61b2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ce3912
 
 
 
 
46e9dda
6c1add2
 
 
 
0b61b2b
67d017e
 
 
 
 
 
1cd778f
 
 
 
 
67d017e
 
 
 
 
46e9dda
0b61b2b
 
 
 
33e3849
46e9dda
67d017e
 
0b61b2b
238794e
10af70f
0b61b2b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
import pandas as pd
import re
from collections import Counter
import os
from openpyxl import load_workbook
from openpyxl.drawing.image import Image

def process_excel(file):
    # ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
    df = pd.read_excel(file.name)
    
    # D์—ด์˜ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
    product_names = df.iloc[:, 3].dropna()  # D์—ด์€ 0๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋ฏ€๋กœ index๋Š” 3
    
    # ํ‚ค์›Œ๋“œ ์ถ”์ถœ ๋ฐ ๋นˆ๋„ ๊ณ„์‚ฐ
    all_keywords = []
    
    for name in product_names:
        # ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ ๋ฐ ๊ณต๋ฐฑ ๊ธฐ์ค€์œผ๋กœ ๋ถ„ํ• 
        words = re.sub(r'[^\w\s]', '', name).split()
        # ์ค‘๋ณต ์ œ๊ฑฐ
        unique_words = set(words)
        all_keywords.extend(unique_words)
    
    # ๋นˆ๋„ ๊ณ„์‚ฐ
    keyword_counts = Counter(all_keywords)
    
    # ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ์ •๋ฆฌ
    result_df = pd.DataFrame(keyword_counts.items(), columns=['Keyword', 'Frequency'])
    result_df = result_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
    
    # ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ ํ™•์ธ ๋ฐ ํŒŒ์ผ ์ €์žฅ
    output_dir = "output"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    output_file = os.path.join(output_dir, "keyword_counts.xlsx")
    
    # ์—‘์…€ ํŒŒ์ผ์— ๋ฐ์ดํ„ฐ๋ฅผ A4, B4 ์…€๋ถ€ํ„ฐ ์“ฐ๊ธฐ
    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        result_df.to_excel(writer, index=False, startrow=3)  # startrow=3์œผ๋กœ ์„ค์ •ํ•˜์—ฌ 4๋ฒˆ์งธ ํ–‰(A4, B4)๋ถ€ํ„ฐ ์‹œ์ž‘
    
    # ์ด๋ฏธ์ง€๋ฅผ ์—‘์…€ ํŒŒ์ผ์˜ A1 ์…€์— ์‚ฝ์ž…
    wb = load_workbook(output_file)
    ws = wb.active
    
    # ssboost-logo.png ํŒŒ์ผ์„ A1 ์…€์— ์‚ฝ์ž…
    img = Image("ssboost-logo.png")
    
    # ์ด๋ฏธ์ง€ ํฌ๊ธฐ ์„ค์ • (1.54cm ๋†’์ด, 5.69cm ๋„ˆ๋น„)
    img.height = 1.54 * 28.3465  # 1 cm = 28.3465 ํฌ์ธํŠธ
    img.width = 5.69 * 28.3465  # 1 cm = 28.3465 ํฌ์ธํŠธ
    
    ws.add_image(img, "A1")
    
    # ์ˆ˜์ •๋œ ์—‘์…€ ํŒŒ์ผ ์ €์žฅ
    wb.save(output_file)
    
    return output_file

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
iface = gr.Interface(
    fn=process_excel, 
    inputs=gr.File(file_types=[".xlsx"]),  # ์—‘์…€ ํŒŒ์ผ๋งŒ ์—…๋กœ๋“œํ•  ์ˆ˜ ์žˆ๊ฒŒ ์„ค์ •
    outputs="file",
    title="Excel Keyword Extractor with Image",
    description="์—‘์…€ ํŒŒ์ผ์˜ D์—ด์—์„œ ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜๊ณ  ๋นˆ๋„๋ฅผ ๊ณ„์‚ฐํ•œ ํ›„, A1 ์…€์— ์ด๋ฏธ์ง€๋ฅผ ์‚ฝ์ž…ํ•˜์—ฌ ์ƒˆ๋กœ์šด ์—‘์…€ ํŒŒ์ผ๋กœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
)

if __name__ == "__main__":
    iface.launch()