capstone_gpt / app.py
sssssungk's picture
Update app.py
5c70a45 verified
from openai import OpenAI
import openai
import base64
import requests, json
from docx import Document
from docx.shared import Cm, Inches
from PIL import Image
import gradio as gr
import tempfile
import os
import shutil
# ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
#๋ฌธ์„œ ์ƒ
def word(img, gpt_text,text,img_prob,voice_prob ):
doc = Document()
# heading์€ level์„ ํ†ตํ•ด ์กฐ์ ˆ
doc.add_heading("๋”ฅํŽ˜์ดํฌ ์‹ ๊ณ  ์ž‘์„ฑ", level = 0)
#doc.add_heading(f"์˜์ƒ ๋ถ„์•ผ: {crime}", level = 1)
# ๋ฌธ๋‹จ ์“ฐ๊ธฐ
paragraph1 = doc.add_paragraph(f"๋™์˜์ƒ ๋”ฅํŽ˜์ดํฌ ํ™•๋ฅ : {img_prob}")
if img_prob > 0.5:
paragraph1.add_run("\n๋ฌธ์ œ๊ฐ€ ๋˜๋Š” ์˜์ƒ ์ด๋ฏธ์ง€ :")
doc.add_picture(img,width= Cm(6), height= Cm(3))
# ๋ฌธ๋‹จ ๋‚ด์šฉ ์ด์–ด ์“ฐ๊ธฐ
paragraph2 = doc.add_paragraph(f"์Œ์„ฑ ๋”ฅํŽ˜์ดํฌ ํ™•๋ฅ : {voice_prob}")
for i in range(len(voice_prob)):
if voice_prob[i] > 0.5:
t = text[i]
paragraph2.add_run(f"\n๋ฌธ์ œ๊ฐ€ ๋˜๋Š” ํ…์ŠคํŠธ ๋‚ด์šฉ: \n\"{t}\"")
doc.add_heading("๋”ฅํŽ˜์ดํฌ ์‹ ๊ณ  ๋‚ด์šฉ", level = 2)
paragraph3 = doc.add_paragraph(gpt_text)
# ๋ฌธ์„œ ์ €์žฅ
file_path = "sample.docx"
doc.save(file_path)
return file_path
def gpt_prompt(voice_prob,img_prob, text1,crop_img,first_img):
#ํ”„๋กฌํ”„ํŠธ
message=[
# ์–ด๋–ป๊ฒŒ ํ–‰๋™์„ ํ• ์ง€ ์ง€์ •ํ•˜๋Š” ๊ธฐ๋Šฅ
{"role": "system", "content": "You are an expert on deepfake criminal activity."},
# ์งˆ๋ฌธ ๋‚ด์šฉ
{"role": "user", "content": f"""
์ฒซ๋ฒˆ์งธ๋กœ ์ž…๋ ฅ๋œ ์ด๋ฏธ์ง€๋Š” ์˜์ƒ์˜ ์ฒซ ํ”„๋ ˆ์ž„ ์ด๋ฏธ์ง€์•ผ. ๊ทธ ์ด๋ฏธ์ง€์™€ ํ…์ŠคํŠธ๋ฅผ ๋ณด๊ณ  ์˜์ƒ์˜ ์‚ฌํ™ฉ๊ณผ ๋‚ด์šฉ์„ ์„ค๋ช…ํ•ด์ค˜. ex) ์–ด๋–ค์‚ฌ๋žŒ(์œ ๋ช…์ธ, ์ •์น˜์ธ, ์ผ๋ฐ˜์ธ)์ด ๋ฌด์—‡์„ ํ•˜๋Š”์ง€(๊ด‘๊ณ , ์ธํ„ฐ๋ทฐ, ๋‰ด์Šค ๋“ฑ๋“ฑ)
๋‘๋ฒˆ์งธ ์ด๋ฏธ์ง€๋Š” ์˜์ƒ์˜ ์ผ๋ถ€๋ฅผ ์ž˜๋ผ์˜จ๊ฑฐ์•ผ. ๋”ฅํŽ˜์ดํฌ ์˜์ƒ ํ™•๋ฅ : {img_prob}์€ ์ฃผ์–ด์ง„ ์ด๋ฏธ์ง€์— ๋Œ€ํ•œ ๋”ฅํŽ˜์ดํฌ ํ™•๋ฅ ์ด์•ผ. ์ด ๊ฐ’์ด 0.5๋ณด๋‹ค ํฌ๋ฉด, ๋‘๋ฒˆ์งธ ์ด๋ฏธ์ง€๋Š” ์กฐ์ž‘๋˜์—ˆ๋‹ค๋Š”๊ฑฐ์•ผ.
์Œ์„ฑ ๋”ฅํŽ˜์ดํฌ ํ™•๋ฅ : {voice_prob} / prob์˜ ๋ฆฌ์ŠคํŠธ ๊ธธ์ด๋Š” ๋ฐœํ™”์ž์˜ ๊ฐœ์ˆ˜์•ผ. ๊ฐ’์ด ํ•˜๋‚˜๋ฉด ํ•œ๋ช…์˜ ๋ชฉ์†Œ๋ฆฌ์ด๊ณ  ๋งํ•˜๋Š” ์ˆœ์„œ๋Œ€๋กœ ๋‚˜์—ด๋œ๊ฑฐ์•ผ. ๊ทธ ๊ฐ’์ด 0.5๋ณด๋‹ค ๋†’์œผ๋ฉด ์กฐ์ž‘๋œ ๋‚ด์šฉ์ด๋ผ๋Š” ๋œป์ด์•ผ.
๋ฐ˜๋Œ€๋กœ 0.5๋ณด๋‹ค ๋‚ฎ์œผ๋ฉด ์กฐ์ž‘๋˜์ง€ ์•Š์€ ์ง„์งœ ๋‚ด์šฉ์ด๋ผ๋Š” ์†Œ๋ฆฌ์•ผ
์Œ์„ฑ ๋‚ด์šฉ: {text1} / ์‰ผํ‘œ๋กœ ๊ตฌ๋ถ„๋œ ๊ฐ ๋ฌธ์žฅ์€ ์œ„์˜ prob์˜ index์˜ ๋งž๊ฒŒ ์Œ์„ฑ์„ ํƒ์ŠคํŠธ๋กœ ์ถ”์ถœํ•œ๊ฑฐ์•ผ. ํ…์ŠคํŠธ์˜ ๋‚ด์šฉ์ด ์กฐ์ž‘๋˜์—ˆ์„ ํ™•๋ฅ ์ด 0.5๋ฅผ ๋„˜๊ธฐ๋ฉด ๊ทธ ๋‚ด์šฉ์€ ๊ฐ€์งœ ๋‚ด์šฉ์ด์•ผ. ๋ฐ˜๋Œ€๋กœ ํ…์ŠคํŠธ๊ฐ€ ์กฐ์ž‘ ๋˜์—ˆ์„ํ™•๋ฅ ์ด 0.5๋ณด๋‹ค ๋‚ฎ์œผ๋ฉด
๊ทธ ๋‚ด์šฉ์€ ์ง„์งœ ๋‚ด์šฉ์ด์•ผ.
์‰ฝํ‘œ๋ฅผ ๊ธฐ์ค€์œผ๋กœ ๋งˆ์ง€๋ง‰ ๋ฌธ์žฅ์€ ์˜์ƒ์˜ ์ „์ฒด text์•ผ.
๋‹ต๋ณ€์— ์Œ์„ฑ๋‚ด์šฉ์˜ ์ฒซ๋ฒˆ์งธ ์‰ฝํ‘œ ๊นŒ์ง€์˜ ๋ฌธ์žฅ ์‚ฌ์šฉํ•ด์ค˜.
์ฃผ์–ด์ง„ ์ด๋ฏธ์ง€, ํ…์ŠคํŠธ, ๋”ฅํŽ˜์ดํฌ ํ™•๋ฅ ์„ ๊ฐ€์ง€๊ณ  ์‹ ๊ณ ์„œ ์ž‘์„ฑ์„ ๋ชฉ์ ์œผ๋กœ ์‹ ๊ณ ์„œ ์ž‘์„ฑ ์›Œ๋“œ ํŒŒ์ผ์„ ๋งŒ๋“œ๋ ค๊ณ  ํ•ด
1.์„ฑ๋ฒ”์ฃ„ 2.๋ช…์˜ˆํ›ผ์† 3. ํ—ˆ์œ„์˜์ƒ :๊ธˆ์œต ๋ฐ ์‚ฌ๊ธฐ (๊ฐ€์งœ ๊ด‘๊ณ ) 4. ์ •์น˜ ๋ฐ ์„ ๊ฑฐ ๊ฐœ์ž… (๊ฐ€์งœ ๋‰ด์Šค, ์„ ๊ฑฐ).
ํ•ด๋‹น ๋”ฅํŽ˜์ดํฌ ์˜์ƒ์˜ text ๊ฐ€ ์–ด๋–ค ๋ฒ”์ฃ„ ํ–‰์œ„์— ํ•ด๋‹นํ•˜๋Š”์ง€ ํ™•์ธํ•˜๊ณ  ์‹ ๊ณ ์„œ ์–‘์‹์— ๋งž๊ฒŒ ์‹ ๊ณ ์„œ ๋‚ด์šฉ์„ ์ž‘์„ฑํ•ด์ค˜.
์ด๋ฏธ์ง€์™€ ์Œ์„ฑ์˜ ๋”ฅํŽ˜์ดํฌ ํ™•๋ฅ  ๊ฐ’์€ ์–ด๋–ป๊ฒŒ ๋‚˜์™€์„œ ๋”ฅํŽ˜์ดํฌ๋กœ ์˜์‹ฌ๋˜๋Š”์ง€ ์„ค๋ช…ํ•˜๊ณ 
์–ด๋–ค ๋ฒ”์ฃ„ ํ–‰์œ„์ธ์ง€, text์— ๋ฌธ์ œ๊ฐ€ ์žˆ๋‹ค๋ฉด ์–ด๋–ค ๋‚ด์šฉ์ด๊ณ  ๋ญ๊ฐ€ ๋ฌธ์ œ์ธ์ง€์— ๋Œ€ํ•œ ๋‚ด์šฉ์„ ์ž์„ธํ•˜๊ฒŒ ์„ค๋ช…ํ•˜๊ณ  ํฌํ•จํ•ด์•ผ ํ•ด.
๋”ฅํŽ˜์ดํฌ๋กœ ํŒ์ •๋œ ์˜์ƒ์ด ๊ณ„์† ์‚ฌ์šฉ๋œ๋‹ค๋ฉด ์–ด๋–ค ๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒ ํ•  ์ˆ˜ ์žˆ๋Š”์ง€๋„ ์ถ”๊ฐ€ํ•ด์ค˜.
์ƒˆ๋กœ์šด ๋ฌธ์žฅ ์•ž์—๋Š” \n์„ ์ถ”๊ฐ€ํ•˜๊ณ  ์‹œ์ž‘ํ•ด์ค˜.
**๋Š” ์‚ฌ์šฉํ•˜์ง€ ๋งˆ.
"""},
#์ด๋ฏธ์ง€ ์ž…๋ ฅ
{"role": "user","content": [
{"type": "text","text": """ ์ฒซ๋ฒˆ์งธ๋กœ ์ž…๋ ฅ๋œ ์ด๋ฏธ์ง€๋Š” ์˜์ƒ์˜ ์ฒซ ํ”„๋ ˆ์ž„ ์ด๋ฏธ์ง€์•ผ. ๊ทธ ์ด๋ฏธ์ง€์™€ ํ…์ŠคํŠธ๋ฅผ ๋ณด๊ณ  ์˜์ƒ์˜ ์‚ฌํ™ฉ๊ณผ ๋‚ด์šฉ์„ ์„ค๋ช…ํ•ด์ค˜. ex) ์–ด๋–ค์‚ฌ๋žŒ(์œ ๋ช…์ธ, ์ •์น˜์ธ, ์ผ๋ฐ˜์ธ)์ด ๋ฌด์—‡์„ ํ•˜๋Š”์ง€(๊ด‘๊ณ , ์ธํ„ฐ๋ทฐ, ๋‰ด์Šค ๋“ฑ๋“ฑ)
๋‘๋ฒˆ์งธ ์ด๋ฏธ์ง€๋Š” ์˜์ƒ์˜ ์ผ๋ถ€๋ฅผ ์ž˜๋ผ์˜จ๊ฑฐ์•ผ. ๋”ฅํŽ˜์ดํฌ ์˜์ƒ ํ™•๋ฅ ์„ ํ™•์ธํ•ด์„œ ๊ทธ๊ทธ ๊ฐ’์ด 0.5๋ณด๋‹ค ํฌ๋ฉด, ๋‘๋ฒˆ์งธ ์ด๋ฏธ์ง€๋Š” ์กฐ์ž‘๋˜์—ˆ๋‹ค๋Š”๊ฑฐ์•ผ. """},
{"type": "image_url","image_url": {"url": f"data:image/jpeg;base64,{first_img}"}},
{"type": "image_url","image_url": {"url": f"data:image/jpeg;base64,{crop_img}"}}
]},
# ๋ชจ๋ธ์ด ์‘๋‹ตํ•  ๋•Œ ์‚ฌ์šฉํ•˜๋Š” ์—ญํ• 
{"role": "assistant", "content": """
๋„ˆ๊ฐ€ ์ž‘์„ฑํ•ด์ค‘ ๋‚ด์šฉ
์‹ ๊ณ ์ œ๋ชฉ : ์‹ ๊ณ ์˜ ํ•ต์‹ฌ๋‚ด์šฉ
์‹ ๊ณ ๋‚ด์šฉ : ์‹ ๊ณ ์ด์œ  ๋ฐ ํ—ˆ์œ„์กฐ์ž‘์ฝ˜ํ…์ธ ๊ฐ€ ๋“œ๋Ÿฌ๋‚˜๋Š” ๋ถ€๋ถ„ ๋ช…์‹œ
์ฆ๊ฑฐ์ž๋ฃŒ : ๋ฌธ์ œ๊ฐ€ ๋˜๋Š” ํ™”๋ฉด์˜ ์บก์ฒ˜ ๊ทธ๋ฆผํŒŒ์ผ
๊ฐ ๋‚ด์šฉ๋‹น ์—ฌ๋Ÿฌ ๋‹จ๋ฝ์œผ๋กœ ๋‚˜๋ˆ ์„œ ๋…ผ๋ฆฌ์ ์œผ๋กœ ์ž‘์„ฑํ•ด์ค˜์ค˜.
๋‹ต๋ณ€์— **๋“ค์€ ์‚ญ์ œํ•ด์ค˜
"""}
]
return message
def main(first_img,image, img_prob, voice_prob, voice_text):
GPT_api_key = os.getenv("GPT_api_key")
client = OpenAI(api_key=GPT_api_key)
# Hugging Face Secrets์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ
# openai.api_key = os.getenv("OPENAI_API_KEY")
model="gpt-4o-mini"
# if os.path.exists('/tmp/wav'):
# shutil.rmtree('/tmp/wav')
# #os.listdir('/tmp/wav')
first_img_path = first_img
image_path = image
img = Image.open(image_path)
img.save(image_path,'JPEG')
image_path = image
#image.save(image_path)
#base64_image = encode_image(image_path)
first_img = encode_image(first_img_path)
image = encode_image(image_path)
img_prob = eval(img_prob)
voice_prob = eval(voice_prob)
voice_text = eval(voice_text)
#ํ”„๋กฌํ”„ํŠธ
message = gpt_prompt(voice_prob,img_prob, voice_text,image,first_img)
response = client.chat.completions.create(
model=model,
messages=message,
)
# response = openai.ChatCompletion.create(
# model=model,
# messages=message
# )
gpt_text = response.choices[0].message.content
#print(gpt_text)
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_image_file:
temp_image_path = temp_image_file.name
with open(temp_image_path, 'wb') as f:
f.write(base64.b64decode(image))
doc = word(temp_image_path, gpt_text, voice_text, img_prob,voice_prob)
return doc, gpt_text
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
iface = gr.Interface(
fn=main,
inputs=[
gr.Image(label="์ฒซ ํ”„๋ ˆ์ž„ ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", type="filepath"), # ์ด๋ฏธ์ง€ ์ž…๋ ฅ
gr.Image(label="๋”ฅํŽ˜์ดํฌ ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", type="filepath"), # ์ด๋ฏธ์ง€ ์ž…๋ ฅ
gr.Textbox(label="์ด๋ฏธ์ง€ ํ™•๋ฅ ๊ฐ’ ์ž…๋ ฅ (์˜ˆ: 0.74 )"), # ์ด๋ฏธ์ง€ ํ™•๋ฅ ๊ฐ’
gr.Textbox(label="์Œ์„ฑ ํ™•๋ฅ ๊ฐ’ ์ž…๋ ฅ (์˜ˆ: [0.90] )"), # ์Œ์„ฑ ํ™•๋ฅ ๊ฐ’
gr.Textbox(label="ํ…์ŠคํŠธ ์ž…๋ ฅ (์˜ˆ: ['๋ฌธ์žฅ1', '๋ฌธ์žฅ2'] )"), # ํ…์ŠคํŠธ ์ž…๋ ฅ
],
outputs=[
gr.File(label="๋‹ค์šด๋กœ๋“œํ•  ์‹ ๊ณ ์„œ ํŒŒ์ผ"), # ์‹ ๊ณ ์„œ ํŒŒ์ผ
gr.Textbox(label="GPT_๋‚ด์šฉ")
],
title="๋”ฅํŽ˜์ดํฌ ์‹ ๊ณ ์„œ ์ƒ์„ฑ๊ธฐ",
)
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
if __name__ == "__main__":
iface.launch()