import pickle import numpy as np import pandas as pd import streamlit as st from sentence_transformers.util import semantic_search from transformers import VisionTextDualEncoderModel, VisionTextDualEncoderProcessor st.title("VitB32 Bert Ko Small Clip Test") st.markdown("Unsplash data에서 입력 텍스트와 가장 유사한 이미지를 검색합니다.") with st.spinner("Loading model..."): model = VisionTextDualEncoderModel.from_pretrained( "Bingsu/vitB32_bert_ko_small_clip" ) processor = VisionTextDualEncoderProcessor.from_pretrained( "Bingsu/vitB32_bert_ko_small_clip" ) info = pd.read_csv("info.csv") with open("img_id.pkl", "rb") as f: img_id = pickle.load(f) img_emb = np.load("img_emb.npy") text = st.text_input("Input Text", value="검은 고양이") tokens = processor(text=text, return_tensors="pt") with st.spinner("Predicting..."): text_emb = model.get_text_features(**tokens) result = semantic_search(text_emb, img_emb, top_k=6)[0] columns = st.columns(3) + st.columns(3) for i, col in enumerate(columns): photo_id = img_id[result[i]["corpus_id"]] target_series = info.loc[info["photo_id"] == photo_id, "photo_image_url"] if len(target_series) == 0: continue img_url = target_series.iloc[0] col.image(img_url, use_column_width=True)