ncoop57
Update descriptions in both english and spanish and add page that describes project in spanish
830243e
import ffmpeg | |
import torch | |
import youtube_dl | |
import numpy as np | |
import streamlit as st | |
from sentence_transformers import SentenceTransformer, util, models | |
from clip import CLIPModel | |
from PIL import Image | |
def get_model(): | |
txt_model = SentenceTransformer('clip-ViT-B-32-multilingual-v1').to(dtype=torch.float32, device=torch.device('cpu')) | |
clip = CLIPModel() | |
vis_model = SentenceTransformer(modules=[clip]).to(dtype=torch.float32, device=torch.device('cpu')) | |
return txt_model, vis_model | |
def get_embedding(txt_model, vis_model, query, video): | |
text_emb = txt_model.encode(query, device='cpu') | |
# Encode an image: | |
images = [] | |
for img in video: | |
images.append(Image.fromarray(img)) | |
img_embs = vis_model.encode(images, device='cpu') | |
return text_emb, img_embs | |
def find_frames(url, txt_model, vis_model, desc, seconds, top_k): | |
text = st.text("Downloading video (Descargando video)...") | |
probe = ffmpeg.probe(url) | |
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None) | |
width = int(video_stream['width']) | |
height = int(video_stream['height']) | |
out, _ = ( | |
ffmpeg | |
.input(url, t=seconds) | |
.output('pipe:', format='rawvideo', pix_fmt='rgb24') | |
.run(capture_stdout=True) | |
) | |
text.text("Processing video (Procesando video)...") | |
video = ( | |
np | |
.frombuffer(out, np.uint8) | |
.reshape([-1, height, width, 3]) | |
)[::10] | |
txt_embd, img_embds = get_embedding(txt_model, vis_model, desc, video) | |
cos_scores = np.array(util.cos_sim(txt_embd, img_embds)) | |
ids = np.argsort(cos_scores)[0][-top_k:] | |
imgs = [Image.fromarray(video[i]) for i in ids] | |
text.empty() | |
st.image(imgs) | |
with open("HOME.md", "r") as f: | |
HOME_PAGE = f.read() | |
with open("INICIO.md", "r") as f: | |
INICIO_PAGINA = f.read() | |
def main_page(txt_model, vis_model): | |
st.title("Introducing Youtube CLIFS") | |
st.markdown(HOME_PAGE) | |
def inicio_pagina(txt_model, vis_model): | |
st.title("Presentando Youtube CLIFS") | |
st.markdown(INICIO_PAGINA) | |
def clifs_page(txt_model, vis_model): | |
st.title("CLIFS") | |
st.sidebar.markdown("### Controls (Controles):") | |
seconds = st.sidebar.slider( | |
"How many seconds of video to consider? (¿Cuántos segundos de video considerar?)", | |
min_value=10, | |
max_value=120, | |
value=60, | |
step=1, | |
) | |
top_k = st.sidebar.slider( | |
"Top K", | |
min_value=1, | |
max_value=5, | |
step=1, | |
) | |
desc = st.sidebar.text_input( | |
"Search Query (Búsqueda de Consulta)", | |
value="Pancake in the shape of an otter", | |
help="Text description of what you want to find in the video (Descripción de texto de que desea encontrar en el video)", | |
) | |
url = st.sidebar.text_input( | |
"Youtube Video URL (URL del Video de Youtube)", | |
value='https://youtu.be/xUv6XgPwGaQ', | |
help="Youtube video you want to search (Video de Youtube que desea búscar)", | |
) | |
submit_button = st.sidebar.button("Search (Buscar)") | |
if submit_button: | |
ydl_opts = {"format": "mp4[height=360]"} | |
with youtube_dl.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(url, download=False) | |
video_url = info_dict.get("url", None) | |
find_frames(video_url, txt_model, vis_model, desc, seconds, top_k) | |
PAGES = { | |
"Home": main_page, | |
"Inicio": inicio_pagina, | |
"CLIFS": clifs_page | |
} | |
def run(): | |
st.set_page_config(page_title="Youtube CLIFS") | |
# main body | |
txt_model, vis_model = get_model() | |
st.sidebar.title("Navigation (Navegación)") | |
selection = st.sidebar.radio("Go to (Ir a)", list(PAGES.keys())) | |
page = PAGES[selection](txt_model, vis_model) | |
if __name__ == "__main__": | |
run() |