Spaces:

ncoop57
/

clifs

Build error

clifs / app.py

ncoop57

Update descriptions in both english and spanish and add page that describes project in spanish

830243e about 3 years ago

3.89 kB

	import ffmpeg
	import torch
	import youtube_dl

	import numpy as np
	import streamlit as st

	from sentence_transformers import SentenceTransformer, util, models
	from clip import CLIPModel
	from PIL import Image

	@st.cache(allow_output_mutation=True, max_entries=1)
	def get_model():
	txt_model = SentenceTransformer('clip-ViT-B-32-multilingual-v1').to(dtype=torch.float32, device=torch.device('cpu'))
	clip = CLIPModel()
	vis_model = SentenceTransformer(modules=[clip]).to(dtype=torch.float32, device=torch.device('cpu'))
	return txt_model, vis_model


	def get_embedding(txt_model, vis_model, query, video):
	text_emb = txt_model.encode(query, device='cpu')

	# Encode an image:
	images = []
	for img in video:
	images.append(Image.fromarray(img))
	img_embs = vis_model.encode(images, device='cpu')

	return text_emb, img_embs

	def find_frames(url, txt_model, vis_model, desc, seconds, top_k):
	text = st.text("Downloading video (Descargando video)...")
	probe = ffmpeg.probe(url)
	video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
	width = int(video_stream['width'])
	height = int(video_stream['height'])
	out, _ = (
	ffmpeg
	.input(url, t=seconds)
	.output('pipe:', format='rawvideo', pix_fmt='rgb24')
	.run(capture_stdout=True)
	)

	text.text("Processing video (Procesando video)...")
	video = (
	np
	.frombuffer(out, np.uint8)
	.reshape([-1, height, width, 3])
	)[::10]

	txt_embd, img_embds = get_embedding(txt_model, vis_model, desc, video)
	cos_scores = np.array(util.cos_sim(txt_embd, img_embds))
	ids = np.argsort(cos_scores)[0][-top_k:]

	imgs = [Image.fromarray(video[i]) for i in ids]
	text.empty()
	st.image(imgs)

	with open("HOME.md", "r") as f:
	HOME_PAGE = f.read()

	with open("INICIO.md", "r") as f:
	INICIO_PAGINA = f.read()

	def main_page(txt_model, vis_model):
	st.title("Introducing Youtube CLIFS")

	st.markdown(HOME_PAGE)

	def inicio_pagina(txt_model, vis_model):
	st.title("Presentando Youtube CLIFS")

	st.markdown(INICIO_PAGINA)

	def clifs_page(txt_model, vis_model):
	st.title("CLIFS")

	st.sidebar.markdown("### Controls (Controles):")
	seconds = st.sidebar.slider(
	"How many seconds of video to consider? (¿Cuántos segundos de video considerar?)",
	min_value=10,
	max_value=120,
	value=60,
	step=1,
	)
	top_k = st.sidebar.slider(
	"Top K",
	min_value=1,
	max_value=5,
	step=1,
	)
	desc = st.sidebar.text_input(
	"Search Query (Búsqueda de Consulta)",
	value="Pancake in the shape of an otter",
	help="Text description of what you want to find in the video (Descripción de texto de que desea encontrar en el video)",
	)
	url = st.sidebar.text_input(
	"Youtube Video URL (URL del Video de Youtube)",
	value='https://youtu.be/xUv6XgPwGaQ',
	help="Youtube video you want to search (Video de Youtube que desea búscar)",
	)

	submit_button = st.sidebar.button("Search (Buscar)")
	if submit_button:
	ydl_opts = {"format": "mp4[height=360]"}
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(url, download=False)
	video_url = info_dict.get("url", None)
	find_frames(video_url, txt_model, vis_model, desc, seconds, top_k)

	PAGES = {
	"Home": main_page,
	"Inicio": inicio_pagina,
	"CLIFS": clifs_page
	}



	def run():
	st.set_page_config(page_title="Youtube CLIFS")
	# main body
	txt_model, vis_model = get_model()

	st.sidebar.title("Navigation (Navegación)")
	selection = st.sidebar.radio("Go to (Ir a)", list(PAGES.keys()))

	page = PAGES[selection](txt_model, vis_model)




	if __name__ == "__main__":
	run()