Spaces:

ncoop57
/

clifs

Build error

App Files Files Community

ncoop57 commited on Oct 13, 2021

Commit

52636a1

•

1 Parent(s): 7f85c65

Update app with new example and configuration tools

Browse files

Files changed (1) hide show

app.py +31 -20

app.py CHANGED Viewed

@@ -11,41 +11,44 @@ from PIL import Image
 @st.cache(allow_output_mutation=True, max_entries=1)
 def get_model():
     clip = CLIPModel()
-    model = SentenceTransformer(modules=[clip]).to(dtype=torch.float32, device=torch.device('cpu'))
-    return model
-def get_embedding(model, query, video):
-    text_emb = model.encode(query, device='cpu')
     # Encode an image:
     images = []
     for img in video:
         images.append(Image.fromarray(img))
-    img_embs = model.encode(images, device='cpu')
     return text_emb, img_embs
-def find_frames(url, model, desc, top_k, text):
-    text.text("Processing video...")
     probe = ffmpeg.probe(url)
     video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
     width = int(video_stream['width'])
     height = int(video_stream['height'])
     out, _ = (
         ffmpeg
-        .input(url, t=60)
         .output('pipe:', format='rawvideo', pix_fmt='rgb24')
         .run(capture_stdout=True)
     )
     video = (
         np
         .frombuffer(out, np.uint8)
         .reshape([-1, height, width, 3])
     )[::10]
-    txt_embd, img_embds = get_embedding(model, desc, video)
     cos_scores = np.array(util.cos_sim(txt_embd, img_embds))
     ids = np.argsort(cos_scores)[0][-top_k:]
@@ -53,13 +56,25 @@ def find_frames(url, model, desc, top_k, text):
     text.empty()
     st.image(imgs)
-def main_page(model):
     st.title("Introducing Youtube CLIFS")
-def clifs_page(model):
     st.title("CLIFS")
     st.sidebar.markdown("### Controls:")
     top_k = st.sidebar.slider(
         "Top K",
         min_value=1,
@@ -68,26 +83,22 @@ def clifs_page(model):
     )
     desc = st.sidebar.text_input(
         "Search Description",
-        value="Two white puppies",
         help="Text description of what you want to find in the video",
     )
     url = st.sidebar.text_input(
         "Youtube Video URL",
-        value='https://youtu.be/I3AaW9ZevIU',
         help="Youtube video you'd like to search through",
     )
     submit_button = st.sidebar.button("Search")
     if submit_button:
-        text = st.text("Downloading video...")
-        hook = lambda d: my_hook(d, )
         ydl_opts = {"format": "mp4[height=360]"}
         with youtube_dl.YoutubeDL(ydl_opts) as ydl:
             info_dict = ydl.extract_info(url, download=False)
             video_url = info_dict.get("url", None)
-            find_frames(video_url, model, desc, top_k, text)
-            print(video_url)
-            # ydl.download([url])
 PAGES = {
     "Home": main_page,
@@ -99,12 +110,12 @@ PAGES = {
 def run():
     st.set_page_config(page_title="Youtube CLIFS")
     # main body
-    model = get_model()
     st.sidebar.title('Navigation')
     selection = st.sidebar.radio("Go to", list(PAGES.keys()))
-    page = PAGES[selection](model)

 @st.cache(allow_output_mutation=True, max_entries=1)
 def get_model():
+    txt_model = SentenceTransformer('clip-ViT-B-32-multilingual-v1').to(dtype=torch.float32, device=torch.device('cpu'))
     clip = CLIPModel()
+    vis_model = SentenceTransformer(modules=[clip]).to(dtype=torch.float32, device=torch.device('cpu'))
+    return txt_model, vis_model
+def get_embedding(txt_model, vis_model, query, video):
+    text_emb = txt_model.encode(query, device='cpu')
     # Encode an image:
     images = []
     for img in video:
         images.append(Image.fromarray(img))
+    img_embs = vis_model.encode(images, device='cpu')
     return text_emb, img_embs
+def find_frames(url, txt_model, vis_model, desc, seconds, top_k):
+    text = st.text("Downloading video...")
     probe = ffmpeg.probe(url)
     video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
     width = int(video_stream['width'])
     height = int(video_stream['height'])
     out, _ = (
         ffmpeg
+        .input(url, t=seconds)
         .output('pipe:', format='rawvideo', pix_fmt='rgb24')
         .run(capture_stdout=True)
     )
+    text.text("Processing video...")
     video = (
         np
         .frombuffer(out, np.uint8)
         .reshape([-1, height, width, 3])
     )[::10]
+    txt_embd, img_embds = get_embedding(txt_model, vis_model, desc, video)
     cos_scores = np.array(util.cos_sim(txt_embd, img_embds))
     ids = np.argsort(cos_scores)[0][-top_k:]
     text.empty()
     st.image(imgs)
+with open("HOME.md", "r") as f:
+    HOME_PAGE = f.read()
+def main_page(txt_model, vis_model):
     st.title("Introducing Youtube CLIFS")
+    st.markdown(HOME_PAGE)
+def clifs_page(txt_model, vis_model):
     st.title("CLIFS")
     st.sidebar.markdown("### Controls:")
+    seconds = st.sidebar.slider(
+        "How many seconds of video to consider?",
+        min_value=10,
+        max_value=120,
+        value=60,
+        step=1,
+    )
     top_k = st.sidebar.slider(
         "Top K",
         min_value=1,
     )
     desc = st.sidebar.text_input(
         "Search Description",
+        value="Pancake in the shape of an otter", # panqueque en forma de nutria
         help="Text description of what you want to find in the video",
     )
     url = st.sidebar.text_input(
         "Youtube Video URL",
+        value='https://youtu.be/xUv6XgPwGaQ',
         help="Youtube video you'd like to search through",
     )
     submit_button = st.sidebar.button("Search")
     if submit_button:
         ydl_opts = {"format": "mp4[height=360]"}
         with youtube_dl.YoutubeDL(ydl_opts) as ydl:
             info_dict = ydl.extract_info(url, download=False)
             video_url = info_dict.get("url", None)
+            find_frames(video_url, txt_model, vis_model, desc, seconds, top_k)
 PAGES = {
     "Home": main_page,
 def run():
     st.set_page_config(page_title="Youtube CLIFS")
     # main body
+    txt_model, vis_model = get_model()
     st.sidebar.title('Navigation')
     selection = st.sidebar.radio("Go to", list(PAGES.keys()))
+    page = PAGES[selection](txt_model, vis_model)