Spaces:

rynmurdock
/

generative_recsys

Sleeping

App Files Files Community

rynmurdock commited on Sep 8, 2024

Commit

e55aab4

1 Parent(s): 430249a

update sol probs others

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +224 -211
twitter_prompts.csv +47 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__*

app.py CHANGED Viewed

@@ -6,21 +6,27 @@ import torch
 # lol
 DEVICE = 'cuda'
-STEPS = 6
 output_hidden_state = False
 device = "cuda"
 dtype = torch.bfloat16
-N_IMG_EMBS = 3
 import logging
 import os
 import imageio
 import gradio as gr
 import numpy as np
-from sklearn.svm import SVC
-from sklearn import preprocessing
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 import random
 import time
@@ -37,8 +43,12 @@ prevs_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'l
 import spaces
 start_time = time.time()
 ####################### Setup Model
-from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler, LCMScheduler, AutoencoderTiny, UNet2DConditionModel, AutoencoderKL
 from transformers import CLIPTextModel
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
@@ -46,9 +56,8 @@ from PIL import Image
 from transformers import CLIPVisionModelWithProjection
 import uuid
 import av
-import torchvision
-def write_video(file_name, images, fps=17):
     container = av.open(file_name, mode="w")
     stream = container.add_stream("h264", rate=fps)
@@ -89,182 +98,133 @@ device_map='cuda')
 #unet = UNet2DConditionModel.from_pretrained(finetune_path+'/unet/').to(dtype)
 #text_encoder = CLIPTextModel.from_pretrained(finetune_path+'/text_encoder/').to(dtype)
-unet = UNet2DConditionModel.from_pretrained('rynmurdock/Sea_Claws', subfolder='unet',).to(dtype).to('cpu')
-text_encoder = CLIPTextModel.from_pretrained('rynmurdock/Sea_Claws', subfolder='text_encoder',
-device_map='cpu').to(dtype)
-adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
-pipe = AnimateDiffPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", motion_adapter=adapter, image_encoder=image_encoder, torch_dtype=dtype,
-                                            unet=unet, text_encoder=text_encoder)
-pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
-pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
-pipe.set_adapters(["lcm-lora"], [.95])
-pipe.fuse_lora()
-#pipe = AnimateDiffPipeline.from_pretrained('emilianJR/epiCRealism', torch_dtype=dtype, image_encoder=image_encoder)
-#pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
-#repo = "ByteDance/AnimateDiff-Lightning"
-#ckpt = f"animatediff_lightning_4step_diffusers.safetensors"
-pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15_vit-G.bin", map_location='cpu')
-# This IP adapter improves outputs substantially.
-pipe.set_ip_adapter_scale(.6)
-pipe.unet.fuse_qkv_projections()
-#pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
-pipe.to(device=DEVICE)
 #pipe.unet = torch.compile(pipe.unet)
 #pipe.vae = torch.compile(pipe.vae)
-#############################################################
-from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
-pali = PaliGemmaForConditionalGeneration.from_pretrained('google/paligemma-3b-mix-224', torch_dtype=dtype).eval().to('cuda')
-processor = AutoProcessor.from_pretrained('google/paligemma-3b-mix-224')
-#pali = torch.compile(pali)
 @spaces.GPU()
-def to_wanted_embs(image_outputs, input_ids, attention_mask, cache_position=None):
-    inputs_embeds = pali.get_input_embeddings()(input_ids.to('cuda'))
-    selected_image_feature = image_outputs.to(dtype).to('cuda')
-    image_features = pali.multi_modal_projector(selected_image_feature)
-    if cache_position is None:
-        cache_position = torch.arange(inputs_embeds.shape[1], device=inputs_embeds.device)
-    inputs_embeds, attention_mask, labels, position_ids = pali._merge_input_ids_with_image_features(
-        image_features, inputs_embeds, input_ids, attention_mask, None, None, cache_position
-    )
-    return inputs_embeds
-# TODO cache descriptions?
-@spaces.GPU(duration=20)
-def generate_pali(n_embs):
-    prompt = 'caption en'
-    model_inputs = processor(text=prompt, images=torch.zeros(1, 3, 224, 224), return_tensors="pt")
-    # we need to get im_embs taken in here.
-    descs = ''
-    for n, emb in enumerate(n_embs):
-        if n < len(n_embs)-1:
-            input_len = model_inputs["input_ids"].shape[-1]
-            input_embeds = to_wanted_embs(emb,
-                                model_inputs["input_ids"].to(device),
-                                model_inputs["attention_mask"].to(device))
-            generation = pali.generate(max_new_tokens=20, do_sample=True, top_p=.94, temperature=1.2, inputs_embeds=input_embeds)
-            decoded = processor.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
-            descs += f'Description: {decoded}\n'
-        else:
-            prompt = f'en {descs} Describe a new image that is similar. Description:'
-            model_inputs = processor(text=prompt, images=torch.zeros(1, 3, 224, 224), return_tensors="pt")
-            input_len = model_inputs["input_ids"].shape[-1]
-            input_embeds = to_wanted_embs(emb,
-                                model_inputs["input_ids"].to(device),
-                                model_inputs["attention_mask"].to(device))
-            generation = pali.generate(max_new_tokens=20, do_sample=True, top_p=.94, temperature=1.2, inputs_embeds=input_embeds)
-            decoded = processor.decode(generation[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
-    return decoded
-#############################################################
-@spaces.GPU(duration=20)
 def generate_gpu(in_im_embs, prompt='the scene'):
     with torch.no_grad():
-        in_im_embs = in_im_embs.to('cuda').unsqueeze(0).unsqueeze(0)
         output = pipe(prompt=prompt, guidance_scale=1, added_cond_kwargs={}, ip_adapter_image_embeds=[in_im_embs], num_inference_steps=STEPS)
         im_emb, _ = pipe.encode_image(
-                    output.frames[0][len(output.frames[0])//2], 'cuda', 1, output_hidden_state
                 )
         im_emb = im_emb.detach().to('cpu').to(torch.float32)
-        im = torchvision.transforms.ToTensor()(output.frames[0][len(output.frames[0])//2]).unsqueeze(0)
-        im = torch.nn.functional.interpolate(im, (224, 224)).to(dtype).to('cuda')
-        im = (im - .5) * 2
-        gemb = pali.vision_tower(im).last_hidden_state.detach().to('cpu').to(torch.float32)
-    return output, im_emb, gemb
 def generate(in_im_embs, prompt='the scene'):
-    output, im_emb, gemb = generate_gpu(in_im_embs, prompt)
-    nsfw =maybe_nsfw(output.frames[0][len(output.frames[0])//2])
     name = str(uuid.uuid4()).replace("-", "")
-    path = f"/tmp/{name}.mp4"
     if nsfw:
         gr.Warning("NSFW content detected.")
         # TODO could return an automatic dislike of auto dislike on the backend for neither as well; just would need refactoring.
-        return None, im_emb, gemb
-    output.frames[0] = output.frames[0] + list(reversed(output.frames[0]))
-    write_video(path, output.frames[0])
-    return path, im_emb, gemb
 #######################
 def get_user_emb(embs, ys):
-    # handle case where every instance of calibration videos is 'Neither' or 'Like' or 'Dislike'
-    if len(list(ys)) <= 10:
-        aways = [torch.zeros_like(embs[0]) for i in range(10)]
-        embs += aways
-        awal = [0 for i in range(5)] + [1 for i in range(5)]
-        ys += awal
-    indices = list(range(len(embs)))
     # sample only as many negatives as there are positives
-    pos_indices = [i for i in indices if ys[i] == 1]
-    neg_indices = [i for i in indices if ys[i] == 0]
-    #lower = min(len(pos_indices), len(neg_indices))
-    #neg_indices = random.sample(neg_indices, lower)
-    #pos_indices = random.sample(pos_indices, lower)
-    # we may have just encountered a rare multi-threading diffusers issue (https://github.com/huggingface/diffusers/issues/5749);
-    # this ends up adding a rating but losing an embedding, it seems.
-    # let's take off a rating if so to continue without indexing errors.
-    if len(ys) > len(embs):
-        print('ys are longer than embs; popping latest rating')
-        ys.pop(-1)
-    feature_embs = torch.stack([embs[i].squeeze().to('cpu') for i in indices]).to('cpu')
-    #scaler = preprocessing.StandardScaler().fit(feature_embs)
-    #feature_embs = scaler.transform(feature_embs)
-    chosen_y = np.array([ys[i] for i in indices])
-    if feature_embs.norm() != 0:
-        feature_embs = feature_embs / feature_embs.norm()
-    #lin_class = Ridge(fit_intercept=False).fit(feature_embs, chosen_y)
-    #class_weight='balanced'
-    lin_class = SVC(max_iter=500, kernel='linear', C=.1, ).fit(feature_embs.squeeze(), chosen_y)
-    coef_ = torch.tensor(lin_class.coef_, dtype=torch.float32).detach().to('cpu')
-    coef_ = coef_ / coef_.abs().max()
-    w = 1# if len(embs) % 2 == 0 else 0
-    im_emb = w * coef_.to(dtype=dtype)
-    return im_emb
 def pluck_img(user_id, user_emb):
     not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, 'gone') == 'gone' for i in prevs_df.iterrows()]]
     while len(not_rated_rows) == 0:
         not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, 'gone') == 'gone' for i in prevs_df.iterrows()]]
-        time.sleep(.001)
     # TODO optimize this lol
     best_sim = -100000
     for i in not_rated_rows.iterrows():
@@ -274,8 +234,7 @@ def pluck_img(user_id, user_emb):
             best_sim = sim
             best_row = i[1]
     img = best_row['paths']
-    text = best_row.get('text', '')
-    return img, text
 def background_next_image():
@@ -283,10 +242,10 @@ def background_next_image():
         # only let it get N (maybe 3) ahead of the user
         #not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
         rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
-        while len(rated_rows) < 5:
         #    not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
-            rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
-            time.sleep(.01)
         user_id_list = set(rated_rows['latest_user_to_rate'].to_list())
         for uid in user_id_list:
@@ -300,22 +259,32 @@ def background_next_image():
             rated_from_user = rated_rows[[i[1]['from_user_id'] == uid for i in rated_rows.iterrows()]]
             # we pop previous ratings if there are > n
-            if len(rated_from_user) >= 25:
                 oldest = rated_from_user.iloc[0]['paths']
                 prevs_df = prevs_df[prevs_df['paths'] != oldest]
             # we don't compute more after n are in the queue for them
-            if len(unrated_from_user) >= 20:
                 continue
-            embs, ys, gembs = pluck_embs_ys(uid)
-            user_emb = get_user_emb(embs, ys) * 3
-            pos_gembs = [g for g, y in zip(gembs, ys) if y == 1]
-            if len(pos_gembs) > 4:
-                hist_gem = random.sample(pos_gembs, N_IMG_EMBS) # rng n embeddings
-                text = generate_pali(hist_gem)
             else:
-                text = 'the scene'
-            img, embs, new_gem = generate(user_emb, text)
             if img:
                 tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'latest_user_to_rate', 'text', 'gemb'])
@@ -324,7 +293,6 @@ def background_next_image():
                 tmp_df['user:rating'] = [{' ': ' '}]
                 tmp_df['from_user_id'] = [uid]
                 tmp_df['text'] = [text]
-                tmp_df['gemb'] = [new_gem]
                 prevs_df = pd.concat((prevs_df, tmp_df))
                 # we can free up storage by deleting the image
                 if len(prevs_df) > 500:
@@ -340,37 +308,52 @@ def background_next_image():
 def pluck_embs_ys(user_id):
     rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) != None for i in prevs_df.iterrows()]]
     embs = rated_rows['embeddings'].to_list()
     ys = [i[user_id] for i in rated_rows['user:rating'].to_list()]
-    gembs = rated_rows['gemb'].to_list()
-    return embs, ys, gembs
 def next_image(calibrate_prompts, user_id):
     with torch.no_grad():
         if len(calibrate_prompts) > 0:
             cal_video = calibrate_prompts.pop(0)
             image = prevs_df[prevs_df['paths'] == cal_video]['paths'].to_list()[0]
-            return image, calibrate_prompts, ''
         else:
-            embs, ys, gembs = pluck_embs_ys(user_id)
-            user_emb = get_user_emb(embs, ys) * 3
-            image, text = pluck_img(user_id, user_emb)
-            return image, calibrate_prompts, text
 def start(_, calibrate_prompts, user_id, request: gr.Request):
     user_id = int(str(time.time())[-7:].replace('.', ''))
-    image, calibrate_prompts, text = next_image(calibrate_prompts, user_id)
     return [
-            gr.Button(value='Like (L)', interactive=True),
             gr.Button(value='Neither (Space)', interactive=True, visible=False),
-            gr.Button(value='Dislike (A)', interactive=True),
             gr.Button(value='Start', interactive=False),
             image,
             calibrate_prompts,
-            user_id
             ]
@@ -378,27 +361,34 @@ def choose(img, choice, calibrate_prompts, user_id, request: gr.Request):
     global prevs_df
-    if choice == 'Like (L)':
-        choice = 1
     elif choice == 'Neither (Space)':
-        img, calibrate_prompts, text = next_image(calibrate_prompts, user_id)
-        return img, calibrate_prompts, text
     else:
-        choice = 0
     # if we detected NSFW, leave that area of latent space regardless of how they rated chosen.
     # TODO skip allowing rating & just continue
-    if img == None:
         print('NSFW -- choice is disliked')
-        choice = 0
     row_mask = [p.split('/')[-1] in img for p in prevs_df['paths'].to_list()]
     # if it's still in the dataframe, add the choice
     if len(prevs_df.loc[row_mask, 'user:rating']) > 0:
         prevs_df.loc[row_mask, 'user:rating'][0][user_id] = choice
         prevs_df.loc[row_mask, 'latest_user_to_rate'] = [user_id]
-    img, calibrate_prompts, text = next_image(calibrate_prompts, user_id)
-    return img, calibrate_prompts, text
 css = '''.gradio-container{max-width: 700px !important}
 #description{text-align: center}
@@ -461,53 +451,71 @@ Explore the latent space without text prompts based on your preferences. Learn m
     user_id = gr.State()
     # calibration videos -- this is a misnomer now :D
     calibrate_prompts = gr.State([
-    './first.mp4',
-    './second.mp4',
-    './third.mp4',
-    './fourth.mp4',
-    './fifth.mp4',
-    './sixth.mp4',
     ])
     def l():
         return None
     with gr.Row(elem_id='output-image'):
-        img = gr.Video(
         label='Lightning',
-        autoplay=True,
         interactive=False,
-        height=512,
-        width=512,
         #include_audio=False,
-        elem_id="video_output"
        )
-        img.play(l, js='''document.querySelector('[data-testid="Lightning-player"]').loop = true''')
-    with gr.Row():
-        text = gr.Textbox(interactive=False, visible=True, label='Text')
     with gr.Row(equal_height=True):
-        b3 = gr.Button(value='Dislike (A)', interactive=False, elem_id="dislike")
         b2 = gr.Button(value='Neither (Space)', interactive=False, elem_id="neither", visible=False)
-        b1 = gr.Button(value='Like (L)', interactive=False, elem_id="like")
         b1.click(
         choose,
         [img, b1, calibrate_prompts, user_id],
-        [img, calibrate_prompts, text],
         )
         b2.click(
         choose,
         [img, b2, calibrate_prompts, user_id],
-        [img, calibrate_prompts, text],
         )
         b3.click(
         choose,
         [img, b3, calibrate_prompts, user_id],
-        [img, calibrate_prompts, text],
         )
     with gr.Row():
         b4 = gr.Button(value='Start')
         b4.click(start,
                  [b4, calibrate_prompts, user_id],
-                 [b1, b2, b3, b4, img, calibrate_prompts, user_id]
                  )
     with gr.Row():
         html = gr.HTML('''<div style='text-align:center; font-size:20px'>You will calibrate for several videos and then roam. </ div><br><br><br>
@@ -518,37 +526,42 @@ Explore the latent space without text prompts based on your preferences. Learn m
 </ div>''')
 # TODO quiet logging
-log = logging.getLogger('log_here')
-log.setLevel(logging.ERROR)
 scheduler = BackgroundScheduler()
-scheduler.add_job(func=background_next_image, trigger="interval", seconds=.5)
 scheduler.start()
 # prep our calibration videos
-for im in [
-    './first.mp4',
-    './second.mp4',
-    './third.mp4',
-    './fourth.mp4',
-    './fifth.mp4',
-    './sixth.mp4',
-    './seventh.mp4',
-    './eigth.mp4',
-    './ninth.mp4',
-    './tenth.mp4',
     ]:
     tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'text', 'gemb'])
     tmp_df['paths'] = [im]
     image = list(imageio.imiter(im))
     image = image[len(image)//2]
-    tmp_df['embeddings'] = [torch.load(im.replace('mp4', 'im_.pt'))]
-    tmp_df['gemb'] = [torch.load(im.replace('mp4', 'gemb_.pt'))]
     tmp_df['user:rating'] = [{' ': ' '}]
     prevs_df = pd.concat((prevs_df, tmp_df))
-demo.launch(share=True)

 # lol
 DEVICE = 'cuda'
+STEPS = 8
 output_hidden_state = False
 device = "cuda"
 dtype = torch.bfloat16
+import spaces
+import matplotlib.pyplot as plt
+import matplotlib
 import logging
 import os
 import imageio
 import gradio as gr
 import numpy as np
+from sklearn.svm import LinearSVC
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
+import sched
+import threading
 import random
 import time
 import spaces
 start_time = time.time()
+prompt_list = [p for p in list(set(
+                pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
 ####################### Setup Model
+from diffusers import EulerDiscreteScheduler, LCMScheduler, AutoencoderTiny, UNet2DConditionModel, AutoencoderKL, AutoPipelineForText2Image
 from transformers import CLIPTextModel
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
 from transformers import CLIPVisionModelWithProjection
 import uuid
 import av
+def write_video(file_name, images, fps=16):
     container = av.open(file_name, mode="w")
     stream = container.add_stream("h264", rate=fps)
 #unet = UNet2DConditionModel.from_pretrained(finetune_path+'/unet/').to(dtype)
 #text_encoder = CLIPTextModel.from_pretrained(finetune_path+'/text_encoder/').to(dtype)
+#rynmurdock/Sea_Claws
+model_id = "stabilityai/stable-diffusion-xl-base-1.0"
+sdxl_lightening = "ByteDance/SDXL-Lightning"
+ckpt = "sdxl_lightning_8step_unet.safetensors"
+unet = UNet2DConditionModel.from_config(model_id, subfolder="unet", low_cpu_mem_usage=True, device_map=DEVICE).to(torch.float16)
+unet.load_state_dict(load_file(hf_hub_download(sdxl_lightening, ckpt)))
+image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter",  subfolder="models/image_encoder", torch_dtype=torch.float16, low_cpu_mem_usage=True, device_map=DEVICE)
+pipe = AutoPipelineForText2Image.from_pretrained(model_id, unet=unet, torch_dtype=torch.float16, variant="fp16", image_encoder=image_encoder, low_cpu_mem_usage=True)
+pipe.unet._load_ip_adapter_weights(torch.load(hf_hub_download('h94/IP-Adapter', 'sdxl_models/ip-adapter_sdxl_vit-h.bin')))
+pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl_vit-h.bin")
+pipe.register_modules(image_encoder = image_encoder)
+pipe.set_ip_adapter_scale(0.8)
+#pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16, low_cpu_mem_usage=True)
+pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
+pipe.to(device=DEVICE).to(dtype=dtype)
+output_hidden_state = False
+# pipe.unet.fuse_qkv_projections()
+#pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
 #pipe.unet = torch.compile(pipe.unet)
 #pipe.vae = torch.compile(pipe.vae)
 @spaces.GPU()
 def generate_gpu(in_im_embs, prompt='the scene'):
     with torch.no_grad():
+        print(prompt)
+        in_im_embs = in_im_embs.to('cuda').unsqueeze(0)
         output = pipe(prompt=prompt, guidance_scale=1, added_cond_kwargs={}, ip_adapter_image_embeds=[in_im_embs], num_inference_steps=STEPS)
         im_emb, _ = pipe.encode_image(
+                    output.images[0], 'cuda', 1, output_hidden_state
                 )
         im_emb = im_emb.detach().to('cpu').to(torch.float32)
+    return output, im_emb
 def generate(in_im_embs, prompt='the scene'):
+    output, im_emb = generate_gpu(in_im_embs, prompt)
+    nsfw = maybe_nsfw(output.images[0])
     name = str(uuid.uuid4()).replace("-", "")
+    path = f"/tmp/{name}.png"
     if nsfw:
         gr.Warning("NSFW content detected.")
         # TODO could return an automatic dislike of auto dislike on the backend for neither as well; just would need refactoring.
+        return None, im_emb
+    output.images[0].save(path)
+    return path, im_emb
 #######################
+@spaces.GPU()
+def solver(embs, ys):
+    print('ys:', ys,'EMBS:', embs.shape, embs)
+    ys = torch.tensor(ys).to('cpu', dtype=torch.float32).squeeze().unsqueeze(1)
+    sol = LinearSVC(class_weight='balanced').fit(np.array(embs), np.array(torch.tensor(ys).float() * 2 - 1)).coef_
+    return torch.tensor(sol).to('cpu', dtype=torch.float32)
 def get_user_emb(embs, ys):
     # sample only as many negatives as there are positives
+    indices = range(len(ys))
+    pos_indices = [i for i in indices if ys[i] > .5]
+    neg_indices = [i for i in indices if ys[i] <= .5]
+    mini = min(len(pos_indices), len(neg_indices))
+    if len(ys) > 20: # drop earliest of whichever of neg or pos is most abundant
+        if len(pos_indices) > len(neg_indices):
+            ind = pos_indices[0]
+        else:
+            ind = neg_indices[0]
+        ys.pop(ind)
+        embs.pop(ind)
+        print('Dropping at 20')
+    if mini < 1:
+        feature_embs = torch.stack([torch.randn(1280), torch.randn(1280)])
+        ys_t = [0, 1]
+        print('Not enough ratings.')
+    else:
+        indices = range(len(ys))
+        ys_t = [ys[i] for i in indices]
+        feature_embs = torch.stack([embs[e].detach().cpu() for e in indices]).squeeze()
+        # scaler = preprocessing.StandardScaler().fit(feature_embs)
+        # feature_embs = scaler.transform(feature_embs)
+        # ys_t = ys
+        print(np.array(feature_embs).shape, np.array(ys_t).shape)
+    sol = solver(feature_embs.squeeze(), ys_t)
+    dif = torch.tensor(sol, dtype=dtype).to(device)
+    # could j have a base vector of a black image
+    latest_pos = (random.sample([feature_embs[i] for i in range(len(ys_t)) if ys_t[i] > .5], 1)[0]).to(device, dtype)
+    dif = ((dif / dif.std()) * latest_pos.std())
+    sol = (1*latest_pos + 3*dif)/4
+    return sol
 def pluck_img(user_id, user_emb):
     not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, 'gone') == 'gone' for i in prevs_df.iterrows()]]
     while len(not_rated_rows) == 0:
         not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, 'gone') == 'gone' for i in prevs_df.iterrows()]]
+        time.sleep(.1)
     # TODO optimize this lol
     best_sim = -100000
     for i in not_rated_rows.iterrows():
             best_sim = sim
             best_row = i[1]
     img = best_row['paths']
+    return img
 def background_next_image():
         # only let it get N (maybe 3) ahead of the user
         #not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
         rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
+        if len(rated_rows) < 4:
+            time.sleep(.1)
         #    not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
+            return
         user_id_list = set(rated_rows['latest_user_to_rate'].to_list())
         for uid in user_id_list:
             rated_from_user = rated_rows[[i[1]['from_user_id'] == uid for i in rated_rows.iterrows()]]
             # we pop previous ratings if there are > n
+            if len(rated_from_user) >= 15:
                 oldest = rated_from_user.iloc[0]['paths']
                 prevs_df = prevs_df[prevs_df['paths'] != oldest]
             # we don't compute more after n are in the queue for them
+            if len(unrated_from_user) >= 10:
                 continue
+            if len(rated_rows) < 5:
+                continue
+            embs, ys = pluck_embs_ys(uid)
+            user_emb = get_user_emb(embs, [y[1] for y in ys])
+            global glob_idx
+            glob_idx += 1
+            if glob_idx >= (len(prompt_list)-1):
+                glob_idx = 0
+            if glob_idx % 7 == 0:
+                text = prompt_list[glob_idx]
             else:
+                text = 'an image'
+            img, embs = generate(user_emb, text)
             if img:
                 tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'latest_user_to_rate', 'text', 'gemb'])
                 tmp_df['user:rating'] = [{' ': ' '}]
                 tmp_df['from_user_id'] = [uid]
                 tmp_df['text'] = [text]
                 prevs_df = pd.concat((prevs_df, tmp_df))
                 # we can free up storage by deleting the image
                 if len(prevs_df) > 500:
 def pluck_embs_ys(user_id):
     rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) != None for i in prevs_df.iterrows()]]
+    #not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) == None for i in prevs_df.iterrows()]]
+    #while len(not_rated_rows) == 0:
+    #    not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) == None for i in prevs_df.iterrows()]]
+    #    rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) != None for i in prevs_df.iterrows()]]
+    #    time.sleep(.01)
+    #    print('current user has 0 not_rated_rows')
     embs = rated_rows['embeddings'].to_list()
     ys = [i[user_id] for i in rated_rows['user:rating'].to_list()]
+    return embs, ys
 def next_image(calibrate_prompts, user_id):
     with torch.no_grad():
         if len(calibrate_prompts) > 0:
             cal_video = calibrate_prompts.pop(0)
             image = prevs_df[prevs_df['paths'] == cal_video]['paths'].to_list()[0]
+            return image, calibrate_prompts,
         else:
+            embs, ys = pluck_embs_ys(user_id)
+            ys_here = [y[1] for y in ys]
+            user_emb = get_user_emb(embs, ys_here)
+            image  = pluck_img(user_id, user_emb)
+            return image, calibrate_prompts,
 def start(_, calibrate_prompts, user_id, request: gr.Request):
     user_id = int(str(time.time())[-7:].replace('.', ''))
+    image, calibrate_prompts  = next_image(calibrate_prompts, user_id)
     return [
+            gr.Button(value='👍', interactive=True),
             gr.Button(value='Neither (Space)', interactive=True, visible=False),
+            gr.Button(value='👎', interactive=True),
             gr.Button(value='Start', interactive=False),
+            gr.Button(value='👍 Content', interactive=True, visible=False),
+            gr.Button(value='👍 Style', interactive=True, visible=False),
             image,
             calibrate_prompts,
+            user_id,
             ]
     global prevs_df
+    if choice == '👍':
+        choice = [1, 1]
     elif choice == 'Neither (Space)':
+        img, calibrate_prompts,  = next_image(calibrate_prompts, user_id)
+        return img, calibrate_prompts,
+    elif choice == '👎':
+        choice = [0, 0]
+    elif choice == '👍 Style':
+        choice = [0, 1]
+    elif choice == '👍 Content':
+        choice = [1, 0]
     else:
+        assert False, f'choice is {choice}'
     # if we detected NSFW, leave that area of latent space regardless of how they rated chosen.
     # TODO skip allowing rating & just continue
+    if img is None:
         print('NSFW -- choice is disliked')
+        choice = [0, 0]
     row_mask = [p.split('/')[-1] in img for p in prevs_df['paths'].to_list()]
     # if it's still in the dataframe, add the choice
     if len(prevs_df.loc[row_mask, 'user:rating']) > 0:
         prevs_df.loc[row_mask, 'user:rating'][0][user_id] = choice
         prevs_df.loc[row_mask, 'latest_user_to_rate'] = [user_id]
+    img, calibrate_prompts,  = next_image(calibrate_prompts, user_id)
+    return img, calibrate_prompts
 css = '''.gradio-container{max-width: 700px !important}
 #description{text-align: center}
     user_id = gr.State()
     # calibration videos -- this is a misnomer now :D
     calibrate_prompts = gr.State([
+    './first.png',
+    './second.png',
+    './sixth.png',
+    './fifth.png',
+    './fourth.png',
     ])
     def l():
         return None
     with gr.Row(elem_id='output-image'):
+        img = gr.Image(
         label='Lightning',
+#        autoplay=True,
         interactive=False,
+#        height=512,
+#        width=512,
         #include_audio=False,
+        elem_id="video_output",
+        type='filepath',
        )
+        #img.play(l, js='''document.querySelector('[data-testid="Lightning-player"]').loop = true''')
     with gr.Row(equal_height=True):
+        b3 = gr.Button(value='👎', interactive=False, elem_id="dislike")
         b2 = gr.Button(value='Neither (Space)', interactive=False, elem_id="neither", visible=False)
+        b1 = gr.Button(value='👍', interactive=False, elem_id="like")
+    with gr.Row(equal_height=True):
+        b6 = gr.Button(value='👍 Style', interactive=False, elem_id="dislike like", visible=False)
+        b5 = gr.Button(value='👍 Content', interactive=False, elem_id="like dislike", visible=False)
         b1.click(
         choose,
         [img, b1, calibrate_prompts, user_id],
+        [img, calibrate_prompts, ],
         )
         b2.click(
         choose,
         [img, b2, calibrate_prompts, user_id],
+        [img, calibrate_prompts, ],
         )
         b3.click(
         choose,
         [img, b3, calibrate_prompts, user_id],
+        [img, calibrate_prompts, ],
+        )
+        b5.click(
+        choose,
+        [img, b5, calibrate_prompts, user_id],
+        [img, calibrate_prompts, ],
+        )
+        b6.click(
+        choose,
+        [img, b6, calibrate_prompts, user_id],
+        [img, calibrate_prompts, ],
         )
     with gr.Row():
         b4 = gr.Button(value='Start')
         b4.click(start,
                  [b4, calibrate_prompts, user_id],
+                 [b1, b2, b3, b4, b5, b6, img, calibrate_prompts, user_id, ]
                  )
     with gr.Row():
         html = gr.HTML('''<div style='text-align:center; font-size:20px'>You will calibrate for several videos and then roam. </ div><br><br><br>
 </ div>''')
 # TODO quiet logging
 scheduler = BackgroundScheduler()
+scheduler.add_job(func=background_next_image, trigger="interval", seconds=.2)
 scheduler.start()
+#thread = threading.Thread(target=background_next_image,)
+#thread.start()
+# TODO shouldn't call this before gradio launch, yeah?
+@spaces.GPU()
+def encode_space(x):
+    im_emb, _ = pipe.encode_image(
+                image, DEVICE, 1, output_hidden_state
+            )
+    return im_emb.detach().to('cpu').to(torch.float32)
 # prep our calibration videos
+for im, txt in [ # TODO more movement
+    ('./first.png', 'describe the scene: a sketch'),
+    ('./second.png', 'describe the scene: omens in the suburbs'),
+    ('./sixth.png', 'describe the scene: geometric abstract art of a windmill'),
+    ('./fifth.png', 'describe the scene: memento mori'),
+    ('./fourth.png', 'describe the scene: a green plate with anespresso'),
     ]:
     tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'text', 'gemb'])
     tmp_df['paths'] = [im]
     image = list(imageio.imiter(im))
     image = image[len(image)//2]
+    im_emb = encode_space(image)
+    tmp_df['embeddings'] = [im_emb.detach().to('cpu')]
     tmp_df['user:rating'] = [{' ': ' '}]
+    tmp_df['text'] = [txt]
     prevs_df = pd.concat((prevs_df, tmp_df))
+glob_idx = 0
+demo.launch(share=True,)

twitter_prompts.csv ADDED Viewed

	@@ -0,0 +1,47 @@

+,0
+0,a sunset
+1,a still life in blue
+2,last day on earth
+3,the conch shell
+4,the winds of change
+5,a surrealist eye
+6,a surrealist polaroid photo of an apple
+7,metaphysics
+8,the sun is setting into my glass of tea
+9,the moon at 3am
+10,a memento mori
+11,quaking aspen tree
+12,violets and daffodils
+13,espresso
+14,sisyphus
+15,high windows of stained glass
+16,a green dog
+17,an adorable companion; it is a pig
+18,bird of paradise
+19,a complex intricate machine
+20,a white clock
+21,a film featuring the landscape Salt Lake City Utah
+22,a creature
+23,a house set aflame.
+24,a gorgeous landscape by Cy Twombly
+25,smoke rises from the caterpillar's hookah
+26,corvid in red
+27,Monet's pond
+28,Genesis
+29,Death is a black camel that kneels down so we can ride
+30,a cherry tree made of fractals
+29,the end of the sidewalk
+30,a polaroid photo of a bustling city of lights and sky scrapers
+31,The Fig Tree metaphor
+32,God killed Van Gogh.
+33,a cosmic entity alien with four eyes.
+34,a horse with 128 eyes.
+35,a being with an infinite set of eyes (it is omniscient)
+36,A sticky-note magnum opus featuring birds
+37,Moka Pot
+38,the moon is a sickle cell
+39,The Penultimate Supper
+40,Art
+41,surrealism
+42,a god made of wires & dust
+43,a dandelion blown into the universe