AmitIsraeli commited on
Commit
7a7d1a1
1 Parent(s): f6d4208

change some stuff

Browse files
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -88,7 +88,7 @@ class InferenceTextVAR(nn.Module):
88
  self.var = get_peft_model(self.var, lora_config)
89
 
90
  @torch.no_grad()
91
- def generate_image(self, text, beta=1, seed=None, more_smooth=False, top_k=0, top_p=0.9):
92
  if seed is None:
93
  seed = random.randint(0, 2**32 - 1)
94
  inputs = self.text_processor([text], padding="max_length", return_tensors="pt").to(self.device)
@@ -159,9 +159,6 @@ if __name__ == '__main__':
159
  - **Model Fine-tuning:** Fine-tuned the [Visual AutoRegressive (VAR)](https://arxiv.org/abs/2404.02905) model, pretrained on ImageNet, to adapt it for Funko Pop! generation by injecting a custom embedding representing the "doll" class.
160
  - **Adapter Training:** Trained an adapter with the frozen [SigLIP image encoder](https://github.com/FoundationVision/VAR) and a lightweight LoRA module to map image embeddings to text representation in a large language model.
161
  - **Text-to-Image Generation:** Enabled text-to-image generation by replacing the SigLIP image encoder with its text encoder, retaining frozen components such as the VAE and generator for efficiency and quality.
162
-
163
- ![VAR Explained](VAR_explained.png)
164
-
165
 
166
  ## Generate Your Own Funko Pop!
167
  """)
@@ -226,9 +223,9 @@ if __name__ == '__main__':
226
  image = model.generate_image(prompt)
227
  return image
228
 
229
- famous_name_input = gr.Dropdown(choices=["None", "Donald Trump", "Johnny Depp", "Oprah Winfrey"], label="Famous Name", value="None")
230
  character_input = gr.Dropdown(choices=["None", "Alien", "Robot"], label="Character", value="None")
231
- action_input = gr.Dropdown(choices=["None", "Playing the Guitar", "Holding the Sword"], label="Action", value="None")
232
  custom_generate_button = gr.Button("Generate Custom Funko Pop!")
233
  custom_image_output = gr.Image(label="Custom Funko Pop!")
234
 
 
88
  self.var = get_peft_model(self.var, lora_config)
89
 
90
  @torch.no_grad()
91
+ def generate_image(self, text, beta=1, seed=None, more_smooth=False, top_k=0, top_p=0.5):
92
  if seed is None:
93
  seed = random.randint(0, 2**32 - 1)
94
  inputs = self.text_processor([text], padding="max_length", return_tensors="pt").to(self.device)
 
159
  - **Model Fine-tuning:** Fine-tuned the [Visual AutoRegressive (VAR)](https://arxiv.org/abs/2404.02905) model, pretrained on ImageNet, to adapt it for Funko Pop! generation by injecting a custom embedding representing the "doll" class.
160
  - **Adapter Training:** Trained an adapter with the frozen [SigLIP image encoder](https://github.com/FoundationVision/VAR) and a lightweight LoRA module to map image embeddings to text representation in a large language model.
161
  - **Text-to-Image Generation:** Enabled text-to-image generation by replacing the SigLIP image encoder with its text encoder, retaining frozen components such as the VAE and generator for efficiency and quality.
 
 
 
162
 
163
  ## Generate Your Own Funko Pop!
164
  """)
 
223
  image = model.generate_image(prompt)
224
  return image
225
 
226
+ famous_name_input = gr.Dropdown(choices=["None", "Donald Trump", "Johnny Depp", "Oprah Winfrey,Lebron James"], label="Famous Name", value="None")
227
  character_input = gr.Dropdown(choices=["None", "Alien", "Robot"], label="Character", value="None")
228
+ action_input = gr.Dropdown(choices=["None", "Playing the Guitar", "Holding the Sword","wearing headphone"], label="Action", value="None")
229
  custom_generate_button = gr.Button("Generate Custom Funko Pop!")
230
  custom_image_output = gr.Image(label="Custom Funko Pop!")
231