DigiP-AI commited on
Commit
f4ed65d
·
verified ·
1 Parent(s): d13dd5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
@@ -327,6 +337,47 @@ with gr.Blocks(theme=theme, css=css) as app:
327
  inputs=[image_input, filter_type],
328
  outputs=image_output
329
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  with gr.Tab("Image Upscaler"):
331
  with gr.Row():
332
  with gr.Column():
 
1
+ import gradio as gr
2
+ import random
3
+ import os
4
+ import torch
5
+ import subprocess
6
+ import numpy as np
7
+ from PIL import Image
8
+ from transformers import AutoProcessor, AutoModelForCausalLM
9
+ from diffusers import DiffusionPipeline
10
+
11
  import gradio as gr
12
  import cv2
13
  import numpy as np
 
337
  inputs=[image_input, filter_type],
338
  outputs=image_output
339
  )
340
+
341
+ with gr.Tab("Image to Prompt"):
342
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
343
+
344
+ # Initialize Florence model
345
+ device = "cuda" if torch.cuda.is_available() else "cpu"
346
+ florence_model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to(device).eval()
347
+ florence_processor = AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
348
+
349
+ # api_key = os.getenv("HF_READ_TOKEN")
350
+
351
+ def generate_caption(image):
352
+ if not isinstance(image, Image.Image):
353
+ image = Image.fromarray(image)
354
+
355
+ inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
356
+ generated_ids = florence_model.generate(
357
+ input_ids=inputs["input_ids"],
358
+ pixel_values=inputs["pixel_values"],
359
+ max_new_tokens=1024,
360
+ early_stopping=False,
361
+ do_sample=False,
362
+ num_beams=3,
363
+ )
364
+ generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
365
+ parsed_answer = florence_processor.post_process_generation(
366
+ generated_text,
367
+ task="<MORE_DETAILED_CAPTION>",
368
+ image_size=(image.width, image.height)
369
+ )
370
+ prompt = parsed_answer["<MORE_DETAILED_CAPTION>"]
371
+ print("\n\nGeneration completed!:"+ prompt)
372
+ return prompt
373
+
374
+ io = gr.Interface(generate_caption,
375
+ inputs=[gr.Image(label="Input Image")],
376
+ outputs = [gr.Textbox(label="Output Prompt", lines=2, show_copy_button = True),
377
+ # gr.Image(label="Output Image")
378
+ ]
379
+ )
380
+
381
  with gr.Tab("Image Upscaler"):
382
  with gr.Row():
383
  with gr.Column():