Spaces:

Zaherrr
/

KG_transform

Sleeping

File size: 12,706 Bytes

6f4dba4
a9e6ec2
6f4dba4
 
 
 
a9e6ec2
6f4dba4
 
 
 
 
 
 
 
 
 
a7af971
6f4dba4
 
 
 
563b496
6f4dba4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7af971
 
 
6f4dba4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636931b
 
 
 
 
 
 
 
 
 
6f4dba4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636931b
 
 
 
 
 
 
6f4dba4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9e6ec2
 
 
 
 
6f4dba4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9e6ec2
6f4dba4
a9e6ec2
6f4dba4
 
 
 
 
 
 
 
 
a9e6ec2
6f4dba4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9e6ec2
6f4dba4
 
 
 
 
a9e6ec2
6f4dba4
a9e6ec2
6f4dba4
 
 
 
 
 
 
a9e6ec2
6f4dba4
a9e6ec2
6f4dba4
a9e6ec2
6f4dba4
a9e6ec2
6f4dba4
 
 
 
 
 
a9e6ec2
6f4dba4
 
 
 
 
 
 
a9e6ec2
6f4dba4
a9e6ec2
6f4dba4
a9e6ec2
6f4dba4
 
 
 
 
 
 
a9e6ec2
6f4dba4
 
 
 
 
 
 
a9e6ec2
6f4dba4
a9e6ec2
6f4dba4
 
 
 
 
 
 
a9e6ec2
6f4dba4
 
 
 
 
 
 
a9e6ec2
6f4dba4
 
 
 
a9e6ec2
6f4dba4
 
 
 
 
a9e6ec2
6f4dba4


import gradio as gr
from transformers import AutoProcessor, Idefics3ForConditionalGeneration
import re
import time
from PIL import Image
import torch
import spaces
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)


processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")

model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3", 
        torch_dtype=torch.bfloat16,
        #_attn_implementation="flash_attention_2",
        trust_remote_code=True)#.to("cuda")

BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]

# @spaces.GPU
def model_inference(
    images, text, assistant_prefix, decoding_strategy, temperature, max_new_tokens,
    repetition_penalty, top_p
):
    if text == "" and not images:
        gr.Error("Please input a query and optionally image(s).")

    if text == "" and images:
        gr.Error("Please input a text query along the image(s).")

    if isinstance(images, Image.Image):
        images = [images]


    resulting_messages = [
                {
                    "role": "user",
                    "content": [{"type": "image"}] + [
                        {"type": "text", "text": text}
                    ]
                }
            ]

    if assistant_prefix:
      text = f"{assistant_prefix} {text}"


    prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
    inputs = processor(text=prompt, images=[images], return_tensors="pt")
    # inputs = {k: v.to("cuda") for k, v in inputs.items()}
    inputs = {k: v for k, v in inputs.items()}


    generation_args = {
        "max_new_tokens": max_new_tokens,
        "repetition_penalty": repetition_penalty,

    }

    assert decoding_strategy in [
        "Greedy",
        "Top P Sampling",
    ]
    if decoding_strategy == "Greedy":
        generation_args["do_sample"] = False
    elif decoding_strategy == "Top P Sampling":
        generation_args["temperature"] = temperature
        generation_args["do_sample"] = True
        generation_args["top_p"] = top_p


    generation_args.update(inputs)

    # Generate
    generated_ids = model.generate(**generation_args)

    generated_texts = processor.batch_decode(generated_ids[:, generation_args["input_ids"].size(1):], skip_special_tokens=True)
    return generated_texts[0]


with gr.Blocks(fill_height=True) as demo:
    gr.Markdown("## IDEFICS3-Llama 🐶")
    gr.Markdown("Play with [HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) in this demo. To get started, upload an image and text or try one of the examples.")
    gr.Markdown("**Disclaimer:** Idefics3 does not include an RLHF alignment stage, so it may not consistently follow prompts or handle complex tasks. However, this doesn't mean it is incapable of doing so. Adding a prefix to the assistant's response, such as Let's think step for a reasoning question or `<html>` for HTML code generation, can significantly improve the output in practice. You could also play with the parameters such as the temperature in non-greedy mode.")
    with gr.Column():
        image_input = gr.Image(label="Upload your Image", type="pil", scale=1)
        query_input = gr.Textbox(label="Prompt")
        assistant_prefix = gr.Textbox(label="Assistant Prefix", placeholder="Let's think step by step.")

        submit_btn = gr.Button("Submit")
        output = gr.Textbox(label="Output")

    with gr.Accordion(label="Example Inputs and Advanced Generation Parameters"):
        # examples=[
        #             ["example_images/mmmu_example.jpeg", "Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend?", "Let's think step by step.", "Greedy", 0.4, 512, 1.2, 0.8],
        #             ["example_images/rococo_1.jpg", "What art era is this?", None, "Greedy", 0.4, 512, 1.2, 0.8],
        #             ["example_images/paper_with_text.png", "Read what's written on the paper", None, "Greedy", 0.4, 512, 1.2, 0.8],
        #             ["example_images/dragons_playing.png","What's unusual about this image?",None,  "Greedy", 0.4, 512, 1.2, 0.8],
        #             ["example_images/example_images_ai2d_example_2.jpeg", "What happens to fish if pelicans increase?", None, "Greedy", 0.4, 512, 1.2, 0.8],
        #             ["example_images/travel_tips.jpg", "I want to go somewhere similar to the one in the photo. Give me destinations and travel tips.", None, "Greedy", 0.4, 512, 1.2, 0.8],
        #             ["example_images/dummy_pdf.png", "How much percent is the order status?", None, "Greedy", 0.4, 512, 1.2, 0.8],
        #             ["example_images/art_critic.png", "As an art critic AI assistant, could you describe this painting in details and make a thorough critic?.",None,  "Greedy", 0.4, 512, 1.2, 0.8],
        #             ["example_images/s2w_example.png",  "What is this UI about?", None,"Greedy", 0.4, 512, 1.2, 0.8]]

        # Hyper-parameters for generation
        max_new_tokens = gr.Slider(
              minimum=8,
              maximum=1024,
              value=512,
              step=1,
              interactive=True,
              label="Maximum number of new tokens to generate",
          )
        repetition_penalty = gr.Slider(
              minimum=0.01,
              maximum=5.0,
              value=1.2,
              step=0.01,
              interactive=True,
              label="Repetition penalty",
              info="1.0 is equivalent to no penalty",
          )
        temperature = gr.Slider(
              minimum=0.0,
              maximum=5.0,
              value=0.4,
              step=0.1,
              interactive=True,
              label="Sampling temperature",
              info="Higher values will produce more diverse outputs.",
          )
        top_p = gr.Slider(
              minimum=0.01,
              maximum=0.99,
              value=0.8,
              step=0.01,
              interactive=True,
              label="Top P",
              info="Higher values is equivalent to sampling more low-probability tokens.",
          )
        decoding_strategy = gr.Radio(
              [
                  "Greedy",
                  "Top P Sampling",
              ],
              value="Greedy",
              label="Decoding strategy",
              interactive=True,
              info="Higher values is equivalent to sampling more low-probability tokens.",
          )
        decoding_strategy.change(
              fn=lambda selection: gr.Slider(
                  visible=(
                      selection in ["contrastive_sampling", "beam_sampling", "Top P Sampling", "sampling_top_k"]
                  )
              ),
              inputs=decoding_strategy,
              outputs=temperature,
          )

        decoding_strategy.change(
              fn=lambda selection: gr.Slider(
                  visible=(
                      selection in ["contrastive_sampling", "beam_sampling", "Top P Sampling", "sampling_top_k"]
                  )
              ),
              inputs=decoding_strategy,
              outputs=repetition_penalty,
          )
        decoding_strategy.change(
              fn=lambda selection: gr.Slider(visible=(selection in ["Top P Sampling"])),
              inputs=decoding_strategy,
              outputs=top_p,
          )
        # gr.Examples(
        #                 examples = examples,
        #                 inputs=[image_input, query_input, assistant_prefix, decoding_strategy, temperature,
        #                                                       max_new_tokens, repetition_penalty, top_p],
        #                 outputs=output,
        #                 fn=model_inference
        #             )

        submit_btn.click(model_inference, inputs = [image_input, query_input, assistant_prefix, decoding_strategy, temperature,
                                                      max_new_tokens, repetition_penalty, top_p], outputs=output)


demo.launch(debug=True)

















# -----------------------------------------------------------------------------------------------------------------------------
# import gradio as gr
# import numpy as np
# import random
# from diffusers import DiffusionPipeline
# import torch

# device = "cuda" if torch.cuda.is_available() else "cpu"

# if torch.cuda.is_available():
#     torch.cuda.max_memory_allocated(device=device)
#     pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
#     pipe.enable_xformers_memory_efficient_attention()
#     pipe = pipe.to(device)
# else: 
#     pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True)
#     pipe = pipe.to(device)

# MAX_SEED = np.iinfo(np.int32).max
# MAX_IMAGE_SIZE = 1024

# def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):

#     if randomize_seed:
#         seed = random.randint(0, MAX_SEED)
        
#     generator = torch.Generator().manual_seed(seed)
    
#     image = pipe(
#         prompt = prompt, 
#         negative_prompt = negative_prompt,
#         guidance_scale = guidance_scale, 
#         num_inference_steps = num_inference_steps, 
#         width = width, 
#         height = height,
#         generator = generator
#     ).images[0] 
    
#     return image

# examples = [
#     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
#     "An astronaut riding a green horse",
#     "A delicious ceviche cheesecake slice",
# ]

# css="""
# #col-container {
#     margin: 0 auto;
#     max-width: 520px;
# }
# """

# if torch.cuda.is_available():
#     power_device = "GPU"
# else:
#     power_device = "CPU"

# with gr.Blocks(css=css) as demo:
    
#     with gr.Column(elem_id="col-container"):
#         gr.Markdown(f"""
#         # Text-to-Image Gradio Template
#         Currently running on {power_device}.
#         """)
        
#         with gr.Row():
            
#             prompt = gr.Text(
#                 label="Prompt",
#                 show_label=False,
#                 max_lines=1,
#                 placeholder="Enter your prompt",
#                 container=False,
#             )
            
#             run_button = gr.Button("Run", scale=0)
        
#         result = gr.Image(label="Result", show_label=False)

#         with gr.Accordion("Advanced Settings", open=False):
            
#             negative_prompt = gr.Text(
#                 label="Negative prompt",
#                 max_lines=1,
#                 placeholder="Enter a negative prompt",
#                 visible=False,
#             )
            
#             seed = gr.Slider(
#                 label="Seed",
#                 minimum=0,
#                 maximum=MAX_SEED,
#                 step=1,
#                 value=0,
#             )
            
#             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
            
#             with gr.Row():
                
#                 width = gr.Slider(
#                     label="Width",
#                     minimum=256,
#                     maximum=MAX_IMAGE_SIZE,
#                     step=32,
#                     value=512,
#                 )
                
#                 height = gr.Slider(
#                     label="Height",
#                     minimum=256,
#                     maximum=MAX_IMAGE_SIZE,
#                     step=32,
#                     value=512,
#                 )
            
#             with gr.Row():
                
#                 guidance_scale = gr.Slider(
#                     label="Guidance scale",
#                     minimum=0.0,
#                     maximum=10.0,
#                     step=0.1,
#                     value=0.0,
#                 )
                
#                 num_inference_steps = gr.Slider(
#                     label="Number of inference steps",
#                     minimum=1,
#                     maximum=12,
#                     step=1,
#                     value=2,
#                 )
        
#         gr.Examples(
#             examples = examples,
#             inputs = [prompt]
#         )

#     run_button.click(
#         fn = infer,
#         inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
#         outputs = [result]
#     )

# demo.queue().launch()