Spaces:

taesiri
/

BugsBunny-Llama-3.2-Base-Medium

Running on Zero

App Files Files Community

taesiri commited on 11 days ago

Commit

942f16c

•

1 Parent(s): 936d897

update

Browse files

Files changed (2) hide show

app.py +199 -44
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -7,11 +7,24 @@ from peft import PeftModel
 from huggingface_hub import login
 import spaces
 import json
 # Login to Hugging Face
-if "HF_TOKEN" not in os.environ:
-    raise ValueError("Please set the HF_TOKEN environment variable with your Hugging Face token")
 login(token=os.environ["HF_TOKEN"])
 # Load model and processor (do this outside the inference function to avoid reloading)
@@ -28,71 +41,213 @@ model = PeftModel.from_pretrained(model, lora_weights_path)
 model.tie_weights()
 @spaces.GPU
 def inference(image):
     # Prepare input
     messages = [
-        {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Describe the image in JSON"}]}
     ]
     input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
-    inputs = processor(image, input_text, add_special_tokens=False, return_tensors="pt").to(model.device)
-    # Run inference
-    with torch.no_grad():
-        output = model.generate(**inputs, max_new_tokens=2048)
     # Decode output
     result = processor.decode(output[0], skip_special_tokens=True)
     json_str = result.strip().split("assistant\n")[1].strip()
-    try:
-        # First JSON parse to handle escaped JSON string
-        first_parse = json.loads(json_str)
-        try:
-            # Second JSON parse to get the actual JSON object
-            json_object = json.loads(first_parse)
-            # Return indented JSON string with 2 spaces
-            return json.dumps(json_object, indent=2)
-        except json.JSONDecodeError:
-            # If second parse fails, return the result of first parse indented
-            if isinstance(first_parse, (dict, list)):
-                return json.dumps(first_parse, indent=2)
-            return first_parse
-    except json.JSONDecodeError:
-        # If both JSON parses fail, return original string
-        return json_str
-    return None  # In case of unexpected errors
-# Create Gradio interface using Blocks
 with gr.Blocks() as demo:
-    gr.Markdown("# BugsBunny-LLama-3.2-11B-Base-Medium Demo")
     with gr.Row():
-        # Container for the image takes full width
         with gr.Column(scale=1):
             image_input = gr.Image(
                 type="pil",
                 label="Upload Image",
                 elem_id="large-image",
-                height=500,  # Increased height for larger display
             )
-    with gr.Row():
-        # Container for the text output takes full width
-        with gr.Column(scale=1):
-            text_output = gr.Textbox(
-                label="Response",
-                elem_id="response-text",
                 lines=25,
-                max_lines=10,
             )
-    # Button to trigger the analysis
     submit_btn = gr.Button("Analyze Image", variant="primary")
-    submit_btn.click(fn=inference, inputs=[image_input], outputs=[text_output])
 demo.launch()

 from huggingface_hub import login
 import spaces
 import json
+import matplotlib.pyplot as plt
+import io
+import base64
+def check_environment():
+    required_vars = ["HF_TOKEN"]
+    missing_vars = [var for var in required_vars if var not in os.environ]
+    if missing_vars:
+        raise ValueError(
+            f"Missing required environment variables: {', '.join(missing_vars)}\n"
+            "Please set the HF_TOKEN environment variable with your Hugging Face token"
+        )
 # Login to Hugging Face
+check_environment()
 login(token=os.environ["HF_TOKEN"])
 # Load model and processor (do this outside the inference function to avoid reloading)
 model.tie_weights()
+def parse_json_response(json_str):
+    if not json_str:
+        return None
+    try:
+        # Handle potential JSON string escaping
+        json_str = json_str.strip()
+        if json_str.startswith('"') and json_str.endswith('"'):
+            json_str = json_str[1:-1]
+        first_parse = json.loads(json_str)
+        json_object = (
+            json.loads(first_parse) if isinstance(first_parse, str) else first_parse
+        )
+        # Validate expected keys
+        required_keys = [
+            "description",
+            "scene_description",
+            "character_list",
+            "object_list",
+        ]
+        if not all(key in json_object for key in required_keys):
+            print("Missing required keys in JSON response")
+            return None
+        return json_object
+    except json.JSONDecodeError as e:
+        print(f"JSON parsing error: {e}")
+        return None
+    except Exception as e:
+        print(f"Unexpected error during JSON parsing: {e}")
+        return None
+def create_color_palette_image(colors):
+    if not colors or not isinstance(colors, list):
+        return None
+    try:
+        # Validate color format
+        for color in colors:
+            if not isinstance(color, str) or not color.startswith("#"):
+                return None
+        # Create figure and axis
+        fig, ax = plt.subplots(figsize=(10, 2))
+        # Create rectangles for each color
+        for i, color in enumerate(colors):
+            ax.add_patch(plt.Rectangle((i, 0), 1, 1, facecolor=color))
+        # Set the view limits and aspect ratio
+        ax.set_xlim(0, len(colors))
+        ax.set_ylim(0, 1)
+        ax.set_xticks([])
+        ax.set_yticks([])
+        # Save to bytes buffer
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
+        plt.close("all")  # Close all figures to prevent memory leaks
+        plt.close(fig)  # Explicitly close the current figure
+        # Convert to base64 string
+        buf.seek(0)
+        return buf
+    except Exception as e:
+        print(f"Error creating color palette: {e}")
+        return None
 @spaces.GPU
 def inference(image):
+    if image is None:
+        return ["Please provide an image"] * 8
+    if not isinstance(image, Image.Image):
+        try:
+            image = Image.fromarray(image)
+        except Exception as e:
+            print(f"Image conversion error: {e}")
+            return ["Invalid image format"] * 8
     # Prepare input
     messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": "Describe the image in JSON"},
+            ],
+        }
     ]
     input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
+    try:
+        # Move inputs to the correct device
+        inputs = processor(
+            image, input_text, add_special_tokens=False, return_tensors="pt"
+        ).to(model.device)
+        # Clear CUDA cache after inference
+        with torch.no_grad():
+            output = model.generate(**inputs, max_new_tokens=2048)
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+    except Exception as e:
+        print(f"Inference error: {e}")
+        return ["Error during inference"] * 8
     # Decode output
     result = processor.decode(output[0], skip_special_tokens=True)
     json_str = result.strip().split("assistant\n")[1].strip()
+    parsed_json = parse_json_response(json_str)
+    if parsed_json:
+        # Create color palette visualization
+        colors = parsed_json.get("color_palette", [])
+        color_image = create_color_palette_image(colors)
+        return (
+            parsed_json.get("description", "Not available"),
+            parsed_json.get("scene_description", "Not available"),
+            json.dumps(parsed_json.get("character_list", []), indent=2),
+            json.dumps(parsed_json.get("object_list", []), indent=2),
+            json.dumps(parsed_json.get("texture_details", []), indent=2),
+            parsed_json.get("lighting_details", "Not available"),
+            color_image,
+            json_str,
+            "",  # Error box
+            "Analysis complete",  # Status
+        )
+    return ["Error parsing response"] * 8 + ["Failed to parse JSON", "Error"]
+# Update Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# BungsBunny-LLama-3.2-11B-Base-Medium Demo")
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(
                 type="pil",
                 label="Upload Image",
                 elem_id="large-image",
+                height=500,
             )
+    with gr.Tabs():
+        with gr.Tab("Structured Results"):
+            with gr.Column(scale=1):
+                description_output = gr.Textbox(
+                    label="Description",
+                    lines=4,
+                )
+                scene_output = gr.Textbox(
+                    label="Scene Description",
+                    lines=2,
+                )
+                characters_output = gr.JSON(
+                    label="Characters",
+                )
+                objects_output = gr.JSON(
+                    label="Objects",
+                )
+                textures_output = gr.JSON(
+                    label="Texture Details",
+                )
+                lighting_output = gr.Textbox(
+                    label="Lighting Details",
+                    lines=2,
+                )
+                color_palette_output = gr.Image(
+                    label="Color Palette",
+                    height=100,
+                )
+        with gr.Tab("Raw Output"):
+            raw_output = gr.Textbox(
+                label="Raw JSON Response",
                 lines=25,
+                max_lines=30,
             )
     submit_btn = gr.Button("Analyze Image", variant="primary")
+    error_box = gr.Textbox(label="Error Messages", visible=False)
+    with gr.Row():
+        status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
+    submit_btn.click(
+        fn=inference,
+        inputs=[image_input],
+        outputs=[
+            description_output,
+            scene_output,
+            characters_output,
+            objects_output,
+            textures_output,
+            lighting_output,
+            color_palette_output,
+            raw_output,
+            error_box,
+            status_text,
+        ],
+        api_name="analyze",
+    )
 demo.launch()

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ accelerate
 huggingface_hub[cli]
 hf-transfer
 pillow
-gradio

 huggingface_hub[cli]
 hf-transfer
 pillow
+gradio
+matplotlib