import gradio as gr import requests import base64 from PIL import Image from io import BytesIO import os print("=== DEBUG: Starting app.py ===") # Example images directory example_dir = os.path.join(os.environ.get("HOME", "/home/user"), "app", "example_images") example_images = [] if os.path.exists(example_dir): for filename in os.listdir(example_dir): if filename.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")): example_images.append(os.path.join(example_dir, filename)) print(f"Found {len(example_images)} example images") print(f"Example dir: {example_dir}") def encode_image_to_base64(image: Image.Image) -> str: buffered = BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode() return f"data:image/jpeg;base64,{img_str}" def load_image_any(image): """ With gr.Image(type="filepath"), image is a str path. With other types it can be PIL.Image. Normalize to PIL.Image RGB. """ if image is None: return None if isinstance(image, str): return Image.open(image).convert("RGB") if isinstance(image, Image.Image): return image.convert("RGB") # Best-effort fallback return Image.open(image).convert("RGB") def query_vllm_api(image, temperature, max_tokens=12_000): print(f"=== DEBUG: query_vllm_api called with image={image is not None}, temp={temperature} ===") pil_img = load_image_any(image) if pil_img is None: return "No image provided", "No image provided", "Please upload an image first." try: # Optional resize to avoid huge uploads max_size = 2048 if max(pil_img.size) > max_size: ratio = max_size / max(pil_img.size) new_size = (int(pil_img.size[0] * ratio), int(pil_img.size[1] * ratio)) pil_img = pil_img.resize(new_size, Image.Resampling.LANCZOS) image_b64 = encode_image_to_base64(pil_img) messages = [{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": image_b64}} ], }] payload = { "model": "numind/NuMarkdown-8B-Thinking", "messages": messages, "max_tokens": max_tokens, "temperature": temperature, } print("=== DEBUG: About to make vLLM API request ===") response = requests.post( "http://localhost:8000/v1/chat/completions", json=payload, timeout=60, ) response.raise_for_status() data = response.json() result = data["choices"][0]["message"]["content"] # Parse optional / try: reasoning = result.split("")[1].split("")[0] answer = result.split("")[1].split("")[0] except Exception: reasoning = "No thinking trace found" answer = result return reasoning, answer, answer except requests.exceptions.RequestException as e: error_msg = f"API request failed: {e}" print(f"=== DEBUG: Request error: {error_msg} ===") return error_msg, error_msg, error_msg except Exception as e: error_msg = f"Unexpected error: {e}" print(f"=== DEBUG: Unexpected error: {error_msg} ===") return error_msg, error_msg, error_msg print("=== DEBUG: Creating Gradio interface ===") with gr.Blocks(title="NuMarkdown-8B-Thinking") as demo: gr.HTML( """

👁️ NuMarkdown-8B-Thinking

Upload an image to convert to Markdown!

🖥️ API / Platform | 🗣️ Discord | 🔗 GitHub | 🤗 Model

NuMarkdown-8B-Thinking converts documents into clean Markdown, well suited for RAG applications.

NOTE: We downsize large images and restrict max output tokens in this demo.

""" ) with gr.Row(): with gr.Column(scale=2): temperature = gr.Slider(0.1, 1.5, value=0.4, step=0.1, label="Temperature") btn = gr.Button("Generate Response", variant="primary", size="lg") # ✅ Use filepath so preview works consistently with Examples and uploads img_in = gr.Image(type="filepath", label="Upload Image") with gr.Column(scale=2): with gr.Accordion("🔍 Model Outputs", open=True): with gr.Tabs(): with gr.TabItem("🧠 Thinking Trace"): thinking = gr.Textbox( lines=15, max_lines=25, show_label=False, placeholder="The model's reasoning process will appear here...", ) with gr.TabItem("📄 Raw Markdown"): raw_answer = gr.Textbox( lines=15, max_lines=25, show_label=False, placeholder="The raw model output will appear here...", ) with gr.TabItem("📝 Rendered Markdown"): output = gr.Markdown(label="📝 Generated Markdown") btn.click( query_vllm_api, inputs=[img_in, temperature], outputs=[thinking, raw_answer, output], ) if example_images: gr.Examples( examples=[[p] for p in example_images[:5]], inputs=[img_in], label="📸 Try these example images", ) print("=== DEBUG: Gradio interface created ===") if __name__ == "__main__": print("=== DEBUG: About to launch Gradio ===") # ✅ IMPORTANT: # If you set allowed_paths, include Gradio's upload temp dir, otherwise previews break. # Uploads typically land in /tmp/gradio/... allowed = ["/tmp/gradio"] if os.path.exists(example_dir): allowed.append(example_dir) demo.launch( server_name="0.0.0.0", server_port=7860, share=True, theme=gr.themes.Soft(), allowed_paths=allowed, # ✅ include /tmp/gradio so uploaded previews render css=""" * { font-family: 'Inter', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important; } """, ) print("=== DEBUG: Gradio launched ===")