Spaces:
Running on L40S
Running on L40S
| import gradio as gr | |
| import requests | |
| import base64 | |
| from PIL import Image | |
| from io import BytesIO | |
| import os | |
| print("=== DEBUG: Starting app.py ===") | |
| # Example images directory | |
| example_dir = os.path.join(os.environ.get("HOME", "/home/user"), "app", "example_images") | |
| example_images = [] | |
| if os.path.exists(example_dir): | |
| for filename in os.listdir(example_dir): | |
| if filename.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")): | |
| example_images.append(os.path.join(example_dir, filename)) | |
| print(f"Found {len(example_images)} example images") | |
| print(f"Example dir: {example_dir}") | |
| def encode_image_to_base64(image: Image.Image) -> str: | |
| buffered = BytesIO() | |
| image.save(buffered, format="JPEG") | |
| img_str = base64.b64encode(buffered.getvalue()).decode() | |
| return f"data:image/jpeg;base64,{img_str}" | |
| def load_image_any(image): | |
| """ | |
| With gr.Image(type="filepath"), image is a str path. | |
| With other types it can be PIL.Image. | |
| Normalize to PIL.Image RGB. | |
| """ | |
| if image is None: | |
| return None | |
| if isinstance(image, str): | |
| return Image.open(image).convert("RGB") | |
| if isinstance(image, Image.Image): | |
| return image.convert("RGB") | |
| # Best-effort fallback | |
| return Image.open(image).convert("RGB") | |
| def query_vllm_api(image, temperature, max_tokens=12_000): | |
| print(f"=== DEBUG: query_vllm_api called with image={image is not None}, temp={temperature} ===") | |
| pil_img = load_image_any(image) | |
| if pil_img is None: | |
| return "No image provided", "No image provided", "Please upload an image first." | |
| try: | |
| # Optional resize to avoid huge uploads | |
| max_size = 2048 | |
| if max(pil_img.size) > max_size: | |
| ratio = max_size / max(pil_img.size) | |
| new_size = (int(pil_img.size[0] * ratio), int(pil_img.size[1] * ratio)) | |
| pil_img = pil_img.resize(new_size, Image.Resampling.LANCZOS) | |
| image_b64 = encode_image_to_base64(pil_img) | |
| messages = [{ | |
| "role": "user", | |
| "content": [ | |
| {"type": "image_url", "image_url": {"url": image_b64}} | |
| ], | |
| }] | |
| payload = { | |
| "model": "numind/NuMarkdown-8B-Thinking", | |
| "messages": messages, | |
| "max_tokens": max_tokens, | |
| "temperature": temperature, | |
| } | |
| print("=== DEBUG: About to make vLLM API request ===") | |
| response = requests.post( | |
| "http://localhost:8000/v1/chat/completions", | |
| json=payload, | |
| timeout=60, | |
| ) | |
| response.raise_for_status() | |
| data = response.json() | |
| result = data["choices"][0]["message"]["content"] | |
| # Parse optional <think>/<answer> | |
| try: | |
| reasoning = result.split("<think>")[1].split("</think>")[0] | |
| answer = result.split("<answer>")[1].split("</answer>")[0] | |
| except Exception: | |
| reasoning = "No thinking trace found" | |
| answer = result | |
| return reasoning, answer, answer | |
| except requests.exceptions.RequestException as e: | |
| error_msg = f"API request failed: {e}" | |
| print(f"=== DEBUG: Request error: {error_msg} ===") | |
| return error_msg, error_msg, error_msg | |
| except Exception as e: | |
| error_msg = f"Unexpected error: {e}" | |
| print(f"=== DEBUG: Unexpected error: {error_msg} ===") | |
| return error_msg, error_msg, error_msg | |
| print("=== DEBUG: Creating Gradio interface ===") | |
| with gr.Blocks(title="NuMarkdown-8B-Thinking") as demo: | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;"> | |
| <h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">ποΈ NuMarkdown-8B-Thinking</h1> | |
| <p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p> | |
| <div style="margin-top: 15px;"> | |
| <a href="https://nuextract.ai/" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">π₯οΈ API / Platform</a> | |
| <span style="color: rgba(255,255,255,0.7);">|</span> | |
| <a href="https://discord.gg/3tsEtJNCDe" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">π£οΈ Discord</a> | |
| <span style="color: rgba(255,255,255,0.7);">|</span> | |
| <a href="https://github.com/numindai/NuMarkdown" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">π GitHub</a> | |
| <span style="color: rgba(255,255,255,0.7);">|</span> | |
| <a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">π€ Model</a> | |
| </div> | |
| </div> | |
| <p>NuMarkdown-8B-Thinking converts documents into clean Markdown, well suited for RAG applications.</p> | |
| <p>NOTE: We downsize large images and restrict max output tokens in this demo.</p> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| temperature = gr.Slider(0.1, 1.5, value=0.4, step=0.1, label="Temperature") | |
| btn = gr.Button("Generate Response", variant="primary", size="lg") | |
| # β Use filepath so preview works consistently with Examples and uploads | |
| img_in = gr.Image(type="filepath", label="Upload Image") | |
| with gr.Column(scale=2): | |
| with gr.Accordion("π Model Outputs", open=True): | |
| with gr.Tabs(): | |
| with gr.TabItem("π§ Thinking Trace"): | |
| thinking = gr.Textbox( | |
| lines=15, | |
| max_lines=25, | |
| show_label=False, | |
| placeholder="The model's reasoning process will appear here...", | |
| ) | |
| with gr.TabItem("π Raw Markdown"): | |
| raw_answer = gr.Textbox( | |
| lines=15, | |
| max_lines=25, | |
| show_label=False, | |
| placeholder="The raw model output will appear here...", | |
| ) | |
| with gr.TabItem("π Rendered Markdown"): | |
| output = gr.Markdown(label="π Generated Markdown") | |
| btn.click( | |
| query_vllm_api, | |
| inputs=[img_in, temperature], | |
| outputs=[thinking, raw_answer, output], | |
| ) | |
| if example_images: | |
| gr.Examples( | |
| examples=[[p] for p in example_images[:5]], | |
| inputs=[img_in], | |
| label="πΈ Try these example images", | |
| ) | |
| print("=== DEBUG: Gradio interface created ===") | |
| if __name__ == "__main__": | |
| print("=== DEBUG: About to launch Gradio ===") | |
| # β IMPORTANT: | |
| # If you set allowed_paths, include Gradio's upload temp dir, otherwise previews break. | |
| # Uploads typically land in /tmp/gradio/... | |
| allowed = ["/tmp/gradio"] | |
| if os.path.exists(example_dir): | |
| allowed.append(example_dir) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| theme=gr.themes.Soft(), | |
| allowed_paths=allowed, # β include /tmp/gradio so uploaded previews render | |
| css=""" | |
| * { font-family: 'Inter', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important; } | |
| """, | |
| ) | |
| print("=== DEBUG: Gradio launched ===") | |