etiennebcp's picture
Update app.py
3750374 verified
import gradio as gr
import requests
import base64
from PIL import Image
from io import BytesIO
import os
print("=== DEBUG: Starting app.py ===")
# Example images directory
example_dir = os.path.join(os.environ.get("HOME", "/home/user"), "app", "example_images")
example_images = []
if os.path.exists(example_dir):
for filename in os.listdir(example_dir):
if filename.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
example_images.append(os.path.join(example_dir, filename))
print(f"Found {len(example_images)} example images")
print(f"Example dir: {example_dir}")
def encode_image_to_base64(image: Image.Image) -> str:
buffered = BytesIO()
image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/jpeg;base64,{img_str}"
def load_image_any(image):
"""
With gr.Image(type="filepath"), image is a str path.
With other types it can be PIL.Image.
Normalize to PIL.Image RGB.
"""
if image is None:
return None
if isinstance(image, str):
return Image.open(image).convert("RGB")
if isinstance(image, Image.Image):
return image.convert("RGB")
# Best-effort fallback
return Image.open(image).convert("RGB")
def query_vllm_api(image, temperature, max_tokens=12_000):
print(f"=== DEBUG: query_vllm_api called with image={image is not None}, temp={temperature} ===")
pil_img = load_image_any(image)
if pil_img is None:
return "No image provided", "No image provided", "Please upload an image first."
try:
# Optional resize to avoid huge uploads
max_size = 2048
if max(pil_img.size) > max_size:
ratio = max_size / max(pil_img.size)
new_size = (int(pil_img.size[0] * ratio), int(pil_img.size[1] * ratio))
pil_img = pil_img.resize(new_size, Image.Resampling.LANCZOS)
image_b64 = encode_image_to_base64(pil_img)
messages = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_b64}}
],
}]
payload = {
"model": "numind/NuMarkdown-8B-Thinking",
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
}
print("=== DEBUG: About to make vLLM API request ===")
response = requests.post(
"http://localhost:8000/v1/chat/completions",
json=payload,
timeout=60,
)
response.raise_for_status()
data = response.json()
result = data["choices"][0]["message"]["content"]
# Parse optional <think>/<answer>
try:
reasoning = result.split("<think>")[1].split("</think>")[0]
answer = result.split("<answer>")[1].split("</answer>")[0]
except Exception:
reasoning = "No thinking trace found"
answer = result
return reasoning, answer, answer
except requests.exceptions.RequestException as e:
error_msg = f"API request failed: {e}"
print(f"=== DEBUG: Request error: {error_msg} ===")
return error_msg, error_msg, error_msg
except Exception as e:
error_msg = f"Unexpected error: {e}"
print(f"=== DEBUG: Unexpected error: {error_msg} ===")
return error_msg, error_msg, error_msg
print("=== DEBUG: Creating Gradio interface ===")
with gr.Blocks(title="NuMarkdown-8B-Thinking") as demo:
gr.HTML(
"""
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">πŸ‘οΈ NuMarkdown-8B-Thinking</h1>
<p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
<div style="margin-top: 15px;">
<a href="https://nuextract.ai/" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">πŸ–₯️ API / Platform</a>
<span style="color: rgba(255,255,255,0.7);">|</span>
<a href="https://discord.gg/3tsEtJNCDe" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">πŸ—£οΈ Discord</a>
<span style="color: rgba(255,255,255,0.7);">|</span>
<a href="https://github.com/numindai/NuMarkdown" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">πŸ”— GitHub</a>
<span style="color: rgba(255,255,255,0.7);">|</span>
<a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">πŸ€— Model</a>
</div>
</div>
<p>NuMarkdown-8B-Thinking converts documents into clean Markdown, well suited for RAG applications.</p>
<p>NOTE: We downsize large images and restrict max output tokens in this demo.</p>
"""
)
with gr.Row():
with gr.Column(scale=2):
temperature = gr.Slider(0.1, 1.5, value=0.4, step=0.1, label="Temperature")
btn = gr.Button("Generate Response", variant="primary", size="lg")
# βœ… Use filepath so preview works consistently with Examples and uploads
img_in = gr.Image(type="filepath", label="Upload Image")
with gr.Column(scale=2):
with gr.Accordion("πŸ” Model Outputs", open=True):
with gr.Tabs():
with gr.TabItem("🧠 Thinking Trace"):
thinking = gr.Textbox(
lines=15,
max_lines=25,
show_label=False,
placeholder="The model's reasoning process will appear here...",
)
with gr.TabItem("πŸ“„ Raw Markdown"):
raw_answer = gr.Textbox(
lines=15,
max_lines=25,
show_label=False,
placeholder="The raw model output will appear here...",
)
with gr.TabItem("πŸ“ Rendered Markdown"):
output = gr.Markdown(label="πŸ“ Generated Markdown")
btn.click(
query_vllm_api,
inputs=[img_in, temperature],
outputs=[thinking, raw_answer, output],
)
if example_images:
gr.Examples(
examples=[[p] for p in example_images[:5]],
inputs=[img_in],
label="πŸ“Έ Try these example images",
)
print("=== DEBUG: Gradio interface created ===")
if __name__ == "__main__":
print("=== DEBUG: About to launch Gradio ===")
# βœ… IMPORTANT:
# If you set allowed_paths, include Gradio's upload temp dir, otherwise previews break.
# Uploads typically land in /tmp/gradio/...
allowed = ["/tmp/gradio"]
if os.path.exists(example_dir):
allowed.append(example_dir)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
theme=gr.themes.Soft(),
allowed_paths=allowed, # βœ… include /tmp/gradio so uploaded previews render
css="""
* { font-family: 'Inter', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important; }
""",
)
print("=== DEBUG: Gradio launched ===")