Spaces:

numind
/

NuMarkdown-8B-Thinking

Running on L40S

App Files Files Community

NuMarkdown-8B-Thinking / app.py

etiennebcp

Update app.py

3750374 verified 12 days ago

raw

history blame contribute delete

7.7 kB

	import gradio as gr
	import requests
	import base64
	from PIL import Image
	from io import BytesIO
	import os

	print("=== DEBUG: Starting app.py ===")

	# Example images directory
	example_dir = os.path.join(os.environ.get("HOME", "/home/user"), "app", "example_images")
	example_images = []

	if os.path.exists(example_dir):
	for filename in os.listdir(example_dir):
	if filename.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
	example_images.append(os.path.join(example_dir, filename))

	print(f"Found {len(example_images)} example images")
	print(f"Example dir: {example_dir}")

	def encode_image_to_base64(image: Image.Image) -> str:
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode()
	return f"data:image/jpeg;base64,{img_str}"

	def load_image_any(image):
	"""
	With gr.Image(type="filepath"), image is a str path.
	With other types it can be PIL.Image.
	Normalize to PIL.Image RGB.
	"""
	if image is None:
	return None
	if isinstance(image, str):
	return Image.open(image).convert("RGB")
	if isinstance(image, Image.Image):
	return image.convert("RGB")
	# Best-effort fallback
	return Image.open(image).convert("RGB")

	def query_vllm_api(image, temperature, max_tokens=12_000):
	print(f"=== DEBUG: query_vllm_api called with image={image is not None}, temp={temperature} ===")

	pil_img = load_image_any(image)
	if pil_img is None:
	return "No image provided", "No image provided", "Please upload an image first."

	try:
	# Optional resize to avoid huge uploads
	max_size = 2048
	if max(pil_img.size) > max_size:
	ratio = max_size / max(pil_img.size)
	new_size = (int(pil_img.size[0] * ratio), int(pil_img.size[1] * ratio))
	pil_img = pil_img.resize(new_size, Image.Resampling.LANCZOS)

	image_b64 = encode_image_to_base64(pil_img)

	messages = [{
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": image_b64}}
	],
	}]

	payload = {
	"model": "numind/NuMarkdown-8B-Thinking",
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature,
	}

	print("=== DEBUG: About to make vLLM API request ===")
	response = requests.post(
	"http://localhost:8000/v1/chat/completions",
	json=payload,
	timeout=60,
	)
	response.raise_for_status()

	data = response.json()
	result = data["choices"][0]["message"]["content"]

	# Parse optional <think>/<answer>
	try:
	reasoning = result.split("<think>")[1].split("</think>")[0]
	answer = result.split("<answer>")[1].split("</answer>")[0]
	except Exception:
	reasoning = "No thinking trace found"
	answer = result

	return reasoning, answer, answer

	except requests.exceptions.RequestException as e:
	error_msg = f"API request failed: {e}"
	print(f"=== DEBUG: Request error: {error_msg} ===")
	return error_msg, error_msg, error_msg
	except Exception as e:
	error_msg = f"Unexpected error: {e}"
	print(f"=== DEBUG: Unexpected error: {error_msg} ===")
	return error_msg, error_msg, error_msg


	print("=== DEBUG: Creating Gradio interface ===")

	with gr.Blocks(title="NuMarkdown-8B-Thinking") as demo:
	gr.HTML(
	"""
	<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
	<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">👁️ NuMarkdown-8B-Thinking</h1>
	<p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
	<div style="margin-top: 15px;">
	<a href="https://nuextract.ai/" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🖥️ API / Platform</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://discord.gg/3tsEtJNCDe" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🗣️ Discord</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://github.com/numindai/NuMarkdown" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🔗 GitHub</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🤗 Model</a>
	</div>
	</div>
	<p>NuMarkdown-8B-Thinking converts documents into clean Markdown, well suited for RAG applications.</p>
	<p>NOTE: We downsize large images and restrict max output tokens in this demo.</p>
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	temperature = gr.Slider(0.1, 1.5, value=0.4, step=0.1, label="Temperature")
	btn = gr.Button("Generate Response", variant="primary", size="lg")

	# ✅ Use filepath so preview works consistently with Examples and uploads
	img_in = gr.Image(type="filepath", label="Upload Image")

	with gr.Column(scale=2):
	with gr.Accordion("🔍 Model Outputs", open=True):
	with gr.Tabs():
	with gr.TabItem("🧠 Thinking Trace"):
	thinking = gr.Textbox(
	lines=15,
	max_lines=25,
	show_label=False,
	placeholder="The model's reasoning process will appear here...",
	)
	with gr.TabItem("📄 Raw Markdown"):
	raw_answer = gr.Textbox(
	lines=15,
	max_lines=25,
	show_label=False,
	placeholder="The raw model output will appear here...",
	)
	with gr.TabItem("📝 Rendered Markdown"):
	output = gr.Markdown(label="📝 Generated Markdown")

	btn.click(
	query_vllm_api,
	inputs=[img_in, temperature],
	outputs=[thinking, raw_answer, output],
	)

	if example_images:
	gr.Examples(
	examples=[[p] for p in example_images[:5]],
	inputs=[img_in],
	label="📸 Try these example images",
	)

	print("=== DEBUG: Gradio interface created ===")

	if __name__ == "__main__":
	print("=== DEBUG: About to launch Gradio ===")

	# ✅ IMPORTANT:
	# If you set allowed_paths, include Gradio's upload temp dir, otherwise previews break.
	# Uploads typically land in /tmp/gradio/...
	allowed = ["/tmp/gradio"]
	if os.path.exists(example_dir):
	allowed.append(example_dir)

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	theme=gr.themes.Soft(),
	allowed_paths=allowed, # ✅ include /tmp/gradio so uploaded previews render
	css="""
	* { font-family: 'Inter', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important; }
	""",
	)

	print("=== DEBUG: Gradio launched ===")