Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Thai-HandWriting-to-Text / app.py

Aekanun

fixed and run app.py

592ad8f about 2 months ago

raw

history blame

3.3 kB

	import torch
	from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
	from PIL import Image
	import gradio as gr

	# Global variables for model and processor
	model = None
	processor = None

	def load_model_and_processor():
	global model, processor
	try:
	model_path = "Aekanun/thai-handwriting-llm"
	base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"

	print("Loading processor...")
	processor = AutoProcessor.from_pretrained(base_model_path)

	print("Loading model...")
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	model = AutoModelForVision2Seq.from_pretrained(
	model_path,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	quantization_config=bnb_config
	)
	return True
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	return False

	def process_handwriting(image):
	global model, processor

	if image is None:
	return "กรุณาอัพโหลดรูปภาพ"

	try:
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	prompt = """Transcribe the Thai handwritten text from the provided image.
	Only return the transcription in Thai language."""

	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{"type": "image", "image": image}
	],
	}
	]

	text = processor.apply_chat_template(messages, tokenize=False)
	inputs = processor(text=text, images=image, return_tensors="pt")
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=False,
	pad_token_id=processor.tokenizer.pad_token_id
	)

	transcription = processor.decode(outputs[0], skip_special_tokens=True)
	return transcription

	except Exception as e:
	return f"เกิดข้อผิดพลาด: {str(e)}"

	# Initialize application
	print("Initializing application...")
	model_loaded = load_model_and_processor()

	if model_loaded:
	print("Creating Gradio interface...")
	demo = gr.Interface(
	fn=process_handwriting,
	inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
	outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
	title="Thai Handwriting to Text",
	description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ"
	)

	if __name__ == "__main__":
	print("Launching application...")
	demo.launch()
	else:
	print("Failed to load model and processor. Please check the logs.")