Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Thai-HandWriting-to-Text / app.py

Aekanun

fixing app.py

948e2eb about 2 months ago

raw

history blame

5.18 kB

	import os
	import warnings
	import torch
	import gc
	from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
	from PIL import Image
	import gradio as gr
	from huggingface_hub import login

	# ตั้งค่าพื้นฐาน
	warnings.filterwarnings('ignore')
	os.environ["CUDA_VISIBLE_DEVICES"] = "0"

	# Global variables
	model = None
	processor = None

	# เคลียร์ CUDA cache
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()
	print("เคลียร์ CUDA cache เรียบร้อยแล้ว")

	# Login to Hugging Face Hub
	if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
	print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
	login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
	else:
	print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")

	def load_model_and_processor():
	"""โหลดโมเดลและ processor"""
	global model, processor
	print("กำลังโหลดโมเดลและ processor...")

	try:
	# กำหนด paths
	base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
	hub_model_path = "Aekanun/thai-handwriting-llm"

	# ตั้งค่า BitsAndBytes
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	# โหลด processor จาก base model
	processor = AutoProcessor.from_pretrained(
	base_model_path,
	use_auth_token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
	)

	# โหลดโมเดลจาก Hub
	print("กำลังโหลดโมเดลจาก Hub...")
	model = AutoModelForVision2Seq.from_pretrained(
	hub_model_path,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	quantization_config=bnb_config,
	trust_remote_code=True,
	use_auth_token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
	)
	print("โหลดโมเดลจาก Hub สำเร็จ!")

	return True
	except Exception as e:
	print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
	return False

	def process_handwriting(image):
	"""ฟังก์ชันสำหรับ Gradio interface"""
	global model, processor

	if image is None:
	return "กรุณาอัพโหลดรูปภาพ"

	try:
	# Ensure image is in PIL format
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	# Convert to RGB if needed
	if image.mode != "RGB":
	image = image.convert("RGB")

	# สร้าง prompt สำหรับการถอดความ
	prompt = """Transcribe the Thai handwritten text from the provided image.
	Only return the transcription in Thai language."""

	# สร้าง input สำหรับโมเดล
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{"type": "image", "image": image}
	],
	}
	]

	# สร้าง inputs โดยตรงจาก processor
	text = processor.apply_chat_template(messages, tokenize=False)
	inputs = processor(text=text, images=image, return_tensors="pt")
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	# ทำนาย
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=False,
	pad_token_id=processor.tokenizer.pad_token_id
	)

	# แปลงผลลัพธ์
	transcription = processor.decode(outputs[0], skip_special_tokens=True)
	return transcription.strip()

	except Exception as e:
	return f"เกิดข้อผิดพลาด: {str(e)}"

	# Initialize application
	print("กำลังเริ่มต้นแอปพลิเคชัน...")
	if load_model_and_processor():
	# Create Gradio interface
	demo = gr.Interface(
	fn=process_handwriting,
	inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
	outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
	title="Thai Handwriting Recognition",
	description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
	examples=[["example1.jpg"], ["example2.jpg"]]
	)

	if __name__ == "__main__":
	demo.launch()
	else:
	print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")