Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Thai-HandWriting-to-Text / app.py

Aekanun

fixing app.py

5eb28b7 2 months ago

raw

history blame

4.46 kB

	import os
	import warnings
	import torch
	import gc
	from transformers import LlavaForConditionalGeneration, LlavaProcessor
	from PIL import Image
	import gradio as gr
	from huggingface_hub import login

	# Basic settings
	warnings.filterwarnings('ignore')
	os.environ["CUDA_VISIBLE_DEVICES"] = "0"

	# Global variables
	model = None
	processor = None

	# Clear CUDA cache
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()
	print("เคลียร์ CUDA cache เรียบร้อยแล้ว")

	# Login to Hugging Face Hub
	if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
	print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
	login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
	else:
	print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")

	def load_model_and_processor():
	"""โหลดโมเดลและ processor"""
	global model, processor
	print("กำลังโหลดโมเดลและ processor...")

	try:
	# Model paths
	hub_model_path = "Aekanun/thai-handwriting-llm"

	# Load processor and model directly using LLaVA classes
	processor = LlavaProcessor.from_pretrained(
	hub_model_path,
	trust_remote_code=True
	)

	print("กำลังโหลดโมเดลจาก Hub...")
	model = LlavaForConditionalGeneration.from_pretrained(
	hub_model_path,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	load_in_4bit=True
	)
	print("โหลดโมเดลสำเร็จ!")

	return True
	except Exception as e:
	print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
	return False

	def process_handwriting(image):
	"""ฟังก์ชันสำหรับ Gradio interface"""
	global model, processor

	if image is None:
	return "กรุณาอัพโหลดรูปภาพ"

	try:
	# Ensure image is in PIL format
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	# Convert to RGB if needed
	if image.mode != "RGB":
	image = image.convert("RGB")

	# Create prompt
	prompt = """Transcribe the Thai handwritten text from the provided image.
	Only return the transcription in Thai language."""

	# Create model inputs
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{"type": "image", "image": image}
	],
	}
	]

	# Process with model
	text = processor.apply_chat_template(messages, tokenize=False)
	inputs = processor(text=text, images=image, return_tensors="pt")
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	# Generate
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=False,
	pad_token_id=processor.tokenizer.pad_token_id
	)

	# Decode output
	transcription = processor.decode(outputs[0], skip_special_tokens=True)
	return transcription.strip()

	except Exception as e:
	return f"เกิดข้อผิดพลาด: {str(e)}"

	# Initialize application
	print("กำลังเริ่มต้นแอปพลิเคชัน...")
	if load_model_and_processor():
	# Create Gradio interface
	demo = gr.Interface(
	fn=process_handwriting,
	inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
	outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
	title="Thai Handwriting Recognition",
	description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
	examples=[["example1.jpg"], ["example2.jpg"]]
	)

	if __name__ == "__main__":
	demo.launch()
	else:
	print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")