import torch from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig from PIL import Image import gradio as gr # Global variables for model and processor model = None processor = None def load_model_and_processor(): global model, processor try: model_path = "Aekanun/thai-handwriting-llm" base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct" print("Loading processor...") processor = AutoProcessor.from_pretrained(base_model_path) print("Loading model...") bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) model = AutoModelForVision2Seq.from_pretrained( model_path, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=bnb_config ) return True except Exception as e: print(f"Error loading model: {str(e)}") return False def process_handwriting(image): global model, processor if image is None: return "กรุณาอัพโหลดรูปภาพ" try: if not isinstance(image, Image.Image): image = Image.fromarray(image) prompt = """Transcribe the Thai handwritten text from the provided image. Only return the transcription in Thai language.""" messages = [ { "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image", "image": image} ], } ] text = processor.apply_chat_template(messages, tokenize=False) inputs = processor(text=text, images=image, return_tensors="pt") inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=256, do_sample=False, pad_token_id=processor.tokenizer.pad_token_id ) transcription = processor.decode(outputs[0], skip_special_tokens=True) return transcription except Exception as e: return f"เกิดข้อผิดพลาด: {str(e)}" # Initialize application print("Initializing application...") model_loaded = load_model_and_processor() if model_loaded: print("Creating Gradio interface...") demo = gr.Interface( fn=process_handwriting, inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"), outputs=gr.Textbox(label="ข้อความที่แปลงได้"), title="Thai Handwriting to Text", description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ" ) if __name__ == "__main__": print("Launching application...") demo.launch() else: print("Failed to load model and processor. Please check the logs.")