import os from huggingface_hub import login from transformers import AutoProcessor, AutoModelForVision2Seq import torch from PIL import Image import gradio as gr # Login to Hugging Face Hub if 'HUGGING_FACE_HUB_TOKEN' in os.environ: print("Logging in to Hugging Face Hub...") login(token=os.environ['HUGGING_FACE_HUB_TOKEN']) else: print("Warning: HUGGING_FACE_HUB_TOKEN not found") # Global variables model = None processor = None def load_model(): global model, processor try: model_path = "Aekanun/thai-handwriting-llm" print(f"Loading model and processor from {model_path}...") processor = AutoProcessor.from_pretrained(model_path) model = AutoModelForVision2Seq.from_pretrained(model_path) if torch.cuda.is_available(): model = model.to("cuda") return True except Exception as e: print(f"Error loading model: {str(e)}") return False def process_image(image): if image is None: return "กรุณาอัพโหลดรูปภาพ" try: # Ensure image is in PIL format if not isinstance(image, Image.Image): image = Image.fromarray(image) # Convert to RGB if needed if image.mode != "RGB": image = image.convert("RGB") # Process image inputs = processor(images=image, return_tensors="pt") # Move to GPU if available if torch.cuda.is_available(): inputs = {k: v.to("cuda") for k, v in inputs.items()} # Generate text with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=100, num_beams=4, pad_token_id=processor.tokenizer.pad_token_id, eos_token_id=processor.tokenizer.eos_token_id ) # Decode output predicted_text = processor.batch_decode(outputs, skip_special_tokens=True)[0] return predicted_text.strip() except Exception as e: return f"เกิดข้อผิดพลาด: {str(e)}" # Initialize print("Initializing application...") if load_model(): # Create Gradio interface demo = gr.Interface( fn=process_image, inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"), outputs=gr.Textbox(label="ข้อความที่แปลงได้"), title="Thai Handwriting Recognition", description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ", examples=[["example1.jpg"], ["example2.jpg"]] ) if __name__ == "__main__": demo.launch() else: print("Failed to initialize the application")