import os
from huggingface_hub import login
from transformers import AutoProcessor, AutoModelForVision2Seq
import torch
from PIL import Image
import gradio as gr

# Login to Hugging Face Hub
if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
    print("Logging in to Hugging Face Hub...")
    login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
else:
    print("Warning: HUGGING_FACE_HUB_TOKEN not found")

# Global variables
model = None
processor = None

def load_model():
    global model, processor
    try:
        model_path = "Aekanun/thai-handwriting-llm"
        print(f"Loading model and processor from {model_path}...")
        
        processor = AutoProcessor.from_pretrained(model_path)
        model = AutoModelForVision2Seq.from_pretrained(model_path)
        
        if torch.cuda.is_available():
            model = model.to("cuda")
            
        return True
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        return False

def process_image(image):
    if image is None:
        return "กรุณาอัพโหลดรูปภาพ"
        
    try:
        # Ensure image is in PIL format
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)
        
        # Convert to RGB if needed
        if image.mode != "RGB":
            image = image.convert("RGB")
            
        # Process image
        inputs = processor(images=image, return_tensors="pt")
        
        # Move to GPU if available
        if torch.cuda.is_available():
            inputs = {k: v.to("cuda") for k, v in inputs.items()}
            
        # Generate text
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=100,
                num_beams=4,
                pad_token_id=processor.tokenizer.pad_token_id,
                eos_token_id=processor.tokenizer.eos_token_id
            )
            
        # Decode output
        predicted_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
        return predicted_text.strip()
        
    except Exception as e:
        return f"เกิดข้อผิดพลาด: {str(e)}"

# Initialize
print("Initializing application...")
if load_model():
    # Create Gradio interface
    demo = gr.Interface(
        fn=process_image,
        inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
        outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
        title="Thai Handwriting Recognition",
        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
        examples=[["example1.jpg"], ["example2.jpg"]]
    )

    if __name__ == "__main__":
        demo.launch()
else:
    print("Failed to initialize the application")