File size: 4,464 Bytes
1a517f1
a187193
592ad8f
a187193
5eb28b7
592ad8f
 
a187193
b31bef1
a187193
 
 
1a517f1
 
592ad8f
 
b31bef1
a187193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592ad8f
a187193
 
ddc67bf
a187193
 
 
5eb28b7
 
 
 
a187193
592ad8f
a187193
5eb28b7
a187193
 
 
5eb28b7
 
a187193
 
ddc67bf
592ad8f
 
a187193
592ad8f
 
a187193
 
 
 
592ad8f
 
a187193
592ad8f
1a517f1
592ad8f
 
1c8a6bd
 
 
 
592ad8f
a187193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592ad8f
 
 
a187193
 
 
592ad8f
a187193
1a517f1
a187193
 
592ad8f
ddc67bf
592ad8f
 
a187193
 
 
1a517f1
592ad8f
a187193
592ad8f
 
1a517f1
 
 
592ad8f
1a517f1
592ad8f
 
 
5eb28b7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import warnings
import torch
import gc
from transformers import LlavaForConditionalGeneration, LlavaProcessor
from PIL import Image
import gradio as gr
from huggingface_hub import login

# Basic settings
warnings.filterwarnings('ignore')
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Global variables
model = None
processor = None

# Clear CUDA cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    gc.collect()
    print("เคลียร์ CUDA cache เรียบร้อยแล้ว")

# Login to Hugging Face Hub
if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
    print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
    login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
else:
    print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")

def load_model_and_processor():
    """โหลดโมเดลและ processor"""
    global model, processor
    print("กำลังโหลดโมเดลและ processor...")
    
    try:
        # Model paths
        hub_model_path = "Aekanun/thai-handwriting-llm"
        
        # Load processor and model directly using LLaVA classes
        processor = LlavaProcessor.from_pretrained(
            hub_model_path,
            trust_remote_code=True
        )
        
        print("กำลังโหลดโมเดลจาก Hub...")
        model = LlavaForConditionalGeneration.from_pretrained(
            hub_model_path,
            device_map="auto",
            torch_dtype=torch.bfloat16,
            trust_remote_code=True,
            load_in_4bit=True
        )
        print("โหลดโมเดลสำเร็จ!")
        
        return True
    except Exception as e:
        print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
        return False

def process_handwriting(image):
    """ฟังก์ชันสำหรับ Gradio interface"""
    global model, processor
    
    if image is None:
        return "กรุณาอัพโหลดรูปภาพ"
    
    try:
        # Ensure image is in PIL format
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)
            
        # Convert to RGB if needed
        if image.mode != "RGB":
            image = image.convert("RGB")
        
        # Create prompt
        prompt = """Transcribe the Thai handwritten text from the provided image.
Only return the transcription in Thai language."""

        # Create model inputs
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "image", "image": image}
                ],
            }
        ]

        # Process with model
        text = processor.apply_chat_template(messages, tokenize=False)
        inputs = processor(text=text, images=image, return_tensors="pt")
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

        # Generate
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=256,
                do_sample=False,
                pad_token_id=processor.tokenizer.pad_token_id
            )

        # Decode output
        transcription = processor.decode(outputs[0], skip_special_tokens=True)
        return transcription.strip()
        
    except Exception as e:
        return f"เกิดข้อผิดพลาด: {str(e)}"

# Initialize application
print("กำลังเริ่มต้นแอปพลิเคชัน...")
if load_model_and_processor():
    # Create Gradio interface
    demo = gr.Interface(
        fn=process_handwriting,
        inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
        outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
        title="Thai Handwriting Recognition",
        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
        examples=[["example1.jpg"], ["example2.jpg"]]
    )

    if __name__ == "__main__":
        demo.launch()
else:
    print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")