Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

File size: 3,289 Bytes

1a517f1
a187193
592ad8f
a187193
ef3ca12
592ad8f
 
a187193
b31bef1
a187193
 
1a517f1
 
ef3ca12
b31bef1
a187193
 
 
 
 
ef3ca12
 
 
 
a187193
ddc67bf
a187193
 
ef3ca12
 
 
 
 
 
 
 
 
 
17ba373
 
ddc67bf
ef3ca12
592ad8f
 
ef3ca12
592ad8f
 
a187193
17ba373
ef3ca12
a187193
592ad8f
 
a187193
592ad8f
17ba373
592ad8f
 
1c8a6bd
17ba373
1c8a6bd
 
948e2eb
ef3ca12
 
 
 
 
 
 
 
 
 
 
 
 
592ad8f
ddc67bf
592ad8f
 
17ba373
a187193
ef3ca12
17ba373
592ad8f
a187193
592ad8f
 
1a517f1
17ba373
 
592ad8f
1a517f1
592ad8f
 
 
948e2eb

import os
import warnings
import torch
import gc
from transformers import pipeline, AutoTokenizer
from PIL import Image
import gradio as gr
from huggingface_hub import login

warnings.filterwarnings('ignore')
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Global variables
pipe = None

if torch.cuda.is_available():
    torch.cuda.empty_cache()
    gc.collect()
    print("เคลียร์ CUDA cache เรียบร้อยแล้ว")

def load_pipeline():
    """โหลด pipeline"""
    global pipe
    print("กำลังโหลด pipeline...")
    
    try:
        hub_model_path = "Aekanun/thai-handwriting-llm"
        
        # สร้าง pipeline
        pipe = pipeline(
            "image-to-text",
            model=hub_model_path,
            device="cuda" if torch.cuda.is_available() else "cpu",
            model_kwargs={
                "torch_dtype": torch.bfloat16,
                "load_in_4bit": True,
                "trust_remote_code": True,
            },
            token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
        )
        
        print("โหลด pipeline สำเร็จ!")
        return True
    except Exception as e:
        print(f"เกิดข้อผิดพลาดในการโหลด pipeline: {str(e)}")
        return False

def process_handwriting(image):
    """ฟังก์ชันสำหรับ Gradio interface"""
    global pipe
    
    if image is None:
        return "กรุณาอัพโหลดรูปภาพ"
    
    try:
        # Ensure image is in PIL format
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)
            
        # Convert to RGB if needed
        if image.mode != "RGB":
            image = image.convert("RGB")

        # ใช้ pipeline ประมวลผล
        result = pipe(
            image,
            prompt="""Transcribe the Thai handwritten text from the provided image.
Only return the transcription in Thai language.""",
            max_new_tokens=256,
            do_sample=False
        )
        
        # รับผลลัพธ์
        if isinstance(result, list):
            return result[0]['generated_text'].strip()
        return result['generated_text'].strip()
        
    except Exception as e:
        return f"เกิดข้อผิดพลาด: {str(e)}"

# Initialize application
print("กำลังเริ่มต้นแอปพลิเคชัน...")
if load_pipeline():
    # Create Gradio interface
    demo = gr.Interface(
        fn=process_handwriting,
        inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
        outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
        title="Thai Handwriting Recognition",
        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
        examples=[["example1.jpg"], ["example2.jpg"]]
    )

    if __name__ == "__main__":
        demo.launch()
else:
    print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")