Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Aekanun commited on Nov 16

Commit

5a19e6b

•

1 Parent(s): dfb8587

revised app.py with 512 new tokens

Browse files

Files changed (2) hide show

app.py +2 -2
app.py.success.1 +132 -0

app.py CHANGED Viewed

@@ -102,7 +102,7 @@ Only return the transcription in Thai language."""
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=256,
                 do_sample=False,
                 pad_token_id=processor.tokenizer.pad_token_id
             )
@@ -121,7 +121,7 @@ if load_model_and_processor():
         fn=process_handwriting,
         inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
         outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
-        title="Thai Handwriting Recognition",
         description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
         examples=[["example1.jpg"], ["example2.jpg"]]
     )

         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=512,
                 do_sample=False,
                 pad_token_id=processor.tokenizer.pad_token_id
             )
         fn=process_handwriting,
         inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
         outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
+        title="Thai Handwriting Recognition and Vision-Language",
         description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
         examples=[["example1.jpg"], ["example2.jpg"]]
     )

app.py.success.1 ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import warnings
+import torch
+import gc
+from transformers import AutoModelForVision2Seq, AutoProcessor
+from peft import PeftModel
+from PIL import Image
+import gradio as gr
+from huggingface_hub import login
+import spaces  # เพิ่ม import spaces
+# Basic settings
+warnings.filterwarnings('ignore')
+# Global variables
+model = None
+processor = None
+# Login to Hugging Face Hub
+if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
+    print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
+    login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
+else:
+    print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
+def load_model_and_processor():
+    """โหลดโมเดลและ processor"""
+    global model, processor
+    print("กำลังโหลดโมเดลและ processor...")
+    try:
+        # Model paths
+        base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+        adapter_path = "Aekanun/thai-handwriting-llm"
+        # Load processor from base model
+        print("กำลังโหลด processor...")
+        processor = AutoProcessor.from_pretrained(
+            base_model_path,
+            use_auth_token=True
+        )
+        # Load base model
+        print("กำลังโหลด base model...")
+        base_model = AutoModelForVision2Seq.from_pretrained(
+            base_model_path,
+            device_map="auto",
+            torch_dtype=torch.float16,  # เปลี่ยนกลับเป็น float16
+            trust_remote_code=True,
+            use_auth_token=True
+        )
+        # Load adapter
+        print("กำลังโหลด adapter...")
+        model = PeftModel.from_pretrained(
+            base_model,
+            adapter_path,
+            device_map="auto",  # ให้จัดการ device map อัตโนมัติ
+            torch_dtype=torch.float16,
+            use_auth_token=True
+        )
+        print("โหลดโมเดลสำเร็จ!")
+        return True
+    except Exception as e:
+        print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
+        return False
+@spaces.GPU(duration=30)  # ใช้ GPU decorator กำหนดเวลาสูงสุด 30 วินาที
+def process_handwriting(image):
+    """ฟังก์ชันสำหรับ Gradio interface"""
+    global model, processor
+    if image is None:
+        return "กรุณาอัพโหลดรูปภาพ"
+    try:
+        # Ensure image is in PIL format
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        # Create prompt
+        prompt = """Transcribe the Thai handwritten text from the provided image.
+Only return the transcription in Thai language."""
+        # Create model inputs
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image", "image": image}
+                ],
+            }
+        ]
+        # Process with model
+        text = processor.apply_chat_template(messages, tokenize=False)
+        inputs = processor(text=text, images=image, return_tensors="pt")
+        inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        # Generate
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=256,
+                do_sample=False,
+                pad_token_id=processor.tokenizer.pad_token_id
+            )
+        # Decode output
+        transcription = processor.decode(outputs[0], skip_special_tokens=True)
+        return transcription.strip()
+    except Exception as e:
+        return f"เกิดข้อผิดพลาด: {str(e)}"
+# Initialize application
+print("กำลังเริ่มต้นแอปพลิเคชัน...")
+if load_model_and_processor():
+    # Create Gradio interface
+    demo = gr.Interface(
+        fn=process_handwriting,
+        inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
+        outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
+        title="Thai Handwriting Recognition",
+        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
+        examples=[["example1.jpg"], ["example2.jpg"]]
+    )
+    if __name__ == "__main__":
+        demo.launch(show_error=True)
+else:
+    print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")