Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Aekanun commited on Nov 16, 2024

Commit

069ee6d

1 Parent(s): bbf3ed3

fixing

Browse files

Files changed (1) hide show

app.py +62 -31

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import warnings
 import torch
 import gc
-from transformers import pipeline
 from PIL import Image
 import gradio as gr
 from huggingface_hub import login
@@ -11,39 +11,57 @@ warnings.filterwarnings('ignore')
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 # Global variables
-pipe = None
 if torch.cuda.is_available():
     torch.cuda.empty_cache()
     gc.collect()
     print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
-def load_pipeline():
-    """โหลด pipeline"""
-    global pipe
-    print("กำลังโหลด pipeline...")
     try:
         hub_model_path = "Aekanun/thai-handwriting-llm"
-        # สร้าง pipeline
-        pipe = pipeline(
-            "image-to-text",
-            model=hub_model_path,
-            device="cuda" if torch.cuda.is_available() else "cpu",
-            trust_remote_code=True,
             token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
         )
-        print("โหลด pipeline สำเร็จ!")
         return True
     except Exception as e:
-        print(f"เกิดข้อผิดพลาดในการโหลด pipeline: {str(e)}")
         return False
 def process_handwriting(image):
     """ฟังก์ชันสำหรับ Gradio interface"""
-    global pipe
     if image is None:
         return "กรุณาอัพโหลดรูปภาพ"
@@ -57,33 +75,46 @@ def process_handwriting(image):
         if image.mode != "RGB":
             image = image.convert("RGB")
-        # ใช้ pipeline ประมวลผล
-        result = pipe(
-            image,
-            prompt="""Transcribe the Thai handwritten text from the provided image.
-Only return the transcription in Thai language.""",
-            max_new_tokens=256,
-            do_sample=False
-        )
-        # รับผลลัพธ์
-        if isinstance(result, list):
-            return result[0]['generated_text'].strip()
-        return result['generated_text'].strip()
     except Exception as e:
         return f"เกิดข้อผิดพลาด: {str(e)}"
 # Initialize application
 print("กำลังเริ่มต้นแอปพลิเคชัน...")
-if load_pipeline():
     demo = gr.Interface(
         fn=process_handwriting,
         inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
         outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
         title="Thai Handwriting Recognition",
-        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
-        examples=[["example1.jpg"], ["example2.jpg"]]
     )
     if __name__ == "__main__":

 import warnings
 import torch
 import gc
+from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
 from PIL import Image
 import gradio as gr
 from huggingface_hub import login
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 # Global variables
+model = None
+processor = None
 if torch.cuda.is_available():
     torch.cuda.empty_cache()
     gc.collect()
     print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
+def load_model_and_processor():
+    """โหลดโมเดลและ processor"""
+    global model, processor
+    print("กำลังโหลดโมเดลและ processor...")
     try:
+        # กำหนด paths
+        base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
         hub_model_path = "Aekanun/thai-handwriting-llm"
+        # ตั้งค่า BitsAndBytes
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.bfloat16
+        )
+        # โหลด processor จาก base model
+        processor = AutoProcessor.from_pretrained(
+            base_model_path,
+            token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
+        )
+        # โหลดโมเดลจาก Hub
+        print("กำลังโหลดโมเดลจาก Hub...")
+        model = AutoModelForVision2Seq.from_pretrained(
+            hub_model_path,
+            device_map="auto",
+            torch_dtype=torch.bfloat16,
+            quantization_config=bnb_config,
             token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
         )
+        print("โหลดโมเดลจาก Hub สำเร็จ!")
         return True
     except Exception as e:
+        print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
         return False
 def process_handwriting(image):
     """ฟังก์ชันสำหรับ Gradio interface"""
+    global model, processor
     if image is None:
         return "กรุณาอัพโหลดรูปภาพ"
         if image.mode != "RGB":
             image = image.convert("RGB")
+        prompt = """Transcribe the Thai handwritten text from the provided image.
+Only return the transcription in Thai language."""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image", "image": image}
+                ],
+            }
+        ]
+        text = processor.apply_chat_template(messages, tokenize=False)
+        inputs = processor(text=text, images=image, return_tensors="pt")
+        inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=256,
+                do_sample=False,
+                pad_token_id=processor.tokenizer.pad_token_id
+            )
+        transcription = processor.decode(outputs[0], skip_special_tokens=True)
+        return transcription.strip()
     except Exception as e:
         return f"เกิดข้อผิดพลาด: {str(e)}"
 # Initialize application
 print("กำลังเริ่มต้นแอปพลิเคชัน...")
+if load_model_and_processor():
     demo = gr.Interface(
         fn=process_handwriting,
         inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
         outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
         title="Thai Handwriting Recognition",
+        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ"
     )
     if __name__ == "__main__":