Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Aekanun commited on Nov 16, 2024

Commit

0502f94

1 Parent(s): 948e2eb

fixing

Browse files

Files changed (1) hide show

app.py +7 -29

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from PIL import Image
 import gradio as gr
 from huggingface_hub import login
-# ตั้งค่าพื้นฐาน
 warnings.filterwarnings('ignore')
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
@@ -15,30 +14,20 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 model = None
 processor = None
-# เคลียร์ CUDA cache
 if torch.cuda.is_available():
     torch.cuda.empty_cache()
     gc.collect()
     print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
-# Login to Hugging Face Hub
-if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
-    print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
-    login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
-else:
-    print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
 def load_model_and_processor():
     """โหลดโมเดลและ processor"""
     global model, processor
     print("กำลังโหลดโมเดลและ processor...")
     try:
-        # กำหนด paths
         base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
         hub_model_path = "Aekanun/thai-handwriting-llm"
-        # ตั้งค่า BitsAndBytes
         bnb_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_use_double_quant=True,
@@ -47,22 +36,22 @@ def load_model_and_processor():
         )
         # โหลด processor จาก base model
         processor = AutoProcessor.from_pretrained(
-            base_model_path,
-            use_auth_token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
         )
         # โหลดโมเดลจาก Hub
-        print("กำลังโหลดโมเดลจาก Hub...")
         model = AutoModelForVision2Seq.from_pretrained(
             hub_model_path,
             device_map="auto",
             torch_dtype=torch.bfloat16,
             quantization_config=bnb_config,
-            trust_remote_code=True,
-            use_auth_token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
         )
-        print("โหลดโมเดลจาก Hub สำเร็จ!")
         return True
     except Exception as e:
@@ -70,26 +59,21 @@ def load_model_and_processor():
         return False
 def process_handwriting(image):
-    """ฟังก์ชันสำหรับ Gradio interface"""
     global model, processor
     if image is None:
         return "กรุณาอัพโหลดรูปภาพ"
     try:
-        # Ensure image is in PIL format
         if not isinstance(image, Image.Image):
             image = Image.fromarray(image)
-        # Convert to RGB if needed
         if image.mode != "RGB":
             image = image.convert("RGB")
-        # สร้าง prompt สำหรับการถอดความ
         prompt = """Transcribe the Thai handwritten text from the provided image.
 Only return the transcription in Thai language."""
-        # สร้าง input สำหรับโมเดล
         messages = [
             {
                 "role": "user",
@@ -100,12 +84,10 @@ Only return the transcription in Thai language."""
             }
         ]
-        # สร้าง inputs โดยตรงจาก processor
         text = processor.apply_chat_template(messages, tokenize=False)
         inputs = processor(text=text, images=image, return_tensors="pt")
         inputs = {k: v.to(model.device) for k, v in inputs.items()}
-        # ทำนาย
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
@@ -114,24 +96,20 @@ Only return the transcription in Thai language."""
                 pad_token_id=processor.tokenizer.pad_token_id
             )
-        # แปลงผลลัพธ์
         transcription = processor.decode(outputs[0], skip_special_tokens=True)
         return transcription.strip()
     except Exception as e:
         return f"เกิดข้อผิดพลาด: {str(e)}"
-# Initialize application
 print("กำลังเริ่มต้นแอปพลิเคชัน...")
 if load_model_and_processor():
-    # Create Gradio interface
     demo = gr.Interface(
         fn=process_handwriting,
         inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
         outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
         title="Thai Handwriting Recognition",
-        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
-        examples=[["example1.jpg"], ["example2.jpg"]]
     )
     if __name__ == "__main__":

 import gradio as gr
 from huggingface_hub import login
 warnings.filterwarnings('ignore')
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 model = None
 processor = None
 if torch.cuda.is_available():
     torch.cuda.empty_cache()
     gc.collect()
     print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
 def load_model_and_processor():
     """โหลดโมเดลและ processor"""
     global model, processor
     print("กำลังโหลดโมเดลและ processor...")
     try:
         base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
         hub_model_path = "Aekanun/thai-handwriting-llm"
         bnb_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_use_double_quant=True,
         )
         # โหลด processor จาก base model
+        print("Loading processor...")
         processor = AutoProcessor.from_pretrained(
+            base_model_path,
+            token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
         )
         # โหลดโมเดลจาก Hub
+        print("Loading model...")
         model = AutoModelForVision2Seq.from_pretrained(
             hub_model_path,
             device_map="auto",
             torch_dtype=torch.bfloat16,
             quantization_config=bnb_config,
+            token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
         )
+        print("Model loaded successfully!")
         return True
     except Exception as e:
         return False
 def process_handwriting(image):
     global model, processor
     if image is None:
         return "กรุณาอัพโหลดรูปภาพ"
     try:
         if not isinstance(image, Image.Image):
             image = Image.fromarray(image)
         if image.mode != "RGB":
             image = image.convert("RGB")
         prompt = """Transcribe the Thai handwritten text from the provided image.
 Only return the transcription in Thai language."""
         messages = [
             {
                 "role": "user",
             }
         ]
         text = processor.apply_chat_template(messages, tokenize=False)
         inputs = processor(text=text, images=image, return_tensors="pt")
         inputs = {k: v.to(model.device) for k, v in inputs.items()}
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 pad_token_id=processor.tokenizer.pad_token_id
             )
         transcription = processor.decode(outputs[0], skip_special_tokens=True)
         return transcription.strip()
     except Exception as e:
         return f"เกิดข้อผิดพลาด: {str(e)}"
 print("กำลังเริ่มต้นแอปพลิเคชัน...")
 if load_model_and_processor():
     demo = gr.Interface(
         fn=process_handwriting,
         inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
         outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
         title="Thai Handwriting Recognition",
+        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ"
     )
     if __name__ == "__main__":