Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Aekanun commited on Nov 16

Commit

643d69f

•

1 Parent(s): 279bd33

fixed app.py

Browse files

Files changed (1) hide show

app.py +112 -113

app.py CHANGED Viewed

@@ -18,131 +18,130 @@ processor = None
 # Login to Hugging Face Hub
 if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
-    print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
-    login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
 else:
-    print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
 def load_model_and_processor():
-    """โหลดโมเดลและ processor"""
-    global model, processor
-    print("กำลังโหลดโมเดลและ processor...")
-    try:
-        # Model paths
-        base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-        adapter_path = "Aekanun/thai-handwriting-llm"
-        # Load processor from base model
-        print("กำลังโหลด processor...")
-        processor = AutoProcessor.from_pretrained(
-            base_model_path,
-            use_auth_token=True,
-            low_memory=True  # เพิ่ม low memory option
-        )
-        # Load base model with CPU configurations
-        print("กำลังโหลด base model...")
-        base_model = AutoModelForVision2Seq.from_pretrained(
-            base_model_path,
-            device_map={"": "cpu"},
-            torch_dtype=torch.float32,
-            trust_remote_code=True,
-            use_auth_token=True,
-            low_cpu_mem_usage=True,  # เพิ่ม low memory usage
-            offload_folder="offload"  # เพิ่ม offload folder
-        )
-        # Load adapter with CPU configurations
-        print("กำลังโหลด adapter...")
-        model = PeftModel.from_pretrained(
-            base_model,
-            adapter_path,
-            torch_dtype=torch.float32,
-            device_map={"": "cpu"},
-            use_auth_token=True,
-            low_cpu_mem_usage=True  # เพิ่ม low memory usage
-        )
-        # Clear memory
-        gc.collect()
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
-        print("โหลดโมเดลสำเร็จ!")
-        return True
-    except Exception as e:
-        print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
-        return False
 def process_handwriting(image):
-    """ฟังก์ชันสำหรับ Gradio interface"""
-    global model, processor
-    if image is None:
-        return "กรุณาอัพโหลดรูปภาพ"
-    try:
-        # Ensure image is in PIL format
-        if not isinstance(image, Image.Image):
-            image = Image.fromarray(image)
-        # Create prompt
-        prompt = """Transcribe the Thai handwritten text from the provided image.
 Only return the transcription in Thai language."""
-        # Create model inputs
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": prompt},
-                    {"type": "image", "image": image}
-                ],
-            }
-        ]
-        # Process with model
-        text = processor.apply_chat_template(messages, tokenize=False)
-        inputs = processor(text=text, images=image, return_tensors="pt")
-        inputs = {k: v.to('cpu') for k, v in inputs.items()}
-        # Generate with memory optimization
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=64,
-                do_sample=False,
-                pad_token_id=processor.tokenizer.pad_token_id,
-                use_cache=True  # ใช้ cache เพื่อประหยัด memory
-            )
-        # Clear memory after generation
-        gc.collect()
-        # Decode output
-        transcription = processor.decode(outputs[0], skip_special_tokens=True)
-        return transcription.strip()
-    except Exception as e:
-        return f"เกิดข้อผิดพลาด: {str(e)}"
 # Initialize application
 print("กำลังเริ่มต้นแอปพลิเคชัน...")
 if load_model_and_processor():
-    # Create Gradio interface with lower memory usage
-    demo = gr.Interface(
-        fn=process_handwriting,
-        inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
-        outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
-        title="Thai Handwriting Recognition",
-        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
-        examples=[["example1.jpg"], ["example2.jpg"]],
-        cache_examples=False  # ไม่ cache examples เพื่อประหยัด memory
-    )
-    if __name__ == "__main__":
-        demo.launch(
-            share=False,  # ไม่แชร์ public URL
-            show_error=True,  # แสดง error messages
-            enable_queue=False  # ไม่ใช้ queue เพื่อประหยัด memory
-        )
 else:
-    print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")

 # Login to Hugging Face Hub
 if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
+   print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
+   login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
 else:
+   print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
 def load_model_and_processor():
+   """โหลดโมเดลและ processor"""
+   global model, processor
+   print("กำลังโหลดโมเดลและ processor...")
+   try:
+       # Model paths
+       base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+       adapter_path = "Aekanun/thai-handwriting-llm"
+       # Load processor from base model
+       print("กำลังโหลด processor...")
+       processor = AutoProcessor.from_pretrained(
+           base_model_path,
+           use_auth_token=True,
+           low_memory=True  # เพิ่ม low memory option
+       )
+       # Load base model with CPU configurations
+       print("กำลังโหลด base model...")
+       base_model = AutoModelForVision2Seq.from_pretrained(
+           base_model_path,
+           device_map={"": "cpu"},
+           torch_dtype=torch.float32,
+           trust_remote_code=True,
+           use_auth_token=True,
+           low_cpu_mem_usage=True,  # เพิ่ม low memory usage
+           offload_folder="offload"  # เพิ่ม offload folder
+       )
+       # Load adapter with CPU configurations
+       print("กำลังโหลด adapter...")
+       model = PeftModel.from_pretrained(
+           base_model,
+           adapter_path,
+           torch_dtype=torch.float32,
+           device_map={"": "cpu"},
+           use_auth_token=True,
+           low_cpu_mem_usage=True  # เพิ่ม low memory usage
+       )
+       # Clear memory
+       gc.collect()
+       torch.cuda.empty_cache() if torch.cuda.is_available() else None
+       print("โหลดโมเดลสำเร็จ!")
+       return True
+   except Exception as e:
+       print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
+       return False
 def process_handwriting(image):
+   """ฟังก์ชันสำหรับ Gradio interface"""
+   global model, processor
+   if image is None:
+       return "กรุณาอัพโหลดรูปภาพ"
+   try:
+       # Ensure image is in PIL format
+       if not isinstance(image, Image.Image):
+           image = Image.fromarray(image)
+       # Create prompt
+       prompt = """Transcribe the Thai handwritten text from the provided image.
 Only return the transcription in Thai language."""
+       # Create model inputs
+       messages = [
+           {
+               "role": "user",
+               "content": [
+                   {"type": "text", "text": prompt},
+                   {"type": "image", "image": image}
+               ],
+           }
+       ]
+       # Process with model
+       text = processor.apply_chat_template(messages, tokenize=False)
+       inputs = processor(text=text, images=image, return_tensors="pt")
+       inputs = {k: v.to('cpu') for k, v in inputs.items()}
+       # Generate with memory optimization
+       with torch.no_grad():
+           outputs = model.generate(
+               **inputs,
+               max_new_tokens=256,
+               do_sample=False,
+               pad_token_id=processor.tokenizer.pad_token_id,
+               use_cache=True  # ใช้ cache เพื่อประหยัด memory
+           )
+       # Clear memory after generation
+       gc.collect()
+       # Decode output
+       transcription = processor.decode(outputs[0], skip_special_tokens=True)
+       return transcription.strip()
+   except Exception as e:
+       return f"เกิดข้อผิดพลาด: {str(e)}"
 # Initialize application
 print("กำลังเริ่มต้นแอปพลิเคชัน...")
 if load_model_and_processor():
+   # Create Gradio interface with lower memory usage
+   demo = gr.Interface(
+       fn=process_handwriting,
+       inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
+       outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
+       title="Thai Handwriting Recognition",
+       description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
+       examples=[["example1.jpg"], ["example2.jpg"]],
+       cache_examples=False  # ไม่ cache examples เพื่อประหยัด memory
+   )
+   if __name__ == "__main__":
+       demo.launch(
+           share=False,  # ไม่แชร์ public URL
+           show_error=True  # แสดง error messages
+       )
 else:
+   print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")