Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Aekanun commited on Nov 16

Commit

279bd33

•

1 Parent(s): 531f528

fixed app.py with low mem

Browse files

Files changed (1) hide show

app.py +115 -97

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from huggingface_hub import login
 # Basic settings
 warnings.filterwarnings('ignore')
-os.environ["CUDA_VISIBLE_DEVICES"] = ""  # ปิดการใช้ CUDA
 # Global variables
 model = None
@@ -18,113 +18,131 @@ processor = None
 # Login to Hugging Face Hub
 if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
-   print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
-   login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
 else:
-   print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
 def load_model_and_processor():
-   """โหลดโมเดลและ processor"""
-   global model, processor
-   print("กำลังโหลดโมเดลและ processor...")
-   try:
-       # Model paths
-       base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-       adapter_path = "Aekanun/thai-handwriting-llm"
-       # Load processor from base model
-       print("กำลังโหลด processor...")
-       processor = AutoProcessor.from_pretrained(base_model_path, use_auth_token=True)
-       # Load base model
-       print("กำลังโหลด base model...")
-       base_model = AutoModelForVision2Seq.from_pretrained(
-           base_model_path,
-           device_map={"": "cpu"},  # ใช้ CPU
-           torch_dtype=torch.float32,  # ใช้ float32 แทน bfloat16
-           trust_remote_code=True,
-           use_auth_token=True
-       )
-       # Load adapter
-       print("กำลังโหลด adapter...")
-       model = PeftModel.from_pretrained(
-           base_model,
-           adapter_path,
-           torch_dtype=torch.float32,  # ใช้ float32
-           device_map={"": "cpu"},  # ใช้ CPU
-           use_auth_token=True
-       )
-       print("โหลดโมเดลสำเร็จ!")
-       return True
-   except Exception as e:
-       print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
-       return False
 def process_handwriting(image):
-   """ฟังก์ชันสำหรับ Gradio interface"""
-   global model, processor
-   if image is None:
-       return "กรุณาอัพโหลดรูปภาพ"
-   try:
-       # Ensure image is in PIL format
-       if not isinstance(image, Image.Image):
-           image = Image.fromarray(image)
-       # Create prompt
-       prompt = """Transcribe the Thai handwritten text from the provided image.
 Only return the transcription in Thai language."""
-       # Create model inputs
-       messages = [
-           {
-               "role": "user",
-               "content": [
-                   {"type": "text", "text": prompt},
-                   {"type": "image", "image": image}
-               ],
-           }
-       ]
-       # Process with model
-       text = processor.apply_chat_template(messages, tokenize=False)
-       inputs = processor(text=text, images=image, return_tensors="pt")
-       # Move inputs to CPU
-       inputs = {k: v.to('cpu') for k, v in inputs.items()}
-       # Generate
-       with torch.no_grad():
-           outputs = model.generate(
-               **inputs,
-               max_new_tokens=256,
-               do_sample=False,
-               pad_token_id=processor.tokenizer.pad_token_id
-           )
-       # Decode output
-       transcription = processor.decode(outputs[0], skip_special_tokens=True)
-       return transcription.strip()
-   except Exception as e:
-       return f"เกิดข้อผิดพลาด: {str(e)}"
 # Initialize application
 print("กำลังเริ่มต้นแอปพลิเคชัน...")
 if load_model_and_processor():
-   # Create Gradio interface
-   demo = gr.Interface(
-       fn=process_handwriting,
-       inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
-       outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
-       title="Thai Handwriting Recognition",
-       description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
-       examples=[["example1.jpg"], ["example2.jpg"]]
-   )
-   if __name__ == "__main__":
-       demo.launch()
 else:
-   print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")

 # Basic settings
 warnings.filterwarnings('ignore')
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
 # Global variables
 model = None
 # Login to Hugging Face Hub
 if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
+    print("กำลังเข้าสู่ระบบ Hugging Face Hub...")
+    login(token=os.environ['HUGGING_FACE_HUB_TOKEN'])
 else:
+    print("คำเตือน: ไม่พบ HUGGING_FACE_HUB_TOKEN")
 def load_model_and_processor():
+    """โหลดโมเดลและ processor"""
+    global model, processor
+    print("กำลังโหลดโมเดลและ processor...")
+    try:
+        # Model paths
+        base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+        adapter_path = "Aekanun/thai-handwriting-llm"
+        # Load processor from base model
+        print("กำลังโหลด processor...")
+        processor = AutoProcessor.from_pretrained(
+            base_model_path,
+            use_auth_token=True,
+            low_memory=True  # เพิ่ม low memory option
+        )
+        # Load base model with CPU configurations
+        print("กำลังโหลด base model...")
+        base_model = AutoModelForVision2Seq.from_pretrained(
+            base_model_path,
+            device_map={"": "cpu"},
+            torch_dtype=torch.float32,
+            trust_remote_code=True,
+            use_auth_token=True,
+            low_cpu_mem_usage=True,  # เพิ่ม low memory usage
+            offload_folder="offload"  # เพิ่ม offload folder
+        )
+        # Load adapter with CPU configurations
+        print("กำลังโหลด adapter...")
+        model = PeftModel.from_pretrained(
+            base_model,
+            adapter_path,
+            torch_dtype=torch.float32,
+            device_map={"": "cpu"},
+            use_auth_token=True,
+            low_cpu_mem_usage=True  # เพิ่ม low memory usage
+        )
+        # Clear memory
+        gc.collect()
+        torch.cuda.empty_cache() if torch.cuda.is_available() else None
+        print("โหลดโมเดลสำเร็จ!")
+        return True
+    except Exception as e:
+        print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
+        return False
 def process_handwriting(image):
+    """ฟังก์ชันสำหรับ Gradio interface"""
+    global model, processor
+    if image is None:
+        return "กรุณาอัพโหลดรูปภาพ"
+    try:
+        # Ensure image is in PIL format
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        # Create prompt
+        prompt = """Transcribe the Thai handwritten text from the provided image.
 Only return the transcription in Thai language."""
+        # Create model inputs
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image", "image": image}
+                ],
+            }
+        ]
+        # Process with model
+        text = processor.apply_chat_template(messages, tokenize=False)
+        inputs = processor(text=text, images=image, return_tensors="pt")
+        inputs = {k: v.to('cpu') for k, v in inputs.items()}
+        # Generate with memory optimization
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=64,
+                do_sample=False,
+                pad_token_id=processor.tokenizer.pad_token_id,
+                use_cache=True  # ใช้ cache เพื่อประหยัด memory
+            )
+        # Clear memory after generation
+        gc.collect()
+        # Decode output
+        transcription = processor.decode(outputs[0], skip_special_tokens=True)
+        return transcription.strip()
+    except Exception as e:
+        return f"เกิดข้อผิดพลาด: {str(e)}"
 # Initialize application
 print("กำลังเริ่มต้นแอปพลิเคชัน...")
 if load_model_and_processor():
+    # Create Gradio interface with lower memory usage
+    demo = gr.Interface(
+        fn=process_handwriting,
+        inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
+        outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
+        title="Thai Handwriting Recognition",
+        description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
+        examples=[["example1.jpg"], ["example2.jpg"]],
+        cache_examples=False  # ไม่ cache examples เพื่อประหยัด memory
+    )
+    if __name__ == "__main__":
+        demo.launch(
+            share=False,  # ไม่แชร์ public URL
+            show_error=True,  # แสดง error messages
+            enable_queue=False  # ไม่ใช้ queue เพื่อประหยัด memory
+        )
 else:
+    print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้")