Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

Aekanun commited on Nov 16, 2024

Commit

1c8a6bd

1 Parent(s): a187193

fixed app.py with specific model type

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import warnings
 import torch
 import gc
-from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
 from PIL import Image
 import gradio as gr
 from huggingface_hub import login
@@ -35,7 +35,6 @@ def load_model_and_processor():
     try:
         # Model paths
-        base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
         hub_model_path = "Aekanun/thai-handwriting-llm"
         # BitsAndBytes config
@@ -45,14 +44,18 @@ def load_model_and_processor():
             bnb_4bit_quant_type="nf4",
             bnb_4bit_compute_dtype=torch.bfloat16
         )
-        # Load processor from base model
-        processor = AutoProcessor.from_pretrained(base_model_path)
-        # Load model from Hub
         print("กำลังโหลดโมเดลจาก Hub...")
         model = AutoModelForVision2Seq.from_pretrained(
             hub_model_path,
             device_map="auto",
             torch_dtype=torch.bfloat16,
             quantization_config=bnb_config,
@@ -76,6 +79,10 @@ def process_handwriting(image):
         # Ensure image is in PIL format
         if not isinstance(image, Image.Image):
             image = Image.fromarray(image)
         # Create prompt
         prompt = """Transcribe the Thai handwritten text from the provided image.

 import warnings
 import torch
 import gc
+from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig, AutoConfig
 from PIL import Image
 import gradio as gr
 from huggingface_hub import login
     try:
         # Model paths
         hub_model_path = "Aekanun/thai-handwriting-llm"
         # BitsAndBytes config
             bnb_4bit_quant_type="nf4",
             bnb_4bit_compute_dtype=torch.bfloat16
         )
+        # Load model configuration
+        config = AutoConfig.from_pretrained(hub_model_path, trust_remote_code=True)
+        config.model_type = "llava"  # กำหนด model_type
+        # Load processor and model
+        processor = AutoProcessor.from_pretrained(hub_model_path, trust_remote_code=True)
         print("กำลังโหลดโมเดลจาก Hub...")
         model = AutoModelForVision2Seq.from_pretrained(
             hub_model_path,
+            config=config,
             device_map="auto",
             torch_dtype=torch.bfloat16,
             quantization_config=bnb_config,
         # Ensure image is in PIL format
         if not isinstance(image, Image.Image):
             image = Image.fromarray(image)
+        # Convert to RGB if needed
+        if image.mode != "RGB":
+            image = image.convert("RGB")
         # Create prompt
         prompt = """Transcribe the Thai handwritten text from the provided image.