Aekanun commited on
Commit
17ba373
·
1 Parent(s): 0502f94
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import warnings
3
  import torch
4
  import gc
5
- from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
6
  from PIL import Image
7
  import gradio as gr
8
  from huggingface_hub import login
@@ -28,6 +28,14 @@ def load_model_and_processor():
28
  base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
29
  hub_model_path = "Aekanun/thai-handwriting-llm"
30
 
 
 
 
 
 
 
 
 
31
  bnb_config = BitsAndBytesConfig(
32
  load_in_4bit=True,
33
  bnb_4bit_use_double_quant=True,
@@ -46,6 +54,7 @@ def load_model_and_processor():
46
  print("Loading model...")
47
  model = AutoModelForVision2Seq.from_pretrained(
48
  hub_model_path,
 
49
  device_map="auto",
50
  torch_dtype=torch.bfloat16,
51
  quantization_config=bnb_config,
@@ -59,15 +68,18 @@ def load_model_and_processor():
59
  return False
60
 
61
  def process_handwriting(image):
 
62
  global model, processor
63
 
64
  if image is None:
65
  return "กรุณาอัพโหลดรูปภาพ"
66
 
67
  try:
 
68
  if not isinstance(image, Image.Image):
69
  image = Image.fromarray(image)
70
 
 
71
  if image.mode != "RGB":
72
  image = image.convert("RGB")
73
 
@@ -102,14 +114,17 @@ Only return the transcription in Thai language."""
102
  except Exception as e:
103
  return f"เกิดข้อผิดพลาด: {str(e)}"
104
 
 
105
  print("กำลังเริ่มต้นแอปพลิเคชัน...")
106
  if load_model_and_processor():
 
107
  demo = gr.Interface(
108
  fn=process_handwriting,
109
  inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
110
  outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
111
  title="Thai Handwriting Recognition",
112
- description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ"
 
113
  )
114
 
115
  if __name__ == "__main__":
 
2
  import warnings
3
  import torch
4
  import gc
5
+ from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig, AutoConfig
6
  from PIL import Image
7
  import gradio as gr
8
  from huggingface_hub import login
 
28
  base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
29
  hub_model_path = "Aekanun/thai-handwriting-llm"
30
 
31
+ # Load and set config
32
+ config = AutoConfig.from_pretrained(
33
+ hub_model_path,
34
+ trust_remote_code=True,
35
+ token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
36
+ )
37
+ config.model_type = "vision2seq"
38
+
39
  bnb_config = BitsAndBytesConfig(
40
  load_in_4bit=True,
41
  bnb_4bit_use_double_quant=True,
 
54
  print("Loading model...")
55
  model = AutoModelForVision2Seq.from_pretrained(
56
  hub_model_path,
57
+ config=config,
58
  device_map="auto",
59
  torch_dtype=torch.bfloat16,
60
  quantization_config=bnb_config,
 
68
  return False
69
 
70
  def process_handwriting(image):
71
+ """ฟังก์ชันสำหรับ Gradio interface"""
72
  global model, processor
73
 
74
  if image is None:
75
  return "กรุณาอัพโหลดรูปภาพ"
76
 
77
  try:
78
+ # Ensure image is in PIL format
79
  if not isinstance(image, Image.Image):
80
  image = Image.fromarray(image)
81
 
82
+ # Convert to RGB if needed
83
  if image.mode != "RGB":
84
  image = image.convert("RGB")
85
 
 
114
  except Exception as e:
115
  return f"เกิดข้อผิดพลาด: {str(e)}"
116
 
117
+ # Initialize application
118
  print("กำลังเริ่มต้นแอปพลิเคชัน...")
119
  if load_model_and_processor():
120
+ # Create Gradio interface
121
  demo = gr.Interface(
122
  fn=process_handwriting,
123
  inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
124
  outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
125
  title="Thai Handwriting Recognition",
126
+ description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
127
+ examples=[["example1.jpg"], ["example2.jpg"]]
128
  )
129
 
130
  if __name__ == "__main__":