Aekanun commited on
Commit
ef3ca12
·
1 Parent(s): 17ba373
Files changed (1) hide show
  1. app.py +33 -69
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import warnings
3
  import torch
4
  import gc
5
- from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig, AutoConfig
6
  from PIL import Image
7
  import gradio as gr
8
  from huggingface_hub import login
@@ -11,65 +11,43 @@ warnings.filterwarnings('ignore')
11
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
12
 
13
  # Global variables
14
- model = None
15
- processor = None
16
 
17
  if torch.cuda.is_available():
18
  torch.cuda.empty_cache()
19
  gc.collect()
20
  print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
21
 
22
- def load_model_and_processor():
23
- """โหลดโมเดลและ processor"""
24
- global model, processor
25
- print("กำลังโหลดโมเดลและ processor...")
26
 
27
  try:
28
- base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
29
  hub_model_path = "Aekanun/thai-handwriting-llm"
30
 
31
- # Load and set config
32
- config = AutoConfig.from_pretrained(
33
- hub_model_path,
34
- trust_remote_code=True,
 
 
 
 
 
 
35
  token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
36
  )
37
- config.model_type = "vision2seq"
38
-
39
- bnb_config = BitsAndBytesConfig(
40
- load_in_4bit=True,
41
- bnb_4bit_use_double_quant=True,
42
- bnb_4bit_quant_type="nf4",
43
- bnb_4bit_compute_dtype=torch.bfloat16
44
- )
45
-
46
- # โหลด processor จาก base model
47
- print("Loading processor...")
48
- processor = AutoProcessor.from_pretrained(
49
- base_model_path,
50
- token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
51
- )
52
-
53
- # โหลดโมเดลจาก Hub
54
- print("Loading model...")
55
- model = AutoModelForVision2Seq.from_pretrained(
56
- hub_model_path,
57
- config=config,
58
- device_map="auto",
59
- torch_dtype=torch.bfloat16,
60
- quantization_config=bnb_config,
61
- token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
62
- )
63
- print("Model loaded successfully!")
64
 
 
65
  return True
66
  except Exception as e:
67
- print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
68
  return False
69
 
70
  def process_handwriting(image):
71
  """ฟังก์ชันสำหรับ Gradio interface"""
72
- global model, processor
73
 
74
  if image is None:
75
  return "กรุณาอัพโหลดรูปภาพ"
@@ -83,40 +61,26 @@ def process_handwriting(image):
83
  if image.mode != "RGB":
84
  image = image.convert("RGB")
85
 
86
- prompt = """Transcribe the Thai handwritten text from the provided image.
87
- Only return the transcription in Thai language."""
88
-
89
- messages = [
90
- {
91
- "role": "user",
92
- "content": [
93
- {"type": "text", "text": prompt},
94
- {"type": "image", "image": image}
95
- ],
96
- }
97
- ]
98
-
99
- text = processor.apply_chat_template(messages, tokenize=False)
100
- inputs = processor(text=text, images=image, return_tensors="pt")
101
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
102
-
103
- with torch.no_grad():
104
- outputs = model.generate(
105
- **inputs,
106
- max_new_tokens=256,
107
- do_sample=False,
108
- pad_token_id=processor.tokenizer.pad_token_id
109
- )
110
-
111
- transcription = processor.decode(outputs[0], skip_special_tokens=True)
112
- return transcription.strip()
113
 
114
  except Exception as e:
115
  return f"เกิดข้อผิดพลาด: {str(e)}"
116
 
117
  # Initialize application
118
  print("กำลังเริ่มต้นแอปพลิเคชัน...")
119
- if load_model_and_processor():
120
  # Create Gradio interface
121
  demo = gr.Interface(
122
  fn=process_handwriting,
 
2
  import warnings
3
  import torch
4
  import gc
5
+ from transformers import pipeline, AutoTokenizer
6
  from PIL import Image
7
  import gradio as gr
8
  from huggingface_hub import login
 
11
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
12
 
13
  # Global variables
14
+ pipe = None
 
15
 
16
  if torch.cuda.is_available():
17
  torch.cuda.empty_cache()
18
  gc.collect()
19
  print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
20
 
21
+ def load_pipeline():
22
+ """โหลด pipeline"""
23
+ global pipe
24
+ print("กำลังโหลด pipeline...")
25
 
26
  try:
 
27
  hub_model_path = "Aekanun/thai-handwriting-llm"
28
 
29
+ # สร้าง pipeline
30
+ pipe = pipeline(
31
+ "image-to-text",
32
+ model=hub_model_path,
33
+ device="cuda" if torch.cuda.is_available() else "cpu",
34
+ model_kwargs={
35
+ "torch_dtype": torch.bfloat16,
36
+ "load_in_4bit": True,
37
+ "trust_remote_code": True,
38
+ },
39
  token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
40
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ print("โหลด pipeline สำเร็จ!")
43
  return True
44
  except Exception as e:
45
+ print(f"เกิดข้อผิดพลาดในการโหลด pipeline: {str(e)}")
46
  return False
47
 
48
  def process_handwriting(image):
49
  """ฟังก์ชันสำหรับ Gradio interface"""
50
+ global pipe
51
 
52
  if image is None:
53
  return "กรุณาอัพโหลดรูปภาพ"
 
61
  if image.mode != "RGB":
62
  image = image.convert("RGB")
63
 
64
+ # ใช้ pipeline ประมวลผล
65
+ result = pipe(
66
+ image,
67
+ prompt="""Transcribe the Thai handwritten text from the provided image.
68
+ Only return the transcription in Thai language.""",
69
+ max_new_tokens=256,
70
+ do_sample=False
71
+ )
72
+
73
+ # รับผลลัพธ์
74
+ if isinstance(result, list):
75
+ return result[0]['generated_text'].strip()
76
+ return result['generated_text'].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  except Exception as e:
79
  return f"เกิดข้อผิดพลาด: {str(e)}"
80
 
81
  # Initialize application
82
  print("กำลังเริ่มต้นแอปพลิเคชัน...")
83
+ if load_pipeline():
84
  # Create Gradio interface
85
  demo = gr.Interface(
86
  fn=process_handwriting,