Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,644 Bytes
1a517f1 a187193 592ad8f a187193 17ba373 592ad8f a187193 b31bef1 a187193 1a517f1 592ad8f b31bef1 a187193 592ad8f a187193 ddc67bf 948e2eb a187193 17ba373 948e2eb 0502f94 948e2eb 0502f94 a187193 592ad8f 948e2eb 0502f94 948e2eb a187193 17ba373 a187193 948e2eb 0502f94 a187193 0502f94 ddc67bf 592ad8f a187193 592ad8f a187193 17ba373 a187193 592ad8f a187193 592ad8f 17ba373 592ad8f 1c8a6bd 17ba373 1c8a6bd 948e2eb a187193 592ad8f a187193 592ad8f a187193 592ad8f ddc67bf 592ad8f 17ba373 a187193 17ba373 592ad8f a187193 592ad8f 1a517f1 17ba373 592ad8f 1a517f1 592ad8f 948e2eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
import warnings
import torch
import gc
from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig, AutoConfig
from PIL import Image
import gradio as gr
from huggingface_hub import login
warnings.filterwarnings('ignore')
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# Global variables
model = None
processor = None
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
def load_model_and_processor():
"""โหลดโมเดลและ processor"""
global model, processor
print("กำลังโหลดโมเดลและ processor...")
try:
base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
hub_model_path = "Aekanun/thai-handwriting-llm"
# Load and set config
config = AutoConfig.from_pretrained(
hub_model_path,
trust_remote_code=True,
token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
)
config.model_type = "vision2seq"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
# โหลด processor จาก base model
print("Loading processor...")
processor = AutoProcessor.from_pretrained(
base_model_path,
token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
)
# โหลดโมเดลจาก Hub
print("Loading model...")
model = AutoModelForVision2Seq.from_pretrained(
hub_model_path,
config=config,
device_map="auto",
torch_dtype=torch.bfloat16,
quantization_config=bnb_config,
token=os.environ.get('HUGGING_FACE_HUB_TOKEN')
)
print("Model loaded successfully!")
return True
except Exception as e:
print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
return False
def process_handwriting(image):
"""ฟังก์ชันสำหรับ Gradio interface"""
global model, processor
if image is None:
return "กรุณาอัพโหลดรูปภาพ"
try:
# Ensure image is in PIL format
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Convert to RGB if needed
if image.mode != "RGB":
image = image.convert("RGB")
prompt = """Transcribe the Thai handwritten text from the provided image.
Only return the transcription in Thai language."""
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image", "image": image}
],
}
]
text = processor.apply_chat_template(messages, tokenize=False)
inputs = processor(text=text, images=image, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
do_sample=False,
pad_token_id=processor.tokenizer.pad_token_id
)
transcription = processor.decode(outputs[0], skip_special_tokens=True)
return transcription.strip()
except Exception as e:
return f"เกิดข้อผิดพลาด: {str(e)}"
# Initialize application
print("กำลังเริ่มต้นแอปพลิเคชัน...")
if load_model_and_processor():
# Create Gradio interface
demo = gr.Interface(
fn=process_handwriting,
inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"),
outputs=gr.Textbox(label="ข้อความที่แปลงได้"),
title="Thai Handwriting Recognition",
description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
examples=[["example1.jpg"], ["example2.jpg"]]
)
if __name__ == "__main__":
demo.launch()
else:
print("ไม่สามารถเริ่มต้นแอปพลิเคชันได้") |