Spaces:
Running
on
Zero
Running
on
Zero
import os | |
from huggingface_hub import login | |
from transformers import AutoProcessor, AutoModelForVision2Seq | |
import torch | |
from PIL import Image | |
import gradio as gr | |
# Login to Hugging Face Hub | |
if 'HUGGING_FACE_HUB_TOKEN' in os.environ: | |
print("Logging in to Hugging Face Hub...") | |
login(token=os.environ['HUGGING_FACE_HUB_TOKEN']) | |
else: | |
print("Warning: HUGGING_FACE_HUB_TOKEN not found") | |
# Global variables | |
model = None | |
processor = None | |
def load_model(): | |
global model, processor | |
try: | |
model_path = "Aekanun/thai-handwriting-llm" | |
print(f"Loading model and processor from {model_path}...") | |
processor = AutoProcessor.from_pretrained(model_path) | |
model = AutoModelForVision2Seq.from_pretrained(model_path) | |
if torch.cuda.is_available(): | |
model = model.to("cuda") | |
return True | |
except Exception as e: | |
print(f"Error loading model: {str(e)}") | |
return False | |
def process_image(image): | |
if image is None: | |
return "กรุณาอัพโหลดรูปภาพ" | |
try: | |
# Ensure image is in PIL format | |
if not isinstance(image, Image.Image): | |
image = Image.fromarray(image) | |
# Convert to RGB if needed | |
if image.mode != "RGB": | |
image = image.convert("RGB") | |
# Process image | |
inputs = processor(images=image, return_tensors="pt") | |
# Move to GPU if available | |
if torch.cuda.is_available(): | |
inputs = {k: v.to("cuda") for k, v in inputs.items()} | |
# Generate text | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=100, | |
num_beams=4, | |
pad_token_id=processor.tokenizer.pad_token_id, | |
eos_token_id=processor.tokenizer.eos_token_id | |
) | |
# Decode output | |
predicted_text = processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
return predicted_text.strip() | |
except Exception as e: | |
return f"เกิดข้อผิดพลาด: {str(e)}" | |
# Initialize | |
print("Initializing application...") | |
if load_model(): | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="pil", label="อัพโหลดรูปลายมือเขียนภาษาไทย"), | |
outputs=gr.Textbox(label="ข้อความที่แปลงได้"), | |
title="Thai Handwriting Recognition", | |
description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ", | |
examples=[["example1.jpg"], ["example2.jpg"]] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |
else: | |
print("Failed to initialize the application") |