Spaces:
Runtime error
Runtime error
File size: 1,938 Bytes
d6912cc 198fce8 1559fe8 8da78db 147cf50 d6912cc 2df3b4c d6912cc 1559fe8 198fce8 d6912cc 1559fe8 d6912cc 1559fe8 d8dd5f6 d6912cc 1559fe8 d6912cc 1559fe8 8884faf 1559fe8 d40505d 8884faf e3ba257 9db1a5d e584282 1559fe8 e960600 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from transformers import ViTFeatureExtractor, ViTForImageClassification
import gradio as gr
from datasets import load_dataset
import torch
#dataset = load_dataset("cifar100")
#image = dataset["train"]["fine_label"]
#print("load and train dataset \n")
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
print("feature extractor \n")
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
print("load model \n")
def classify(image):
inputs = feature_extractor(images=image, return_tensors="pt")
print("define input \n")
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
print("prediction \n")
predicted_class_idx = logits.argmax(-1).item()
return model.config.id2label[predicted_class_idx]
def image2speech(image):
print("tts \n")
try:
txt = classify(image)
except:
txt = "No object detected"
return fastspeech(txt), txt
print("load tts interface \n")
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
print("sets input and outputs \n")
camera = gr.inputs.Image(label="Image from your camera", source="webcam")
read = gr.outputs.Textbox(type="auto", label="Text")
speak = gr.outputs.Audio(type="auto", label="Speech")
print("define interface \n")
app = gr.Interface(fn=image2speech,
inputs=camera,
live=True,
description="Takes a snapshot of an object, identifies it, and then tell you what it is. \n Intended use is to help the visually impaired. Models and dataset used is listed on the linked models and dataset",
outputs=[speak, read],
examples=["remotecontrol.jpg", "calculator.jpg", "cellphone.jpg"])
print("launch interface \n")
app.launch(cache_examples=True) |