import gradio as gr
from huggingface_hub import login
from transformers import AutoModelForVideoClassification, AutoFeatureExtractor, pipeline
import torch

# Load the Hugging Face API token from environment variables or enter directly
# HUGGINGFACEHUB_API_TOKEN = "your_huggingface_api_token"
# login(HUGGINGFACEHUB_API_TOKEN)

# Define the model and feature extractor from Hugging Face
# model_name = "microsoft/xclip-base-patch32"
model_name = "facebook/timesformer-base-finetuned-k400"
model = AutoModelForVideoClassification.from_pretrained(model_name)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)

# Create a video classification pipeline
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

video_pipeline = pipeline("video-classification", model=model, feature_extractor=feature_extractor, device=0 if torch.cuda.is_available() else -1)

# Define the function for video classification
def classify_video(video_path):
    predictions = video_pipeline(video_path)
    return {prediction['label']: prediction['score'] for prediction in predictions}

# Create a Gradio interface
interface = gr.Interface(
    fn=classify_video,
    inputs=gr.Video(label="Upload a video for classification"),
    outputs=gr.Label(num_top_classes=5, label="Top 5 Predicted Classes"),
    title="Video Classification using Hugging Face",
    description="Upload a video file and get the top 5 predicted classes using a Hugging Face video classification model."
)

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()