Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
import nltk | |
from youtube_transcript_api import YouTubeTranscriptApi | |
# Download NLTK data | |
nltk.download('punkt') | |
# Load models and tokenizers | |
summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048' | |
summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name) | |
summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name) | |
tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation") | |
tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation") | |
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
# Function to summarize text | |
def summarize_text(text, prefix): | |
src_text = prefix + text | |
input_ids = summary_tokenizer(src_text, return_tensors="pt") | |
generated_tokens = summary_model.generate(**input_ids) | |
result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
return result[0] | |
# Function to fetch YouTube transcript | |
def fetch_transcript(url): | |
video_id = url.split('watch?v=')[-1] | |
try: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
transcript_text = ' '.join([entry['text'] for entry in transcript]) | |
return transcript_text | |
except Exception as e: | |
return str(e) | |
# Streamlit app title | |
st.title("Multi-purpose Machine Learning App: WAVE_AI") | |
# Create tabs for different functionalities | |
tab1, tab2, tab3, tab4 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript"]) | |
# Text Summarization Tab | |
with tab1: | |
st.header("Text Summarization") | |
input_text = st.text_area("Enter the text to summarize:", height=300) | |
if st.button("Generate Summaries"): | |
if input_text: | |
title1 = summarize_text(input_text, 'summary: ') | |
title2 = summarize_text(input_text, 'summary brief: ') | |
st.write("### Title 1") | |
st.write(title1) | |
st.write("### Title 2") | |
st.write(title2) | |
else: | |
st.warning("Please enter some text to summarize.") | |
# Text Tag Generation Tab | |
with tab2: | |
st.header("Text Tag Generation") | |
text = st.text_area("Enter the text for tag extraction:", height=200) | |
if st.button("Generate Tags"): | |
if text: | |
try: | |
inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt") | |
output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64) | |
decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0] | |
tags = list(set(decoded_output.strip().split(", "))) | |
st.write("**Generated Tags:**") | |
st.write(tags) | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |
else: | |
st.warning("Please enter some text to generate tags.") | |
# Image Captioning Tab | |
with tab3: | |
st.header("Image Captioning Extractor") | |
image_url = st.text_input("Enter the URL of the image:") | |
if image_url: | |
try: | |
st.image(image_url, caption="Provided Image", use_column_width=True) | |
caption = captioner(image_url) | |
st.write("**Generated Caption:**") | |
st.write(caption[0]['generated_text']) | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |
# YouTube Transcript Tab | |
with tab4: | |
st.header("YouTube Video Transcript Extractor") | |
youtube_url = st.text_input("Enter YouTube URL:") | |
if st.button("Get Transcript"): | |
if youtube_url: | |
transcript = fetch_transcript(youtube_url) | |
if "error" not in transcript.lower(): | |
st.success("Transcript successfully fetched!") | |
st.text_area("Transcript", transcript, height=300) | |
else: | |
st.error(f"An error occurred: {transcript}") | |
else: | |
st.warning("Please enter a URL.") | |