Spaces:
Sleeping
Sleeping
File size: 4,127 Bytes
5fe4ba5 d6764d1 412e4aa 7c7cb02 412e4aa d6764d1 8e377cb 412e4aa d6764d1 412e4aa 7c7cb02 412e4aa 8e377cb 412e4aa d6764d1 412e4aa d6764d1 412e4aa d6764d1 412e4aa 8e377cb 412e4aa d6764d1 412e4aa d6764d1 8e377cb 7c7cb02 d6764d1 7c7cb02 8e377cb 7c7cb02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
from youtube_transcript_api import YouTubeTranscriptApi
# Download NLTK data
nltk.download('punkt')
# Load models and tokenizers
summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name)
summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name)
tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
# Function to summarize text
def summarize_text(text, prefix):
src_text = prefix + text
input_ids = summary_tokenizer(src_text, return_tensors="pt")
generated_tokens = summary_model.generate(**input_ids)
result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return result[0]
# Function to fetch YouTube transcript
def fetch_transcript(url):
video_id = url.split('watch?v=')[-1]
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = ' '.join([entry['text'] for entry in transcript])
return transcript_text
except Exception as e:
return str(e)
# Streamlit app title
st.title("Multi-purpose Machine Learning App: WAVE_AI")
# Create tabs for different functionalities
tab1, tab2, tab3, tab4 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript"])
# Text Summarization Tab
with tab1:
st.header("Text Summarization")
input_text = st.text_area("Enter the text to summarize:", height=300)
if st.button("Generate Summaries"):
if input_text:
title1 = summarize_text(input_text, 'summary: ')
title2 = summarize_text(input_text, 'summary brief: ')
st.write("### Title 1")
st.write(title1)
st.write("### Title 2")
st.write(title2)
else:
st.warning("Please enter some text to summarize.")
# Text Tag Generation Tab
with tab2:
st.header("Text Tag Generation")
text = st.text_area("Enter the text for tag extraction:", height=200)
if st.button("Generate Tags"):
if text:
try:
inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
tags = list(set(decoded_output.strip().split(", ")))
st.write("**Generated Tags:**")
st.write(tags)
except Exception as e:
st.error(f"An error occurred: {e}")
else:
st.warning("Please enter some text to generate tags.")
# Image Captioning Tab
with tab3:
st.header("Image Captioning Extractor")
image_url = st.text_input("Enter the URL of the image:")
if image_url:
try:
st.image(image_url, caption="Provided Image", use_column_width=True)
caption = captioner(image_url)
st.write("**Generated Caption:**")
st.write(caption[0]['generated_text'])
except Exception as e:
st.error(f"An error occurred: {e}")
# YouTube Transcript Tab
with tab4:
st.header("YouTube Video Transcript Extractor")
youtube_url = st.text_input("Enter YouTube URL:")
if st.button("Get Transcript"):
if youtube_url:
transcript = fetch_transcript(youtube_url)
if "error" not in transcript.lower():
st.success("Transcript successfully fetched!")
st.text_area("Transcript", transcript, height=300)
else:
st.error(f"An error occurred: {transcript}")
else:
st.warning("Please enter a URL.")
|