File size: 4,127 Bytes
5fe4ba5
d6764d1
412e4aa
7c7cb02
412e4aa
 
 
 
d6764d1
 
 
 
 
 
 
 
8e377cb
412e4aa
d6764d1
 
 
 
 
 
 
412e4aa
7c7cb02
 
 
 
 
 
 
 
 
 
412e4aa
8e377cb
412e4aa
 
d6764d1
412e4aa
d6764d1
412e4aa
d6764d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412e4aa
 
 
 
8e377cb
412e4aa
 
d6764d1
 
 
412e4aa
 
 
 
 
 
 
 
d6764d1
 
8e377cb
 
 
 
 
 
 
 
 
 
 
 
 
7c7cb02
d6764d1
7c7cb02
 
 
 
8e377cb
7c7cb02
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
from youtube_transcript_api import YouTubeTranscriptApi

# Download NLTK data
nltk.download('punkt')

# Load models and tokenizers
summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name)
summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name)

tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")

captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

# Function to summarize text
def summarize_text(text, prefix):
    src_text = prefix + text
    input_ids = summary_tokenizer(src_text, return_tensors="pt")
    generated_tokens = summary_model.generate(**input_ids)
    result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    return result[0]

# Function to fetch YouTube transcript
def fetch_transcript(url):
    video_id = url.split('watch?v=')[-1]
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = ' '.join([entry['text'] for entry in transcript])
        return transcript_text
    except Exception as e:
        return str(e)

# Streamlit app title
st.title("Multi-purpose Machine Learning App: WAVE_AI")

# Create tabs for different functionalities
tab1, tab2, tab3, tab4 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript"])

# Text Summarization Tab
with tab1:
    st.header("Text Summarization")

    input_text = st.text_area("Enter the text to summarize:", height=300)

    if st.button("Generate Summaries"):
        if input_text:
            title1 = summarize_text(input_text, 'summary: ')
            title2 = summarize_text(input_text, 'summary brief: ')
            st.write("### Title 1")
            st.write(title1)
            st.write("### Title 2")
            st.write(title2)
        else:
            st.warning("Please enter some text to summarize.")

# Text Tag Generation Tab
with tab2:
    st.header("Text Tag Generation")
    
    text = st.text_area("Enter the text for tag extraction:", height=200)
    
    if st.button("Generate Tags"):
        if text:
            try:
                inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
                output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
                decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
                tags = list(set(decoded_output.strip().split(", ")))
                st.write("**Generated Tags:**")
                st.write(tags)
            except Exception as e:
                st.error(f"An error occurred: {e}")
        else:
            st.warning("Please enter some text to generate tags.")

# Image Captioning Tab
with tab3:
    st.header("Image Captioning Extractor")
    
    image_url = st.text_input("Enter the URL of the image:")
    
    if image_url:
        try:
            st.image(image_url, caption="Provided Image", use_column_width=True)
            caption = captioner(image_url)
            st.write("**Generated Caption:**")
            st.write(caption[0]['generated_text'])
        except Exception as e:
            st.error(f"An error occurred: {e}")

# YouTube Transcript Tab
with tab4:
    st.header("YouTube Video Transcript Extractor")
    
    youtube_url = st.text_input("Enter YouTube URL:")
    
    if st.button("Get Transcript"):
        if youtube_url:
            transcript = fetch_transcript(youtube_url)
            if "error" not in transcript.lower():
                st.success("Transcript successfully fetched!")
                st.text_area("Transcript", transcript, height=300)
            else:
                st.error(f"An error occurred: {transcript}")
        else:
            st.warning("Please enter a URL.")