pi_project_2023

Build error

App Files Files Community

ASokirka commited on Jun 13, 2024

Commit

03bc94b

verified ·

1 Parent(s): 9663a29

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -154

app.py DELETED Viewed

@@ -1,154 +0,0 @@
-import os
-import re
-import streamlit as st
-import googleapiclient.discovery
-import pandas as pd
-from transformers import pipeline
-import matplotlib.pyplot as plt
-import seaborn as sns
-st.title('Анализатор комментариев :red[YouTube] :sunglasses:')
-# Инициализируем модель Hugging Face для анализа тональности текста
-# Кэшируем ресурс для одной загрузки модели на все сессии
-#@st.cache_resource
-def load_model():
-    """
-    Loads the 'blanchefort/rubert-base-cased-sentiment' model from HuggingFace
-    and saves to cache for consecutive loads.
-    """
-    model = pipeline(
-        "sentiment-analysis",
-        "blanchefort/rubert-base-cased-sentiment")
-    return model
-def extract_video_id(url: str) -> str:
-    """
-    Extracts the video ID from a YouTube video URL.
-    Args:       url (str): The YouTube video URL.
-    Returns:    str: The extracted video ID,
-                or an empty string if the URL is not valid.
-    """
-    pattern = r"(?<=v=)[\w-]+(?=&|\b)"
-    match = re.search(pattern, url)
-    if match:
-        return match.group()
-    else:
-        return ""
-def download_comments(video_id: str) -> pd.DataFrame:
-    """
-    Downloads comments from a YouTube video based on the provided video ID
-    and returns them as a DataFrame.
-    Args: video_id (str): The video ID of the YouTube video.
-    Returns: DataFrame: A DataFrame containing the downloaded comments from the video.
-    """
-    DEV_KEY = os.getenv('API_KEY_YOUTUBE')
-    youtube = googleapiclient.discovery.build("youtube",
-                                              "v3",
-                                              developerKey=DEV_KEY)
-    request = youtube.commentThreads().list(part="snippet",
-                                            videoId=video_id,
-                                            maxResults=100)
-    response = request.execute()
-    comments = []
-    for item in response['items']:
-        comment = item['snippet']['topLevelComment']['snippet']
-        comments.append([comment['authorDisplayName'],
-                        comment['publishedAt'],
-                        comment['updatedAt'],
-                        comment['likeCount'],
-                        comment['textDisplay'],])
-    return pd.DataFrame(comments,
-                        columns=['author',
-                                'published_at',
-                                'updated_at',
-                                'like_count',
-                                'text',])
-def analyze_emotions_in_comments(df: pd.DataFrame) -> tuple:
-    """
-    Takes a DataFrame with comments,
-    processes the emotional sentiment of each comment in the DataFrame
-    Args: dataframe (pandas.DataFrame): DataFrame containing comments to analyze.
-    Returns: tuple: containing the updated DataFrame with the added 'Emotional Sentiment' column
-    and the total count of processed comments.
-    """
-    model = load_model()
-    selected_columns = ['text', 'author', 'published_at']
-    df = df[selected_columns]
-    res_list = []
-    res_list = model(df['text'][:513].to_list())
-    full_df = pd.concat([pd.DataFrame(res_list), df], axis=1)
-    return (full_df, len(res_list))
-def plot_heatmap_from_dataframe(df: pd.DataFrame) -> plt:
-    """
-    Visualizes the data from the input DataFrame and returns a matplotlib plot object.
-    Args: df (DataFrame): The input DataFrame containing the data to be visualized.
-    Returns: plt: A matplotlib plot object showing the visualization of the data.
-    """
-    df['published_at'] = pd.to_datetime(df['published_at'])
-    df['Date'] = df['published_at'].dt.date
-    df['Hour'] = df['published_at'].dt.hour
-    pivot_table = df.pivot_table(index='Hour',
-                                columns='Date',
-                                values='text',
-                                aggfunc='count')
-    plt.figure(figsize=(10, 6))
-    sns.heatmap(pivot_table,
-                cmap='YlGnBu')
-    plt.title('Количество комментариев по часам и датам')
-    plt.xlabel('Дата')
-    plt.ylabel('Час')
-    return plt
-def visualize_data(df: pd.DataFrame):
-    """
-    Visualizes the data from the input DataFrame and returns a matplotlib figure object.
-    Args: df (DataFrame): The input DataFrame containing the data to be visualized.
-    Returns: fig: A matplotlib figure object
-    """
-    data = df['label'].value_counts()
-    fig, ax = plt.subplots()
-    plt.title("Эмоциональная окраска комментариев на YouTube")
-    label = data.index
-    ax.pie(data, labels=label, autopct='%1.1f%%')
-    return fig
-def change_url():
-    st.session_state.start = False
-if "start" not in st.session_state:
-    st.session_state.start = False
-# Получаем id видеоролика из URL для отправки запроса
-url = st.text_input(label="Enter URL from YouTube", on_change=change_url)
-video_id = extract_video_id(url)
-if  video_id != "":
-    if btn_start := st.button('Загрузить комментарии'):
-        st.session_state.start = True
-if st.session_state.start:
-    # Выводим таблицу с результатами на странице
-    comments_df = download_comments(video_id)
-    with st.spinner('Analyzing comments...'):
-        full_df,  num_comments = analyze_emotions_in_comments(comments_df)
-        st.success(f'Готово! Обработано {num_comments} комментариев.')
-    st.write(full_df)
-    st.markdown('***')
-    # Выводим heatmap комментариев по часам и датам
-    st.pyplot(plot_heatmap_from_dataframe(full_df))
-    st.markdown('***')
-    # Выводим круговую диаграмму
-    st.pyplot(visualize_data(full_df))