Spaces:
Sleeping
Sleeping
File size: 3,369 Bytes
ef22d5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import re
import pandas as pd
from urllib.parse import urlparse, parse_qs
from preprocessText import preprocess
from googleapiclient.discovery import build
import isodate
api_keys = ['AIzaSyC4hp-RHBw5uY4NcthYw-A2fqYyrG22kaE',
'AIzaSyC7KzwigUsNJ4KNvqGfPqXVK9QcDBsKU78',
'AIzaSyDEPBCb1PhEaYHuBgzW6D5-ldTHUCowuq4',
'AIzaSyD-LN8Z7xG8OHtMQ89GRDvIaRQwkVHzfEo',
'AIzaSyCW5J_uI37UPmq3mJVAhVdWNdGSMAMg5tI',
'AIzaSyC8VVO0DhDY91lfyqqaUW85VKriqBiahBA',
'AIzaSyDC744JL3Xa3eORSxORoxKpunKFPPMGb3Y',
'AIzaSyA-DwJmtgWFO-I-Dwv1hcISJKXGDjbpZok',
'AIzaSyDC744JL3Xa3eORSxORoxKpunKFPPMGb3Y',
'AIzaSyD74KqDih_2AyOIJV-HaIvU9DdUOIyRONs',
'AIzaSyALgq5vR27iGsuFuLiz-Ry4NGy6E-L1PUY',
'AIzaSyC4hp-RHBw5uY4NcthYw-A2fqYyrG22kaE']
current_key_index = 0 # Declare current_key_index as a global variable
def get_video_id(url):
video_id = None
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
if parsed_url.netloc == 'youtu.be':
video_id = parsed_url.path[1:]
elif parsed_url.netloc in ('www.youtube.com', 'youtube.com'):
if 'v' in query_params:
video_id = query_params['v'][0]
return video_id
def get_next_api_key():
global current_key_index
current_key_index = (current_key_index + 1) % len(api_keys)
return api_keys[current_key_index]
def get_video_metadata(video_id):
try:
# Get the next API key
api_key = get_next_api_key()
# Set up the YouTube Data API client
youtube = build('youtube', 'v3', developerKey=api_key)
# Call the API to retrieve video metadata
response = youtube.videos().list(
part='snippet,contentDetails,statistics',
id=video_id
).execute()
# Extract the relevant metadata
if 'items' in response and len(response['items']) > 0:
video = response['items'][0]
metadata = {
'title': video['snippet']['title'],
'description': video['snippet']['description'],
'channel_title': video['snippet']['channelTitle'],
'publish_date': video['snippet']['publishedAt'],
'duration': video['contentDetails']['duration'],
'views': video['statistics']['viewCount'],
'likes': video['statistics']['likeCount'],
'comments': video['statistics']['commentCount'],
'category_id': video['snippet']['categoryId'],
'thumbnail_link': video['snippet']['thumbnails']['default']['url']
}
return metadata
except Exception as e:
print("An error occurred:", str(e))
return None
def get_metadata(url):
# Set up the YouTube Data API client
video_id = get_video_id(url)
metadata = get_video_metadata(video_id)
if metadata is not None:
# Create a DataFrame from the metadata
df = pd.DataFrame([metadata])
df['duration'] = df['duration'].apply(lambda x: isodate.parse_duration(x).total_seconds())
df['cleanTitle'] = df['title'].apply(preprocess)
df['cleanTitle'] = df['cleanTitle'].apply(lambda x: ' '.join(x))
df['titleLength'] = df['title'].apply(lambda x: len(x))
df['descriptionLength'] = df['description'].apply(lambda x: len(x))
return df
else:
return 0
|