Spaces:
Runtime error
Runtime error
File size: 2,441 Bytes
4b8361a 0030bc6 4b8361a 0030bc6 4b8361a 0030bc6 4b8361a 0030bc6 4b8361a 0030bc6 4b8361a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os
import json
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
from utils import download_song
import time
def set_env():
here = os.path.dirname(__file__)
with open(os.path.join(here, "auth", "spotify.json"), "r") as f:
config = json.load(f)
os.environ["SPOTIPY_CLIENT_ID"] = config["client_id"]
os.environ["SPOTIPY_CLIENT_SECRET"] = config["client_secret"]
os.environ["SPOTIPY_REDIRECT_URI"] = "https://localhost:8080/callback"
set_env()
def get_song_preview_url(song_name:str, spotify:spotipy.Spotify, artist:str = None) -> str | None:
info = {
"track": song_name
}
if artist is not None:
info["artist"] = artist
query = " ".join(f"{k}: {v}" for k,v in info.items())
results = spotify.search(query,type="track", limit=1)["tracks"]["items"]
valid_results = len(results) > 0 and results[0] is not None and "preview_url" in results[0]
if not valid_results:
return None
song = results[0]
return song["preview_url"]
def patch_missing_songs(
df: pd.DataFrame,
) -> pd.DataFrame:
spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials())
# find songs with missing previews
audio_urls = df["Sample"].replace(".", np.nan)
missing_audio = pd.isna(audio_urls)
missing_df = df[missing_audio]
def patch_preview(row: pd.Series):
song:str = row["Title"]
artist:str = row["Artist"]
preview_url = get_song_preview_url(song, spotify, artist)
if preview_url is not None:
row["Sample"] = preview_url
return row
rows = []
indices = []
after = 18418
missing_df = missing_df.iloc[after:]
total_rows = len(missing_df)
for i, row in tqdm(missing_df.iterrows(),total=total_rows):
patched_row = patch_preview(row)
rows.append(patched_row)
indices.append(i)
patched_df = pd.DataFrame(rows,index=indices)
df.update(patched_df)
return df
def download_links_from_backup(backup_file:str, output_dir:str):
with open(backup_file) as f:
links = [x.split(",")[1].strip() for x in f.readlines()]
links = [l for l in links if "https" in l]
for link in tqdm(links, "Songs Downloaded"):
download_song(link, output_dir)
time.sleep(5e-3) # hopefully wont be rate limited with delay 🤞
|