Spaces:
Sleeping
Sleeping
""" | |
Downloads the MovieLens dataset and saves it as an artifact | |
""" | |
import os | |
import tempfile | |
import zipfile | |
import click | |
import requests | |
import mlflow | |
def load_raw_data(url): | |
with mlflow.start_run(): | |
local_dir = tempfile.mkdtemp() | |
local_filename = os.path.join(local_dir, "ml-20m.zip") | |
print(f"Downloading {url} to {local_filename}") | |
r = requests.get(url, stream=True) | |
with open(local_filename, "wb") as f: | |
for chunk in r.iter_content(chunk_size=1024): | |
if chunk: # filter out keep-alive new chunks | |
f.write(chunk) | |
extracted_dir = os.path.join(local_dir, "ml-20m") | |
print(f"Extracting {local_filename} into {extracted_dir}") | |
with zipfile.ZipFile(local_filename, "r") as zip_ref: | |
zip_ref.extractall(local_dir) | |
ratings_file = os.path.join(extracted_dir, "ratings.csv") | |
print(f"Uploading ratings: {ratings_file}") | |
mlflow.log_artifact(ratings_file, "ratings-csv-dir") | |
if __name__ == "__main__": | |
load_raw_data() |