Spaces:
Running
Running
import os, zipfile | |
from huggingface_hub import snapshot_download | |
# Define the base directory | |
base_dir = os.path.join(os.getcwd(), 'datasets') | |
# Ensure the base directory exists | |
if not os.path.exists(base_dir): | |
os.mkdir(base_dir) | |
# Define the specific dataset directory | |
dataset_dir = os.path.join(base_dir, "osv5m") | |
# Ensure the specific dataset directory exists | |
if not os.path.exists(dataset_dir): | |
os.mkdir(dataset_dir) | |
# Download the dataset | |
snapshot_download(repo_id="osv5m/osv5m", local_dir=dataset_dir, repo_type='dataset') | |
# Extract zip files and remove them after extraction | |
for root, dirs, files in os.walk(dataset_dir): | |
for file in files: | |
if file.endswith(".zip"): | |
with zipfile.ZipFile(os.path.join(root, file), 'r') as zip_ref: | |
zip_ref.extractall(root) | |
os.remove(os.path.join(root, file)) | |