Spaces:
Running
Running
File size: 868 Bytes
c4c7cee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import os, zipfile
from huggingface_hub import snapshot_download
# Define the base directory
base_dir = os.path.join(os.getcwd(), 'datasets')
# Ensure the base directory exists
if not os.path.exists(base_dir):
os.mkdir(base_dir)
# Define the specific dataset directory
dataset_dir = os.path.join(base_dir, "osv5m")
# Ensure the specific dataset directory exists
if not os.path.exists(dataset_dir):
os.mkdir(dataset_dir)
# Download the dataset
snapshot_download(repo_id="osv5m/osv5m", local_dir=dataset_dir, repo_type='dataset')
# Extract zip files and remove them after extraction
for root, dirs, files in os.walk(dataset_dir):
for file in files:
if file.endswith(".zip"):
with zipfile.ZipFile(os.path.join(root, file), 'r') as zip_ref:
zip_ref.extractall(root)
os.remove(os.path.join(root, file))
|