File size: 868 Bytes
c4c7cee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import os, zipfile
from huggingface_hub import snapshot_download

# Define the base directory
base_dir = os.path.join(os.getcwd(), 'datasets')

# Ensure the base directory exists
if not os.path.exists(base_dir):
    os.mkdir(base_dir)

# Define the specific dataset directory
dataset_dir = os.path.join(base_dir, "osv5m")

# Ensure the specific dataset directory exists
if not os.path.exists(dataset_dir):
    os.mkdir(dataset_dir)

# Download the dataset
snapshot_download(repo_id="osv5m/osv5m", local_dir=dataset_dir, repo_type='dataset')

# Extract zip files and remove them after extraction
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        if file.endswith(".zip"):
            with zipfile.ZipFile(os.path.join(root, file), 'r') as zip_ref:
                zip_ref.extractall(root)
                os.remove(os.path.join(root, file))