File size: 3,131 Bytes
c3d82b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import boto3
import os
from pathlib import Path
from dotenv import load_dotenv, find_dotenv

# Load environment variables from .env file
load_dotenv(find_dotenv(".env"))


class S3Handler:
    def __init__(self, bucket_name):
        self.bucket_name = bucket_name
        self.s3 = boto3.client(
            "s3",
            aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
            aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
            region_name=os.getenv("AWS_REGION"),
        )

    def upload_folder(self, source_folder, dest_folder, filenames=None):
        """
        Upload specified files or all files from a local folder to an S3 folder.

        Args:
            source_folder (str): Local source folder path.
            dest_folder (str): Destination folder path in S3.
            filenames (list): List of filenames to upload (relative to source_folder). If None, uploads all files.
        """
        source_folder = Path(source_folder)

        # Select files based on filenames list or all files if filenames is None
        files_to_upload = (
            [source_folder / file for file in filenames]
            if filenames
            else list(source_folder.rglob("*"))
        )

        for file_path in files_to_upload:
            if file_path.is_file():
                s3_path = f"{dest_folder}/{file_path.relative_to(source_folder)}"
                self.s3.upload_file(str(file_path), self.bucket_name, s3_path)
                print(f"Uploaded: {file_path} to {s3_path}")
            else:
                print(f"File not found: {file_path}")

    def download_folder(self, s3_folder, dest_folder):
        """
        Download all files from an S3 folder to a local folder.

        Args:
            s3_folder (str): Source folder in S3.
            dest_folder (str): Local destination folder path.
        """
        dest_folder = Path(dest_folder).resolve()
        paginator = self.s3.get_paginator("list_objects_v2")

        for page in paginator.paginate(Bucket=self.bucket_name, Prefix=s3_folder):
            for obj in page.get("Contents", []):
                s3_path = obj["Key"]
                # Skip folder itself if returned by S3
                if s3_path.endswith("/"):
                    continue

                # Compute relative path and local destination
                relative_path = Path(s3_path[len(s3_folder) :].lstrip("/"))
                local_path = dest_folder / relative_path

                # Create necessary local directories
                local_path.parent.mkdir(parents=True, exist_ok=True)

                # Download file
                self.s3.download_file(self.bucket_name, s3_path, str(local_path))
                print(f"Downloaded: {s3_path} to {local_path}")


# Usage Example
if __name__ == "__main__":
    # Initialize with bucket name
    s3_handler = S3Handler(bucket_name="deep-bucket-s3")

    # Upload specific files
    s3_handler.upload_folder(
        "checkpoints_test",
        "checkpoints_test",
    )

    # Download example
    s3_handler.download_folder("checkpoints_test", "checkpoints_test")