Spaces:
Sleeping
Sleeping
import boto3 | |
from botocore.exceptions import ClientError, NoCredentialsError | |
import os | |
def upload_folder_to_s3(local_dir, prefix=''): | |
s3_bucket = os.getenv("AWS_BUCKET_NAME") | |
s3_client = boto3.client('s3') | |
for root, dirs, files in os.walk(local_dir): | |
for dir in dirs: | |
dir_path = os.path.join(root, dir) | |
relative_path = os.path.relpath(dir_path, local_dir) | |
# Create the directory in S3 if it doesn't exist | |
try: | |
s3_client.put_object(Bucket=s3_bucket, Key=os.path.join(prefix, relative_path)) | |
except ClientError as e: | |
if e.response['Error']['Code'] == '404': | |
continue # Directory already exists | |
else: | |
raise | |
for file in files: | |
file_path = os.path.join(root, file) | |
relative_path = os.path.relpath(file_path, local_dir) | |
try: | |
s3_client.upload_file(file_path, s3_bucket, os.path.join(prefix, relative_path)) | |
print(f"Uploaded: {file_path} -> s3://{s3_bucket}/{os.path.join(prefix, relative_path)}") | |
except Exception as e: | |
raise e | |
# print(f"Error uploading {file_path}: {e}") | |
def check_file_exists_in_s3(file_path): | |
bucket_name = os.getenv("AWS_BUCKET_NAME") | |
s3_client = boto3.client('s3') | |
try: | |
s3_client.head_object(Bucket=bucket_name, Key=file_path) | |
return True | |
except ClientError as e: | |
if e.response['Error']['Code'] == '404': | |
return False | |
else: | |
raise e | |
def download_files_from_s3(local_folder, file_path_list): | |
s3 = boto3.client('s3') | |
bucket_name = os.getenv("AWS_BUCKET_NAME") | |
folder_prefix = '' | |
try: | |
# List objects in the S3 bucket | |
paginator = s3.get_paginator('list_objects_v2') | |
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=folder_prefix) | |
# Download filtered files | |
for page in page_iterator: | |
for obj in page.get('Contents', []): | |
key = obj['Key'] | |
# Apply file filter if specified | |
if key not in file_path_list: | |
continue | |
# Construct local file path | |
local_path = os.path.join(local_folder, key) | |
os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
try: | |
print(f"Downloading: {key} -> {local_path}") | |
s3.download_file(bucket_name, key, local_path) | |
print(f"Downloaded: {local_path}") | |
except Exception as e: | |
print(f"Error downloading {key}: {e}") | |
except NoCredentialsError: | |
print("No AWS credentials found.") | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
def download_folder_from_s3(local_folder, aws_folder_prefix): | |
s3 = boto3.client('s3') | |
bucket_name = os.getenv("AWS_BUCKET_NAME") | |
if not bucket_name: | |
raise ValueError("AWS_BUCKET_NAME environment variable is not set") | |
try: | |
# Create the local folder if it doesn't exist | |
os.makedirs(local_folder, exist_ok=True) | |
# List objects in the S3 bucket | |
paginator = s3.get_paginator('list_objects_v2') | |
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=aws_folder_prefix) | |
# Process objects | |
for page in page_iterator: | |
for obj in page.get('Contents', []): | |
key = obj['Key'] | |
# Determine if it's a file or directory | |
if obj['Size'] == 0: | |
continue | |
# Construct local file path | |
local_path = os.path.join(local_folder, os.path.relpath(key, aws_folder_prefix)) | |
os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
try: | |
print(f"Downloading: {key} -> {local_path}") | |
s3.download_file(bucket_name, key, local_path) | |
print(f"Downloaded: {local_path}") | |
except ClientError as e: | |
if e.response['Error']['Code'] == 'AccessDenied': | |
print(f"Permission denied when trying to download {key}: {e}") | |
elif e.response['Error']['Code'] == 'NoSuchKey': | |
print(f"The object {key} does not exist in the bucket.") | |
elif e.response['Error']['Code'] == "": | |
pass | |
else: | |
print(f"An error occurred while downloading {key}: {e}") | |
raise e | |
except Exception as e: | |
print(f"An unexpected error occurred : {e}") | |
def delete_s3_folder(folder_path): | |
bucket_name = os.getenv("AWS_BUCKET_NAME") | |
s3_client = boto3.client('s3') | |
try: | |
# List objects in the S3 bucket | |
paginator = s3_client.get_paginator('list_objects_v2') | |
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=folder_path) | |
# Delete objects within the folder_path | |
delete_keys = {'Objects': []} | |
for page in page_iterator: | |
for obj in page.get('Contents', []): | |
key = obj['Key'] | |
# Construct the full key for deletion | |
delete_key = {'Key': key} | |
delete_keys['Objects'].append(delete_key) | |
print(f"Deleting: {key}") | |
# Perform batch delete operation | |
if len(delete_keys['Objects']) > 0: | |
s3_client.delete_objects(Bucket=bucket_name, Delete=delete_keys) | |
print(f"Deleted {len(delete_keys['Objects'])} objects in folder '{folder_path}'") | |
else: | |
print(f"No objects found in folder '{folder_path}'") | |
except ClientError as e: | |
print(f"An error occurred: {e}") | |
def list_s3_objects(prefix=''): | |
bucket_name = os.getenv("AWS_BUCKET_NAME") | |
s3_client = boto3.client('s3') | |
try: | |
paginator = s3_client.get_paginator('list_objects_v2') | |
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix) | |
for page in page_iterator: | |
for obj in page.get('Contents', []): | |
print(f"Key: {obj['Key']}") | |
print(f"Size: {obj['Size']} bytes") | |
print(f"Last Modified: {obj['LastModified']}") | |
print(f"ETag: {obj['ETag']}") | |
print(f"File Extension: {os.path.splitext(obj['Key'])[-1]}") | |
print("---") | |
except ClientError as e: | |
print(f"An error occurred: {e}") | |