import sys, os
import boto3
from datetime import datetime as dt

# EASI tools
import git
repo = git.Repo('.', search_parent_directories=True).working_tree_dir
if repo not in sys.path: sys.path.append(repo)
from easi_tools import EasiDefaults


client = boto3.client('s3')

easi = EasiDefaults()
bucket = easi.scratch

Successfully found configuration for deployment "chile"


# Optional, for parallel uploads and downloads of large files
# Add a (..., Config=config) parameter to the relevant upload and download functions

# from boto3.s3.transfer import TransferConfig
# config = TransferConfig(
#     multipart_threshold = 1024 * 25,
#     max_concurrency = 10,
#     multipart_chunksize = 1024 * 25,
#     use_threads = True
# )


%%bash

userid=`aws sts get-caller-identity --query 'UserId' | sed 's/["]//g'`
echo $userid

AROAT2ES654NBY3M4X7WD:jhodge


userid = boto3.client('sts').get_caller_identity()['UserId']
print(userid)

AROAT2ES654NBY3M4X7WD:jhodge


testfile = '/home/jovyan/test-file.txt'


%%bash -s "$testfile"
 
testfile=$1
touch $testfile
ls -l $testfile

-rw-r--r-- 1 jovyan users 0 May 24 15:28 /home/jovyan/test-file.txt


%%bash -s "$bucket" "$userid" "$testfile"

bucket=$1
userid=$2
testfile=$3

aws s3 cp ${testfile} s3://${bucket}/${userid}/

upload: ../../../test-file.txt to s3://easido-prod-user-scratch/AROAT2ES654NBY3M4X7WD:jhodge/test-file.txt


target = testfile.split('/')[-1]
try:
    print(f'upload: {testfile} to s3://{bucket}/{userid}/{target}')
    r = client.upload_file(testfile, bucket, f'{userid}/{target}')
    print('Success.')
except Exception as e:
    print(e)
    print('Failed.')

upload: /home/jovyan/test-file.txt to s3://easido-prod-user-scratch/AROAT2ES654NBY3M4X7WD:jhodge/test-file.txt
Success.


%%bash -s "$bucket" "$userid"

bucket=$1
userid=$2

aws s3 ls s3://${bucket}/${userid}/

2023-05-24 15:28:45          0 test-file.txt


# Basic use of list_objects_v2

response = client.list_objects_v2(Bucket=bucket, Prefix=f'{userid}/')

# from pprint import pprint
# pprint(response)

# List each key with its last modified time stamp
if 'Contents' in response:
    for c in response['Contents']:
        key = c['Key']
        lastmodified = c['LastModified'].strftime('%Y-%d-%m %H:%M:%S')
        size = c['Size']
        print(f'{lastmodified}\t{size} {key}')

2023-24-05 15:28:45	0 AROAT2ES654NBY3M4X7WD:jhodge/test-file.txt


# Paginated list objects, for potentially >1000 keys

paginator = client.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(Bucket=bucket, Prefix=f'{userid}/')

for response in page_iterator:
    if 'Contents' in response:
        for c in response['Contents']:
            key = c['Key']
            lastmodified = c['LastModified'].strftime('%Y-%d-%m %H:%M:%S')
            psize = c['Size']
            print(f'{lastmodified}\t{size} {key}')

2023-24-05 15:28:45	0 AROAT2ES654NBY3M4X7WD:jhodge/test-file.txt


%%bash -s "$bucket" "$userid" "$testfile"

bucket=$1
userid=$2
testfile=$3

source=`basename $testfile`
aws s3 cp s3://${bucket}/${userid}/${source} ${testfile}
ls -l $testfile

download: s3://easido-prod-user-scratch/AROAT2ES654NBY3M4X7WD:jhodge/test-file.txt to ../../../test-file.txt
-rw-r--r-- 1 jovyan users 0 May 24 15:28 /home/jovyan/test-file.txt


source = testfile.split('/')[-1]
try:
    print(f'download: s3://{bucket}/{userid}/{source} to {testfile}')
    r = client.download_file(bucket, f'{userid}/{source}', testfile)
    print('Success.')
except Exception as e:
    print(e)
    print('Failed.')

download: s3://easido-prod-user-scratch/AROAT2ES654NBY3M4X7WD:jhodge/test-file.txt to /home/jovyan/test-file.txt
Success.

Using EASI scratch and project buckets

Imports and setup¶

Writing¶

User ID¶

Select a test file¶

Upload a file¶

Reading¶

List objects¶

Read a file directly¶

Copy a file to local¶