Improve the aws deep glacier restore scripts
This commit is contained in:
parent
0ecad79655
commit
56e4ad3386
|
|
@ -52,11 +52,6 @@ error() {
|
|||
printf "${BOLD}${RED}$1${NORMAL}\n"
|
||||
}
|
||||
|
||||
abort() {
|
||||
error "\nAborting...\n"
|
||||
exit 1
|
||||
}
|
||||
|
||||
set -e
|
||||
|
||||
bucket="$1"
|
||||
|
|
@ -68,4 +63,24 @@ if [[ $bucket == "" || $path == "" || $output_file == "" ]]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
aws s3api list-objects-v2 --bucket $bucket --prefix $path --query "Contents[?StorageClass=='DEEP_ARCHIVE']" --output text | LC_ALL=C awk '{print substr($0, index($0, $2))}' | awk '{NF-=3};3' > "$output_file"
|
||||
# .Key gives us just the object paths. If you want the other metadata then remove that from the query.
|
||||
|
||||
items="$(aws s3api list-objects-v2 --bucket $bucket --prefix "$path" --query "Contents[?StorageClass=='DEEP_ARCHIVE'].Key" --output text | tr '\t' '\n' | tr -d '\r')"
|
||||
|
||||
error=$?
|
||||
if [[ ! $error -eq 0 ]]; then
|
||||
error "Error: failed to run the aws command. Aborting."
|
||||
exit 1
|
||||
fi
|
||||
if [[ $items == "None" ]]; then
|
||||
error "Didn't find any files. Check that your bucket name and path is correct."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mapfile -t lines_array <<< "$items"
|
||||
item_count="${#lines_array[@]}"
|
||||
|
||||
echo "$items" > "$output_file"
|
||||
|
||||
printf "Number of items: ${BOLD}${YELLOW}$item_count${NORMAL}\n"
|
||||
printf "Wrote file list to ${BOLD}${YELLOW}$output_file${NORMAL}\n"
|
||||
|
|
|
|||
|
|
@ -1,28 +1,44 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Restores all objects recursively from a specific bucket path. If want to
|
||||
# restore objects from an rclone crypt (encrypted remote), then you'll need to
|
||||
# do some manual steps first. See the `# Rclone Crypt` section for details.
|
||||
#
|
||||
# Restores all files/folders inside a particular bucket path for the next 7 days. This uses the bulk retrieval tier:
|
||||
# You can set how long restore files are available for download and the AWS
|
||||
# retrieval tier. The defaults are 7 days and the bulk tier respectively.
|
||||
#
|
||||
# Available tiers: bulk, standard, and expedited.
|
||||
#
|
||||
# Bulk retrievals are the lowest-cost retrieval option when restoring objects
|
||||
# from S3 Glacier Deep Archive. They typically finish within 48 hours for
|
||||
# objects stored in the S3 Glacier Deep Archive storage class or S3
|
||||
# Intelligent-Tiering Deep Archive tier.
|
||||
#
|
||||
# If you need faster access then use the `Expedited` or `Standard` tiers.
|
||||
# If you need faster access then use the `expedited` or `standard` tiers.
|
||||
#
|
||||
# Example usage:
|
||||
#
|
||||
# aws-restore-deep-glacier-folder my-deep-glacier-bucket path/to/images restored_images
|
||||
# aws-restore-deep-glacier-folder my-deep-glacier-bucket path/to/images restored_images 14 expedited
|
||||
#
|
||||
# This will create a run.sh script in a folder called "restored_images". Run that to restore all files inside the `path/to/images` folder inside the my-deep-glacier bucket.
|
||||
# This will create a run.sh script in a folder called "restored_images". Run
|
||||
# that to restore all files inside the `path/to/images` folder from the
|
||||
# my-deep-glacier bucket. Restored objects will be available for 14 days and
|
||||
# retrieved using the expedited tier.
|
||||
#
|
||||
# After you run the generated script, you have to wait for AWS to make the files available for download. You can check the status of a file with:
|
||||
# After you run the generated script, you have to wait for AWS to make the
|
||||
# files available for download. You can check the status of a file with:
|
||||
#
|
||||
# aws s3api head-object --bucket my-deep-glacier --key path/to/images/photo1.jpg
|
||||
# aws s3api head-object --bucket my-deep-glacier-bucket --key "path/to/images/photo1.jpg" --query "{Restore:Restore, StorageClass:StorageClass}"
|
||||
#
|
||||
# (obviously change the bucket and path to suit your needs).
|
||||
#
|
||||
# Once the files are restored you can download them on the S3 website or better yet use RcloneBrowser. I'm sure there's also a way to do it over cli too, I just haven't checked.
|
||||
# Or use the aws-see-restore-status script.
|
||||
# You know it's ready when ongoing-request is false and there's a date. If that
|
||||
# field is null then the file isn't being restored.
|
||||
#
|
||||
# Once the files are restored you can download them on the S3 website or better
|
||||
# yet use RcloneBrowser. I'm sure there's also a way to do it over cli too, I
|
||||
# just haven't checked.
|
||||
#
|
||||
# You'll need the aws cli tools for this script. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
|
||||
# Once installed, open a new shell and verify that you can run the `aws` command.
|
||||
|
|
@ -44,6 +60,67 @@
|
|||
# You can enable the UTF-8 locale with:
|
||||
#
|
||||
# win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box.
|
||||
|
||||
##########################
|
||||
# Rclone Crypt
|
||||
##########################
|
||||
#
|
||||
# To restore an rclone crypt, you need to first find the encrypted name that
|
||||
# maps to the parent folder or the file you want to restore. To do this you
|
||||
# need to use rclone. There are two ways to go about this.
|
||||
#
|
||||
# 1. The simple way is to use `cryptdecode` to convert your object path to its
|
||||
# encrypted form.
|
||||
#
|
||||
# For example, say you have an rclone crypt called `s3-deep-glacier-encrypted`
|
||||
# that is stored in S3 at `my-deep-glacier-bucket:encrypted/` You have a folder
|
||||
# called `dev/fonts` that you want to restore. To get its path, run the following
|
||||
# command:
|
||||
#
|
||||
# rclone cryptdecode --reverse s3-deep-glacier-encrypted: dev/fonts
|
||||
#
|
||||
# This will give you the encrypted path, e.g. "44ildo3grlk44jmfr96nb5r56o/oatuh75ej3l4re96nvq2qbj8ik"
|
||||
#
|
||||
# You can now restore this by running:
|
||||
#
|
||||
# aws-restore-deep-glacier-folder my-deep-glacier-bucket 44ildo3grlk44jmfr96nb5r56o/oatuh75ej3l4re96nvq2qbj8ik restore_dev_fonts
|
||||
#
|
||||
# You should be able to simply download the dev/fonts folder after its
|
||||
# restored. The easiest way is using rclone browser because it'll decrypt them
|
||||
# for you. Alternatively you can download the encrypted files using whatever
|
||||
# method you want and then decrypt them locally with rclone.
|
||||
#
|
||||
# 2. You can also get the encrypted names by enabling the 'show_mapping' option
|
||||
# in the rclone remote config. This will log the encrytped names of folders and
|
||||
# files with the original name in the same log line. This makes it easy to
|
||||
# parse the output.
|
||||
#
|
||||
# To enable the option, edit your rclone config, edit the remote you want to
|
||||
# restore from, edit the advanced config and set `show_mapping` to true.
|
||||
#
|
||||
# Now you can list the directories and files with rclone and get the mapping
|
||||
# output on stderr. e.g. let's capture all folders and files in a txt file:
|
||||
#
|
||||
# rclone lsf s3-deep-glacier-encrypted: -R &> keys.txt
|
||||
#
|
||||
# If your rclone config has password protection then you'll be prompted for it
|
||||
# but won't see the output since it's being written to the file. Just paste it
|
||||
# and hit enter.
|
||||
#
|
||||
# Now you have a listing of all objects and the encrypted keys that they map
|
||||
# to. If you want to scope the output to a specific path in the crypt then add
|
||||
# it after the remote name, e.g. `s3-deep-glacier-encrypted:dev/fonts`
|
||||
#
|
||||
# If you scope it like that then be aware that the output won't contain the
|
||||
# mapping for the parent path, i.e. `dev/fonts`, but you can get that using
|
||||
# `cryptdecode` (see above) or with some non-recursive outputs of the parent
|
||||
# parts using `lsd`, e.g.
|
||||
#
|
||||
# // First call will include the dev/ key
|
||||
# rclone lsd s3-deep-glacier-encrypted:
|
||||
#
|
||||
# // Second call has the fonts key
|
||||
# rclone lsd s3-deep-glacier-encrypted:dev
|
||||
#
|
||||
|
||||
if which tput >/dev/null 2>&1; then
|
||||
|
|
@ -73,57 +150,177 @@ error() {
|
|||
printf "${BOLD}${RED}$1${NORMAL}\n"
|
||||
}
|
||||
|
||||
abort() {
|
||||
error "\nAborting...\n"
|
||||
exit 1
|
||||
}
|
||||
|
||||
set -e
|
||||
|
||||
bucket="$1"
|
||||
path="$2"
|
||||
temp_dir="$3"
|
||||
number_of_objects_per_file=100
|
||||
days_available=7
|
||||
restore_tier="Bulk" # Can also be "Standard" or "Expedited"
|
||||
restore_tier="bulk" # Can also be "standard" or "expedited"
|
||||
|
||||
if [[ $bucket == "" || $path == "" || $temp_dir == "" ]]; then
|
||||
error "Usage: aws-restore-deep-glacier-folder <bucket-name> <path-in-bucket> <local-temp-dir>"
|
||||
error "Usage: aws-restore-deep-glacier-folder <bucket-name> <path-in-bucket> <local-temp-dir> <optional: days available> <optional: restore tier>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf "Restoring ${BOLD}${GREEN}$bucket:$path${NORMAL} with local temp folder ${BOLD}${GREEN}$temp_dir${NORMAL}\n"
|
||||
# Get the days available.
|
||||
if [[ $4 != "" ]]; then
|
||||
days_available=$4
|
||||
fi
|
||||
|
||||
# Get the restore tier.
|
||||
if [[ $5 != "" ]]; then
|
||||
restore_tier="$5"
|
||||
fi
|
||||
if ! grep -qiE '\b(bulk|standard|expedited)\b' <<<"$restore_tier"; then
|
||||
error "Restore tier is invalid. Accepted values is \"bulk\", \"standard\" and \"expedited\""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Normalize the tier; lowercase it then capitalize the first character.
|
||||
restore_tier="${restore_tier,,}"
|
||||
restore_tier="${restore_tier^}"
|
||||
|
||||
printf "Restoring ${BOLD}${YELLOW}$bucket:$path${NORMAL} for ${BOLD}${YELLOW}$days_available${NORMAL} days using the ${BOLD}${YELLOW}\"$restore_tier\"${NORMAL} restore tier.\nSaving the restoration script in ${BOLD}${YELLOW}$temp_dir${NORMAL}\n"
|
||||
|
||||
mkdir -p "$temp_dir"
|
||||
pushd "$temp_dir" &>/dev/null
|
||||
|
||||
items="$(aws s3api list-objects-v2 --bucket $bucket --prefix $path --query "Contents[?StorageClass=='DEEP_ARCHIVE']" --output text)"
|
||||
# .Key gives us just the object paths. If you want the other metadata then remove that from the query.
|
||||
items="$(aws s3api list-objects-v2 --bucket $bucket --prefix "$path" --query "Contents[?StorageClass=='DEEP_ARCHIVE'].Key" --output text | tr '\t' '\n' | tr -d '\r')"
|
||||
|
||||
error=$?
|
||||
if [[ ! $error -eq 0 ]]; then
|
||||
error "Error: failed to run the aws command. Aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $items == "None" ]]; then
|
||||
error "Didn't find any files. Check that your bucket name and path is correct."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Format the items list.
|
||||
output="$(echo "$items" | LC_ALL=C awk '{print substr($0, index($0, $2))}' | awk '{NF-=3};3')"
|
||||
|
||||
mapfile -t lines_array <<< "$output"
|
||||
num_items="${#lines_array[@]}"
|
||||
|
||||
printf "Number of items to restore: ${BOLD}${YELLOW}$num_items${NORMAL}\n"
|
||||
printf "${BOLD}${RED}Proceed?\n> ${NORMAL}"
|
||||
read -e proceed
|
||||
if [[ $proceed == "1" || $proceed == "y" || $proceed == "Y" || $proceed == "yes" || $proceed == "YES" ]]; then
|
||||
echo "$output" > all_objects_list.txt
|
||||
mapfile -t lines_array <<< "$items"
|
||||
item_count="${#lines_array[@]}"
|
||||
|
||||
# Generate the main script that will kick off the restoration.
|
||||
printf "while read x; do\n printf \"aws s3api restore-object --restore-request '{\\\\\"Days\\\\\":$days_available,\\\\\"GlacierJobParameters\\\\\":{\\\\\"Tier\\\\\":\\\\\"$restore_tier\\\\\"}}' --bucket $bucket --key \\\\\"\$x\\\\\"\\\\n\"\n aws s3api restore-object --restore-request \"{\\\\\"Days\\\\\":$days_available,\\\\\"GlacierJobParameters\\\\\":{\\\\\"Tier\\\\\":\\\\\"$restore_tier\\\\\"}}\" --bucket $bucket --key \"\$x\"\ndone < all_objects_list.txt\nprintf \"\\\\nDone! You can now delete this folder.\\\\nYour files are currently being restored. The time it takes to restore can be found in the AWS docs - just look for the $restore_tier restore tier, which is what you used.\\\\nOnce restored, download the files from the S3 site or better yet use RCloneBrowser.\\\\n\"\n" > run.sh
|
||||
printf "Number of items to restore: ${BOLD}${YELLOW}$item_count${NORMAL}\n"
|
||||
printf "${BOLD}${RED}Create the restore script?\n> ${NORMAL}"
|
||||
|
||||
read -e proceed
|
||||
if [[ $proceed == "1" || $proceed == "y" || $proceed == "Y" || $proceed == "yes" || $proceed == "YES" ]]; then
|
||||
echo "$items" > all_objects_list.txt
|
||||
|
||||
RUN_TEMPLATE=$(cat <<EOF
|
||||
if which tput >/dev/null 2>&1; then
|
||||
ncolors=\$(tput colors)
|
||||
fi
|
||||
if [ -t 1 ] && [ -n "\$ncolors" ] && [ "\$ncolors" -ge 8 ]; then
|
||||
RED="\$(tput setaf 1)"
|
||||
GREEN="\$(tput setaf 2)"
|
||||
YELLOW="\$(tput setaf 3)"
|
||||
BLUE="\$(tput setaf 4)"
|
||||
MAGENTA="\$(tput setaf 5)"
|
||||
CYAN="\$(tput setaf 6)"
|
||||
BOLD="\$(tput bold)"
|
||||
NORMAL="\$(tput sgr0)"
|
||||
else
|
||||
RED=""
|
||||
GREEN=""
|
||||
YELLOW=""
|
||||
BLUE=""
|
||||
MAGENTA=""
|
||||
CYAN=""
|
||||
BOLD=""
|
||||
NORMAL=""
|
||||
fi
|
||||
|
||||
# Open an output file.
|
||||
exec 3>>output.txt
|
||||
|
||||
fail_count=0
|
||||
failed_filename="failed_keys_\$(printf '%%04x' \$((RANDOM * RANDOM))).txt"
|
||||
|
||||
before_sleep_count=0
|
||||
sleep_every_n_requests=25
|
||||
sleep_duration=0.2
|
||||
|
||||
printf "Files are being restored for $days_available days using the $restore_tier tier\\\n\\\n"
|
||||
printf "Files are being restored for $days_available days using the $restore_tier tier\\\n\\\n" >&3
|
||||
|
||||
printf "\${BOLD}NOTE: Request failures will be saved to \${YELLOW}\$failed_filename\${NORMAL}\${BOLD} as they happen. If this script terminates prematurely then check this file for failures.\\\n\\\n"
|
||||
printf "NOTE: Request failures will be saved to \$failed_filename as they happen. If this script terminates prematurely then check this file for failures.\\\n\\\n" >&3
|
||||
|
||||
index=1
|
||||
while read key; do
|
||||
printf "* [\$index/$item_count] \${BOLD}\$key\${NORMAL}\\\n"
|
||||
printf "* [\$index/$item_count] \$key\\\n" >&3
|
||||
err=\$(
|
||||
aws s3api restore-object \\
|
||||
--bucket mcampagnaro-deep-glacier \\
|
||||
--key \\"\$key\\" \\
|
||||
--restore-request '{\\"Days\\":$days_available,\\"GlacierJobParameters\\":{\\"Tier\\":\\"$restore_tier\\"}}' \\
|
||||
2>&1 >/dev/null
|
||||
)
|
||||
index=\$((index + 1))
|
||||
before_sleep_count=\$((before_sleep_count + 1))
|
||||
|
||||
# strip newlines
|
||||
err="\${err//[$'\\\t\\\r\\\n']}"
|
||||
|
||||
if [[ \$err != "" ]]; then
|
||||
if ! grep -qE 'RestoreAlreadyInProgress|ObjectAlreadyInActiveTierError' <<<"\$err"; then
|
||||
printf "\${BOLD}\${RED}FAILED! \$err\${NORMAL}"
|
||||
printf "FAILED! \$err\" >&3
|
||||
|
||||
# Save the failure to a file now in case the script exits prematurely.
|
||||
fail_count=\$((fail_count + 1))
|
||||
printf "%%s\\\n" "\$key" >> \$failed_filename
|
||||
else
|
||||
if grep -qE 'RestoreAlreadyInProgress' <<<"\$err"; then
|
||||
printf "\${BOLD}\${YELLOW}SKIPPING! File restore is already in progress.\${NORMAL}"
|
||||
printf "SKIPPING! File restore is already in progress." >&3
|
||||
else
|
||||
printf "\${BOLD}\${YELLOW}SKIPPING! File is already restored. You can now download it.\${NORMAL}"
|
||||
printf "SKIPPING! File is already restored. You can now download it." >&3
|
||||
fi
|
||||
fi
|
||||
else
|
||||
printf "\${BOLD}\${GREEN}SUCCESS!\${NORMAL}"
|
||||
printf "SUCCESS!" >&3
|
||||
fi
|
||||
printf "\\\n\\\n"
|
||||
printf "\\\n\\\n" >&3
|
||||
|
||||
if [[ \$before_sleep_count -eq sleep_every_n_requests ]]; then
|
||||
printf "SLEEPING...\\\n\\\n"
|
||||
printf "SLEEPING...\\\n\\\n" >&3
|
||||
sleep \$sleep_duration
|
||||
before_sleep_count=0
|
||||
fi
|
||||
|
||||
done < all_objects_list.txt
|
||||
|
||||
printf "\${BOLD}\${GREEN}Done!\${NORMAL}\\\n\\\n"
|
||||
printf "Done!\\\n\\\n" >&3
|
||||
|
||||
if [[ \$fail_count > 0 ]]; then
|
||||
printf "\${BOLD}\${RED}There were \$fail_count failures!\\\nSee \${NORMAL}\${BOLD}\$filename\${RED} for the list. You can replace the contents of \${NORMAL}\${BOLD}all_objects_list.txt\${RED} with the list of failures and re-run this script to process them.\${NORMAL}\\\n\\\n"
|
||||
printf "There were \$fail_count failures!\\\nSee \$filename for the list. You can replace the contents of all_objects_list.txt with the list of failures and re-run this script to process them.\\\n\\\n" >&3
|
||||
else
|
||||
printf "There were no failures. All the files are being restored. You can now delete this folder.\\\n\\\n"
|
||||
printf "There were no failures. All the files are being restored. You can now delete this folder.\\\n\\\n" >&3
|
||||
fi
|
||||
|
||||
printf "(Note: the time it takes to restore an object can be found in the AWS docs - just look for the $restore_tier restore tier, which is what you used.\\\nOnce restored, download the files from the S3 site or better yet use RCloneBrowser.\\\n"
|
||||
printf "You can check the status of a file using the aws-see-restore-status script)\\\n"
|
||||
|
||||
exec 3>&-
|
||||
|
||||
EOF
|
||||
)
|
||||
|
||||
printf "$RUN_TEMPLATE" > run.sh
|
||||
|
||||
chmod +x run.sh
|
||||
|
||||
printf "${BOLD}You can now run ${GREEN}$temp_dir/run.sh${NORMAL}${BOLD} to start the restoration process.\n"
|
||||
|
|
|
|||
72
dotfiles/bin/aws-see-restore-status
Normal file
72
dotfiles/bin/aws-see-restore-status
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Shows you the status of an object restore job.
|
||||
#
|
||||
# e.g. aws-see-restore-status my-deep-glacier-bucket object/path.png
|
||||
#
|
||||
# You know it's ready when ongoing-request is false and there's a date. If that field is null then the file isn't being restored.
|
||||
#
|
||||
# You'll need the aws cli tools. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
|
||||
#
|
||||
# If you see an error like along the lines of "'charmap' codec can't encode
|
||||
# character '\u200e' in position 42: character maps to <undefined>" then that
|
||||
# means a filename has a Unicode codepoint and the dumb aws Python code is
|
||||
# trying to read it using your system's locale, which is very likely not set to
|
||||
# use the Windows UTF-8 beta feature. This is an ongoing issue in this tool
|
||||
# that goes back to 2013!!! There's no way to fix it using environment
|
||||
# variables, at least nothing worked for me. The fix provided by the devs is
|
||||
# heavy handed: you change your system locale to use UTF-8... This has
|
||||
# consequences though like breaking legacy apps that don't have Unicode support
|
||||
# and I'm sure other weird things will happen, such as file corruption. Anyway,
|
||||
# if you're getting this charmap error then I suggest changing your system
|
||||
# locale, run this again, then switch back to your previous locale. If you
|
||||
# don't get the canonical file name then you won't be able to restore it.
|
||||
#
|
||||
# You can enable the UTF-8 locale with:
|
||||
#
|
||||
# win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box.
|
||||
#
|
||||
|
||||
if which tput >/dev/null 2>&1; then
|
||||
ncolors=$(tput colors)
|
||||
fi
|
||||
if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
|
||||
RED="$(tput setaf 1)"
|
||||
GREEN="$(tput setaf 2)"
|
||||
YELLOW="$(tput setaf 3)"
|
||||
BLUE="$(tput setaf 4)"
|
||||
MAGENTA="$(tput setaf 5)"
|
||||
CYAN="$(tput setaf 6)"
|
||||
BOLD="$(tput bold)"
|
||||
NORMAL="$(tput sgr0)"
|
||||
else
|
||||
RED=""
|
||||
GREEN=""
|
||||
YELLOW=""
|
||||
BLUE=""
|
||||
MAGENTA=""
|
||||
CYAN=""
|
||||
BOLD=""
|
||||
NORMAL=""
|
||||
fi
|
||||
|
||||
error() {
|
||||
printf "${BOLD}${RED}$1${NORMAL}\n"
|
||||
}
|
||||
|
||||
abort() {
|
||||
error "\nAborting...\n"
|
||||
exit 1
|
||||
}
|
||||
|
||||
set -e
|
||||
|
||||
bucket="$1"
|
||||
path="$2"
|
||||
|
||||
if [[ $bucket == "" || $path == "" ]]; then
|
||||
error "Usage: aws-see-restore-status <bucket-name> <path-in-bucket>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
aws s3api head-object --bucket $bucket --key "$path" --query "{Restore:Restore, StorageClass:StorageClass}" --output json
|
||||
Loading…
Reference in New Issue
Block a user