Improve the aws deep glacier restore scripts

This commit is contained in:
Michael Campagnaro 2025-12-29 22:39:57 -05:00
parent 0ecad79655
commit 56e4ad3386
3 changed files with 317 additions and 33 deletions

View File

@ -52,11 +52,6 @@ error() {
printf "${BOLD}${RED}$1${NORMAL}\n" printf "${BOLD}${RED}$1${NORMAL}\n"
} }
abort() {
error "\nAborting...\n"
exit 1
}
set -e set -e
bucket="$1" bucket="$1"
@ -68,4 +63,24 @@ if [[ $bucket == "" || $path == "" || $output_file == "" ]]; then
exit 1 exit 1
fi fi
aws s3api list-objects-v2 --bucket $bucket --prefix $path --query "Contents[?StorageClass=='DEEP_ARCHIVE']" --output text | LC_ALL=C awk '{print substr($0, index($0, $2))}' | awk '{NF-=3};3' > "$output_file" # .Key gives us just the object paths. If you want the other metadata then remove that from the query.
items="$(aws s3api list-objects-v2 --bucket $bucket --prefix "$path" --query "Contents[?StorageClass=='DEEP_ARCHIVE'].Key" --output text | tr '\t' '\n' | tr -d '\r')"
error=$?
if [[ ! $error -eq 0 ]]; then
error "Error: failed to run the aws command. Aborting."
exit 1
fi
if [[ $items == "None" ]]; then
error "Didn't find any files. Check that your bucket name and path is correct."
exit 1
fi
mapfile -t lines_array <<< "$items"
item_count="${#lines_array[@]}"
echo "$items" > "$output_file"
printf "Number of items: ${BOLD}${YELLOW}$item_count${NORMAL}\n"
printf "Wrote file list to ${BOLD}${YELLOW}$output_file${NORMAL}\n"

View File

@ -1,28 +1,44 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Restores all objects recursively from a specific bucket path. If want to
# restore objects from an rclone crypt (encrypted remote), then you'll need to
# do some manual steps first. See the `# Rclone Crypt` section for details.
# #
# Restores all files/folders inside a particular bucket path for the next 7 days. This uses the bulk retrieval tier: # You can set how long restore files are available for download and the AWS
# retrieval tier. The defaults are 7 days and the bulk tier respectively.
#
# Available tiers: bulk, standard, and expedited.
# #
# Bulk retrievals are the lowest-cost retrieval option when restoring objects # Bulk retrievals are the lowest-cost retrieval option when restoring objects
# from S3 Glacier Deep Archive. They typically finish within 48 hours for # from S3 Glacier Deep Archive. They typically finish within 48 hours for
# objects stored in the S3 Glacier Deep Archive storage class or S3 # objects stored in the S3 Glacier Deep Archive storage class or S3
# Intelligent-Tiering Deep Archive tier. # Intelligent-Tiering Deep Archive tier.
# #
# If you need faster access then use the `Expedited` or `Standard` tiers. # If you need faster access then use the `expedited` or `standard` tiers.
# #
# Example usage: # Example usage:
# #
# aws-restore-deep-glacier-folder my-deep-glacier-bucket path/to/images restored_images # aws-restore-deep-glacier-folder my-deep-glacier-bucket path/to/images restored_images 14 expedited
# #
# This will create a run.sh script in a folder called "restored_images". Run that to restore all files inside the `path/to/images` folder inside the my-deep-glacier bucket. # This will create a run.sh script in a folder called "restored_images". Run
# that to restore all files inside the `path/to/images` folder from the
# my-deep-glacier bucket. Restored objects will be available for 14 days and
# retrieved using the expedited tier.
# #
# After you run the generated script, you have to wait for AWS to make the files available for download. You can check the status of a file with: # After you run the generated script, you have to wait for AWS to make the
# files available for download. You can check the status of a file with:
# #
# aws s3api head-object --bucket my-deep-glacier --key path/to/images/photo1.jpg # aws s3api head-object --bucket my-deep-glacier-bucket --key "path/to/images/photo1.jpg" --query "{Restore:Restore, StorageClass:StorageClass}"
# #
# (obviously change the bucket and path to suit your needs). # (obviously change the bucket and path to suit your needs).
# #
# Once the files are restored you can download them on the S3 website or better yet use RcloneBrowser. I'm sure there's also a way to do it over cli too, I just haven't checked. # Or use the aws-see-restore-status script.
# You know it's ready when ongoing-request is false and there's a date. If that
# field is null then the file isn't being restored.
#
# Once the files are restored you can download them on the S3 website or better
# yet use RcloneBrowser. I'm sure there's also a way to do it over cli too, I
# just haven't checked.
# #
# You'll need the aws cli tools for this script. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html # You'll need the aws cli tools for this script. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
# Once installed, open a new shell and verify that you can run the `aws` command. # Once installed, open a new shell and verify that you can run the `aws` command.
@ -44,6 +60,67 @@
# You can enable the UTF-8 locale with: # You can enable the UTF-8 locale with:
# #
# win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box. # win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box.
##########################
# Rclone Crypt
##########################
#
# To restore an rclone crypt, you need to first find the encrypted name that
# maps to the parent folder or the file you want to restore. To do this you
# need to use rclone. There are two ways to go about this.
#
# 1. The simple way is to use `cryptdecode` to convert your object path to its
# encrypted form.
#
# For example, say you have an rclone crypt called `s3-deep-glacier-encrypted`
# that is stored in S3 at `my-deep-glacier-bucket:encrypted/` You have a folder
# called `dev/fonts` that you want to restore. To get its path, run the following
# command:
#
# rclone cryptdecode --reverse s3-deep-glacier-encrypted: dev/fonts
#
# This will give you the encrypted path, e.g. "44ildo3grlk44jmfr96nb5r56o/oatuh75ej3l4re96nvq2qbj8ik"
#
# You can now restore this by running:
#
# aws-restore-deep-glacier-folder my-deep-glacier-bucket 44ildo3grlk44jmfr96nb5r56o/oatuh75ej3l4re96nvq2qbj8ik restore_dev_fonts
#
# You should be able to simply download the dev/fonts folder after its
# restored. The easiest way is using rclone browser because it'll decrypt them
# for you. Alternatively you can download the encrypted files using whatever
# method you want and then decrypt them locally with rclone.
#
# 2. You can also get the encrypted names by enabling the 'show_mapping' option
# in the rclone remote config. This will log the encrytped names of folders and
# files with the original name in the same log line. This makes it easy to
# parse the output.
#
# To enable the option, edit your rclone config, edit the remote you want to
# restore from, edit the advanced config and set `show_mapping` to true.
#
# Now you can list the directories and files with rclone and get the mapping
# output on stderr. e.g. let's capture all folders and files in a txt file:
#
# rclone lsf s3-deep-glacier-encrypted: -R &> keys.txt
#
# If your rclone config has password protection then you'll be prompted for it
# but won't see the output since it's being written to the file. Just paste it
# and hit enter.
#
# Now you have a listing of all objects and the encrypted keys that they map
# to. If you want to scope the output to a specific path in the crypt then add
# it after the remote name, e.g. `s3-deep-glacier-encrypted:dev/fonts`
#
# If you scope it like that then be aware that the output won't contain the
# mapping for the parent path, i.e. `dev/fonts`, but you can get that using
# `cryptdecode` (see above) or with some non-recursive outputs of the parent
# parts using `lsd`, e.g.
#
# // First call will include the dev/ key
# rclone lsd s3-deep-glacier-encrypted:
#
# // Second call has the fonts key
# rclone lsd s3-deep-glacier-encrypted:dev
# #
if which tput >/dev/null 2>&1; then if which tput >/dev/null 2>&1; then
@ -73,57 +150,177 @@ error() {
printf "${BOLD}${RED}$1${NORMAL}\n" printf "${BOLD}${RED}$1${NORMAL}\n"
} }
abort() {
error "\nAborting...\n"
exit 1
}
set -e set -e
bucket="$1" bucket="$1"
path="$2" path="$2"
temp_dir="$3" temp_dir="$3"
number_of_objects_per_file=100
days_available=7 days_available=7
restore_tier="Bulk" # Can also be "Standard" or "Expedited" restore_tier="bulk" # Can also be "standard" or "expedited"
if [[ $bucket == "" || $path == "" || $temp_dir == "" ]]; then if [[ $bucket == "" || $path == "" || $temp_dir == "" ]]; then
error "Usage: aws-restore-deep-glacier-folder <bucket-name> <path-in-bucket> <local-temp-dir>" error "Usage: aws-restore-deep-glacier-folder <bucket-name> <path-in-bucket> <local-temp-dir> <optional: days available> <optional: restore tier>"
exit 1 exit 1
fi fi
printf "Restoring ${BOLD}${GREEN}$bucket:$path${NORMAL} with local temp folder ${BOLD}${GREEN}$temp_dir${NORMAL}\n" # Get the days available.
if [[ $4 != "" ]]; then
days_available=$4
fi
# Get the restore tier.
if [[ $5 != "" ]]; then
restore_tier="$5"
fi
if ! grep -qiE '\b(bulk|standard|expedited)\b' <<<"$restore_tier"; then
error "Restore tier is invalid. Accepted values is \"bulk\", \"standard\" and \"expedited\""
exit 1
fi
# Normalize the tier; lowercase it then capitalize the first character.
restore_tier="${restore_tier,,}"
restore_tier="${restore_tier^}"
printf "Restoring ${BOLD}${YELLOW}$bucket:$path${NORMAL} for ${BOLD}${YELLOW}$days_available${NORMAL} days using the ${BOLD}${YELLOW}\"$restore_tier\"${NORMAL} restore tier.\nSaving the restoration script in ${BOLD}${YELLOW}$temp_dir${NORMAL}\n"
mkdir -p "$temp_dir" mkdir -p "$temp_dir"
pushd "$temp_dir" &>/dev/null pushd "$temp_dir" &>/dev/null
items="$(aws s3api list-objects-v2 --bucket $bucket --prefix $path --query "Contents[?StorageClass=='DEEP_ARCHIVE']" --output text)" # .Key gives us just the object paths. If you want the other metadata then remove that from the query.
items="$(aws s3api list-objects-v2 --bucket $bucket --prefix "$path" --query "Contents[?StorageClass=='DEEP_ARCHIVE'].Key" --output text | tr '\t' '\n' | tr -d '\r')"
error=$? error=$?
if [[ ! $error -eq 0 ]]; then if [[ ! $error -eq 0 ]]; then
error "Error: failed to run the aws command. Aborting." error "Error: failed to run the aws command. Aborting."
exit 1 exit 1
fi fi
if [[ $items == "None" ]]; then if [[ $items == "None" ]]; then
error "Didn't find any files. Check that your bucket name and path is correct." error "Didn't find any files. Check that your bucket name and path is correct."
exit 1 exit 1
fi fi
# Format the items list. mapfile -t lines_array <<< "$items"
output="$(echo "$items" | LC_ALL=C awk '{print substr($0, index($0, $2))}' | awk '{NF-=3};3')" item_count="${#lines_array[@]}"
mapfile -t lines_array <<< "$output" # Generate the main script that will kick off the restoration.
num_items="${#lines_array[@]}" printf "Number of items to restore: ${BOLD}${YELLOW}$item_count${NORMAL}\n"
printf "${BOLD}${RED}Create the restore script?\n> ${NORMAL}"
printf "Number of items to restore: ${BOLD}${YELLOW}$num_items${NORMAL}\n"
printf "${BOLD}${RED}Proceed?\n> ${NORMAL}"
read -e proceed read -e proceed
if [[ $proceed == "1" || $proceed == "y" || $proceed == "Y" || $proceed == "yes" || $proceed == "YES" ]]; then if [[ $proceed == "1" || $proceed == "y" || $proceed == "Y" || $proceed == "yes" || $proceed == "YES" ]]; then
echo "$output" > all_objects_list.txt echo "$items" > all_objects_list.txt
RUN_TEMPLATE=$(cat <<EOF
if which tput >/dev/null 2>&1; then
ncolors=\$(tput colors)
fi
if [ -t 1 ] && [ -n "\$ncolors" ] && [ "\$ncolors" -ge 8 ]; then
RED="\$(tput setaf 1)"
GREEN="\$(tput setaf 2)"
YELLOW="\$(tput setaf 3)"
BLUE="\$(tput setaf 4)"
MAGENTA="\$(tput setaf 5)"
CYAN="\$(tput setaf 6)"
BOLD="\$(tput bold)"
NORMAL="\$(tput sgr0)"
else
RED=""
GREEN=""
YELLOW=""
BLUE=""
MAGENTA=""
CYAN=""
BOLD=""
NORMAL=""
fi
# Open an output file.
exec 3>>output.txt
fail_count=0
failed_filename="failed_keys_\$(printf '%%04x' \$((RANDOM * RANDOM))).txt"
before_sleep_count=0
sleep_every_n_requests=25
sleep_duration=0.2
printf "Files are being restored for $days_available days using the $restore_tier tier\\\n\\\n"
printf "Files are being restored for $days_available days using the $restore_tier tier\\\n\\\n" >&3
printf "\${BOLD}NOTE: Request failures will be saved to \${YELLOW}\$failed_filename\${NORMAL}\${BOLD} as they happen. If this script terminates prematurely then check this file for failures.\\\n\\\n"
printf "NOTE: Request failures will be saved to \$failed_filename as they happen. If this script terminates prematurely then check this file for failures.\\\n\\\n" >&3
index=1
while read key; do
printf "* [\$index/$item_count] \${BOLD}\$key\${NORMAL}\\\n"
printf "* [\$index/$item_count] \$key\\\n" >&3
err=\$(
aws s3api restore-object \\
--bucket mcampagnaro-deep-glacier \\
--key \\"\$key\\" \\
--restore-request '{\\"Days\\":$days_available,\\"GlacierJobParameters\\":{\\"Tier\\":\\"$restore_tier\\"}}' \\
2>&1 >/dev/null
)
index=\$((index + 1))
before_sleep_count=\$((before_sleep_count + 1))
# strip newlines
err="\${err//[$'\\\t\\\r\\\n']}"
if [[ \$err != "" ]]; then
if ! grep -qE 'RestoreAlreadyInProgress|ObjectAlreadyInActiveTierError' <<<"\$err"; then
printf "\${BOLD}\${RED}FAILED! \$err\${NORMAL}"
printf "FAILED! \$err\" >&3
# Save the failure to a file now in case the script exits prematurely.
fail_count=\$((fail_count + 1))
printf "%%s\\\n" "\$key" >> \$failed_filename
else
if grep -qE 'RestoreAlreadyInProgress' <<<"\$err"; then
printf "\${BOLD}\${YELLOW}SKIPPING! File restore is already in progress.\${NORMAL}"
printf "SKIPPING! File restore is already in progress." >&3
else
printf "\${BOLD}\${YELLOW}SKIPPING! File is already restored. You can now download it.\${NORMAL}"
printf "SKIPPING! File is already restored. You can now download it." >&3
fi
fi
else
printf "\${BOLD}\${GREEN}SUCCESS!\${NORMAL}"
printf "SUCCESS!" >&3
fi
printf "\\\n\\\n"
printf "\\\n\\\n" >&3
if [[ \$before_sleep_count -eq sleep_every_n_requests ]]; then
printf "SLEEPING...\\\n\\\n"
printf "SLEEPING...\\\n\\\n" >&3
sleep \$sleep_duration
before_sleep_count=0
fi
done < all_objects_list.txt
printf "\${BOLD}\${GREEN}Done!\${NORMAL}\\\n\\\n"
printf "Done!\\\n\\\n" >&3
if [[ \$fail_count > 0 ]]; then
printf "\${BOLD}\${RED}There were \$fail_count failures!\\\nSee \${NORMAL}\${BOLD}\$filename\${RED} for the list. You can replace the contents of \${NORMAL}\${BOLD}all_objects_list.txt\${RED} with the list of failures and re-run this script to process them.\${NORMAL}\\\n\\\n"
printf "There were \$fail_count failures!\\\nSee \$filename for the list. You can replace the contents of all_objects_list.txt with the list of failures and re-run this script to process them.\\\n\\\n" >&3
else
printf "There were no failures. All the files are being restored. You can now delete this folder.\\\n\\\n"
printf "There were no failures. All the files are being restored. You can now delete this folder.\\\n\\\n" >&3
fi
printf "(Note: the time it takes to restore an object can be found in the AWS docs - just look for the $restore_tier restore tier, which is what you used.\\\nOnce restored, download the files from the S3 site or better yet use RCloneBrowser.\\\n"
printf "You can check the status of a file using the aws-see-restore-status script)\\\n"
exec 3>&-
EOF
)
printf "$RUN_TEMPLATE" > run.sh
# Generate the main script that will kick off the restoration.
printf "while read x; do\n printf \"aws s3api restore-object --restore-request '{\\\\\"Days\\\\\":$days_available,\\\\\"GlacierJobParameters\\\\\":{\\\\\"Tier\\\\\":\\\\\"$restore_tier\\\\\"}}' --bucket $bucket --key \\\\\"\$x\\\\\"\\\\n\"\n aws s3api restore-object --restore-request \"{\\\\\"Days\\\\\":$days_available,\\\\\"GlacierJobParameters\\\\\":{\\\\\"Tier\\\\\":\\\\\"$restore_tier\\\\\"}}\" --bucket $bucket --key \"\$x\"\ndone < all_objects_list.txt\nprintf \"\\\\nDone! You can now delete this folder.\\\\nYour files are currently being restored. The time it takes to restore can be found in the AWS docs - just look for the $restore_tier restore tier, which is what you used.\\\\nOnce restored, download the files from the S3 site or better yet use RCloneBrowser.\\\\n\"\n" > run.sh
chmod +x run.sh chmod +x run.sh
printf "${BOLD}You can now run ${GREEN}$temp_dir/run.sh${NORMAL}${BOLD} to start the restoration process.\n" printf "${BOLD}You can now run ${GREEN}$temp_dir/run.sh${NORMAL}${BOLD} to start the restoration process.\n"

View File

@ -0,0 +1,72 @@
#!/usr/bin/env bash
# Shows you the status of an object restore job.
#
# e.g. aws-see-restore-status my-deep-glacier-bucket object/path.png
#
# You know it's ready when ongoing-request is false and there's a date. If that field is null then the file isn't being restored.
#
# You'll need the aws cli tools. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
#
# If you see an error like along the lines of "'charmap' codec can't encode
# character '\u200e' in position 42: character maps to <undefined>" then that
# means a filename has a Unicode codepoint and the dumb aws Python code is
# trying to read it using your system's locale, which is very likely not set to
# use the Windows UTF-8 beta feature. This is an ongoing issue in this tool
# that goes back to 2013!!! There's no way to fix it using environment
# variables, at least nothing worked for me. The fix provided by the devs is
# heavy handed: you change your system locale to use UTF-8... This has
# consequences though like breaking legacy apps that don't have Unicode support
# and I'm sure other weird things will happen, such as file corruption. Anyway,
# if you're getting this charmap error then I suggest changing your system
# locale, run this again, then switch back to your previous locale. If you
# don't get the canonical file name then you won't be able to restore it.
#
# You can enable the UTF-8 locale with:
#
# win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box.
#
if which tput >/dev/null 2>&1; then
ncolors=$(tput colors)
fi
if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
RED="$(tput setaf 1)"
GREEN="$(tput setaf 2)"
YELLOW="$(tput setaf 3)"
BLUE="$(tput setaf 4)"
MAGENTA="$(tput setaf 5)"
CYAN="$(tput setaf 6)"
BOLD="$(tput bold)"
NORMAL="$(tput sgr0)"
else
RED=""
GREEN=""
YELLOW=""
BLUE=""
MAGENTA=""
CYAN=""
BOLD=""
NORMAL=""
fi
error() {
printf "${BOLD}${RED}$1${NORMAL}\n"
}
abort() {
error "\nAborting...\n"
exit 1
}
set -e
bucket="$1"
path="$2"
if [[ $bucket == "" || $path == "" ]]; then
error "Usage: aws-see-restore-status <bucket-name> <path-in-bucket>"
exit 1
fi
aws s3api head-object --bucket $bucket --key "$path" --query "{Restore:Restore, StorageClass:StorageClass}" --output json