Improve the aws deep glacier restore scripts

2025-12-29 22:39:57 -05:00 · 2025-12-29 22:39:57 -05:00 · 56e4ad3386
commit 56e4ad3386
parent 0ecad79655
3 changed files with 317 additions and 33 deletions
--- a/dotfiles/bin/aws-list-deep-glacier-files
+++ b/dotfiles/bin/aws-list-deep-glacier-files
@ -52,11 +52,6 @@ error() {
    printf "${BOLD}${RED}$1${NORMAL}\n"
 }
 abort() {
  error "\nAborting...\n"
  exit 1
 }
 set -e
 bucket="$1"
@ -68,4 +63,24 @@ if [[ $bucket == "" || $path == "" || $output_file == "" ]]; then
    exit 1
 fi
-aws s3api list-objects-v2 --bucket $bucket --prefix $path --query "Contents[?StorageClass=='DEEP_ARCHIVE']" --output text | LC_ALL=C awk '{print substr($0, index($0, $2))}' | awk '{NF-=3};3' > "$output_file"
+# .Key gives us just the object paths. If you want the other metadata then remove that from the query.
 items="$(aws s3api list-objects-v2 --bucket $bucket --prefix "$path" --query "Contents[?StorageClass=='DEEP_ARCHIVE'].Key" --output text | tr '\t' '\n' | tr -d '\r')"
 error=$?
 if [[ ! $error -eq 0 ]]; then
    error "Error: failed to run the aws command. Aborting."
    exit 1
 fi
 if [[ $items == "None" ]]; then
    error "Didn't find any files. Check that your bucket name and path is correct."
    exit 1
 fi
 mapfile -t lines_array <<< "$items"
 item_count="${#lines_array[@]}"
 echo "$items" > "$output_file"
 printf "Number of items: ${BOLD}${YELLOW}$item_count${NORMAL}\n"
 printf "Wrote file list to ${BOLD}${YELLOW}$output_file${NORMAL}\n"
--- a/dotfiles/bin/aws-restore-deep-glacier-folder
+++ b/dotfiles/bin/aws-restore-deep-glacier-folder
@ -1,28 +1,44 @@
 #!/usr/bin/env bash
 # Restores all objects recursively from a specific bucket path. If want to
 # restore objects from an rclone crypt (encrypted remote), then you'll need to
 # do some manual steps first. See the `# Rclone Crypt` section for details.
 #
-# Restores all files/folders inside a particular bucket path for the next 7 days. This uses the bulk retrieval tier:
+# You can set how long restore files are available for download and the AWS
 # retrieval tier. The defaults are 7 days and the bulk tier respectively.
 #
 # Available tiers: bulk, standard, and expedited.
 #
 # Bulk retrievals are the lowest-cost retrieval option when restoring objects
 # from S3 Glacier Deep Archive. They typically finish within 48 hours for
 # objects stored in the S3 Glacier Deep Archive storage class or S3
 # Intelligent-Tiering Deep Archive tier.
 #
-# If you need faster access then use the `Expedited` or `Standard` tiers.
+# If you need faster access then use the `expedited` or `standard` tiers.
 #
 # Example usage:
 #
-# aws-restore-deep-glacier-folder my-deep-glacier-bucket path/to/images restored_images
+# aws-restore-deep-glacier-folder my-deep-glacier-bucket path/to/images restored_images 14 expedited
 #
-# This will create a run.sh script in a folder called "restored_images". Run that to restore all files inside the `path/to/images` folder inside the my-deep-glacier bucket.
+# This will create a run.sh script in a folder called "restored_images". Run
 # that to restore all files inside the `path/to/images` folder from the
 # my-deep-glacier bucket.  Restored objects will be available for 14 days and
 # retrieved using the expedited tier.
 #
-# After you run the generated script, you have to wait for AWS to make the files available for download. You can check the status of a file with:
+# After you run the generated script, you have to wait for AWS to make the
 # files available for download. You can check the status of a file with:
 #
-# aws s3api head-object --bucket my-deep-glacier --key path/to/images/photo1.jpg
+# aws s3api head-object --bucket my-deep-glacier-bucket --key "path/to/images/photo1.jpg" --query "{Restore:Restore, StorageClass:StorageClass}"
 #
 # (obviously change the bucket and path to suit your needs).
 #
-# Once the files are restored you can download them on the S3 website or better yet use RcloneBrowser. I'm sure there's also a way to do it over cli too, I just haven't checked.
+# Or use the aws-see-restore-status script.
 # You know it's ready when ongoing-request is false and there's a date. If that
 # field is null then the file isn't being restored.
 #
 # Once the files are restored you can download them on the S3 website or better
 # yet use RcloneBrowser. I'm sure there's also a way to do it over cli too, I
 # just haven't checked.
 #
 # You'll need the aws cli tools for this script. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
 # Once installed, open a new shell and verify that you can run the `aws` command.
@ -44,6 +60,67 @@
 # You can enable the UTF-8 locale with:
 #
 # win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box.
 ##########################
 # Rclone Crypt
 ##########################
 #
 # To restore an rclone crypt, you need to first find the encrypted name that
 # maps to the parent folder or the file you want to restore. To do this you
 # need to use rclone. There are two ways to go about this.
 #
 # 1. The simple way is to use `cryptdecode` to convert your object path to its
 # encrypted form.
 #
 # For example, say you have an rclone crypt called `s3-deep-glacier-encrypted`
 # that is stored in S3 at `my-deep-glacier-bucket:encrypted/` You have a folder
 # called `dev/fonts` that you want to restore. To get its path, run the following
 # command:
 #
 #   rclone cryptdecode --reverse s3-deep-glacier-encrypted: dev/fonts
 #
 # This will give you the encrypted path, e.g. "44ildo3grlk44jmfr96nb5r56o/oatuh75ej3l4re96nvq2qbj8ik"
 #
 # You can now restore this by running:
 #
 #   aws-restore-deep-glacier-folder my-deep-glacier-bucket 44ildo3grlk44jmfr96nb5r56o/oatuh75ej3l4re96nvq2qbj8ik restore_dev_fonts
 #
 # You should be able to simply download the dev/fonts folder after its
 # restored. The easiest way is using rclone browser because it'll decrypt them
 # for you. Alternatively you can download the encrypted files using whatever
 # method you want and then decrypt them locally with rclone.
 #
 # 2. You can also get the encrypted names by enabling the 'show_mapping' option
 # in the rclone remote config. This will log the encrytped names of folders and
 # files with the original name in the same log line. This makes it easy to
 # parse the output.
 #
 # To enable the option, edit your rclone config, edit the remote you want to
 # restore from, edit the advanced config and set `show_mapping` to true.
 #
 # Now you can list the directories and files with rclone and get the mapping
 # output on stderr. e.g. let's capture all folders and files in a txt file:
 #
 #   rclone lsf s3-deep-glacier-encrypted: -R &> keys.txt
 #
 # If your rclone config has password protection then you'll be prompted for it
 # but won't see the output since it's being written to the file. Just paste it
 # and hit enter.
 #
 # Now you have a listing of all objects and the encrypted keys that they map
 # to. If you want to scope the output to a specific path in the crypt then add
 # it after the remote name, e.g. `s3-deep-glacier-encrypted:dev/fonts`
 #
 # If you scope it like that then be aware that the output won't contain the
 # mapping for the parent path, i.e. `dev/fonts`, but you can get that using
 # `cryptdecode` (see above) or with some non-recursive outputs of the parent
 # parts using `lsd`, e.g.
 #
 #   // First call will include the dev/ key
 #   rclone lsd s3-deep-glacier-encrypted:
 #
 #   // Second call has the fonts key
 #   rclone lsd s3-deep-glacier-encrypted:dev
 #
 if which tput >/dev/null 2>&1; then
@ -73,57 +150,177 @@ error() {
    printf "${BOLD}${RED}$1${NORMAL}\n"
 }
 abort() {
  error "\nAborting...\n"
  exit 1
 }
 set -e
 bucket="$1"
 path="$2"
 temp_dir="$3"
 number_of_objects_per_file=100
 days_available=7
-restore_tier="Bulk" # Can also be "Standard" or "Expedited"
+restore_tier="bulk" # Can also be "standard" or "expedited"
 if [[ $bucket == "" || $path == "" || $temp_dir == "" ]]; then
-    error "Usage: aws-restore-deep-glacier-folder <bucket-name> <path-in-bucket> <local-temp-dir>"
+    error "Usage: aws-restore-deep-glacier-folder <bucket-name> <path-in-bucket> <local-temp-dir> <optional: days available> <optional: restore tier>"
    exit 1
 fi
-printf "Restoring ${BOLD}${GREEN}$bucket:$path${NORMAL} with local temp folder ${BOLD}${GREEN}$temp_dir${NORMAL}\n"
+# Get the days available.
 if [[ $4 != "" ]]; then
    days_available=$4
 fi
 # Get the restore tier.
 if [[ $5 != "" ]]; then
    restore_tier="$5"
 fi
 if ! grep -qiE '\b(bulk|standard|expedited)\b' <<<"$restore_tier"; then
    error "Restore tier is invalid. Accepted values is \"bulk\", \"standard\" and \"expedited\""
    exit 1
 fi
 # Normalize the tier; lowercase it then capitalize the first character.
 restore_tier="${restore_tier,,}"
 restore_tier="${restore_tier^}"
 printf "Restoring ${BOLD}${YELLOW}$bucket:$path${NORMAL} for ${BOLD}${YELLOW}$days_available${NORMAL} days using the ${BOLD}${YELLOW}\"$restore_tier\"${NORMAL} restore tier.\nSaving the restoration script in ${BOLD}${YELLOW}$temp_dir${NORMAL}\n"
 mkdir -p "$temp_dir"
 pushd "$temp_dir" &>/dev/null
-items="$(aws s3api list-objects-v2 --bucket $bucket --prefix $path --query "Contents[?StorageClass=='DEEP_ARCHIVE']" --output text)"
+# .Key gives us just the object paths. If you want the other metadata then remove that from the query.
 items="$(aws s3api list-objects-v2 --bucket $bucket --prefix "$path" --query "Contents[?StorageClass=='DEEP_ARCHIVE'].Key" --output text | tr '\t' '\n' | tr -d '\r')"
 error=$?
 if [[ ! $error -eq 0 ]]; then
    error "Error: failed to run the aws command. Aborting."
    exit 1
 fi
 if [[ $items == "None" ]]; then
    error "Didn't find any files. Check that your bucket name and path is correct."
    exit 1
 fi
-# Format the items list.
+mapfile -t lines_array <<< "$items"
-output="$(echo "$items" | LC_ALL=C awk '{print substr($0, index($0, $2))}' | awk '{NF-=3};3')"
+item_count="${#lines_array[@]}"
-mapfile -t lines_array <<< "$output"
+# Generate the main script that will kick off the restoration.
-num_items="${#lines_array[@]}"
+printf "Number of items to restore: ${BOLD}${YELLOW}$item_count${NORMAL}\n"
 printf "${BOLD}${RED}Create the restore script?\n> ${NORMAL}"
 printf "Number of items to restore: ${BOLD}${YELLOW}$num_items${NORMAL}\n"
 printf "${BOLD}${RED}Proceed?\n> ${NORMAL}"
 read -e proceed
 if [[ $proceed == "1" || $proceed == "y" || $proceed == "Y" || $proceed == "yes" || $proceed == "YES" ]]; then
-    echo "$output" > all_objects_list.txt
+    echo "$items" > all_objects_list.txt
    RUN_TEMPLATE=$(cat <<EOF
 if which tput >/dev/null 2>&1; then
    ncolors=\$(tput colors)
 fi
 if [ -t 1 ] && [ -n "\$ncolors" ] && [ "\$ncolors" -ge 8 ]; then
    RED="\$(tput setaf 1)"
    GREEN="\$(tput setaf 2)"
    YELLOW="\$(tput setaf 3)"
    BLUE="\$(tput setaf 4)"
    MAGENTA="\$(tput setaf 5)"
    CYAN="\$(tput setaf 6)"
    BOLD="\$(tput bold)"
    NORMAL="\$(tput sgr0)"
 else
    RED=""
    GREEN=""
    YELLOW=""
    BLUE=""
    MAGENTA=""
    CYAN=""
    BOLD=""
    NORMAL=""
 fi
 # Open an output file.
 exec 3>>output.txt
 fail_count=0
 failed_filename="failed_keys_\$(printf '%%04x' \$((RANDOM * RANDOM))).txt"
 before_sleep_count=0
 sleep_every_n_requests=25
 sleep_duration=0.2
 printf "Files are being restored for $days_available days using the $restore_tier tier\\\n\\\n"
 printf "Files are being restored for $days_available days using the $restore_tier tier\\\n\\\n" >&3
 printf "\${BOLD}NOTE: Request failures will be saved to \${YELLOW}\$failed_filename\${NORMAL}\${BOLD} as they happen. If this script terminates prematurely then check this file for failures.\\\n\\\n"
 printf "NOTE: Request failures will be saved to \$failed_filename as they happen. If this script terminates prematurely then check this file for failures.\\\n\\\n" >&3
 index=1
 while read key; do
    printf "* [\$index/$item_count] \${BOLD}\$key\${NORMAL}\\\n"
    printf "* [\$index/$item_count] \$key\\\n" >&3
    err=\$(
        aws s3api restore-object \\
            --bucket mcampagnaro-deep-glacier \\
            --key \\"\$key\\" \\
            --restore-request '{\\"Days\\":$days_available,\\"GlacierJobParameters\\":{\\"Tier\\":\\"$restore_tier\\"}}' \\
            2>&1 >/dev/null
    )
    index=\$((index + 1))
    before_sleep_count=\$((before_sleep_count + 1))
    # strip newlines
    err="\${err//[$'\\\t\\\r\\\n']}"
    if [[ \$err != "" ]]; then
        if ! grep -qE 'RestoreAlreadyInProgress|ObjectAlreadyInActiveTierError' <<<"\$err"; then
            printf "\${BOLD}\${RED}FAILED! \$err\${NORMAL}"
            printf "FAILED! \$err\" >&3
            # Save the failure to a file now in case the script exits prematurely.
            fail_count=\$((fail_count + 1))
            printf "%%s\\\n" "\$key" >> \$failed_filename
        else
            if grep -qE 'RestoreAlreadyInProgress' <<<"\$err"; then
                printf "\${BOLD}\${YELLOW}SKIPPING! File restore is already in progress.\${NORMAL}"
                printf "SKIPPING! File restore is already in progress." >&3
            else
                printf "\${BOLD}\${YELLOW}SKIPPING! File is already restored. You can now download it.\${NORMAL}"
                printf "SKIPPING! File is already restored. You can now download it." >&3
            fi
        fi
    else
        printf "\${BOLD}\${GREEN}SUCCESS!\${NORMAL}"
        printf "SUCCESS!" >&3
    fi
    printf "\\\n\\\n"
    printf "\\\n\\\n" >&3
    if [[ \$before_sleep_count -eq sleep_every_n_requests ]]; then
        printf "SLEEPING...\\\n\\\n"
        printf "SLEEPING...\\\n\\\n" >&3
        sleep \$sleep_duration
        before_sleep_count=0
    fi
 done < all_objects_list.txt
 printf "\${BOLD}\${GREEN}Done!\${NORMAL}\\\n\\\n"
 printf "Done!\\\n\\\n" >&3
 if [[ \$fail_count > 0 ]]; then
    printf "\${BOLD}\${RED}There were \$fail_count failures!\\\nSee \${NORMAL}\${BOLD}\$filename\${RED} for the list. You can replace the contents of \${NORMAL}\${BOLD}all_objects_list.txt\${RED} with the list of failures and re-run this script to process them.\${NORMAL}\\\n\\\n"
    printf "There were \$fail_count failures!\\\nSee \$filename for the list. You can replace the contents of all_objects_list.txt with the list of failures and re-run this script to process them.\\\n\\\n" >&3
 else
    printf "There were no failures. All the files are being restored. You can now delete this folder.\\\n\\\n"
    printf "There were no failures. All the files are being restored. You can now delete this folder.\\\n\\\n" >&3
 fi
 printf "(Note: the time it takes to restore an object can be found in the AWS docs - just look for the $restore_tier restore tier, which is what you used.\\\nOnce restored, download the files from the S3 site or better yet use RCloneBrowser.\\\n"
 printf "You can check the status of a file using the aws-see-restore-status script)\\\n"
 exec 3>&-
 EOF
 )
    printf "$RUN_TEMPLATE" > run.sh
    # Generate the main script that will kick off the restoration.
    printf "while read x; do\n    printf \"aws s3api restore-object --restore-request '{\\\\\"Days\\\\\":$days_available,\\\\\"GlacierJobParameters\\\\\":{\\\\\"Tier\\\\\":\\\\\"$restore_tier\\\\\"}}' --bucket $bucket --key \\\\\"\$x\\\\\"\\\\n\"\n    aws s3api restore-object --restore-request \"{\\\\\"Days\\\\\":$days_available,\\\\\"GlacierJobParameters\\\\\":{\\\\\"Tier\\\\\":\\\\\"$restore_tier\\\\\"}}\" --bucket $bucket --key \"\$x\"\ndone < all_objects_list.txt\nprintf \"\\\\nDone! You can now delete this folder.\\\\nYour files are currently being restored. The time it takes to restore can be found in the AWS docs - just look for the $restore_tier restore tier, which is what you used.\\\\nOnce restored, download the files from the S3 site or better yet use RCloneBrowser.\\\\n\"\n" > run.sh
    chmod +x run.sh
    printf "${BOLD}You can now run ${GREEN}$temp_dir/run.sh${NORMAL}${BOLD} to start the restoration process.\n"
--- a/dotfiles/bin/aws-see-restore-status
+++ b/dotfiles/bin/aws-see-restore-status
@ -0,0 +1,72 @@
 #!/usr/bin/env bash
 # Shows you the status of an object restore job.
 #
 # e.g. aws-see-restore-status my-deep-glacier-bucket object/path.png
 #
 # You know it's ready when ongoing-request is false and there's a date. If that field is null then the file isn't being restored.
 #
 # You'll need the aws cli tools. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
 #
 # If you see an error like along the lines of "'charmap' codec can't encode
 # character '\u200e' in position 42: character maps to <undefined>" then that
 # means a filename has a Unicode codepoint and the dumb aws Python code is
 # trying to read it using your system's locale, which is very likely not set to
 # use the Windows UTF-8 beta feature. This is an ongoing issue in this tool
 # that goes back to 2013!!! There's no way to fix it using environment
 # variables, at least nothing worked for me. The fix provided by the devs is
 # heavy handed: you change your system locale to use UTF-8... This has
 # consequences though like breaking legacy apps that don't have Unicode support
 # and I'm sure other weird things will happen, such as file corruption. Anyway,
 # if you're getting this charmap error then I suggest changing your system
 # locale, run this again, then switch back to your previous locale. If you
 # don't get the canonical file name then you won't be able to restore it.
 #
 # You can enable the UTF-8 locale with:
 #
 # win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box.
 #
 if which tput >/dev/null 2>&1; then
    ncolors=$(tput colors)
 fi
 if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
    RED="$(tput setaf 1)"
    GREEN="$(tput setaf 2)"
    YELLOW="$(tput setaf 3)"
    BLUE="$(tput setaf 4)"
    MAGENTA="$(tput setaf 5)"
    CYAN="$(tput setaf 6)"
    BOLD="$(tput bold)"
    NORMAL="$(tput sgr0)"
 else
    RED=""
    GREEN=""
    YELLOW=""
    BLUE=""
    MAGENTA=""
    CYAN=""
    BOLD=""
    NORMAL=""
 fi
 error() {
    printf "${BOLD}${RED}$1${NORMAL}\n"
 }
 abort() {
  error "\nAborting...\n"
  exit 1
 }
 set -e
 bucket="$1"
 path="$2"
 if [[ $bucket == "" || $path == "" ]]; then
    error "Usage: aws-see-restore-status <bucket-name> <path-in-bucket>"
    exit 1
 fi
 aws s3api head-object --bucket $bucket --key "$path" --query "{Restore:Restore, StorageClass:StorageClass}" --output json