diff --git a/dotfiles/bin/aws-list-deep-glacier-files b/dotfiles/bin/aws-list-deep-glacier-files index 0b8b8c7..c64b526 100644 --- a/dotfiles/bin/aws-list-deep-glacier-files +++ b/dotfiles/bin/aws-list-deep-glacier-files @@ -52,11 +52,6 @@ error() { printf "${BOLD}${RED}$1${NORMAL}\n" } -abort() { - error "\nAborting...\n" - exit 1 -} - set -e bucket="$1" @@ -68,4 +63,24 @@ if [[ $bucket == "" || $path == "" || $output_file == "" ]]; then exit 1 fi -aws s3api list-objects-v2 --bucket $bucket --prefix $path --query "Contents[?StorageClass=='DEEP_ARCHIVE']" --output text | LC_ALL=C awk '{print substr($0, index($0, $2))}' | awk '{NF-=3};3' > "$output_file" +# .Key gives us just the object paths. If you want the other metadata then remove that from the query. + +items="$(aws s3api list-objects-v2 --bucket $bucket --prefix "$path" --query "Contents[?StorageClass=='DEEP_ARCHIVE'].Key" --output text | tr '\t' '\n' | tr -d '\r')" + +error=$? +if [[ ! $error -eq 0 ]]; then + error "Error: failed to run the aws command. Aborting." + exit 1 +fi +if [[ $items == "None" ]]; then + error "Didn't find any files. Check that your bucket name and path is correct." + exit 1 +fi + +mapfile -t lines_array <<< "$items" +item_count="${#lines_array[@]}" + +echo "$items" > "$output_file" + +printf "Number of items: ${BOLD}${YELLOW}$item_count${NORMAL}\n" +printf "Wrote file list to ${BOLD}${YELLOW}$output_file${NORMAL}\n" diff --git a/dotfiles/bin/aws-restore-deep-glacier-folder b/dotfiles/bin/aws-restore-deep-glacier-folder index 3bf507e..8668ad5 100644 --- a/dotfiles/bin/aws-restore-deep-glacier-folder +++ b/dotfiles/bin/aws-restore-deep-glacier-folder @@ -1,28 +1,44 @@ #!/usr/bin/env bash +# Restores all objects recursively from a specific bucket path. If want to +# restore objects from an rclone crypt (encrypted remote), then you'll need to +# do some manual steps first. See the `# Rclone Crypt` section for details. # -# Restores all files/folders inside a particular bucket path for the next 7 days. This uses the bulk retrieval tier: +# You can set how long restore files are available for download and the AWS +# retrieval tier. The defaults are 7 days and the bulk tier respectively. +# +# Available tiers: bulk, standard, and expedited. # # Bulk retrievals are the lowest-cost retrieval option when restoring objects # from S3 Glacier Deep Archive. They typically finish within 48 hours for # objects stored in the S3 Glacier Deep Archive storage class or S3 # Intelligent-Tiering Deep Archive tier. # -# If you need faster access then use the `Expedited` or `Standard` tiers. +# If you need faster access then use the `expedited` or `standard` tiers. # # Example usage: # -# aws-restore-deep-glacier-folder my-deep-glacier-bucket path/to/images restored_images +# aws-restore-deep-glacier-folder my-deep-glacier-bucket path/to/images restored_images 14 expedited # -# This will create a run.sh script in a folder called "restored_images". Run that to restore all files inside the `path/to/images` folder inside the my-deep-glacier bucket. +# This will create a run.sh script in a folder called "restored_images". Run +# that to restore all files inside the `path/to/images` folder from the +# my-deep-glacier bucket. Restored objects will be available for 14 days and +# retrieved using the expedited tier. # -# After you run the generated script, you have to wait for AWS to make the files available for download. You can check the status of a file with: +# After you run the generated script, you have to wait for AWS to make the +# files available for download. You can check the status of a file with: # -# aws s3api head-object --bucket my-deep-glacier --key path/to/images/photo1.jpg +# aws s3api head-object --bucket my-deep-glacier-bucket --key "path/to/images/photo1.jpg" --query "{Restore:Restore, StorageClass:StorageClass}" # # (obviously change the bucket and path to suit your needs). # -# Once the files are restored you can download them on the S3 website or better yet use RcloneBrowser. I'm sure there's also a way to do it over cli too, I just haven't checked. +# Or use the aws-see-restore-status script. +# You know it's ready when ongoing-request is false and there's a date. If that +# field is null then the file isn't being restored. +# +# Once the files are restored you can download them on the S3 website or better +# yet use RcloneBrowser. I'm sure there's also a way to do it over cli too, I +# just haven't checked. # # You'll need the aws cli tools for this script. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html # Once installed, open a new shell and verify that you can run the `aws` command. @@ -44,6 +60,67 @@ # You can enable the UTF-8 locale with: # # win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box. + +########################## +# Rclone Crypt +########################## +# +# To restore an rclone crypt, you need to first find the encrypted name that +# maps to the parent folder or the file you want to restore. To do this you +# need to use rclone. There are two ways to go about this. +# +# 1. The simple way is to use `cryptdecode` to convert your object path to its +# encrypted form. +# +# For example, say you have an rclone crypt called `s3-deep-glacier-encrypted` +# that is stored in S3 at `my-deep-glacier-bucket:encrypted/` You have a folder +# called `dev/fonts` that you want to restore. To get its path, run the following +# command: +# +# rclone cryptdecode --reverse s3-deep-glacier-encrypted: dev/fonts +# +# This will give you the encrypted path, e.g. "44ildo3grlk44jmfr96nb5r56o/oatuh75ej3l4re96nvq2qbj8ik" +# +# You can now restore this by running: +# +# aws-restore-deep-glacier-folder my-deep-glacier-bucket 44ildo3grlk44jmfr96nb5r56o/oatuh75ej3l4re96nvq2qbj8ik restore_dev_fonts +# +# You should be able to simply download the dev/fonts folder after its +# restored. The easiest way is using rclone browser because it'll decrypt them +# for you. Alternatively you can download the encrypted files using whatever +# method you want and then decrypt them locally with rclone. +# +# 2. You can also get the encrypted names by enabling the 'show_mapping' option +# in the rclone remote config. This will log the encrytped names of folders and +# files with the original name in the same log line. This makes it easy to +# parse the output. +# +# To enable the option, edit your rclone config, edit the remote you want to +# restore from, edit the advanced config and set `show_mapping` to true. +# +# Now you can list the directories and files with rclone and get the mapping +# output on stderr. e.g. let's capture all folders and files in a txt file: +# +# rclone lsf s3-deep-glacier-encrypted: -R &> keys.txt +# +# If your rclone config has password protection then you'll be prompted for it +# but won't see the output since it's being written to the file. Just paste it +# and hit enter. +# +# Now you have a listing of all objects and the encrypted keys that they map +# to. If you want to scope the output to a specific path in the crypt then add +# it after the remote name, e.g. `s3-deep-glacier-encrypted:dev/fonts` +# +# If you scope it like that then be aware that the output won't contain the +# mapping for the parent path, i.e. `dev/fonts`, but you can get that using +# `cryptdecode` (see above) or with some non-recursive outputs of the parent +# parts using `lsd`, e.g. +# +# // First call will include the dev/ key +# rclone lsd s3-deep-glacier-encrypted: +# +# // Second call has the fonts key +# rclone lsd s3-deep-glacier-encrypted:dev # if which tput >/dev/null 2>&1; then @@ -73,57 +150,177 @@ error() { printf "${BOLD}${RED}$1${NORMAL}\n" } -abort() { - error "\nAborting...\n" - exit 1 -} - set -e bucket="$1" path="$2" temp_dir="$3" -number_of_objects_per_file=100 days_available=7 -restore_tier="Bulk" # Can also be "Standard" or "Expedited" +restore_tier="bulk" # Can also be "standard" or "expedited" if [[ $bucket == "" || $path == "" || $temp_dir == "" ]]; then - error "Usage: aws-restore-deep-glacier-folder " + error "Usage: aws-restore-deep-glacier-folder " exit 1 fi -printf "Restoring ${BOLD}${GREEN}$bucket:$path${NORMAL} with local temp folder ${BOLD}${GREEN}$temp_dir${NORMAL}\n" +# Get the days available. +if [[ $4 != "" ]]; then + days_available=$4 +fi + +# Get the restore tier. +if [[ $5 != "" ]]; then + restore_tier="$5" +fi +if ! grep -qiE '\b(bulk|standard|expedited)\b' <<<"$restore_tier"; then + error "Restore tier is invalid. Accepted values is \"bulk\", \"standard\" and \"expedited\"" + exit 1 +fi + +# Normalize the tier; lowercase it then capitalize the first character. +restore_tier="${restore_tier,,}" +restore_tier="${restore_tier^}" + +printf "Restoring ${BOLD}${YELLOW}$bucket:$path${NORMAL} for ${BOLD}${YELLOW}$days_available${NORMAL} days using the ${BOLD}${YELLOW}\"$restore_tier\"${NORMAL} restore tier.\nSaving the restoration script in ${BOLD}${YELLOW}$temp_dir${NORMAL}\n" mkdir -p "$temp_dir" pushd "$temp_dir" &>/dev/null -items="$(aws s3api list-objects-v2 --bucket $bucket --prefix $path --query "Contents[?StorageClass=='DEEP_ARCHIVE']" --output text)" +# .Key gives us just the object paths. If you want the other metadata then remove that from the query. +items="$(aws s3api list-objects-v2 --bucket $bucket --prefix "$path" --query "Contents[?StorageClass=='DEEP_ARCHIVE'].Key" --output text | tr '\t' '\n' | tr -d '\r')" error=$? if [[ ! $error -eq 0 ]]; then error "Error: failed to run the aws command. Aborting." exit 1 fi - if [[ $items == "None" ]]; then error "Didn't find any files. Check that your bucket name and path is correct." exit 1 fi -# Format the items list. -output="$(echo "$items" | LC_ALL=C awk '{print substr($0, index($0, $2))}' | awk '{NF-=3};3')" +mapfile -t lines_array <<< "$items" +item_count="${#lines_array[@]}" -mapfile -t lines_array <<< "$output" -num_items="${#lines_array[@]}" +# Generate the main script that will kick off the restoration. +printf "Number of items to restore: ${BOLD}${YELLOW}$item_count${NORMAL}\n" +printf "${BOLD}${RED}Create the restore script?\n> ${NORMAL}" -printf "Number of items to restore: ${BOLD}${YELLOW}$num_items${NORMAL}\n" -printf "${BOLD}${RED}Proceed?\n> ${NORMAL}" read -e proceed if [[ $proceed == "1" || $proceed == "y" || $proceed == "Y" || $proceed == "yes" || $proceed == "YES" ]]; then - echo "$output" > all_objects_list.txt + echo "$items" > all_objects_list.txt + + RUN_TEMPLATE=$(cat </dev/null 2>&1; then + ncolors=\$(tput colors) +fi +if [ -t 1 ] && [ -n "\$ncolors" ] && [ "\$ncolors" -ge 8 ]; then + RED="\$(tput setaf 1)" + GREEN="\$(tput setaf 2)" + YELLOW="\$(tput setaf 3)" + BLUE="\$(tput setaf 4)" + MAGENTA="\$(tput setaf 5)" + CYAN="\$(tput setaf 6)" + BOLD="\$(tput bold)" + NORMAL="\$(tput sgr0)" +else + RED="" + GREEN="" + YELLOW="" + BLUE="" + MAGENTA="" + CYAN="" + BOLD="" + NORMAL="" +fi + +# Open an output file. +exec 3>>output.txt + +fail_count=0 +failed_filename="failed_keys_\$(printf '%%04x' \$((RANDOM * RANDOM))).txt" + +before_sleep_count=0 +sleep_every_n_requests=25 +sleep_duration=0.2 + +printf "Files are being restored for $days_available days using the $restore_tier tier\\\n\\\n" +printf "Files are being restored for $days_available days using the $restore_tier tier\\\n\\\n" >&3 + +printf "\${BOLD}NOTE: Request failures will be saved to \${YELLOW}\$failed_filename\${NORMAL}\${BOLD} as they happen. If this script terminates prematurely then check this file for failures.\\\n\\\n" +printf "NOTE: Request failures will be saved to \$failed_filename as they happen. If this script terminates prematurely then check this file for failures.\\\n\\\n" >&3 + +index=1 +while read key; do + printf "* [\$index/$item_count] \${BOLD}\$key\${NORMAL}\\\n" + printf "* [\$index/$item_count] \$key\\\n" >&3 + err=\$( + aws s3api restore-object \\ + --bucket mcampagnaro-deep-glacier \\ + --key \\"\$key\\" \\ + --restore-request '{\\"Days\\":$days_available,\\"GlacierJobParameters\\":{\\"Tier\\":\\"$restore_tier\\"}}' \\ + 2>&1 >/dev/null + ) + index=\$((index + 1)) + before_sleep_count=\$((before_sleep_count + 1)) + + # strip newlines + err="\${err//[$'\\\t\\\r\\\n']}" + + if [[ \$err != "" ]]; then + if ! grep -qE 'RestoreAlreadyInProgress|ObjectAlreadyInActiveTierError' <<<"\$err"; then + printf "\${BOLD}\${RED}FAILED! \$err\${NORMAL}" + printf "FAILED! \$err\" >&3 + + # Save the failure to a file now in case the script exits prematurely. + fail_count=\$((fail_count + 1)) + printf "%%s\\\n" "\$key" >> \$failed_filename + else + if grep -qE 'RestoreAlreadyInProgress' <<<"\$err"; then + printf "\${BOLD}\${YELLOW}SKIPPING! File restore is already in progress.\${NORMAL}" + printf "SKIPPING! File restore is already in progress." >&3 + else + printf "\${BOLD}\${YELLOW}SKIPPING! File is already restored. You can now download it.\${NORMAL}" + printf "SKIPPING! File is already restored. You can now download it." >&3 + fi + fi + else + printf "\${BOLD}\${GREEN}SUCCESS!\${NORMAL}" + printf "SUCCESS!" >&3 + fi + printf "\\\n\\\n" + printf "\\\n\\\n" >&3 + + if [[ \$before_sleep_count -eq sleep_every_n_requests ]]; then + printf "SLEEPING...\\\n\\\n" + printf "SLEEPING...\\\n\\\n" >&3 + sleep \$sleep_duration + before_sleep_count=0 + fi + +done < all_objects_list.txt + +printf "\${BOLD}\${GREEN}Done!\${NORMAL}\\\n\\\n" +printf "Done!\\\n\\\n" >&3 + +if [[ \$fail_count > 0 ]]; then + printf "\${BOLD}\${RED}There were \$fail_count failures!\\\nSee \${NORMAL}\${BOLD}\$filename\${RED} for the list. You can replace the contents of \${NORMAL}\${BOLD}all_objects_list.txt\${RED} with the list of failures and re-run this script to process them.\${NORMAL}\\\n\\\n" + printf "There were \$fail_count failures!\\\nSee \$filename for the list. You can replace the contents of all_objects_list.txt with the list of failures and re-run this script to process them.\\\n\\\n" >&3 +else + printf "There were no failures. All the files are being restored. You can now delete this folder.\\\n\\\n" + printf "There were no failures. All the files are being restored. You can now delete this folder.\\\n\\\n" >&3 +fi + +printf "(Note: the time it takes to restore an object can be found in the AWS docs - just look for the $restore_tier restore tier, which is what you used.\\\nOnce restored, download the files from the S3 site or better yet use RCloneBrowser.\\\n" +printf "You can check the status of a file using the aws-see-restore-status script)\\\n" + +exec 3>&- + +EOF +) + + printf "$RUN_TEMPLATE" > run.sh - # Generate the main script that will kick off the restoration. - printf "while read x; do\n printf \"aws s3api restore-object --restore-request '{\\\\\"Days\\\\\":$days_available,\\\\\"GlacierJobParameters\\\\\":{\\\\\"Tier\\\\\":\\\\\"$restore_tier\\\\\"}}' --bucket $bucket --key \\\\\"\$x\\\\\"\\\\n\"\n aws s3api restore-object --restore-request \"{\\\\\"Days\\\\\":$days_available,\\\\\"GlacierJobParameters\\\\\":{\\\\\"Tier\\\\\":\\\\\"$restore_tier\\\\\"}}\" --bucket $bucket --key \"\$x\"\ndone < all_objects_list.txt\nprintf \"\\\\nDone! You can now delete this folder.\\\\nYour files are currently being restored. The time it takes to restore can be found in the AWS docs - just look for the $restore_tier restore tier, which is what you used.\\\\nOnce restored, download the files from the S3 site or better yet use RCloneBrowser.\\\\n\"\n" > run.sh chmod +x run.sh printf "${BOLD}You can now run ${GREEN}$temp_dir/run.sh${NORMAL}${BOLD} to start the restoration process.\n" diff --git a/dotfiles/bin/aws-see-restore-status b/dotfiles/bin/aws-see-restore-status new file mode 100644 index 0000000..5059c33 --- /dev/null +++ b/dotfiles/bin/aws-see-restore-status @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +# Shows you the status of an object restore job. +# +# e.g. aws-see-restore-status my-deep-glacier-bucket object/path.png +# +# You know it's ready when ongoing-request is false and there's a date. If that field is null then the file isn't being restored. +# +# You'll need the aws cli tools. Download them from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html +# +# If you see an error like along the lines of "'charmap' codec can't encode +# character '\u200e' in position 42: character maps to " then that +# means a filename has a Unicode codepoint and the dumb aws Python code is +# trying to read it using your system's locale, which is very likely not set to +# use the Windows UTF-8 beta feature. This is an ongoing issue in this tool +# that goes back to 2013!!! There's no way to fix it using environment +# variables, at least nothing worked for me. The fix provided by the devs is +# heavy handed: you change your system locale to use UTF-8... This has +# consequences though like breaking legacy apps that don't have Unicode support +# and I'm sure other weird things will happen, such as file corruption. Anyway, +# if you're getting this charmap error then I suggest changing your system +# locale, run this again, then switch back to your previous locale. If you +# don't get the canonical file name then you won't be able to restore it. +# +# You can enable the UTF-8 locale with: +# +# win+r -> intl.cpl -> Administrative tab -> Change system locale -> Beta: Use Unicode UTF-8 box. +# + +if which tput >/dev/null 2>&1; then + ncolors=$(tput colors) +fi +if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then + RED="$(tput setaf 1)" + GREEN="$(tput setaf 2)" + YELLOW="$(tput setaf 3)" + BLUE="$(tput setaf 4)" + MAGENTA="$(tput setaf 5)" + CYAN="$(tput setaf 6)" + BOLD="$(tput bold)" + NORMAL="$(tput sgr0)" +else + RED="" + GREEN="" + YELLOW="" + BLUE="" + MAGENTA="" + CYAN="" + BOLD="" + NORMAL="" +fi + +error() { + printf "${BOLD}${RED}$1${NORMAL}\n" +} + +abort() { + error "\nAborting...\n" + exit 1 +} + +set -e + +bucket="$1" +path="$2" + +if [[ $bucket == "" || $path == "" ]]; then + error "Usage: aws-see-restore-status " + exit 1 +fi + +aws s3api head-object --bucket $bucket --key "$path" --query "{Restore:Restore, StorageClass:StorageClass}" --output json