diff --git a/aliases b/aliases index 2941683..5503ce7 100644 --- a/aliases +++ b/aliases @@ -278,6 +278,8 @@ custom_grep() { local include_list=("$@") local include_arg="" if [[ $include_list != "" ]]; then + # We're looping like this instead of for var in "$@", because that way of looping is affecting + # my shell environment. Very strange! for i in "${include_list[@]}"; do include_arg+="--include=\*${i} " done @@ -511,18 +513,8 @@ download_twitch_chat() { fi } -# Download Twitch videos, both VODs and live streams. Pass a Twitch account URL to download a live stream. -# The live stream filename will not contain the stream title, so you'll need to modify it afterwards. -# -# If you want to download subcriber-only vids then first extract your Twitch -# cookies to a file (can use cookies.txt add-on from Lennon Hill) and then pass it as an option, -# using the full path to the cookies file, e.g. -# `tw-1080p60 --cookies /c//twitch_cookies.txt` -# -# To extract a portion of a video, you have to first download the entire file and then use the -# `trim-video` or `compress-video-and-trim` scripts. -# -download_twitch_vid() { +# Copy pasta of download_twitch_vid with a final pass to transcribe the audio using whisper.cpp +download_twitch_vid_and_transcribe() { local format="$1" local shortname="$2" local compress="$3" @@ -533,7 +525,7 @@ download_twitch_vid() { if [[ $url == "" ]]; then error "Usage: $0 " - return + exit 1 fi # We use yt-dlp to get the filename and then use streamlink to download it (the latter is a lot faster). @@ -569,7 +561,7 @@ download_twitch_vid() { if [[ $make_folder == "1" ]]; then make_vid_dir_and_cd_into $url "" $opts if [[ $? -ne 0 ]]; then - return + exit 1 fi fi @@ -607,6 +599,113 @@ download_twitch_vid() { fi else error "Error: Failed to download '$url'" + exit 1 + fi + + transcribe-video "$filename" jon base small + + if [[ $make_folder == "1" ]]; then + cd .. + fi +} + +# Download Twitch videos, both VODs and live streams. Pass a Twitch account URL to download a live stream. +# The live stream filename will not contain the stream title, so you'll need to modify it afterwards. +# +# If you want to download subcriber-only vids then first extract your Twitch +# cookies to a file (can use cookies.txt add-on from Lennon Hill) and then pass it as an option, +# using the full path to the cookies file, e.g. +# `tw-1080p60 --cookies /c//twitch_cookies.txt` +# +# To extract a portion of a video, you have to first download the entire file and then use the +# `trim-video` or `compress-video-and-trim` scripts. +# +download_twitch_vid() { + local format="$1" + local shortname="$2" + local compress="$3" + local make_folder="$4" + local url="$5" + shift 5 + local opts="$@" + + if [[ $url == "" ]]; then + error "Usage: $0 " + exit 1 + fi + + # We use yt-dlp to get the filename and then use streamlink to download it (the latter is a lot faster). + # It's a two step process because streamlink cannot pass the formatted filename to ffmpeg. + # We fallback to yt-dlp when it's a subscriber VOD because we don't have an easy way to access it with streamlink. + + local subscriber_vod=0 + local split_opts=($opts) + if [[ ${split_opts[0]} == "--cookies" ]]; then + subscriber_vod=1 + printf "${BOLD}Subscriber VOD. Will use yt-dlp to download.${NORMAL}\n" + fi + + if [[ $compress -eq 0 ]]; then + printf "${BOLD}Downloading Twitch vid with no compression.${NORMAL}\n" + else + printf "${BOLD}Downloading Twitch vid with compression.${NORMAL}\n" + fi + + local yt_dlp_format="" + local streamlink_format="" + + if [[ $format == "" ]]; then + # Twitch only supplies pre-merged mp4s so we can ask for the best format and not worry about anything else. + printf "${BOLD}No format given; using best available.${NORMAL}\n" + yt_dlp_format="b" + streamlink_format="best" + else + yt_dlp_format="$format" + streamlink_format="$format" + fi + + if [[ $make_folder == "1" ]]; then + make_vid_dir_and_cd_into $url "" $opts + if [[ $? -ne 0 ]]; then + exit 1 + fi + fi + + if [[ $shortname -eq 0 ]]; then + local name_format="%(upload_date>%Y-%m-%d)s-%(title)s-tw-%(id)s" + else + local name_format="%(upload_date>%Y-%m-%d)s-shortname-tw-%(id)s" + fi + + # Download Twitch chat transcript + actually_download_twitch_chat $url "$(yt-dlp.exe --get-filename -o "$name_format" $opts $url)" + + # Get the video filename. + local filename=$(yt-dlp.exe --get-filename -o "$name_format.%(ext)s" $opts $url) + + # Download + if [[ $subscriber_vod -eq 0 ]]; then + local cmd="streamlink.exe --twitch-low-latency --twitch-disable-ads --twitch-disable-hosting --force --force-progress $opts $url $streamlink_format -O | ffmpeg -i pipe:0 -c copy \"$filename\"" + else + local cmd="yt-dlp.exe -f $yt_dlp_format -o \"$filename\" $opts $url" + fi + + eval $cmd # Need to eval in order to preserve the quotes wrapping the filename format string. + + error=$? + if [[ $error -eq 0 ]]; then + if [[ $compress -eq 1 ]]; then + local temp_name="temp_${RANDOM}" + # 0=cpu, 1=gpu + compress-video "$filename" "$temp_name" 0 + extension="${filename##*.}" + mv "$filename" "orig_$filename" + mv $temp_name.$extension "$filename" + printf "${BOLD}Make sure to delete the original video file${NORMAL}\n" + fi + else + error "Error: Failed to download '$url'" + exit 1 fi if [[ $make_folder == "1" ]]; then @@ -815,6 +914,7 @@ alias yt-and-hflip='download_youtube_vid_and_hflip "137+140"' # 1080p # Twitch Vid DL alias tw='download_twitch_vid "" $SHORTNAME_OFF $COMPRESSION_OFF' +alias twt='download_twitch_vid_and_transcribe "" $SHORTNAME_OFF $COMPRESSION_OFF' alias tw-compressed='download_twitch_vid "" $SHORTNAME_OFF $COMPRESSION_ON' alias tw-shortname='download_twitch_vid "" $SHORTNAME_ON $COMPRESSION_OFF' alias tw-shortname-compressed='download_twitch_vid "" $SHORTNAME_ON $COMPRESSION_ON' @@ -856,7 +956,7 @@ alias vimeo-compressed='download_vimeo_vid "Original" $SHORTNAME_OFF $COMPRESSIO alias ig-download-and-hflip='download_instagram_vid_and_hflip ' # Twitter Vid DL -alias twt='download_twitter_vid "" ' +alias twitter='download_twitter_vid "" ' # Misc alias download-mp4='download_mp4' diff --git a/bin/transcribe-audio b/bin/transcribe-audio index ad9f92d..20fa6fc 100644 --- a/bin/transcribe-audio +++ b/bin/transcribe-audio @@ -23,6 +23,23 @@ else NORMAL="" fi +# Will return a symlink path in its expanded form. If the path's root is the +# home directory symbol "~" then it'll be replaced by the full home path. +expand_path() { + local ret="$1" + + IFS="/" read -ra parts <<< "$ret" + if [[ "${parts[0]}" == "~" ]]; then + ret="$HOME" + for ((i=1; i < ${#parts[@]}; i++)) + do + ret="$ret/${parts[$i]}" + done + fi + ret=$(readlink -m "$ret") + echo $ret +} + input_wav="$1" output_name_without_ext="$2" model="$3" @@ -49,6 +66,14 @@ output_name="$output_name_without_ext.${model}" printf "\n${YELLOW}${BOLD}Transcribing $input_wav | model: $model | threads: $threads | output: $output_name ${NORMAL}\n" -whisper.exe --threads ${threads} -m $JELLYPIXEL_OPENSOURCE_DEV/whisper.cpp/models/ggml-${model}.en.bin -otxt -osrt -f "$input_wav" -of "$output_name" --print-colors +whisper_fullname="$(expand_path $(which whisper.exe))" +whisper_path="$(dirname $whisper_fullname)" +models_path="$whisper_path/models" -printf "${GREEN}${BOLD}Done transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n" +whisper.exe --threads ${threads} -m "$models_path/ggml-${model}.en.bin" -otxt -osrt -f "$input_wav" -of "$output_name" --print-colors +error=$? +if [[ error -eq 0 ]]; then + printf "${GREEN}${BOLD}Done transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n" +else + printf "${GREEN}${BOLD}Error while transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n" +fi diff --git a/bin/transcribe-video-with-model b/bin/transcribe-video similarity index 73% rename from bin/transcribe-video-with-model rename to bin/transcribe-video index f61f583..e426b2d 100644 --- a/bin/transcribe-video-with-model +++ b/bin/transcribe-video @@ -25,11 +25,11 @@ fi input_video="$1" output_name_without_ext="$2" -model="$3" -threads=$4 +shift 2 +models="$@" -if [[ $input_video == "" || $output_name_without_ext == "" || $model == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" +if [[ $input_video == "" || $output_name_without_ext == "" || $models == "" ]]; then + printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" exit 1 fi @@ -45,11 +45,13 @@ fi extract-16bit-wav-from-video "$input_video" "$wav_name" if [[ $? == 1 ]]; then exit 1; fi -transcribe-audio "$wav_name" "$output_name_without_ext" "$model" $threads -if [[ $? == 1 ]]; then - printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}" - exit 1 -fi +for model in "$@"; do + transcribe-audio "$wav_name" "$output_name_without_ext" "${model}" + if [[ $? == 1 ]]; then + printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}" + exit 1 + fi +done rm "$wav_name" diff --git a/bin/transcribe-video-base b/bin/transcribe-video-base index 3f225c3..11e283c 100644 --- a/bin/transcribe-video-base +++ b/bin/transcribe-video-base @@ -1,4 +1,2 @@ #!/usr/bin/env bash - -transcribe-video-with-model "$1" "$2" "base" "$3" - +transcribe-video "$1" "$2" "base" diff --git a/bin/transcribe-video-batch b/bin/transcribe-video-batch deleted file mode 100644 index f2927a6..0000000 --- a/bin/transcribe-video-batch +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash - -# I was originally just using three calls to transcribe-video-with-model but I want to reuse the same audio input, so this -# is mostly a copy pasta of that file. - -if which tput >/dev/null 2>&1; then - ncolors=$(tput colors) -fi -if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then - RED="$(tput setaf 1)" - GREEN="$(tput setaf 2)" - YELLOW="$(tput setaf 3)" - BLUE="$(tput setaf 4)" - MAGENTA="$(tput setaf 5)" - CYAN="$(tput setaf 6)" - BOLD="$(tput bold)" - NORMAL="$(tput sgr0)" -else - RED="" - GREEN="" - YELLOW="" - BLUE="" - MAGENTA="" - CYAN="" - BOLD="" - NORMAL="" -fi - -input_video="$1" -output_name_without_ext="$2" -threads=$3 - -if [[ $input_video == "" || $output_name_without_ext == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" - exit 1 -fi - -wav_name="${output_name_without_ext}_audio_${RANDOM}.wav" - -extract-16bit-wav-from-video "$input_video" "$wav_name" -if [[ $? == 1 ]]; then exit 1; fi - -# -# Tiny model first to have something quickly banged out. base and small have similar output quality. Neither are perfect. -# - -transcribe-audio "$wav_name" "$output_name_without_ext" "tiny" $threads -if [[ $? == 1 ]]; then - printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}" - exit 1 -fi - -transcribe-audio "$wav_name" "$output_name_without_ext" "base" $threads -if [[ $? == 1 ]]; then - printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}" - exit 1 -fi - -transcribe-audio "$wav_name" "$output_name_without_ext" "small" $threads -if [[ $? == 1 ]]; then - printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}" - exit 1 -fi - -rm "$wav_name" diff --git a/bin/transcribe-video-medium b/bin/transcribe-video-medium index 7554053..c6fe616 100644 --- a/bin/transcribe-video-medium +++ b/bin/transcribe-video-medium @@ -1,4 +1,2 @@ #!/usr/bin/env bash - -transcribe-video-with-model "$1" "$2" "medium" "$3" - +transcribe-video "$1" "$2" "medium" diff --git a/bin/transcribe-video-small b/bin/transcribe-video-small index 5d040da..4f434a8 100644 --- a/bin/transcribe-video-small +++ b/bin/transcribe-video-small @@ -1,4 +1,2 @@ #!/usr/bin/env bash - -transcribe-video-with-model "$1" "$2" "small" "$3" - +transcribe-video "$1" "$2" "small" diff --git a/bin/transcribe-video-tiny b/bin/transcribe-video-tiny index 550ad72..8a1a1ac 100644 --- a/bin/transcribe-video-tiny +++ b/bin/transcribe-video-tiny @@ -1,4 +1,2 @@ #!/usr/bin/env bash - -transcribe-video-with-model "$1" "$2" "tiny" "$3" - +transcribe-video "$1" "$2" "tiny"