Improve transcribe api and add alias to transcribe twitch stream downloads
This commit is contained in:
parent
8ff9c338b5
commit
e5e8f309e5
130
aliases
130
aliases
|
@ -278,6 +278,8 @@ custom_grep() {
|
||||||
local include_list=("$@")
|
local include_list=("$@")
|
||||||
local include_arg=""
|
local include_arg=""
|
||||||
if [[ $include_list != "" ]]; then
|
if [[ $include_list != "" ]]; then
|
||||||
|
# We're looping like this instead of for var in "$@", because that way of looping is affecting
|
||||||
|
# my shell environment. Very strange!
|
||||||
for i in "${include_list[@]}"; do
|
for i in "${include_list[@]}"; do
|
||||||
include_arg+="--include=\*${i} "
|
include_arg+="--include=\*${i} "
|
||||||
done
|
done
|
||||||
|
@ -511,18 +513,8 @@ download_twitch_chat() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Download Twitch videos, both VODs and live streams. Pass a Twitch account URL to download a live stream.
|
# Copy pasta of download_twitch_vid with a final pass to transcribe the audio using whisper.cpp
|
||||||
# The live stream filename will not contain the stream title, so you'll need to modify it afterwards.
|
download_twitch_vid_and_transcribe() {
|
||||||
#
|
|
||||||
# If you want to download subcriber-only vids then first extract your Twitch
|
|
||||||
# cookies to a file (can use cookies.txt add-on from Lennon Hill) and then pass it as an option,
|
|
||||||
# using the full path to the cookies file, e.g.
|
|
||||||
# `tw-1080p60 <url> --cookies /c/<cookie_path>/twitch_cookies.txt`
|
|
||||||
#
|
|
||||||
# To extract a portion of a video, you have to first download the entire file and then use the
|
|
||||||
# `trim-video` or `compress-video-and-trim` scripts.
|
|
||||||
#
|
|
||||||
download_twitch_vid() {
|
|
||||||
local format="$1"
|
local format="$1"
|
||||||
local shortname="$2"
|
local shortname="$2"
|
||||||
local compress="$3"
|
local compress="$3"
|
||||||
|
@ -533,7 +525,7 @@ download_twitch_vid() {
|
||||||
|
|
||||||
if [[ $url == "" ]]; then
|
if [[ $url == "" ]]; then
|
||||||
error "Usage: $0 <make folder?> <url> <optional args>"
|
error "Usage: $0 <make folder?> <url> <optional args>"
|
||||||
return
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# We use yt-dlp to get the filename and then use streamlink to download it (the latter is a lot faster).
|
# We use yt-dlp to get the filename and then use streamlink to download it (the latter is a lot faster).
|
||||||
|
@ -569,7 +561,7 @@ download_twitch_vid() {
|
||||||
if [[ $make_folder == "1" ]]; then
|
if [[ $make_folder == "1" ]]; then
|
||||||
make_vid_dir_and_cd_into $url "" $opts
|
make_vid_dir_and_cd_into $url "" $opts
|
||||||
if [[ $? -ne 0 ]]; then
|
if [[ $? -ne 0 ]]; then
|
||||||
return
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -607,6 +599,113 @@ download_twitch_vid() {
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
error "Error: Failed to download '$url'"
|
error "Error: Failed to download '$url'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
transcribe-video "$filename" jon base small
|
||||||
|
|
||||||
|
if [[ $make_folder == "1" ]]; then
|
||||||
|
cd ..
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Download Twitch videos, both VODs and live streams. Pass a Twitch account URL to download a live stream.
|
||||||
|
# The live stream filename will not contain the stream title, so you'll need to modify it afterwards.
|
||||||
|
#
|
||||||
|
# If you want to download subcriber-only vids then first extract your Twitch
|
||||||
|
# cookies to a file (can use cookies.txt add-on from Lennon Hill) and then pass it as an option,
|
||||||
|
# using the full path to the cookies file, e.g.
|
||||||
|
# `tw-1080p60 <url> --cookies /c/<cookie_path>/twitch_cookies.txt`
|
||||||
|
#
|
||||||
|
# To extract a portion of a video, you have to first download the entire file and then use the
|
||||||
|
# `trim-video` or `compress-video-and-trim` scripts.
|
||||||
|
#
|
||||||
|
download_twitch_vid() {
|
||||||
|
local format="$1"
|
||||||
|
local shortname="$2"
|
||||||
|
local compress="$3"
|
||||||
|
local make_folder="$4"
|
||||||
|
local url="$5"
|
||||||
|
shift 5
|
||||||
|
local opts="$@"
|
||||||
|
|
||||||
|
if [[ $url == "" ]]; then
|
||||||
|
error "Usage: $0 <make folder?> <url> <optional args>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# We use yt-dlp to get the filename and then use streamlink to download it (the latter is a lot faster).
|
||||||
|
# It's a two step process because streamlink cannot pass the formatted filename to ffmpeg.
|
||||||
|
# We fallback to yt-dlp when it's a subscriber VOD because we don't have an easy way to access it with streamlink.
|
||||||
|
|
||||||
|
local subscriber_vod=0
|
||||||
|
local split_opts=($opts)
|
||||||
|
if [[ ${split_opts[0]} == "--cookies" ]]; then
|
||||||
|
subscriber_vod=1
|
||||||
|
printf "${BOLD}Subscriber VOD. Will use yt-dlp to download.${NORMAL}\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $compress -eq 0 ]]; then
|
||||||
|
printf "${BOLD}Downloading Twitch vid with no compression.${NORMAL}\n"
|
||||||
|
else
|
||||||
|
printf "${BOLD}Downloading Twitch vid with compression.${NORMAL}\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
local yt_dlp_format=""
|
||||||
|
local streamlink_format=""
|
||||||
|
|
||||||
|
if [[ $format == "" ]]; then
|
||||||
|
# Twitch only supplies pre-merged mp4s so we can ask for the best format and not worry about anything else.
|
||||||
|
printf "${BOLD}No format given; using best available.${NORMAL}\n"
|
||||||
|
yt_dlp_format="b"
|
||||||
|
streamlink_format="best"
|
||||||
|
else
|
||||||
|
yt_dlp_format="$format"
|
||||||
|
streamlink_format="$format"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $make_folder == "1" ]]; then
|
||||||
|
make_vid_dir_and_cd_into $url "" $opts
|
||||||
|
if [[ $? -ne 0 ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $shortname -eq 0 ]]; then
|
||||||
|
local name_format="%(upload_date>%Y-%m-%d)s-%(title)s-tw-%(id)s"
|
||||||
|
else
|
||||||
|
local name_format="%(upload_date>%Y-%m-%d)s-shortname-tw-%(id)s"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Download Twitch chat transcript
|
||||||
|
actually_download_twitch_chat $url "$(yt-dlp.exe --get-filename -o "$name_format" $opts $url)"
|
||||||
|
|
||||||
|
# Get the video filename.
|
||||||
|
local filename=$(yt-dlp.exe --get-filename -o "$name_format.%(ext)s" $opts $url)
|
||||||
|
|
||||||
|
# Download
|
||||||
|
if [[ $subscriber_vod -eq 0 ]]; then
|
||||||
|
local cmd="streamlink.exe --twitch-low-latency --twitch-disable-ads --twitch-disable-hosting --force --force-progress $opts $url $streamlink_format -O | ffmpeg -i pipe:0 -c copy \"$filename\""
|
||||||
|
else
|
||||||
|
local cmd="yt-dlp.exe -f $yt_dlp_format -o \"$filename\" $opts $url"
|
||||||
|
fi
|
||||||
|
|
||||||
|
eval $cmd # Need to eval in order to preserve the quotes wrapping the filename format string.
|
||||||
|
|
||||||
|
error=$?
|
||||||
|
if [[ $error -eq 0 ]]; then
|
||||||
|
if [[ $compress -eq 1 ]]; then
|
||||||
|
local temp_name="temp_${RANDOM}"
|
||||||
|
# 0=cpu, 1=gpu
|
||||||
|
compress-video "$filename" "$temp_name" 0
|
||||||
|
extension="${filename##*.}"
|
||||||
|
mv "$filename" "orig_$filename"
|
||||||
|
mv $temp_name.$extension "$filename"
|
||||||
|
printf "${BOLD}Make sure to delete the original video file${NORMAL}\n"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
error "Error: Failed to download '$url'"
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $make_folder == "1" ]]; then
|
if [[ $make_folder == "1" ]]; then
|
||||||
|
@ -815,6 +914,7 @@ alias yt-and-hflip='download_youtube_vid_and_hflip "137+140"' # 1080p
|
||||||
|
|
||||||
# Twitch Vid DL
|
# Twitch Vid DL
|
||||||
alias tw='download_twitch_vid "" $SHORTNAME_OFF $COMPRESSION_OFF'
|
alias tw='download_twitch_vid "" $SHORTNAME_OFF $COMPRESSION_OFF'
|
||||||
|
alias twt='download_twitch_vid_and_transcribe "" $SHORTNAME_OFF $COMPRESSION_OFF'
|
||||||
alias tw-compressed='download_twitch_vid "" $SHORTNAME_OFF $COMPRESSION_ON'
|
alias tw-compressed='download_twitch_vid "" $SHORTNAME_OFF $COMPRESSION_ON'
|
||||||
alias tw-shortname='download_twitch_vid "" $SHORTNAME_ON $COMPRESSION_OFF'
|
alias tw-shortname='download_twitch_vid "" $SHORTNAME_ON $COMPRESSION_OFF'
|
||||||
alias tw-shortname-compressed='download_twitch_vid "" $SHORTNAME_ON $COMPRESSION_ON'
|
alias tw-shortname-compressed='download_twitch_vid "" $SHORTNAME_ON $COMPRESSION_ON'
|
||||||
|
@ -856,7 +956,7 @@ alias vimeo-compressed='download_vimeo_vid "Original" $SHORTNAME_OFF $COMPRESSIO
|
||||||
alias ig-download-and-hflip='download_instagram_vid_and_hflip '
|
alias ig-download-and-hflip='download_instagram_vid_and_hflip '
|
||||||
|
|
||||||
# Twitter Vid DL
|
# Twitter Vid DL
|
||||||
alias twt='download_twitter_vid "" '
|
alias twitter='download_twitter_vid "" '
|
||||||
|
|
||||||
# Misc
|
# Misc
|
||||||
alias download-mp4='download_mp4'
|
alias download-mp4='download_mp4'
|
||||||
|
|
|
@ -23,6 +23,23 @@ else
|
||||||
NORMAL=""
|
NORMAL=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Will return a symlink path in its expanded form. If the path's root is the
|
||||||
|
# home directory symbol "~" then it'll be replaced by the full home path.
|
||||||
|
expand_path() {
|
||||||
|
local ret="$1"
|
||||||
|
|
||||||
|
IFS="/" read -ra parts <<< "$ret"
|
||||||
|
if [[ "${parts[0]}" == "~" ]]; then
|
||||||
|
ret="$HOME"
|
||||||
|
for ((i=1; i < ${#parts[@]}; i++))
|
||||||
|
do
|
||||||
|
ret="$ret/${parts[$i]}"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
ret=$(readlink -m "$ret")
|
||||||
|
echo $ret
|
||||||
|
}
|
||||||
|
|
||||||
input_wav="$1"
|
input_wav="$1"
|
||||||
output_name_without_ext="$2"
|
output_name_without_ext="$2"
|
||||||
model="$3"
|
model="$3"
|
||||||
|
@ -49,6 +66,14 @@ output_name="$output_name_without_ext.${model}"
|
||||||
|
|
||||||
printf "\n${YELLOW}${BOLD}Transcribing $input_wav | model: $model | threads: $threads | output: $output_name ${NORMAL}\n"
|
printf "\n${YELLOW}${BOLD}Transcribing $input_wav | model: $model | threads: $threads | output: $output_name ${NORMAL}\n"
|
||||||
|
|
||||||
whisper.exe --threads ${threads} -m $JELLYPIXEL_OPENSOURCE_DEV/whisper.cpp/models/ggml-${model}.en.bin -otxt -osrt -f "$input_wav" -of "$output_name" --print-colors
|
whisper_fullname="$(expand_path $(which whisper.exe))"
|
||||||
|
whisper_path="$(dirname $whisper_fullname)"
|
||||||
|
models_path="$whisper_path/models"
|
||||||
|
|
||||||
printf "${GREEN}${BOLD}Done transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n"
|
whisper.exe --threads ${threads} -m "$models_path/ggml-${model}.en.bin" -otxt -osrt -f "$input_wav" -of "$output_name" --print-colors
|
||||||
|
error=$?
|
||||||
|
if [[ error -eq 0 ]]; then
|
||||||
|
printf "${GREEN}${BOLD}Done transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n"
|
||||||
|
else
|
||||||
|
printf "${GREEN}${BOLD}Error while transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n"
|
||||||
|
fi
|
||||||
|
|
|
@ -25,11 +25,11 @@ fi
|
||||||
|
|
||||||
input_video="$1"
|
input_video="$1"
|
||||||
output_name_without_ext="$2"
|
output_name_without_ext="$2"
|
||||||
model="$3"
|
shift 2
|
||||||
threads=$4
|
models="$@"
|
||||||
|
|
||||||
if [[ $input_video == "" || $output_name_without_ext == "" || $model == "" ]]; then
|
if [[ $input_video == "" || $output_name_without_ext == "" || $models == "" ]]; then
|
||||||
printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
|
printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <list of model names to use>${NORMAL}\n"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -45,11 +45,13 @@ fi
|
||||||
extract-16bit-wav-from-video "$input_video" "$wav_name"
|
extract-16bit-wav-from-video "$input_video" "$wav_name"
|
||||||
if [[ $? == 1 ]]; then exit 1; fi
|
if [[ $? == 1 ]]; then exit 1; fi
|
||||||
|
|
||||||
transcribe-audio "$wav_name" "$output_name_without_ext" "$model" $threads
|
for model in "$@"; do
|
||||||
if [[ $? == 1 ]]; then
|
transcribe-audio "$wav_name" "$output_name_without_ext" "${model}"
|
||||||
printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
|
if [[ $? == 1 ]]; then
|
||||||
exit 1
|
printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
|
||||||
fi
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
rm "$wav_name"
|
rm "$wav_name"
|
||||||
|
|
|
@ -1,4 +1,2 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
transcribe-video "$1" "$2" "base"
|
||||||
transcribe-video-with-model "$1" "$2" "base" "$3"
|
|
||||||
|
|
||||||
|
|
|
@ -1,65 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# I was originally just using three calls to transcribe-video-with-model but I want to reuse the same audio input, so this
|
|
||||||
# is mostly a copy pasta of that file.
|
|
||||||
|
|
||||||
if which tput >/dev/null 2>&1; then
|
|
||||||
ncolors=$(tput colors)
|
|
||||||
fi
|
|
||||||
if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
|
|
||||||
RED="$(tput setaf 1)"
|
|
||||||
GREEN="$(tput setaf 2)"
|
|
||||||
YELLOW="$(tput setaf 3)"
|
|
||||||
BLUE="$(tput setaf 4)"
|
|
||||||
MAGENTA="$(tput setaf 5)"
|
|
||||||
CYAN="$(tput setaf 6)"
|
|
||||||
BOLD="$(tput bold)"
|
|
||||||
NORMAL="$(tput sgr0)"
|
|
||||||
else
|
|
||||||
RED=""
|
|
||||||
GREEN=""
|
|
||||||
YELLOW=""
|
|
||||||
BLUE=""
|
|
||||||
MAGENTA=""
|
|
||||||
CYAN=""
|
|
||||||
BOLD=""
|
|
||||||
NORMAL=""
|
|
||||||
fi
|
|
||||||
|
|
||||||
input_video="$1"
|
|
||||||
output_name_without_ext="$2"
|
|
||||||
threads=$3
|
|
||||||
|
|
||||||
if [[ $input_video == "" || $output_name_without_ext == "" ]]; then
|
|
||||||
printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <optional: thread count>${NORMAL}\n"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
wav_name="${output_name_without_ext}_audio_${RANDOM}.wav"
|
|
||||||
|
|
||||||
extract-16bit-wav-from-video "$input_video" "$wav_name"
|
|
||||||
if [[ $? == 1 ]]; then exit 1; fi
|
|
||||||
|
|
||||||
#
|
|
||||||
# Tiny model first to have something quickly banged out. base and small have similar output quality. Neither are perfect.
|
|
||||||
#
|
|
||||||
|
|
||||||
transcribe-audio "$wav_name" "$output_name_without_ext" "tiny" $threads
|
|
||||||
if [[ $? == 1 ]]; then
|
|
||||||
printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
transcribe-audio "$wav_name" "$output_name_without_ext" "base" $threads
|
|
||||||
if [[ $? == 1 ]]; then
|
|
||||||
printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
transcribe-audio "$wav_name" "$output_name_without_ext" "small" $threads
|
|
||||||
if [[ $? == 1 ]]; then
|
|
||||||
printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
rm "$wav_name"
|
|
|
@ -1,4 +1,2 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
transcribe-video "$1" "$2" "medium"
|
||||||
transcribe-video-with-model "$1" "$2" "medium" "$3"
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,2 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
transcribe-video "$1" "$2" "small"
|
||||||
transcribe-video-with-model "$1" "$2" "small" "$3"
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,2 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
transcribe-video "$1" "$2" "tiny"
|
||||||
transcribe-video-with-model "$1" "$2" "tiny" "$3"
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user