From ba9a77686719d37412b10a79bcf2264b078dfb72 Mon Sep 17 00:00:00 2001 From: Michael Campagnaro Date: Mon, 7 Aug 2023 17:22:07 -0400 Subject: [PATCH] Support a time range in the transcribe video scripts --- .aliases | 25 ++++++++++--- dotfiles/bin/compress-video-with-crf | 2 +- dotfiles/bin/convert-video-avi-to-mp4 | 2 +- dotfiles/bin/convert-video-flv-to-mp4 | 2 +- dotfiles/bin/convert-video-mkv-to-mp4 | 2 +- dotfiles/bin/extract-audio-from-video | 22 +++++++++-- .../extract-audio-from-video-and-transcode | 10 ++++- dotfiles/bin/fix-audio-in-one-channel | 2 +- dotfiles/bin/normalize-video-volume | 2 +- dotfiles/bin/remove-audio-from-video | 2 +- dotfiles/bin/transcribe-video | 10 +++-- dotfiles/bin/transcribe-video-base | 35 ++++++++++++++++++ dotfiles/bin/transcribe-video-medium | 35 ++++++++++++++++++ dotfiles/bin/transcribe-video-small | 35 ++++++++++++++++++ dotfiles/bin/transcribe-video-tiny | 37 ++++++++++++++++++- 15 files changed, 201 insertions(+), 22 deletions(-) diff --git a/.aliases b/.aliases index b21a6ce..d9badd6 100644 --- a/.aliases +++ b/.aliases @@ -354,36 +354,51 @@ function my_transcribe_video() { file="$1" output="$2" include_small=$3 + start_time="$4" + end_time="$5" + if [[ $file == "" ]]; then - error "Usage: " + error "Usage: " return fi if [[ $output == "" ]]; then output="${1%.*}" # just use the input name without the extension. fi + + if [[ $start_time == "" ]]; then start_time="0"; fi + if [[ $end_time == "" ]]; then end_time="0"; fi + # Tiny is fast and semi-accurate, so whatever. # Base is pretty good overall. It has good punctuation inserting and # catches most words. Small and medium can do better word detection at # times, but suffer from bad punctuation. Medium is particularly bad and # not adding commas and periods. if [[ $include_small -eq 1 ]]; then - transcribe-video "$file" "$output" tiny base small + transcribe-video "$file" "$output" $start_time $end_time tiny base small else - transcribe-video "$file" "$output" tiny base + transcribe-video "$file" "$output" $start_time $end_time tiny base fi } function my_transcribe_video_all_models() { file="$1" output="$2" + start_time="$3" + end_time="$4" + if [[ $file == "" ]]; then - error "Usage: " + error "Usage: " return fi + if [[ $output == "" ]]; then output="${1%.*}" # just use the input name without the extension. fi - transcribe-video "$file" "$output" tiny base small medium + + if [[ $start_time == "" ]]; then start_time="0"; fi + if [[ $end_time == "" ]]; then end_time="0"; fi + + transcribe-video "$file" "$output" $start_time $end_time tiny base small medium } diff --git a/dotfiles/bin/compress-video-with-crf b/dotfiles/bin/compress-video-with-crf index 407b9c4..75b4914 100644 --- a/dotfiles/bin/compress-video-with-crf +++ b/dotfiles/bin/compress-video-with-crf @@ -26,7 +26,7 @@ fi use_gpu=0 if [[ "$#" < 3 || "$#" > 6 ]]; then - printf "${BOLD}${RED}Usage: $0 \n\nIf you want to encode a range of CRF values then use -1 as the crf value.${NORMAL}\n" + printf "${BOLD}${RED}Usage: compress-video-with-crf \n\nIf you want to encode a range of CRF values then use -1 as the crf value.${NORMAL}\n" exit 1 fi diff --git a/dotfiles/bin/convert-video-avi-to-mp4 b/dotfiles/bin/convert-video-avi-to-mp4 index 6905681..d2461df 100644 --- a/dotfiles/bin/convert-video-avi-to-mp4 +++ b/dotfiles/bin/convert-video-avi-to-mp4 @@ -29,7 +29,7 @@ else fi if [[ $1 == "" || $2 == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" + printf "${BOLD}${RED}Usage: convert-video-avi-to-mp4 ${NORMAL}\n" exit 1 fi diff --git a/dotfiles/bin/convert-video-flv-to-mp4 b/dotfiles/bin/convert-video-flv-to-mp4 index b54b949..84b51e4 100644 --- a/dotfiles/bin/convert-video-flv-to-mp4 +++ b/dotfiles/bin/convert-video-flv-to-mp4 @@ -27,7 +27,7 @@ else fi if [[ $1 == "" || $2 == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" + printf "${BOLD}${RED}Usage: convert-video-flv-to-mp4 ${NORMAL}\n" exit 1 fi diff --git a/dotfiles/bin/convert-video-mkv-to-mp4 b/dotfiles/bin/convert-video-mkv-to-mp4 index 77863d4..72dccc8 100644 --- a/dotfiles/bin/convert-video-mkv-to-mp4 +++ b/dotfiles/bin/convert-video-mkv-to-mp4 @@ -27,7 +27,7 @@ else fi if [[ $1 == "" || $2 == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" + printf "${BOLD}${RED}Usage: convert-video-mkv-to-mp4 ${NORMAL}\n" exit 1 fi diff --git a/dotfiles/bin/extract-audio-from-video b/dotfiles/bin/extract-audio-from-video index e57a0ec..dd9a1ca 100644 --- a/dotfiles/bin/extract-audio-from-video +++ b/dotfiles/bin/extract-audio-from-video @@ -30,9 +30,11 @@ fi filename=$(basename -- "$1") format="$2" +start_time="$3" +end_time="$4" if [[ $1 == "" || $2 == "" ]]; then - printf "${BOLD}${RED}Usage: extract-audio-from-video ${NORMAL}\n" + printf "${BOLD}${RED}Usage: extract-audio-from-video ${NORMAL}\n" exit 1 fi @@ -40,9 +42,23 @@ extension="${filename##*.}" filename="${filename%.*}" output_name="$filename.$format" -printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | output: $output_name${NORMAL}\n" +timing_args="" +if [[ $start_time != "" ]]; then + timing_args="-ss $start_time " +fi +if [[ $end_time != "" ]]; then + if [[ $start_time == "0" && $end_time == "0" ]]; then + # We treat a start and end with 0 values as no op. + timing_args="" + elif [[ $end_time != "0" ]]; then + # Handle having a start time but end time is set to 0, can just ignore it and it'll use the remainder of the video. + timing_args+="-to $end_time" + fi +fi -ffmpeg -y -stats -loglevel level+error -i "$filename.$extension" -vn -acodec copy "$output_name" +printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | output: $output_name | start: $start_time | end: $end_time${NORMAL}\n" + +ffmpeg -y -stats -loglevel level+error $timing_args -i "$filename.$extension" -vn -acodec copy "$output_name" printf "\n${GREEN}${BOLD}Done extracting audio from $filename.$extension | output name '$output_name'${NORMAL}\n\n" diff --git a/dotfiles/bin/extract-audio-from-video-and-transcode b/dotfiles/bin/extract-audio-from-video-and-transcode index ee29f6a..a634295 100644 --- a/dotfiles/bin/extract-audio-from-video-and-transcode +++ b/dotfiles/bin/extract-audio-from-video-and-transcode @@ -37,7 +37,7 @@ output_base=$(basename -- "$output_name") output_extension="${output_base##*.}" if [[ $filename == "" || $output_extension == "" || $output_extension == $output_base ]]; then - printf "${BOLD}${RED}Usage: extract-audio-from-video-and-transcode ${NORMAL}\n" + printf "${BOLD}${RED}Usage: extract-audio-from-video-and-transcode ${NORMAL}\n" exit 1 fi @@ -54,7 +54,13 @@ if [[ $start_time != "" ]]; then timing_args="-ss $start_time " fi if [[ $end_time != "" ]]; then - timing_args+="-to $end_time" + if [[ $start_time == "0" && $end_time == "0" ]]; then + # We treat a start and end with 0 values as no op. + timing_args="" + elif [[ $end_time != "0" ]]; then + # Handle having a start time but end time is set to 0, can just ignore it and it'll use the remainder of the video. + timing_args+="-to $end_time" + fi fi printf "\n${YELLOW}${BOLD}Extracting audio from '$filename' | output: '$output_name' | sample rate: $sample_rate | channels: $channel_count | start: $start_time | end: $end_time${NORMAL}\n" diff --git a/dotfiles/bin/fix-audio-in-one-channel b/dotfiles/bin/fix-audio-in-one-channel index 6912b1d..613afc1 100644 --- a/dotfiles/bin/fix-audio-in-one-channel +++ b/dotfiles/bin/fix-audio-in-one-channel @@ -27,7 +27,7 @@ else fi if [[ $1 == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" + printf "${BOLD}${RED}Usage: fix-audio-in-one-channel ${NORMAL}\n" exit 1 fi diff --git a/dotfiles/bin/normalize-video-volume b/dotfiles/bin/normalize-video-volume index d5168fa..b5e00fa 100644 --- a/dotfiles/bin/normalize-video-volume +++ b/dotfiles/bin/normalize-video-volume @@ -31,7 +31,7 @@ else fi if [[ $1 == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" + printf "${BOLD}${RED}Usage: normalize-video-volume ${NORMAL}\n" exit 1 fi diff --git a/dotfiles/bin/remove-audio-from-video b/dotfiles/bin/remove-audio-from-video index d20446b..d405f49 100644 --- a/dotfiles/bin/remove-audio-from-video +++ b/dotfiles/bin/remove-audio-from-video @@ -24,7 +24,7 @@ else fi if [[ $1 == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" + printf "${BOLD}${RED}Usage: remove-audio-from-video ${NORMAL}\n" exit 1 fi diff --git a/dotfiles/bin/transcribe-video b/dotfiles/bin/transcribe-video index 1a8a0ed..735789a 100644 --- a/dotfiles/bin/transcribe-video +++ b/dotfiles/bin/transcribe-video @@ -25,11 +25,13 @@ fi input_video="$1" output_name_without_ext="$2" -shift 2 +start_time="$3" +end_time="$4" +shift 4 models=("$@") -if [[ $input_video == "" || $output_name_without_ext == "" || ${#models[@]} -eq 0 ]]; then - printf "${BOLD}${RED}Usage: transcribe-video ${NORMAL}\n" +if [[ $input_video == "" || $output_name_without_ext == "" || $start_time == "" || $end_time == "" || ${#models[@]} -eq 0 ]]; then + printf "${BOLD}${RED}Usage: transcribe-video ${NORMAL}\n" exit 1 fi @@ -45,7 +47,7 @@ fi channel_count=1 sample_rate=16000 -extract-audio-from-video-and-transcode "$input_video" "$wav_name" $sample_rate $channel_count +extract-audio-from-video-and-transcode "$input_video" "$wav_name" $sample_rate $channel_count $start_time $end_time if [[ $? == 1 ]]; then exit 1; fi for model in "$@"; do diff --git a/dotfiles/bin/transcribe-video-base b/dotfiles/bin/transcribe-video-base index 11e283c..0e25678 100644 --- a/dotfiles/bin/transcribe-video-base +++ b/dotfiles/bin/transcribe-video-base @@ -1,2 +1,37 @@ #!/usr/bin/env bash + +if which tput >/dev/null 2>&1; then + ncolors=$(tput colors) +fi +if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then + RED="$(tput setaf 1)" + GREEN="$(tput setaf 2)" + YELLOW="$(tput setaf 3)" + BLUE="$(tput setaf 4)" + MAGENTA="$(tput setaf 5)" + CYAN="$(tput setaf 6)" + BOLD="$(tput bold)" + NORMAL="$(tput sgr0)" +else + RED="" + GREEN="" + YELLOW="" + BLUE="" + MAGENTA="" + CYAN="" + BOLD="" + NORMAL="" +fi + +if [[ $2 == "" ]]; then + printf "${BOLD}${RED}Usage: transcribe-video-base ${NORMAL}\n" + exit 1 +fi + +start_time="$3" +end_time="$4" + +if [[ $start_time == "" ]]; then start_time="0"; fi +if [[ $end_time == "" ]]; then end_time="0"; fi + transcribe-video "$1" "$2" "base" diff --git a/dotfiles/bin/transcribe-video-medium b/dotfiles/bin/transcribe-video-medium index c6fe616..cdc831a 100644 --- a/dotfiles/bin/transcribe-video-medium +++ b/dotfiles/bin/transcribe-video-medium @@ -1,2 +1,37 @@ #!/usr/bin/env bash + +if which tput >/dev/null 2>&1; then + ncolors=$(tput colors) +fi +if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then + RED="$(tput setaf 1)" + GREEN="$(tput setaf 2)" + YELLOW="$(tput setaf 3)" + BLUE="$(tput setaf 4)" + MAGENTA="$(tput setaf 5)" + CYAN="$(tput setaf 6)" + BOLD="$(tput bold)" + NORMAL="$(tput sgr0)" +else + RED="" + GREEN="" + YELLOW="" + BLUE="" + MAGENTA="" + CYAN="" + BOLD="" + NORMAL="" +fi + +if [[ $2 == "" ]]; then + printf "${BOLD}${RED}Usage: transcribe-video-medium ${NORMAL}\n" + exit 1 +fi + +start_time="$3" +end_time="$4" + +if [[ $start_time == "" ]]; then start_time="0"; fi +if [[ $end_time == "" ]]; then end_time="0"; fi + transcribe-video "$1" "$2" "medium" diff --git a/dotfiles/bin/transcribe-video-small b/dotfiles/bin/transcribe-video-small index 4f434a8..75dee38 100644 --- a/dotfiles/bin/transcribe-video-small +++ b/dotfiles/bin/transcribe-video-small @@ -1,2 +1,37 @@ #!/usr/bin/env bash + +if which tput >/dev/null 2>&1; then + ncolors=$(tput colors) +fi +if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then + RED="$(tput setaf 1)" + GREEN="$(tput setaf 2)" + YELLOW="$(tput setaf 3)" + BLUE="$(tput setaf 4)" + MAGENTA="$(tput setaf 5)" + CYAN="$(tput setaf 6)" + BOLD="$(tput bold)" + NORMAL="$(tput sgr0)" +else + RED="" + GREEN="" + YELLOW="" + BLUE="" + MAGENTA="" + CYAN="" + BOLD="" + NORMAL="" +fi + +if [[ $2 == "" ]]; then + printf "${BOLD}${RED}Usage: transcribe-video-small ${NORMAL}\n" + exit 1 +fi + +start_time="$3" +end_time="$4" + +if [[ $start_time == "" ]]; then start_time="0"; fi +if [[ $end_time == "" ]]; then end_time="0"; fi + transcribe-video "$1" "$2" "small" diff --git a/dotfiles/bin/transcribe-video-tiny b/dotfiles/bin/transcribe-video-tiny index 8a1a1ac..796715a 100644 --- a/dotfiles/bin/transcribe-video-tiny +++ b/dotfiles/bin/transcribe-video-tiny @@ -1,2 +1,37 @@ #!/usr/bin/env bash -transcribe-video "$1" "$2" "tiny" + +if which tput >/dev/null 2>&1; then + ncolors=$(tput colors) +fi +if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then + RED="$(tput setaf 1)" + GREEN="$(tput setaf 2)" + YELLOW="$(tput setaf 3)" + BLUE="$(tput setaf 4)" + MAGENTA="$(tput setaf 5)" + CYAN="$(tput setaf 6)" + BOLD="$(tput bold)" + NORMAL="$(tput sgr0)" +else + RED="" + GREEN="" + YELLOW="" + BLUE="" + MAGENTA="" + CYAN="" + BOLD="" + NORMAL="" +fi + +if [[ $2 == "" ]]; then + printf "${BOLD}${RED}Usage: transcribe-video-tiny ${NORMAL}\n" + exit 1 +fi + +start_time="$3" +end_time="$4" + +if [[ $start_time == "" ]]; then start_time="0"; fi +if [[ $end_time == "" ]]; then end_time="0"; fi + +transcribe-video "$1" "$2" $start_time $end_time "tiny"