Improve transcribe api and add alias to transcribe twitch stream downloads

2023-06-04 11:57:32 -04:00
parent 8ff9c338b5
commit e5e8f309e5
8 changed files with 157 additions and 103 deletions
--- a/bin/transcribe-audio
+++ b/bin/transcribe-audio
@@ -23,6 +23,23 @@ else
    NORMAL=""
 fi

+# Will return a symlink path in its expanded form. If the path's root is the
+# home directory symbol "~" then it'll be replaced by the full home path.
+expand_path() {
+    local ret="$1"
+
+    IFS="/" read -ra parts <<< "$ret"
+    if [[ "${parts[0]}" == "~" ]]; then
+        ret="$HOME"
+        for ((i=1; i < ${#parts[@]}; i++))
+        do
+            ret="$ret/${parts[$i]}"
+        done
+    fi
+    ret=$(readlink -m "$ret")
+    echo $ret
+}
+
 input_wav="$1"
 output_name_without_ext="$2"
 model="$3"
@@ -49,6 +66,14 @@ output_name="$output_name_without_ext.${model}"

 printf "\n${YELLOW}${BOLD}Transcribing $input_wav | model: $model | threads: $threads | output: $output_name ${NORMAL}\n"

-whisper.exe --threads ${threads} -m $JELLYPIXEL_OPENSOURCE_DEV/whisper.cpp/models/ggml-${model}.en.bin -otxt -osrt -f "$input_wav" -of "$output_name" --print-colors
+whisper_fullname="$(expand_path $(which whisper.exe))"
+whisper_path="$(dirname $whisper_fullname)"
+models_path="$whisper_path/models"

-printf "${GREEN}${BOLD}Done transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n"
+whisper.exe --threads ${threads} -m "$models_path/ggml-${model}.en.bin" -otxt -osrt -f "$input_wav" -of "$output_name" --print-colors
+error=$?
+if [[ error -eq 0 ]]; then
+    printf "${GREEN}${BOLD}Done transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n"
+else
+    printf "${GREEN}${BOLD}Error while transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n"
+fi
--- a/bin/transcribe-video-with-model
+++ b/bin/transcribe-video-with-model
@@ -25,11 +25,11 @@ fi

 input_video="$1"
 output_name_without_ext="$2"
-model="$3"
-threads=$4
+shift 2
+models="$@"

-if [[ $input_video == "" || $output_name_without_ext == "" || $model == "" ]]; then
-    printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
+if [[ $input_video == "" || $output_name_without_ext == "" || $models == "" ]]; then
+    printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <list of model names to use>${NORMAL}\n"
    exit 1
 fi

@@ -45,11 +45,13 @@ fi
 extract-16bit-wav-from-video "$input_video" "$wav_name"
 if [[ $? == 1 ]]; then exit 1; fi

-transcribe-audio "$wav_name" "$output_name_without_ext" "$model" $threads
-if [[ $? == 1 ]]; then
-    printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
-    exit 1
-fi
+for model in "$@"; do
+    transcribe-audio "$wav_name" "$output_name_without_ext" "${model}"
+    if [[ $? == 1 ]]; then
+        printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
+        exit 1
+    fi
+done

 rm "$wav_name"

--- a/bin/transcribe-video-base
+++ b/bin/transcribe-video-base
@@ -1,4 +1,2 @@
 #!/usr/bin/env bash
-
-transcribe-video-with-model "$1" "$2" "base" "$3"
-
+transcribe-video "$1" "$2" "base"
--- a/bin/transcribe-video-batch
+++ b/bin/transcribe-video-batch
@@ -1,65 +0,0 @@
-#!/usr/bin/env bash
-
-# I was originally just using three calls to transcribe-video-with-model but I want to reuse the same audio input, so this
-# is mostly a copy pasta of that file.
-
-if which tput >/dev/null 2>&1; then
-    ncolors=$(tput colors)
-fi
-if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
-    RED="$(tput setaf 1)"
-    GREEN="$(tput setaf 2)"
-    YELLOW="$(tput setaf 3)"
-    BLUE="$(tput setaf 4)"
-    MAGENTA="$(tput setaf 5)"
-    CYAN="$(tput setaf 6)"
-    BOLD="$(tput bold)"
-    NORMAL="$(tput sgr0)"
-else
-    RED=""
-    GREEN=""
-    YELLOW=""
-    BLUE=""
-    MAGENTA=""
-    CYAN=""
-    BOLD=""
-    NORMAL=""
-fi
-
-input_video="$1"
-output_name_without_ext="$2"
-threads=$3
-
-if [[ $input_video == "" || $output_name_without_ext == "" ]]; then
-    printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <optional: thread count>${NORMAL}\n"
-    exit 1
-fi
-
-wav_name="${output_name_without_ext}_audio_${RANDOM}.wav"
-
-extract-16bit-wav-from-video "$input_video" "$wav_name"
-if [[ $? == 1 ]]; then exit 1; fi
-
-#
-# Tiny model first to have something quickly banged out. base and small have similar output quality. Neither are perfect.
-#
-
-transcribe-audio "$wav_name" "$output_name_without_ext" "tiny" $threads
-if [[ $? == 1 ]]; then
-    printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
-    exit 1
-fi
-
-transcribe-audio "$wav_name" "$output_name_without_ext" "base" $threads
-if [[ $? == 1 ]]; then
-    printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
-    exit 1
-fi
-
-transcribe-audio "$wav_name" "$output_name_without_ext" "small" $threads
-if [[ $? == 1 ]]; then
-    printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
-    exit 1
-fi
-
-rm "$wav_name"
--- a/bin/transcribe-video-medium
+++ b/bin/transcribe-video-medium
@@ -1,4 +1,2 @@
 #!/usr/bin/env bash
-
-transcribe-video-with-model "$1" "$2" "medium" "$3"
-
+transcribe-video "$1" "$2" "medium"
--- a/bin/transcribe-video-small
+++ b/bin/transcribe-video-small
@@ -1,4 +1,2 @@
 #!/usr/bin/env bash
-
-transcribe-video-with-model "$1" "$2" "small" "$3"
-
+transcribe-video "$1" "$2" "small"
--- a/bin/transcribe-video-tiny
+++ b/bin/transcribe-video-tiny
@@ -1,4 +1,2 @@
 #!/usr/bin/env bash
-
-transcribe-video-with-model "$1" "$2" "tiny" "$3"
-
+transcribe-video "$1" "$2" "tiny"