More transcription script improvements

2023-06-05 17:12:16 -04:00
parent e979d1094c
commit dad6be889f
3 changed files with 28 additions and 9 deletions
--- a/bin/transcribe-audio
+++ b/bin/transcribe-audio
@@ -1,5 +1,12 @@
 #!/usr/bin/env bash

+# The base model is pretty good overall. It has good punctuation inserting and catches most words.
+# Tiny is fast and often has correct grammar, but it misses a lot of words, especially when the
+# source isn't loud or is muffled.
+#
+# Small and medium models can do better word detection at times, but suffer from a lack of punctuation.
+# Medium is particularly bad and often excludes periods and commas.
+
 if which tput >/dev/null 2>&1; then
    ncolors=$(tput colors)
 fi
--- a/bin/transcribe-video
+++ b/bin/transcribe-video
@@ -26,14 +26,15 @@ fi
 input_video="$1"
 output_name_without_ext="$2"
 shift 2
-models="$@"
+models=("$@")

-if [[ $input_video == "" || $output_name_without_ext == "" || $models == "" ]]; then
+if [[ $input_video == "" || $output_name_without_ext == "" || ${#models[@]} -eq 0 ]]; then
    printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <list of model names to use>${NORMAL}\n"
    exit 1
 fi

-wav_name="${output_name_without_ext}_audio_${RANDOM}"
+model_csv=$(IFS=_ ; echo "${models[*]}")
+wav_name="${output_name_without_ext}_${model_csv}_${RANDOM}"

 # Add extension if not provided.
 input_basename=$(basename -- "$wav_name")