More transcription script improvements

2023-06-05 17:12:16 -04:00
parent e979d1094c
commit dad6be889f
3 changed files with 28 additions and 9 deletions
--- a/23
+++ b/23
@@ -425,6 +425,8 @@ download_youtube_vid() {
    fi

    if [[ $make_folder == "1" ]]; then cd ..; fi
+
+    printf "${BOLD}Finished downloading ${YELLOW}$filename${NORMAL}\n"
 }

 download_youtube_playlist() {
@@ -463,7 +465,9 @@ download_youtube_playlist() {
    # Removing any trailing subtitle files
    rm *.vtt *.srt 2>/dev/null

-    cd ..
+    if [[ $dir_name == "1" ]]; then cd ..; fi
+
+    printf "${BOLD}Finished downloading the playlist\n${NORMAL}"
 }

 # Download Twitch chat transcript
@@ -478,7 +482,8 @@ actually_download_twitch_chat() {
    else
        error "Video doesn't have a chat transcript."
    fi
-    printf "\n"
+
+    printf "${BOLD}Finished downloading ${YELLOW}$filename${NORMAL}\n"
 }

 download_twitch_chat() {
@@ -499,11 +504,9 @@ download_twitch_chat() {
        fi
    fi

-    actually_download_twitch_chat $url "$(yt-dlp.exe --get-filename -o "%(upload_date>%Y-%m-%d)s-%(title)s-tw-%(id)s" $opts $url)"
+    actually_download_twitch_chat $url "$(yt-dlp.exe --get-filename -o "%(upload_date>%Y-%m-%d)s-%(title)s-tw-%(id)s.chat" $opts $url)"

-    if [[ $make_folder == "1" ]]; then
-        cd ..
-    fi
+    if [[ $make_folder == "1" ]]; then cd ..; fi
 }

 # Download Twitch videos, both VODs and live streams. Pass a Twitch account URL to download a live stream.
@@ -618,6 +621,8 @@ download_twitch_vid() {
    fi

    if [[ $make_folder == "1" ]]; then cd ..; fi
+
+    printf "${BOLD}Finished downloading ${YELLOW}$filename${NORMAL}\n"
 }

 # Download Vimeo videos.
@@ -690,6 +695,8 @@ download_vimeo_vid() {
    fi

    if [[ $make_folder == "1" ]]; then cd ..; fi
+
+    printf "${BOLD}Finished downloading ${YELLOW}$filename${NORMAL}\n"
 }

 # Download Twitter videos.
@@ -739,6 +746,8 @@ download_twitter_vid() {
    fi

    if [[ $make_folder == "1" ]]; then cd ..; fi
+
+    printf "${BOLD}Finished downloading ${YELLOW}$filename${NORMAL}\n"
 }

 # Download Instagram videos.
@@ -792,6 +801,8 @@ download_instagram_vid() {
    fi

    if [[ $make_folder == "1" ]]; then cd ..; fi
+
+    printf "${BOLD}Finished downloading ${YELLOW}$filename${NORMAL}\n"
 }

 # Download MP4 video.
--- a/bin/transcribe-audio
+++ b/bin/transcribe-audio
@@ -1,5 +1,12 @@
 #!/usr/bin/env bash

+# The base model is pretty good overall. It has good punctuation inserting and catches most words.
+# Tiny is fast and often has correct grammar, but it misses a lot of words, especially when the
+# source isn't loud or is muffled.
+#
+# Small and medium models can do better word detection at times, but suffer from a lack of punctuation.
+# Medium is particularly bad and often excludes periods and commas.
+
 if which tput >/dev/null 2>&1; then
    ncolors=$(tput colors)
 fi
--- a/bin/transcribe-video
+++ b/bin/transcribe-video
@@ -26,14 +26,15 @@ fi
 input_video="$1"
 output_name_without_ext="$2"
 shift 2
-models="$@"
+models=("$@")

-if [[ $input_video == "" || $output_name_without_ext == "" || $models == "" ]]; then
+if [[ $input_video == "" || $output_name_without_ext == "" || ${#models[@]} -eq 0 ]]; then
    printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <list of model names to use>${NORMAL}\n"
    exit 1
 fi

-wav_name="${output_name_without_ext}_audio_${RANDOM}"
+model_csv=$(IFS=_ ; echo "${models[*]}")
+wav_name="${output_name_without_ext}_${model_csv}_${RANDOM}"

 # Add extension if not provided.
 input_basename=$(basename -- "$wav_name")