dotfiles/dotfiles/bin/transcribe-video

#!/usr/bin/env bash

if which tput >/dev/null 2>&1; then
    ncolors=$(tput colors)
fi
if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
    RED="$(tput setaf 1)"
    GREEN="$(tput setaf 2)"
    YELLOW="$(tput setaf 3)"
    BLUE="$(tput setaf 4)"
    MAGENTA="$(tput setaf 5)"
    CYAN="$(tput setaf 6)"
    BOLD="$(tput bold)"
    NORMAL="$(tput sgr0)"
else
    RED=""
    GREEN=""
    YELLOW=""
    BLUE=""
    MAGENTA=""
    CYAN=""
    BOLD=""
    NORMAL=""
fi

input_video="$1"
output_name_without_ext="$2"
start_time="$3"
end_time="$4"
shift 4
models=("$@")

if [[ $input_video == "" || $output_name_without_ext == "" || $start_time == "" || $end_time == "" || ${#models[@]} -eq 0 ]]; then
    printf "${BOLD}${RED}Usage: transcribe-video <input.mp4> <output name without extension> <start time HH:MM:SS, use 0 for start> <end time HH:MM:SS, use 0 for no value> <list of model names to use>${NORMAL}\n"
    exit 1
fi

model_csv=$(IFS=_ ; echo "${models[*]}")
wav_name="${output_name_without_ext}_${model_csv}_${RANDOM}"

# Add extension if not provided.
input_basename=$(basename -- "$wav_name")
input_extension="${input_basename##*.}"
if [[ input_extension != "wav" ]]; then
    wav_name="${wav_name}.wav"
fi

channel_count=1
sample_rate=16000
extract-audio-from-video-and-transcode "$input_video" "$wav_name" $sample_rate $channel_count $start_time $end_time
if [[ $? == 1 ]]; then exit 1; fi

for model in "$@"; do
    transcribe-audio "$wav_name" "$output_name_without_ext" "${model}"

    if [[ $? == 1 ]]; then
        printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
        exit 1
    fi
done

rm "$wav_name"