dotfiles/bin/transcribe-video

66 lines
1.6 KiB
Bash

#!/usr/bin/env bash
if which tput >/dev/null 2>&1; then
ncolors=$(tput colors)
fi
if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
RED="$(tput setaf 1)"
GREEN="$(tput setaf 2)"
YELLOW="$(tput setaf 3)"
BLUE="$(tput setaf 4)"
MAGENTA="$(tput setaf 5)"
CYAN="$(tput setaf 6)"
BOLD="$(tput bold)"
NORMAL="$(tput sgr0)"
else
RED=""
GREEN=""
YELLOW=""
BLUE=""
MAGENTA=""
CYAN=""
BOLD=""
NORMAL=""
fi
input_video="$1"
output_name_without_ext="$2"
shift 2
models=("$@")
if [[ $input_video == "" || $output_name_without_ext == "" || ${#models[@]} -eq 0 ]]; then
printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <list of model names to use>${NORMAL}\n"
exit 1
fi
model_csv=$(IFS=_ ; echo "${models[*]}")
wav_name="${output_name_without_ext}_${model_csv}_${RANDOM}"
# Add extension if not provided.
input_basename=$(basename -- "$wav_name")
input_extension="${input_basename##*.}"
if [[ input_extension != "wav" ]]; then
wav_name="${wav_name}.wav"
fi
extract-16bit-wav-from-video "$input_video" "$wav_name"
if [[ $? == 1 ]]; then exit 1; fi
for model in "$@"; do
# Tweak thread count based on model size.
thread_count=4
if [[ $model == "medium" ]]; then
thread_count=8
fi
transcribe-audio "$wav_name" "$output_name_without_ext" "${model}" $thread_count
if [[ $? == 1 ]]; then
printf "${RED}${BOLD}Saving the audio file \"$wav_name\" in case you want to reuse it for debugging.\n${NORMAL}"
exit 1
fi
done
rm "$wav_name"