dotfiles/bin/transcribe-audio

87 lines
2.6 KiB
Bash

#!/usr/bin/env bash
# The base model is pretty good overall. It has good punctuation inserting and catches most words.
# Tiny is fast and often has correct grammar, but it misses a lot of words, especially when the
# source isn't loud or is muffled.
#
# Small and medium models can do better word detection at times, but suffer from a lack of punctuation.
# Medium is particularly bad and often excludes periods and commas.
if which tput >/dev/null 2>&1; then
ncolors=$(tput colors)
fi
if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
RED="$(tput setaf 1)"
GREEN="$(tput setaf 2)"
YELLOW="$(tput setaf 3)"
BLUE="$(tput setaf 4)"
MAGENTA="$(tput setaf 5)"
CYAN="$(tput setaf 6)"
BOLD="$(tput bold)"
NORMAL="$(tput sgr0)"
else
RED=""
GREEN=""
YELLOW=""
BLUE=""
MAGENTA=""
CYAN=""
BOLD=""
NORMAL=""
fi
# Will return a symlink path in its expanded form. If the path's root is the
# home directory symbol "~" then it'll be replaced by the full home path.
expand_path() {
local ret="$1"
IFS="/" read -ra parts <<< "$ret"
if [[ "${parts[0]}" == "~" ]]; then
ret="$HOME"
for ((i=1; i < ${#parts[@]}; i++))
do
ret="$ret/${parts[$i]}"
done
fi
ret=$(readlink -m "$ret")
echo $ret
}
input_wav="$1"
output_name_without_ext="$2"
model="$3"
threads=$4
# 4 seems to be the sweet spot for most models, except medium might be faster with 8.
default_thread_count=4
if [[ $input_wav == "" || $output_name_without_ext == "" || $model == "" ]]; then
printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
exit 1
fi
if [[ ! -f "$input_wav" ]]; then
printf "${RED}${BOLD}Input file \"$input_wav\" doesn't exist!\n${NORMAL}"
exit 1
fi
if [[ $threads == "" ]]; then
threads=$default_thread_count
fi
output_name="$output_name_without_ext.${model}"
printf "\n${YELLOW}${BOLD}Transcribing $input_wav | model: $model | threads: $threads | output: $output_name ${NORMAL}\n"
whisper_fullname="$(expand_path $(which whisper.exe))"
whisper_path="$(dirname $whisper_fullname)"
models_path="$whisper_path/models"
whisper.exe --threads ${threads} -m "$models_path/ggml-${model}.en.bin" -otxt -osrt -f "$input_wav" -of "$output_name" --print-colors
error=$?
if [[ error -eq 0 ]]; then
printf "${GREEN}${BOLD}Done transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n"
else
printf "${GREEN}${BOLD}Error while transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n"
fi