#!/usr/bin/env bash # The base model is pretty good overall. It has good punctuation inserting and catches most words. # Tiny is fast and often has correct grammar, but it misses a lot of words, especially when the # source isn't loud or is muffled. # # Small and medium models can do better word detection at times, but suffer from a lack of punctuation. # Medium is particularly bad and often excludes periods and commas. if which tput >/dev/null 2>&1; then ncolors=$(tput colors) fi if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then RED="$(tput setaf 1)" GREEN="$(tput setaf 2)" YELLOW="$(tput setaf 3)" BLUE="$(tput setaf 4)" MAGENTA="$(tput setaf 5)" CYAN="$(tput setaf 6)" BOLD="$(tput bold)" NORMAL="$(tput sgr0)" else RED="" GREEN="" YELLOW="" BLUE="" MAGENTA="" CYAN="" BOLD="" NORMAL="" fi # Will return a symlink path in its expanded form. If the path's root is the # home directory symbol "~" then it'll be replaced by the full home path. expand_path() { local ret="$1" IFS="/" read -ra parts <<< "$ret" if [[ "${parts[0]}" == "~" ]]; then ret="$HOME" for ((i=1; i < ${#parts[@]}; i++)) do ret="$ret/${parts[$i]}" done fi ret=$(readlink -m "$ret") echo $ret } input_wav="$1" output_name_without_ext="$2" model="$3" threads=$4 # 4 seems to be the sweet spot for most models, except medium might be faster with 8. default_thread_count=4 if [[ $input_wav == "" || $output_name_without_ext == "" || $model == "" ]]; then printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" exit 1 fi if [[ ! -f "$input_wav" ]]; then printf "${RED}${BOLD}Input file \"$input_wav\" doesn't exist!\n${NORMAL}" exit 1 fi if [[ $threads == "" ]]; then threads=$default_thread_count fi output_name="$output_name_without_ext.${model}" printf "\n${YELLOW}${BOLD}Transcribing $input_wav | model: $model | threads: $threads | output: $output_name ${NORMAL}\n" whisper_fullname="$(expand_path $(which whisper.exe))" whisper_path="$(dirname $whisper_fullname)" models_path="$whisper_path/models" whisper.exe --threads ${threads} -m "$models_path/ggml-${model}.en.bin" -otxt -osrt -f "$input_wav" -of "$output_name" --print-colors error=$? if [[ error -eq 0 ]]; then printf "${GREEN}${BOLD}Done transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n" else printf "${GREEN}${BOLD}Error while transcribing $input_wav | model: $model | threads: $threads | output: $output_name${NORMAL}\n" fi