From fb2773ca197ca4c761559fb2ffabc04227652748 Mon Sep 17 00:00:00 2001 From: Michael Campagnaro Date: Sat, 5 Aug 2023 17:45:37 -0400 Subject: [PATCH] Improve audio extraction --- dotfiles/bin/extract-16bit-wav-from-video | 51 --------------- dotfiles/bin/extract-audio-from-video | 29 ++++----- .../extract-audio-from-video-and-transcode | 65 +++++++++++++++++++ dotfiles/bin/transcribe-audio | 2 +- dotfiles/bin/transcribe-video | 6 +- dotfiles/bin/trim-audio | 5 +- 6 files changed, 82 insertions(+), 76 deletions(-) delete mode 100644 dotfiles/bin/extract-16bit-wav-from-video create mode 100644 dotfiles/bin/extract-audio-from-video-and-transcode diff --git a/dotfiles/bin/extract-16bit-wav-from-video b/dotfiles/bin/extract-16bit-wav-from-video deleted file mode 100644 index b92d68f..0000000 --- a/dotfiles/bin/extract-16bit-wav-from-video +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash - -if which tput >/dev/null 2>&1; then - ncolors=$(tput colors) -fi -if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then - RED="$(tput setaf 1)" - GREEN="$(tput setaf 2)" - YELLOW="$(tput setaf 3)" - BLUE="$(tput setaf 4)" - MAGENTA="$(tput setaf 5)" - CYAN="$(tput setaf 6)" - BOLD="$(tput bold)" - NORMAL="$(tput sgr0)" -else - RED="" - GREEN="" - YELLOW="" - BLUE="" - MAGENTA="" - CYAN="" - BOLD="" - NORMAL="" -fi - -input="$1" -output_name="$2" - -if [[ $input == "" || $output_name == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" - exit 1 -fi - -if [[ ! -f "$input" ]]; then - printf "${RED}${BOLD}Error: failed to extract audio. Video file \"$input\" doesn't exist.\n${NORMAL}" - exit 1 -fi - -# Add extension if not provided. -output_basename=$(basename -- "$output_name") -output_extension="${output_basename##*.}" -if [[ $output_extension != "wav" ]]; then - output_name="${output_name}.wav" -fi - -printf "\n${YELLOW}${BOLD}Extracting 16-bit WAV from $input | output: $output_name${NORMAL}\n" - -# -ac 1 mixes audio to a single channel. -ffmpeg -i "$input" -ar 16000 -ac 1 -c:a pcm_s16le "$output_name" - -printf "${GREEN}${BOLD}Done extracting 16-bit WAV from $input | output: $output_name${NORMAL}\n" diff --git a/dotfiles/bin/extract-audio-from-video b/dotfiles/bin/extract-audio-from-video index ae4b27f..e57a0ec 100644 --- a/dotfiles/bin/extract-audio-from-video +++ b/dotfiles/bin/extract-audio-from-video @@ -1,5 +1,10 @@ #!/usr/bin/env bash +# Extracts audio from a video. It expects you to know what audio codecs the video container has, e.g. +# it's an mp4 video with aac and m4a audio. Just set the format to the appropriate extension. +# If you want to convert to a different format or you want to change the bit rate, channels, trim the audio, etc. +# then use extract-audio-from-video-and-transcode + if which tput >/dev/null 2>&1; then ncolors=$(tput colors) fi @@ -23,13 +28,11 @@ else NORMAL="" fi -transcode="$1" -filename=$(basename -- "$2") -format="$3" -bitrate="$4" +filename=$(basename -- "$1") +format="$2" -if [[ $1 == "" || $2 == "" || $3 == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" +if [[ $1 == "" || $2 == "" ]]; then + printf "${BOLD}${RED}Usage: extract-audio-from-video ${NORMAL}\n" exit 1 fi @@ -37,19 +40,9 @@ extension="${filename##*.}" filename="${filename%.*}" output_name="$filename.$format" -if [[ $bitrate == "" ]]; then - bitrate="64" -fi +printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | output: $output_name${NORMAL}\n" -printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | bitrate: ${bitrate}k | output: $output_name${NORMAL}\n" - -if [[ $transcode == "1" ]]; then - # Transcode audio - ffmpeg -i "$filename.$extension" -b:a ${bitrate}k -ac 2 -ar 44100 -map a "$output_name" -else - # Grab the audio stream from the video. - ffmpeg -i "$filename.$extension" -vn -acodec copy "$output_name" -fi +ffmpeg -y -stats -loglevel level+error -i "$filename.$extension" -vn -acodec copy "$output_name" printf "\n${GREEN}${BOLD}Done extracting audio from $filename.$extension | output name '$output_name'${NORMAL}\n\n" diff --git a/dotfiles/bin/extract-audio-from-video-and-transcode b/dotfiles/bin/extract-audio-from-video-and-transcode new file mode 100644 index 0000000..ee29f6a --- /dev/null +++ b/dotfiles/bin/extract-audio-from-video-and-transcode @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +# Extracts audio from a video and transcodes it based on the supplied params. If you just want the audio +# from the video as-is then use extract-audio-from-video + +if which tput >/dev/null 2>&1; then + ncolors=$(tput colors) +fi +if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then + RED="$(tput setaf 1)" + GREEN="$(tput setaf 2)" + YELLOW="$(tput setaf 3)" + BLUE="$(tput setaf 4)" + MAGENTA="$(tput setaf 5)" + CYAN="$(tput setaf 6)" + BOLD="$(tput bold)" + NORMAL="$(tput sgr0)" +else + RED="" + GREEN="" + YELLOW="" + BLUE="" + MAGENTA="" + CYAN="" + BOLD="" + NORMAL="" +fi + +filename="$1" +output_name="$2" +sample_rate="$3" +channel_count="$4" +start_time="$5" +end_time="$6" + +output_base=$(basename -- "$output_name") +output_extension="${output_base##*.}" + +if [[ $filename == "" || $output_extension == "" || $output_extension == $output_base ]]; then + printf "${BOLD}${RED}Usage: extract-audio-from-video-and-transcode ${NORMAL}\n" + exit 1 +fi + +if [[ $sample_rate == "" ]]; then + sample_rate="44100" +fi + +if [[ $channel_count == "" ]]; then + channel_count="2" +fi + +timing_args="" +if [[ $start_time != "" ]]; then + timing_args="-ss $start_time " +fi +if [[ $end_time != "" ]]; then + timing_args+="-to $end_time" +fi + +printf "\n${YELLOW}${BOLD}Extracting audio from '$filename' | output: '$output_name' | sample rate: $sample_rate | channels: $channel_count | start: $start_time | end: $end_time${NORMAL}\n" + +ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -i "$filename" -ar $sample_rate -ac $channel_count -map a "$output_name" + +printf "\n${GREEN}${BOLD}Done extracting audio from '$filename' | output '$output_name'${NORMAL}\n\n" + diff --git a/dotfiles/bin/transcribe-audio b/dotfiles/bin/transcribe-audio index bed2e3f..9ad7dc9 100644 --- a/dotfiles/bin/transcribe-audio +++ b/dotfiles/bin/transcribe-audio @@ -53,7 +53,7 @@ model="$3" threads=$4 if [[ $input_wav == "" || $output_name_without_ext == "" || $model == "" ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" + printf "${BOLD}${RED}Usage: transcribe-audio ${NORMAL}\n" exit 1 fi diff --git a/dotfiles/bin/transcribe-video b/dotfiles/bin/transcribe-video index 6641e11..1a8a0ed 100644 --- a/dotfiles/bin/transcribe-video +++ b/dotfiles/bin/transcribe-video @@ -29,7 +29,7 @@ shift 2 models=("$@") if [[ $input_video == "" || $output_name_without_ext == "" || ${#models[@]} -eq 0 ]]; then - printf "${BOLD}${RED}Usage: $0 ${NORMAL}\n" + printf "${BOLD}${RED}Usage: transcribe-video ${NORMAL}\n" exit 1 fi @@ -43,7 +43,9 @@ if [[ input_extension != "wav" ]]; then wav_name="${wav_name}.wav" fi -extract-16bit-wav-from-video "$input_video" "$wav_name" +channel_count=1 +sample_rate=16000 +extract-audio-from-video-and-transcode "$input_video" "$wav_name" $sample_rate $channel_count if [[ $? == 1 ]]; then exit 1; fi for model in "$@"; do diff --git a/dotfiles/bin/trim-audio b/dotfiles/bin/trim-audio index f2de52d..8441e74 100644 --- a/dotfiles/bin/trim-audio +++ b/dotfiles/bin/trim-audio @@ -42,10 +42,7 @@ timing_args="-ss $start_time -to $end_time" printf "\n${YELLOW}${BOLD}Trimming '$filename.$extension' | output: $output | start: $start_time | end: $end_time${NORMAL}\n" -# You might have issues if the file has multiple video streams or embedded subtitles. The -map 0 arg is typically given -# when copying a video stream, but I'm not sure if it's appropriate to use here. -# Trying out async to keep video and audio synced. -ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -async 1 -i "$filename.$extension" -c copy "$output" +ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -i "$filename.$extension" -c copy "$output" printf "\n${GREEN}${BOLD}Finished trimming${NORMAL}\n\n"