Improve audio extraction

2023-08-05 17:45:37 -04:00 · 2023-08-05 17:45:37 -04:00 · fb2773ca19
commit fb2773ca19
parent 62fa34ca20
6 changed files with 82 additions and 76 deletions
--- a/dotfiles/bin/extract-16bit-wav-from-video
+++ b/dotfiles/bin/extract-16bit-wav-from-video
@ -1,51 +0,0 @@
-#!/usr/bin/env bash
-
-if which tput >/dev/null 2>&1; then
-    ncolors=$(tput colors)
-fi
-if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
-    RED="$(tput setaf 1)"
-    GREEN="$(tput setaf 2)"
-    YELLOW="$(tput setaf 3)"
-    BLUE="$(tput setaf 4)"
-    MAGENTA="$(tput setaf 5)"
-    CYAN="$(tput setaf 6)"
-    BOLD="$(tput bold)"
-    NORMAL="$(tput sgr0)"
-else
-    RED=""
-    GREEN=""
-    YELLOW=""
-    BLUE=""
-    MAGENTA=""
-    CYAN=""
-    BOLD=""
-    NORMAL=""
-fi
-
-input="$1"
-output_name="$2"
-
-if [[ $input == "" || $output_name == "" ]]; then
-    printf "${BOLD}${RED}Usage: $0 <input video> <wav output name>${NORMAL}\n"
-    exit 1
-fi
-
-if [[ ! -f "$input" ]]; then
-    printf "${RED}${BOLD}Error: failed to extract audio. Video file \"$input\" doesn't exist.\n${NORMAL}"
-    exit 1
-fi
-
-# Add extension if not provided.
-output_basename=$(basename -- "$output_name")
-output_extension="${output_basename##*.}"
-if [[ $output_extension != "wav" ]]; then
-    output_name="${output_name}.wav"
-fi
-
-printf "\n${YELLOW}${BOLD}Extracting 16-bit WAV from $input | output: $output_name${NORMAL}\n"
-
-# -ac 1 mixes audio to a single channel.
-ffmpeg -i "$input" -ar 16000 -ac 1 -c:a pcm_s16le "$output_name"
-
-printf "${GREEN}${BOLD}Done extracting 16-bit WAV from $input | output: $output_name${NORMAL}\n"
--- a/dotfiles/bin/extract-audio-from-video
+++ b/dotfiles/bin/extract-audio-from-video
@ -1,5 +1,10 @@
 #!/usr/bin/env bash

+# Extracts audio from a video. It expects you to know what audio codecs the video container has, e.g.
+# it's an mp4 video with aac and m4a audio. Just set the format to the appropriate extension.
+# If you want to convert to a different format or you want to change the bit rate, channels, trim the audio, etc.
+# then use extract-audio-from-video-and-transcode
+
 if which tput >/dev/null 2>&1; then
    ncolors=$(tput colors)
 fi
@ -23,13 +28,11 @@ else
    NORMAL=""
 fi

-transcode="$1"
-filename=$(basename -- "$2")
-format="$3"
-bitrate="$4"
+filename=$(basename -- "$1")
+format="$2"

-if [[ $1 == "" || $2 == "" || $3 == "" ]]; then
-    printf "${BOLD}${RED}Usage: $0 <transcode? 0|1 (needed when container doesn't contain format)> <filename> <format (mp3, m4a, aac, etc)> <optional: bitrate. Uses 64k when not specified, 0 = variable (e.g. 0, 64, 128, etc)>${NORMAL}\n"
+if [[ $1 == "" || $2 == "" ]]; then
+    printf "${BOLD}${RED}Usage: extract-audio-from-video <filename> <format (mp3, m4a, aac, etc)>${NORMAL}\n"
    exit 1
 fi

@ -37,19 +40,9 @@ extension="${filename##*.}"
 filename="${filename%.*}"
 output_name="$filename.$format"

-if [[ $bitrate == "" ]]; then
-    bitrate="64"
-fi
+printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | output: $output_name${NORMAL}\n"

-printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | bitrate: ${bitrate}k | output: $output_name${NORMAL}\n"
-
-if [[ $transcode == "1" ]]; then
-    # Transcode audio
-    ffmpeg -i "$filename.$extension" -b:a ${bitrate}k -ac 2 -ar 44100 -map a "$output_name"
-else
-    # Grab the audio stream from the video.
-    ffmpeg -i "$filename.$extension" -vn -acodec copy "$output_name"
-fi
+ffmpeg -y -stats -loglevel level+error -i "$filename.$extension" -vn -acodec copy "$output_name"

 printf "\n${GREEN}${BOLD}Done extracting audio from $filename.$extension | output name '$output_name'${NORMAL}\n\n"

--- a/dotfiles/bin/extract-audio-from-video-and-transcode
+++ b/dotfiles/bin/extract-audio-from-video-and-transcode
@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+# Extracts audio from a video and transcodes it based on the supplied params. If you just want the audio
+# from the video as-is then use extract-audio-from-video
+
+if which tput >/dev/null 2>&1; then
+    ncolors=$(tput colors)
+fi
+if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
+    RED="$(tput setaf 1)"
+    GREEN="$(tput setaf 2)"
+    YELLOW="$(tput setaf 3)"
+    BLUE="$(tput setaf 4)"
+    MAGENTA="$(tput setaf 5)"
+    CYAN="$(tput setaf 6)"
+    BOLD="$(tput bold)"
+    NORMAL="$(tput sgr0)"
+else
+    RED=""
+    GREEN=""
+    YELLOW=""
+    BLUE=""
+    MAGENTA=""
+    CYAN=""
+    BOLD=""
+    NORMAL=""
+fi
+
+filename="$1"
+output_name="$2"
+sample_rate="$3"
+channel_count="$4"
+start_time="$5"
+end_time="$6"
+
+output_base=$(basename -- "$output_name")
+output_extension="${output_base##*.}"
+
+if [[ $filename == "" || $output_extension == "" || $output_extension == $output_base ]]; then
+    printf "${BOLD}${RED}Usage: extract-audio-from-video-and-transcode <filename> <output name w/ extension> <optional: sample rate. Defaults to 44100> <optional: channel count. Defaults to 2> <optional: start time HH:MM:SS> <optional: end time HH:MM:SS>${NORMAL}\n"
+    exit 1
+fi
+
+if [[ $sample_rate == "" ]]; then
+    sample_rate="44100"
+fi
+
+if [[ $channel_count == "" ]]; then
+    channel_count="2"
+fi
+
+timing_args=""
+if [[ $start_time != "" ]]; then
+    timing_args="-ss $start_time "
+fi
+if [[ $end_time != "" ]]; then
+    timing_args+="-to $end_time"
+fi
+
+printf "\n${YELLOW}${BOLD}Extracting audio from '$filename' | output: '$output_name' | sample rate: $sample_rate | channels: $channel_count | start: $start_time | end: $end_time${NORMAL}\n"
+
+ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -i "$filename" -ar $sample_rate -ac $channel_count -map a "$output_name"
+
+printf "\n${GREEN}${BOLD}Done extracting audio from '$filename' | output '$output_name'${NORMAL}\n\n"
+
--- a/dotfiles/bin/transcribe-audio
+++ b/dotfiles/bin/transcribe-audio
@ -53,7 +53,7 @@ model="$3"
 threads=$4

 if [[ $input_wav == "" || $output_name_without_ext == "" || $model == "" ]]; then
-    printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
+    printf "${BOLD}${RED}Usage: transcribe-audio <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
    exit 1
 fi

--- a/dotfiles/bin/transcribe-video
+++ b/dotfiles/bin/transcribe-video
@ -29,7 +29,7 @@ shift 2
 models=("$@")

 if [[ $input_video == "" || $output_name_without_ext == "" || ${#models[@]} -eq 0 ]]; then
-    printf "${BOLD}${RED}Usage: $0 <input.mp4> <output name without extension> <list of model names to use>${NORMAL}\n"
+    printf "${BOLD}${RED}Usage: transcribe-video <input.mp4> <output name without extension> <list of model names to use>${NORMAL}\n"
    exit 1
 fi

@ -43,7 +43,9 @@ if [[ input_extension != "wav" ]]; then
    wav_name="${wav_name}.wav"
 fi

-extract-16bit-wav-from-video "$input_video" "$wav_name"
+channel_count=1
+sample_rate=16000
+extract-audio-from-video-and-transcode "$input_video" "$wav_name" $sample_rate $channel_count
 if [[ $? == 1 ]]; then exit 1; fi

 for model in "$@"; do
--- a/dotfiles/bin/trim-audio
+++ b/dotfiles/bin/trim-audio
@ -42,10 +42,7 @@ timing_args="-ss $start_time -to $end_time"

 printf "\n${YELLOW}${BOLD}Trimming '$filename.$extension' | output: $output | start: $start_time | end: $end_time${NORMAL}\n"

-# You might have issues if the file has multiple video streams or embedded subtitles. The -map 0 arg is typically given
-# when copying a video stream, but I'm not sure if it's appropriate to use here.
-# Trying out async to keep video and audio synced.
-ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -async 1 -i "$filename.$extension" -c copy "$output"
+ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -i "$filename.$extension" -c copy "$output"

 printf "\n${GREEN}${BOLD}Finished trimming${NORMAL}\n\n"