Improve audio extraction

2023-08-05 17:45:37 -04:00
parent 62fa34ca20
commit fb2773ca19
6 changed files with 82 additions and 76 deletions
--- a/dotfiles/bin/extract-16bit-wav-from-video
+++ b/dotfiles/bin/extract-16bit-wav-from-video
@@ -1,51 +0,0 @@
 #!/usr/bin/env bash
 if which tput >/dev/null 2>&1; then
    ncolors=$(tput colors)
 fi
 if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
    RED="$(tput setaf 1)"
    GREEN="$(tput setaf 2)"
    YELLOW="$(tput setaf 3)"
    BLUE="$(tput setaf 4)"
    MAGENTA="$(tput setaf 5)"
    CYAN="$(tput setaf 6)"
    BOLD="$(tput bold)"
    NORMAL="$(tput sgr0)"
 else
    RED=""
    GREEN=""
    YELLOW=""
    BLUE=""
    MAGENTA=""
    CYAN=""
    BOLD=""
    NORMAL=""
 fi
 input="$1"
 output_name="$2"
 if [[ $input == "" || $output_name == "" ]]; then
    printf "${BOLD}${RED}Usage: $0 <input video> <wav output name>${NORMAL}\n"
    exit 1
 fi
 if [[ ! -f "$input" ]]; then
    printf "${RED}${BOLD}Error: failed to extract audio. Video file \"$input\" doesn't exist.\n${NORMAL}"
    exit 1
 fi
 # Add extension if not provided.
 output_basename=$(basename -- "$output_name")
 output_extension="${output_basename##*.}"
 if [[ $output_extension != "wav" ]]; then
    output_name="${output_name}.wav"
 fi
 printf "\n${YELLOW}${BOLD}Extracting 16-bit WAV from $input | output: $output_name${NORMAL}\n"
 # -ac 1 mixes audio to a single channel.
 ffmpeg -i "$input" -ar 16000 -ac 1 -c:a pcm_s16le "$output_name"
 printf "${GREEN}${BOLD}Done extracting 16-bit WAV from $input | output: $output_name${NORMAL}\n"
--- a/dotfiles/bin/extract-audio-from-video
+++ b/dotfiles/bin/extract-audio-from-video
@@ -1,5 +1,10 @@
 #!/usr/bin/env bash
 # Extracts audio from a video. It expects you to know what audio codecs the video container has, e.g.
 # it's an mp4 video with aac and m4a audio. Just set the format to the appropriate extension.
 # If you want to convert to a different format or you want to change the bit rate, channels, trim the audio, etc.
 # then use extract-audio-from-video-and-transcode
 if which tput >/dev/null 2>&1; then
    ncolors=$(tput colors)
 fi
@@ -23,13 +28,11 @@ else
    NORMAL=""
 fi
-transcode="$1"
+filename=$(basename -- "$1")
-filename=$(basename -- "$2")
+format="$2"
 format="$3"
 bitrate="$4"
-if [[ $1 == "" || $2 == "" || $3 == "" ]]; then
+if [[ $1 == "" || $2 == "" ]]; then
-    printf "${BOLD}${RED}Usage: $0 <transcode? 0|1 (needed when container doesn't contain format)> <filename> <format (mp3, m4a, aac, etc)> <optional: bitrate. Uses 64k when not specified, 0 = variable (e.g. 0, 64, 128, etc)>${NORMAL}\n"
+    printf "${BOLD}${RED}Usage: extract-audio-from-video <filename> <format (mp3, m4a, aac, etc)>${NORMAL}\n"
    exit 1
 fi
@@ -37,19 +40,9 @@ extension="${filename##*.}"
 filename="${filename%.*}"
 output_name="$filename.$format"
-if [[ $bitrate == "" ]]; then
+printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | output: $output_name${NORMAL}\n"
    bitrate="64"
 fi
-printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | bitrate: ${bitrate}k | output: $output_name${NORMAL}\n"
+ffmpeg -y -stats -loglevel level+error -i "$filename.$extension" -vn -acodec copy "$output_name"
 if [[ $transcode == "1" ]]; then
    # Transcode audio
    ffmpeg -i "$filename.$extension" -b:a ${bitrate}k -ac 2 -ar 44100 -map a "$output_name"
 else
    # Grab the audio stream from the video.
    ffmpeg -i "$filename.$extension" -vn -acodec copy "$output_name"
 fi
 printf "\n${GREEN}${BOLD}Done extracting audio from $filename.$extension | output name '$output_name'${NORMAL}\n\n"
--- a/dotfiles/bin/extract-audio-from-video-and-transcode
+++ b/dotfiles/bin/extract-audio-from-video-and-transcode
@@ -0,0 +1,65 @@
 #!/usr/bin/env bash
 # Extracts audio from a video and transcodes it based on the supplied params. If you just want the audio
 # from the video as-is then use extract-audio-from-video
 if which tput >/dev/null 2>&1; then
    ncolors=$(tput colors)
 fi
 if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
    RED="$(tput setaf 1)"
    GREEN="$(tput setaf 2)"
    YELLOW="$(tput setaf 3)"
    BLUE="$(tput setaf 4)"
    MAGENTA="$(tput setaf 5)"
    CYAN="$(tput setaf 6)"
    BOLD="$(tput bold)"
    NORMAL="$(tput sgr0)"
 else
    RED=""
    GREEN=""
    YELLOW=""
    BLUE=""
    MAGENTA=""
    CYAN=""
    BOLD=""
    NORMAL=""
 fi
 filename="$1"
 output_name="$2"
 sample_rate="$3"
 channel_count="$4"
 start_time="$5"
 end_time="$6"
 output_base=$(basename -- "$output_name")
 output_extension="${output_base##*.}"
 if [[ $filename == "" || $output_extension == "" || $output_extension == $output_base ]]; then
    printf "${BOLD}${RED}Usage: extract-audio-from-video-and-transcode <filename> <output name w/ extension> <optional: sample rate. Defaults to 44100> <optional: channel count. Defaults to 2> <optional: start time HH:MM:SS> <optional: end time HH:MM:SS>${NORMAL}\n"
    exit 1
 fi
 if [[ $sample_rate == "" ]]; then
    sample_rate="44100"
 fi
 if [[ $channel_count == "" ]]; then
    channel_count="2"
 fi
 timing_args=""
 if [[ $start_time != "" ]]; then
    timing_args="-ss $start_time "
 fi
 if [[ $end_time != "" ]]; then
    timing_args+="-to $end_time"
 fi
 printf "\n${YELLOW}${BOLD}Extracting audio from '$filename' | output: '$output_name' | sample rate: $sample_rate | channels: $channel_count | start: $start_time | end: $end_time${NORMAL}\n"
 ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -i "$filename" -ar $sample_rate -ac $channel_count -map a "$output_name"
 printf "\n${GREEN}${BOLD}Done extracting audio from '$filename' | output '$output_name'${NORMAL}\n\n"
--- a/dotfiles/bin/transcribe-audio
+++ b/dotfiles/bin/transcribe-audio
@@ -53,7 +53,7 @@ model="$3"
 threads=$4
 if [[ $input_wav == "" || $output_name_without_ext == "" || $model == "" ]]; then
-    printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
+    printf "${BOLD}${RED}Usage: transcribe-audio <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
    exit 1
 fi
--- a/dotfiles/bin/transcribe-video
+++ b/dotfiles/bin/transcribe-video
@@ -29,7 +29,7 @@ shift 2
 models=("$@")
 if [[ $input_video == "" || $output_name_without_ext == "" || ${#models[@]} -eq 0 ]]; then
-    printf "${BOLD}${RED}Usage: $0 <input.mp4> <output name without extension> <list of model names to use>${NORMAL}\n"
+    printf "${BOLD}${RED}Usage: transcribe-video <input.mp4> <output name without extension> <list of model names to use>${NORMAL}\n"
    exit 1
 fi
@@ -43,7 +43,9 @@ if [[ input_extension != "wav" ]]; then
    wav_name="${wav_name}.wav"
 fi
-extract-16bit-wav-from-video "$input_video" "$wav_name"
+channel_count=1
 sample_rate=16000
 extract-audio-from-video-and-transcode "$input_video" "$wav_name" $sample_rate $channel_count
 if [[ $? == 1 ]]; then exit 1; fi
 for model in "$@"; do
--- a/dotfiles/bin/trim-audio
+++ b/dotfiles/bin/trim-audio
@@ -42,10 +42,7 @@ timing_args="-ss $start_time -to $end_time"
 printf "\n${YELLOW}${BOLD}Trimming '$filename.$extension' | output: $output | start: $start_time | end: $end_time${NORMAL}\n"
-# You might have issues if the file has multiple video streams or embedded subtitles. The -map 0 arg is typically given
+ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -i "$filename.$extension" -c copy "$output"
 # when copying a video stream, but I'm not sure if it's appropriate to use here.
 # Trying out async to keep video and audio synced.
 ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -async 1 -i "$filename.$extension" -c copy "$output"
 printf "\n${GREEN}${BOLD}Finished trimming${NORMAL}\n\n"