Improve audio extraction

This commit is contained in:
Michael Campagnaro 2023-08-05 17:45:37 -04:00
parent 62fa34ca20
commit fb2773ca19
6 changed files with 82 additions and 76 deletions

View File

@ -1,51 +0,0 @@
#!/usr/bin/env bash
if which tput >/dev/null 2>&1; then
ncolors=$(tput colors)
fi
if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
RED="$(tput setaf 1)"
GREEN="$(tput setaf 2)"
YELLOW="$(tput setaf 3)"
BLUE="$(tput setaf 4)"
MAGENTA="$(tput setaf 5)"
CYAN="$(tput setaf 6)"
BOLD="$(tput bold)"
NORMAL="$(tput sgr0)"
else
RED=""
GREEN=""
YELLOW=""
BLUE=""
MAGENTA=""
CYAN=""
BOLD=""
NORMAL=""
fi
input="$1"
output_name="$2"
if [[ $input == "" || $output_name == "" ]]; then
printf "${BOLD}${RED}Usage: $0 <input video> <wav output name>${NORMAL}\n"
exit 1
fi
if [[ ! -f "$input" ]]; then
printf "${RED}${BOLD}Error: failed to extract audio. Video file \"$input\" doesn't exist.\n${NORMAL}"
exit 1
fi
# Add extension if not provided.
output_basename=$(basename -- "$output_name")
output_extension="${output_basename##*.}"
if [[ $output_extension != "wav" ]]; then
output_name="${output_name}.wav"
fi
printf "\n${YELLOW}${BOLD}Extracting 16-bit WAV from $input | output: $output_name${NORMAL}\n"
# -ac 1 mixes audio to a single channel.
ffmpeg -i "$input" -ar 16000 -ac 1 -c:a pcm_s16le "$output_name"
printf "${GREEN}${BOLD}Done extracting 16-bit WAV from $input | output: $output_name${NORMAL}\n"

View File

@ -1,5 +1,10 @@
#!/usr/bin/env bash
# Extracts audio from a video. It expects you to know what audio codecs the video container has, e.g.
# it's an mp4 video with aac and m4a audio. Just set the format to the appropriate extension.
# If you want to convert to a different format or you want to change the bit rate, channels, trim the audio, etc.
# then use extract-audio-from-video-and-transcode
if which tput >/dev/null 2>&1; then
ncolors=$(tput colors)
fi
@ -23,13 +28,11 @@ else
NORMAL=""
fi
transcode="$1"
filename=$(basename -- "$2")
format="$3"
bitrate="$4"
filename=$(basename -- "$1")
format="$2"
if [[ $1 == "" || $2 == "" || $3 == "" ]]; then
printf "${BOLD}${RED}Usage: $0 <transcode? 0|1 (needed when container doesn't contain format)> <filename> <format (mp3, m4a, aac, etc)> <optional: bitrate. Uses 64k when not specified, 0 = variable (e.g. 0, 64, 128, etc)>${NORMAL}\n"
if [[ $1 == "" || $2 == "" ]]; then
printf "${BOLD}${RED}Usage: extract-audio-from-video <filename> <format (mp3, m4a, aac, etc)>${NORMAL}\n"
exit 1
fi
@ -37,19 +40,9 @@ extension="${filename##*.}"
filename="${filename%.*}"
output_name="$filename.$format"
if [[ $bitrate == "" ]]; then
bitrate="64"
fi
printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | output: $output_name${NORMAL}\n"
printf "\n${YELLOW}${BOLD}Extracting audio from $filename.$extension | bitrate: ${bitrate}k | output: $output_name${NORMAL}\n"
if [[ $transcode == "1" ]]; then
# Transcode audio
ffmpeg -i "$filename.$extension" -b:a ${bitrate}k -ac 2 -ar 44100 -map a "$output_name"
else
# Grab the audio stream from the video.
ffmpeg -i "$filename.$extension" -vn -acodec copy "$output_name"
fi
ffmpeg -y -stats -loglevel level+error -i "$filename.$extension" -vn -acodec copy "$output_name"
printf "\n${GREEN}${BOLD}Done extracting audio from $filename.$extension | output name '$output_name'${NORMAL}\n\n"

View File

@ -0,0 +1,65 @@
#!/usr/bin/env bash
# Extracts audio from a video and transcodes it based on the supplied params. If you just want the audio
# from the video as-is then use extract-audio-from-video
if which tput >/dev/null 2>&1; then
ncolors=$(tput colors)
fi
if [ -t 1 ] && [ -n "$ncolors" ] && [ "$ncolors" -ge 8 ]; then
RED="$(tput setaf 1)"
GREEN="$(tput setaf 2)"
YELLOW="$(tput setaf 3)"
BLUE="$(tput setaf 4)"
MAGENTA="$(tput setaf 5)"
CYAN="$(tput setaf 6)"
BOLD="$(tput bold)"
NORMAL="$(tput sgr0)"
else
RED=""
GREEN=""
YELLOW=""
BLUE=""
MAGENTA=""
CYAN=""
BOLD=""
NORMAL=""
fi
filename="$1"
output_name="$2"
sample_rate="$3"
channel_count="$4"
start_time="$5"
end_time="$6"
output_base=$(basename -- "$output_name")
output_extension="${output_base##*.}"
if [[ $filename == "" || $output_extension == "" || $output_extension == $output_base ]]; then
printf "${BOLD}${RED}Usage: extract-audio-from-video-and-transcode <filename> <output name w/ extension> <optional: sample rate. Defaults to 44100> <optional: channel count. Defaults to 2> <optional: start time HH:MM:SS> <optional: end time HH:MM:SS>${NORMAL}\n"
exit 1
fi
if [[ $sample_rate == "" ]]; then
sample_rate="44100"
fi
if [[ $channel_count == "" ]]; then
channel_count="2"
fi
timing_args=""
if [[ $start_time != "" ]]; then
timing_args="-ss $start_time "
fi
if [[ $end_time != "" ]]; then
timing_args+="-to $end_time"
fi
printf "\n${YELLOW}${BOLD}Extracting audio from '$filename' | output: '$output_name' | sample rate: $sample_rate | channels: $channel_count | start: $start_time | end: $end_time${NORMAL}\n"
ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -i "$filename" -ar $sample_rate -ac $channel_count -map a "$output_name"
printf "\n${GREEN}${BOLD}Done extracting audio from '$filename' | output '$output_name'${NORMAL}\n\n"

View File

@ -53,7 +53,7 @@ model="$3"
threads=$4
if [[ $input_wav == "" || $output_name_without_ext == "" || $model == "" ]]; then
printf "${BOLD}${RED}Usage: $0 <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
printf "${BOLD}${RED}Usage: transcribe-audio <input.wav> <output name without extension> <model name> <optional: thread count>${NORMAL}\n"
exit 1
fi

View File

@ -29,7 +29,7 @@ shift 2
models=("$@")
if [[ $input_video == "" || $output_name_without_ext == "" || ${#models[@]} -eq 0 ]]; then
printf "${BOLD}${RED}Usage: $0 <input.mp4> <output name without extension> <list of model names to use>${NORMAL}\n"
printf "${BOLD}${RED}Usage: transcribe-video <input.mp4> <output name without extension> <list of model names to use>${NORMAL}\n"
exit 1
fi
@ -43,7 +43,9 @@ if [[ input_extension != "wav" ]]; then
wav_name="${wav_name}.wav"
fi
extract-16bit-wav-from-video "$input_video" "$wav_name"
channel_count=1
sample_rate=16000
extract-audio-from-video-and-transcode "$input_video" "$wav_name" $sample_rate $channel_count
if [[ $? == 1 ]]; then exit 1; fi
for model in "$@"; do

View File

@ -42,10 +42,7 @@ timing_args="-ss $start_time -to $end_time"
printf "\n${YELLOW}${BOLD}Trimming '$filename.$extension' | output: $output | start: $start_time | end: $end_time${NORMAL}\n"
# You might have issues if the file has multiple video streams or embedded subtitles. The -map 0 arg is typically given
# when copying a video stream, but I'm not sure if it's appropriate to use here.
# Trying out async to keep video and audio synced.
ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -async 1 -i "$filename.$extension" -c copy "$output"
ffmpeg -y -stats -loglevel level+error $timing_args -accurate_seek -i "$filename.$extension" -c copy "$output"
printf "\n${GREEN}${BOLD}Finished trimming${NORMAL}\n\n"