Sideband/sbapp/pydub/effects.py

342 lines
11 KiB
Python
Raw Normal View History

2024-06-03 19:19:24 -06:00
import sys
import math
import array
from .utils import (
db_to_float,
ratio_to_db,
register_pydub_effect,
make_chunks,
audioop,
get_min_max_value
)
from .silence import split_on_silence
from .exceptions import TooManyMissingFrames, InvalidDuration
if sys.version_info >= (3, 0):
xrange = range
@register_pydub_effect
def apply_mono_filter_to_each_channel(seg, filter_fn):
n_channels = seg.channels
channel_segs = seg.split_to_mono()
channel_segs = [filter_fn(channel_seg) for channel_seg in channel_segs]
out_data = seg.get_array_of_samples()
for channel_i, channel_seg in enumerate(channel_segs):
for sample_i, sample in enumerate(channel_seg.get_array_of_samples()):
index = (sample_i * n_channels) + channel_i
out_data[index] = sample
return seg._spawn(out_data)
@register_pydub_effect
def normalize(seg, headroom=0.1):
"""
headroom is how close to the maximum volume to boost the signal up to (specified in dB)
"""
peak_sample_val = seg.max
# if the max is 0, this audio segment is silent, and can't be normalized
if peak_sample_val == 0:
return seg
target_peak = seg.max_possible_amplitude * db_to_float(-headroom)
needed_boost = ratio_to_db(target_peak / peak_sample_val)
return seg.apply_gain(needed_boost)
@register_pydub_effect
def speedup(seg, playback_speed=1.5, chunk_size=150, crossfade=25):
# we will keep audio in 150ms chunks since one waveform at 20Hz is 50ms long
# (20 Hz is the lowest frequency audible to humans)
# portion of AUDIO TO KEEP. if playback speed is 1.25 we keep 80% (0.8) and
# discard 20% (0.2)
atk = 1.0 / playback_speed
if playback_speed < 2.0:
# throwing out more than half the audio - keep 50ms chunks
ms_to_remove_per_chunk = int(chunk_size * (1 - atk) / atk)
else:
# throwing out less than half the audio - throw out 50ms chunks
ms_to_remove_per_chunk = int(chunk_size)
chunk_size = int(atk * chunk_size / (1 - atk))
# the crossfade cannot be longer than the amount of audio we're removing
crossfade = min(crossfade, ms_to_remove_per_chunk - 1)
# DEBUG
#print("chunk: {0}, rm: {1}".format(chunk_size, ms_to_remove_per_chunk))
chunks = make_chunks(seg, chunk_size + ms_to_remove_per_chunk)
if len(chunks) < 2:
raise Exception("Could not speed up AudioSegment, it was too short {2:0.2f}s for the current settings:\n{0}ms chunks at {1:0.1f}x speedup".format(
chunk_size, playback_speed, seg.duration_seconds))
# we'll actually truncate a bit less than we calculated to make up for the
# crossfade between chunks
ms_to_remove_per_chunk -= crossfade
# we don't want to truncate the last chunk since it is not guaranteed to be
# the full chunk length
last_chunk = chunks[-1]
chunks = [chunk[:-ms_to_remove_per_chunk] for chunk in chunks[:-1]]
out = chunks[0]
for chunk in chunks[1:]:
out = out.append(chunk, crossfade=crossfade)
out += last_chunk
return out
@register_pydub_effect
def strip_silence(seg, silence_len=1000, silence_thresh=-16, padding=100):
if padding > silence_len:
raise InvalidDuration("padding cannot be longer than silence_len")
chunks = split_on_silence(seg, silence_len, silence_thresh, padding)
crossfade = padding / 2
if not len(chunks):
return seg[0:0]
seg = chunks[0]
for chunk in chunks[1:]:
seg = seg.append(chunk, crossfade=crossfade)
return seg
@register_pydub_effect
def compress_dynamic_range(seg, threshold=-20.0, ratio=4.0, attack=5.0, release=50.0):
"""
Keyword Arguments:
threshold - default: -20.0
Threshold in dBFS. default of -20.0 means -20dB relative to the
maximum possible volume. 0dBFS is the maximum possible value so
all values for this argument sould be negative.
ratio - default: 4.0
Compression ratio. Audio louder than the threshold will be
reduced to 1/ratio the volume. A ratio of 4.0 is equivalent to
a setting of 4:1 in a pro-audio compressor like the Waves C1.
attack - default: 5.0
Attack in milliseconds. How long it should take for the compressor
to kick in once the audio has exceeded the threshold.
release - default: 50.0
Release in milliseconds. How long it should take for the compressor
to stop compressing after the audio has falled below the threshold.
For an overview of Dynamic Range Compression, and more detailed explanation
of the related terminology, see:
http://en.wikipedia.org/wiki/Dynamic_range_compression
"""
thresh_rms = seg.max_possible_amplitude * db_to_float(threshold)
look_frames = int(seg.frame_count(ms=attack))
def rms_at(frame_i):
return seg.get_sample_slice(frame_i - look_frames, frame_i).rms
def db_over_threshold(rms):
if rms == 0: return 0.0
db = ratio_to_db(rms / thresh_rms)
return max(db, 0)
output = []
# amount to reduce the volume of the audio by (in dB)
attenuation = 0.0
attack_frames = seg.frame_count(ms=attack)
release_frames = seg.frame_count(ms=release)
for i in xrange(int(seg.frame_count())):
rms_now = rms_at(i)
# with a ratio of 4.0 this means the volume will exceed the threshold by
# 1/4 the amount (of dB) that it would otherwise
max_attenuation = (1 - (1.0 / ratio)) * db_over_threshold(rms_now)
attenuation_inc = max_attenuation / attack_frames
attenuation_dec = max_attenuation / release_frames
if rms_now > thresh_rms and attenuation <= max_attenuation:
attenuation += attenuation_inc
attenuation = min(attenuation, max_attenuation)
else:
attenuation -= attenuation_dec
attenuation = max(attenuation, 0)
frame = seg.get_frame(i)
if attenuation != 0.0:
frame = audioop.mul(frame,
seg.sample_width,
db_to_float(-attenuation))
output.append(frame)
return seg._spawn(data=b''.join(output))
# Invert the phase of the signal.
@register_pydub_effect
def invert_phase(seg, channels=(1, 1)):
"""
channels- specifies which channel (left or right) to reverse the phase of.
Note that mono AudioSegments will become stereo.
"""
if channels == (1, 1):
inverted = audioop.mul(seg._data, seg.sample_width, -1.0)
return seg._spawn(data=inverted)
else:
if seg.channels == 2:
left, right = seg.split_to_mono()
else:
raise Exception("Can't implicitly convert an AudioSegment with " + str(seg.channels) + " channels to stereo.")
if channels == (1, 0):
left = left.invert_phase()
else:
right = right.invert_phase()
return seg.from_mono_audiosegments(left, right)
# High and low pass filters based on implementation found on Stack Overflow:
# http://stackoverflow.com/questions/13882038/implementing-simple-high-and-low-pass-filters-in-c
@register_pydub_effect
def low_pass_filter(seg, cutoff):
"""
cutoff - Frequency (in Hz) where higher frequency signal will begin to
be reduced by 6dB per octave (doubling in frequency) above this point
"""
RC = 1.0 / (cutoff * 2 * math.pi)
dt = 1.0 / seg.frame_rate
alpha = dt / (RC + dt)
original = seg.get_array_of_samples()
filteredArray = array.array(seg.array_type, original)
frame_count = int(seg.frame_count())
last_val = [0] * seg.channels
for i in range(seg.channels):
last_val[i] = filteredArray[i] = original[i]
for i in range(1, frame_count):
for j in range(seg.channels):
offset = (i * seg.channels) + j
last_val[j] = last_val[j] + (alpha * (original[offset] - last_val[j]))
filteredArray[offset] = int(last_val[j])
return seg._spawn(data=filteredArray)
@register_pydub_effect
def high_pass_filter(seg, cutoff):
"""
cutoff - Frequency (in Hz) where lower frequency signal will begin to
be reduced by 6dB per octave (doubling in frequency) below this point
"""
RC = 1.0 / (cutoff * 2 * math.pi)
dt = 1.0 / seg.frame_rate
alpha = RC / (RC + dt)
minval, maxval = get_min_max_value(seg.sample_width * 8)
original = seg.get_array_of_samples()
filteredArray = array.array(seg.array_type, original)
frame_count = int(seg.frame_count())
last_val = [0] * seg.channels
for i in range(seg.channels):
last_val[i] = filteredArray[i] = original[i]
for i in range(1, frame_count):
for j in range(seg.channels):
offset = (i * seg.channels) + j
offset_minus_1 = ((i-1) * seg.channels) + j
last_val[j] = alpha * (last_val[j] + original[offset] - original[offset_minus_1])
filteredArray[offset] = int(min(max(last_val[j], minval), maxval))
return seg._spawn(data=filteredArray)
@register_pydub_effect
def pan(seg, pan_amount):
"""
pan_amount should be between -1.0 (100% left) and +1.0 (100% right)
When pan_amount == 0.0 the left/right balance is not changed.
Panning does not alter the *perceived* loundness, but since loudness
is decreasing on one side, the other side needs to get louder to
compensate. When panned hard left, the left channel will be 3dB louder.
"""
if not -1.0 <= pan_amount <= 1.0:
raise ValueError("pan_amount should be between -1.0 (100% left) and +1.0 (100% right)")
max_boost_db = ratio_to_db(2.0)
boost_db = abs(pan_amount) * max_boost_db
boost_factor = db_to_float(boost_db)
reduce_factor = db_to_float(max_boost_db) - boost_factor
reduce_db = ratio_to_db(reduce_factor)
# Cut boost in half (max boost== 3dB) - in reality 2 speakers
# do not sum to a full 6 dB.
boost_db = boost_db / 2.0
if pan_amount < 0:
return seg.apply_gain_stereo(boost_db, reduce_db)
else:
return seg.apply_gain_stereo(reduce_db, boost_db)
@register_pydub_effect
def apply_gain_stereo(seg, left_gain=0.0, right_gain=0.0):
"""
left_gain - amount of gain to apply to the left channel (in dB)
right_gain - amount of gain to apply to the right channel (in dB)
note: mono audio segments will be converted to stereo
"""
if seg.channels == 1:
left = right = seg
elif seg.channels == 2:
left, right = seg.split_to_mono()
l_mult_factor = db_to_float(left_gain)
r_mult_factor = db_to_float(right_gain)
left_data = audioop.mul(left._data, left.sample_width, l_mult_factor)
left_data = audioop.tostereo(left_data, left.sample_width, 1, 0)
right_data = audioop.mul(right._data, right.sample_width, r_mult_factor)
right_data = audioop.tostereo(right_data, right.sample_width, 0, 1)
output = audioop.add(left_data, right_data, seg.sample_width)
return seg._spawn(data=output,
overrides={'channels': 2,
'frame_width': 2 * seg.sample_width})