Sideband/sbapp/pydub/effects.py

import sys
import math
import array
from .utils import (
    db_to_float,
    ratio_to_db,
    register_pydub_effect,
    make_chunks,
    audioop,
    get_min_max_value
)
from .silence import split_on_silence
from .exceptions import TooManyMissingFrames, InvalidDuration

if sys.version_info >= (3, 0):
    xrange = range


@register_pydub_effect
def apply_mono_filter_to_each_channel(seg, filter_fn):
    n_channels = seg.channels

    channel_segs = seg.split_to_mono()
    channel_segs = [filter_fn(channel_seg) for channel_seg in channel_segs]

    out_data = seg.get_array_of_samples()
    for channel_i, channel_seg in enumerate(channel_segs):
        for sample_i, sample in enumerate(channel_seg.get_array_of_samples()):
            index = (sample_i * n_channels) + channel_i
            out_data[index] = sample

    return seg._spawn(out_data)


@register_pydub_effect
def normalize(seg, headroom=0.1):
    """
    headroom is how close to the maximum volume to boost the signal up to (specified in dB)
    """
    peak_sample_val = seg.max
    
    # if the max is 0, this audio segment is silent, and can't be normalized
    if peak_sample_val == 0:
        return seg
    
    target_peak = seg.max_possible_amplitude * db_to_float(-headroom)

    needed_boost = ratio_to_db(target_peak / peak_sample_val)
    return seg.apply_gain(needed_boost)


@register_pydub_effect
def speedup(seg, playback_speed=1.5, chunk_size=150, crossfade=25):
    # we will keep audio in 150ms chunks since one waveform at 20Hz is 50ms long
    # (20 Hz is the lowest frequency audible to humans)

    # portion of AUDIO TO KEEP. if playback speed is 1.25 we keep 80% (0.8) and
    # discard 20% (0.2)
    atk = 1.0 / playback_speed

    if playback_speed < 2.0:
        # throwing out more than half the audio - keep 50ms chunks
        ms_to_remove_per_chunk = int(chunk_size * (1 - atk) / atk)
    else:
        # throwing out less than half the audio - throw out 50ms chunks
        ms_to_remove_per_chunk = int(chunk_size)
        chunk_size = int(atk * chunk_size / (1 - atk))

    # the crossfade cannot be longer than the amount of audio we're removing
    crossfade = min(crossfade, ms_to_remove_per_chunk - 1)

    # DEBUG
    #print("chunk: {0}, rm: {1}".format(chunk_size, ms_to_remove_per_chunk))

    chunks = make_chunks(seg, chunk_size + ms_to_remove_per_chunk)
    if len(chunks) < 2:
        raise Exception("Could not speed up AudioSegment, it was too short {2:0.2f}s for the current settings:\n{0}ms chunks at {1:0.1f}x speedup".format(
            chunk_size, playback_speed, seg.duration_seconds))

    # we'll actually truncate a bit less than we calculated to make up for the
    # crossfade between chunks
    ms_to_remove_per_chunk -= crossfade

    # we don't want to truncate the last chunk since it is not guaranteed to be
    # the full chunk length
    last_chunk = chunks[-1]
    chunks = [chunk[:-ms_to_remove_per_chunk] for chunk in chunks[:-1]]

    out = chunks[0]
    for chunk in chunks[1:]:
        out = out.append(chunk, crossfade=crossfade)

    out += last_chunk
    return out
    

@register_pydub_effect
def strip_silence(seg, silence_len=1000, silence_thresh=-16, padding=100):
    if padding > silence_len:
        raise InvalidDuration("padding cannot be longer than silence_len")

    chunks = split_on_silence(seg, silence_len, silence_thresh, padding)
    crossfade = padding / 2

    if not len(chunks):
        return seg[0:0]

    seg = chunks[0]
    for chunk in chunks[1:]:
        seg = seg.append(chunk, crossfade=crossfade)

    return seg


@register_pydub_effect
def compress_dynamic_range(seg, threshold=-20.0, ratio=4.0, attack=5.0, release=50.0):
    """
    Keyword Arguments:
        
        threshold - default: -20.0
            Threshold in dBFS. default of -20.0 means -20dB relative to the
            maximum possible volume. 0dBFS is the maximum possible value so
            all values for this argument sould be negative.

        ratio - default: 4.0
            Compression ratio. Audio louder than the threshold will be 
            reduced to 1/ratio the volume. A ratio of 4.0 is equivalent to
            a setting of 4:1 in a pro-audio compressor like the Waves C1.
        
        attack - default: 5.0
            Attack in milliseconds. How long it should take for the compressor
            to kick in once the audio has exceeded the threshold.

        release - default: 50.0
            Release in milliseconds. How long it should take for the compressor
            to stop compressing after the audio has falled below the threshold.

    
    For an overview of Dynamic Range Compression, and more detailed explanation
    of the related terminology, see: 

        http://en.wikipedia.org/wiki/Dynamic_range_compression
    """

    thresh_rms = seg.max_possible_amplitude * db_to_float(threshold)
    
    look_frames = int(seg.frame_count(ms=attack))
    def rms_at(frame_i):
        return seg.get_sample_slice(frame_i - look_frames, frame_i).rms
    def db_over_threshold(rms):
        if rms == 0: return 0.0
        db = ratio_to_db(rms / thresh_rms)
        return max(db, 0)

    output = []

    # amount to reduce the volume of the audio by (in dB)
    attenuation = 0.0
    
    attack_frames = seg.frame_count(ms=attack)
    release_frames = seg.frame_count(ms=release)
    for i in xrange(int(seg.frame_count())):
        rms_now = rms_at(i)
        
        # with a ratio of 4.0 this means the volume will exceed the threshold by
        # 1/4 the amount (of dB) that it would otherwise
        max_attenuation = (1 - (1.0 / ratio)) * db_over_threshold(rms_now)
        
        attenuation_inc = max_attenuation / attack_frames
        attenuation_dec = max_attenuation / release_frames
        
        if rms_now > thresh_rms and attenuation <= max_attenuation:
            attenuation += attenuation_inc
            attenuation = min(attenuation, max_attenuation)
        else:
            attenuation -= attenuation_dec
            attenuation = max(attenuation, 0)
        
        frame = seg.get_frame(i)
        if attenuation != 0.0:
            frame = audioop.mul(frame,
                                seg.sample_width,
                                db_to_float(-attenuation))
        
        output.append(frame)
    
    return seg._spawn(data=b''.join(output))


# Invert the phase of the signal.

@register_pydub_effect

def invert_phase(seg, channels=(1, 1)):
    """
    channels- specifies which channel (left or right) to reverse the phase of.
    Note that mono AudioSegments will become stereo.
    """
    if channels == (1, 1):
        inverted = audioop.mul(seg._data, seg.sample_width, -1.0)  
        return seg._spawn(data=inverted)
    
    else:
        if seg.channels == 2:
            left, right = seg.split_to_mono()
        else:
            raise Exception("Can't implicitly convert an AudioSegment with " + str(seg.channels) + " channels to stereo.")
            
        if channels == (1, 0):    
            left = left.invert_phase()
        else:
            right = right.invert_phase()
        
        return seg.from_mono_audiosegments(left, right)
        

# High and low pass filters based on implementation found on Stack Overflow:
#   http://stackoverflow.com/questions/13882038/implementing-simple-high-and-low-pass-filters-in-c

@register_pydub_effect
def low_pass_filter(seg, cutoff):
    """
        cutoff - Frequency (in Hz) where higher frequency signal will begin to
            be reduced by 6dB per octave (doubling in frequency) above this point
    """
    RC = 1.0 / (cutoff * 2 * math.pi)
    dt = 1.0 / seg.frame_rate

    alpha = dt / (RC + dt)
    
    original = seg.get_array_of_samples()
    filteredArray = array.array(seg.array_type, original)
    
    frame_count = int(seg.frame_count())

    last_val = [0] * seg.channels
    for i in range(seg.channels):
        last_val[i] = filteredArray[i] = original[i]

    for i in range(1, frame_count):
        for j in range(seg.channels):
            offset = (i * seg.channels) + j
            last_val[j] = last_val[j] + (alpha * (original[offset] - last_val[j]))
            filteredArray[offset] = int(last_val[j])

    return seg._spawn(data=filteredArray)


@register_pydub_effect
def high_pass_filter(seg, cutoff):
    """
        cutoff - Frequency (in Hz) where lower frequency signal will begin to
            be reduced by 6dB per octave (doubling in frequency) below this point
    """
    RC = 1.0 / (cutoff * 2 * math.pi)
    dt = 1.0 / seg.frame_rate

    alpha = RC / (RC + dt)

    minval, maxval = get_min_max_value(seg.sample_width * 8)
    
    original = seg.get_array_of_samples()
    filteredArray = array.array(seg.array_type, original)
    
    frame_count = int(seg.frame_count())

    last_val = [0] * seg.channels
    for i in range(seg.channels):
        last_val[i] = filteredArray[i] = original[i]

    for i in range(1, frame_count):
        for j in range(seg.channels):
            offset = (i * seg.channels) + j
            offset_minus_1 = ((i-1) * seg.channels) + j

            last_val[j] = alpha * (last_val[j] + original[offset] - original[offset_minus_1])
            filteredArray[offset] = int(min(max(last_val[j], minval), maxval))

    return seg._spawn(data=filteredArray)
    
    
@register_pydub_effect
def pan(seg, pan_amount):
    """
    pan_amount should be between -1.0 (100% left) and +1.0 (100% right)
    
    When pan_amount == 0.0 the left/right balance is not changed.
    
    Panning does not alter the *perceived* loundness, but since loudness
    is decreasing on one side, the other side needs to get louder to
    compensate. When panned hard left, the left channel will be 3dB louder.
    """
    if not -1.0 <= pan_amount <= 1.0:
        raise ValueError("pan_amount should be between -1.0 (100% left) and +1.0 (100% right)")
    
    max_boost_db = ratio_to_db(2.0)
    boost_db = abs(pan_amount) * max_boost_db
    
    boost_factor = db_to_float(boost_db)
    reduce_factor = db_to_float(max_boost_db) - boost_factor
    
    reduce_db = ratio_to_db(reduce_factor)
    
    # Cut boost in half (max boost== 3dB) - in reality 2 speakers
    #   do not sum to a full 6 dB.
    boost_db = boost_db / 2.0
    
    if pan_amount < 0:
        return seg.apply_gain_stereo(boost_db, reduce_db)
    else:
        return seg.apply_gain_stereo(reduce_db, boost_db)
        
    
@register_pydub_effect
def apply_gain_stereo(seg, left_gain=0.0, right_gain=0.0):
    """
    left_gain - amount of gain to apply to the left channel (in dB)
    right_gain - amount of gain to apply to the right channel (in dB)
    
    note: mono audio segments will be converted to stereo
    """
    if seg.channels == 1:
        left = right = seg
    elif seg.channels == 2:
        left, right = seg.split_to_mono()
    
    l_mult_factor = db_to_float(left_gain)
    r_mult_factor = db_to_float(right_gain)
    
    left_data = audioop.mul(left._data, left.sample_width, l_mult_factor)
    left_data = audioop.tostereo(left_data, left.sample_width, 1, 0)
    
    right_data = audioop.mul(right._data, right.sample_width, r_mult_factor)
    right_data = audioop.tostereo(right_data, right.sample_width, 0, 1)
    
    output = audioop.add(left_data, right_data, seg.sample_width)
    
    return seg._spawn(data=output,
                overrides={'channels': 2,
                           'frame_width': 2 * seg.sample_width})
Added PyDub 2024-06-03 19:19:24 -06:00			`import sys`
			`import math`
			`import array`
			`from .utils import (`
			`db_to_float,`
			`ratio_to_db,`
			`register_pydub_effect,`
			`make_chunks,`
			`audioop,`
			`get_min_max_value`
			`)`
			`from .silence import split_on_silence`
			`from .exceptions import TooManyMissingFrames, InvalidDuration`

			`if sys.version_info >= (3, 0):`
			`xrange = range`


			`@register_pydub_effect`
			`def apply_mono_filter_to_each_channel(seg, filter_fn):`
			`n_channels = seg.channels`

			`channel_segs = seg.split_to_mono()`
			`channel_segs = [filter_fn(channel_seg) for channel_seg in channel_segs]`

			`out_data = seg.get_array_of_samples()`
			`for channel_i, channel_seg in enumerate(channel_segs):`
			`for sample_i, sample in enumerate(channel_seg.get_array_of_samples()):`
			`index = (sample_i * n_channels) + channel_i`
			`out_data[index] = sample`

			`return seg._spawn(out_data)`


			`@register_pydub_effect`
			`def normalize(seg, headroom=0.1):`
			`"""`
			`headroom is how close to the maximum volume to boost the signal up to (specified in dB)`
			`"""`
			`peak_sample_val = seg.max`

			`# if the max is 0, this audio segment is silent, and can't be normalized`
			`if peak_sample_val == 0:`
			`return seg`

			`target_peak = seg.max_possible_amplitude * db_to_float(-headroom)`

			`needed_boost = ratio_to_db(target_peak / peak_sample_val)`
			`return seg.apply_gain(needed_boost)`


			`@register_pydub_effect`
			`def speedup(seg, playback_speed=1.5, chunk_size=150, crossfade=25):`
			`# we will keep audio in 150ms chunks since one waveform at 20Hz is 50ms long`
			`# (20 Hz is the lowest frequency audible to humans)`

			`# portion of AUDIO TO KEEP. if playback speed is 1.25 we keep 80% (0.8) and`
			`# discard 20% (0.2)`
			`atk = 1.0 / playback_speed`

			`if playback_speed < 2.0:`
			`# throwing out more than half the audio - keep 50ms chunks`
			`ms_to_remove_per_chunk = int(chunk_size * (1 - atk) / atk)`
			`else:`
			`# throwing out less than half the audio - throw out 50ms chunks`
			`ms_to_remove_per_chunk = int(chunk_size)`
			`chunk_size = int(atk * chunk_size / (1 - atk))`

			`# the crossfade cannot be longer than the amount of audio we're removing`
			`crossfade = min(crossfade, ms_to_remove_per_chunk - 1)`

			`# DEBUG`
			`#print("chunk: {0}, rm: {1}".format(chunk_size, ms_to_remove_per_chunk))`

			`chunks = make_chunks(seg, chunk_size + ms_to_remove_per_chunk)`
			`if len(chunks) < 2:`
			`raise Exception("Could not speed up AudioSegment, it was too short {2:0.2f}s for the current settings:\n{0}ms chunks at {1:0.1f}x speedup".format(`
			`chunk_size, playback_speed, seg.duration_seconds))`

			`# we'll actually truncate a bit less than we calculated to make up for the`
			`# crossfade between chunks`
			`ms_to_remove_per_chunk -= crossfade`

			`# we don't want to truncate the last chunk since it is not guaranteed to be`
			`# the full chunk length`
			`last_chunk = chunks[-1]`
			`chunks = [chunk[:-ms_to_remove_per_chunk] for chunk in chunks[:-1]]`

			`out = chunks[0]`
			`for chunk in chunks[1:]:`
			`out = out.append(chunk, crossfade=crossfade)`

			`out += last_chunk`
			`return out`


			`@register_pydub_effect`
			`def strip_silence(seg, silence_len=1000, silence_thresh=-16, padding=100):`
			`if padding > silence_len:`
			`raise InvalidDuration("padding cannot be longer than silence_len")`

			`chunks = split_on_silence(seg, silence_len, silence_thresh, padding)`
			`crossfade = padding / 2`

			`if not len(chunks):`
			`return seg[0:0]`

			`seg = chunks[0]`
			`for chunk in chunks[1:]:`
			`seg = seg.append(chunk, crossfade=crossfade)`

			`return seg`


			`@register_pydub_effect`
			`def compress_dynamic_range(seg, threshold=-20.0, ratio=4.0, attack=5.0, release=50.0):`
			`"""`
			`Keyword Arguments:`

			`threshold - default: -20.0`
			`Threshold in dBFS. default of -20.0 means -20dB relative to the`
			`maximum possible volume. 0dBFS is the maximum possible value so`
			`all values for this argument sould be negative.`

			`ratio - default: 4.0`
			`Compression ratio. Audio louder than the threshold will be`
			`reduced to 1/ratio the volume. A ratio of 4.0 is equivalent to`
			`a setting of 4:1 in a pro-audio compressor like the Waves C1.`

			`attack - default: 5.0`
			`Attack in milliseconds. How long it should take for the compressor`
			`to kick in once the audio has exceeded the threshold.`

			`release - default: 50.0`
			`Release in milliseconds. How long it should take for the compressor`
			`to stop compressing after the audio has falled below the threshold.`


			`For an overview of Dynamic Range Compression, and more detailed explanation`
			`of the related terminology, see:`

			`http://en.wikipedia.org/wiki/Dynamic_range_compression`
			`"""`

			`thresh_rms = seg.max_possible_amplitude * db_to_float(threshold)`

			`look_frames = int(seg.frame_count(ms=attack))`
			`def rms_at(frame_i):`
			`return seg.get_sample_slice(frame_i - look_frames, frame_i).rms`
			`def db_over_threshold(rms):`
			`if rms == 0: return 0.0`
			`db = ratio_to_db(rms / thresh_rms)`
			`return max(db, 0)`

			`output = []`

			`# amount to reduce the volume of the audio by (in dB)`
			`attenuation = 0.0`

			`attack_frames = seg.frame_count(ms=attack)`
			`release_frames = seg.frame_count(ms=release)`
			`for i in xrange(int(seg.frame_count())):`
			`rms_now = rms_at(i)`

			`# with a ratio of 4.0 this means the volume will exceed the threshold by`
			`# 1/4 the amount (of dB) that it would otherwise`
			`max_attenuation = (1 - (1.0 / ratio)) * db_over_threshold(rms_now)`

			`attenuation_inc = max_attenuation / attack_frames`
			`attenuation_dec = max_attenuation / release_frames`

			`if rms_now > thresh_rms and attenuation <= max_attenuation:`
			`attenuation += attenuation_inc`
			`attenuation = min(attenuation, max_attenuation)`
			`else:`
			`attenuation -= attenuation_dec`
			`attenuation = max(attenuation, 0)`

			`frame = seg.get_frame(i)`
			`if attenuation != 0.0:`
			`frame = audioop.mul(frame,`
			`seg.sample_width,`
			`db_to_float(-attenuation))`

			`output.append(frame)`

			`return seg._spawn(data=b''.join(output))`


			`# Invert the phase of the signal.`

			`@register_pydub_effect`

			`def invert_phase(seg, channels=(1, 1)):`
			`"""`
			`channels- specifies which channel (left or right) to reverse the phase of.`
			`Note that mono AudioSegments will become stereo.`
			`"""`
			`if channels == (1, 1):`
			`inverted = audioop.mul(seg._data, seg.sample_width, -1.0)`
			`return seg._spawn(data=inverted)`

			`else:`
			`if seg.channels == 2:`
			`left, right = seg.split_to_mono()`
			`else:`
			`raise Exception("Can't implicitly convert an AudioSegment with " + str(seg.channels) + " channels to stereo.")`

			`if channels == (1, 0):`
			`left = left.invert_phase()`
			`else:`
			`right = right.invert_phase()`

			`return seg.from_mono_audiosegments(left, right)`



			`# High and low pass filters based on implementation found on Stack Overflow:`
			`# http://stackoverflow.com/questions/13882038/implementing-simple-high-and-low-pass-filters-in-c`

			`@register_pydub_effect`
			`def low_pass_filter(seg, cutoff):`
			`"""`
			`cutoff - Frequency (in Hz) where higher frequency signal will begin to`
			`be reduced by 6dB per octave (doubling in frequency) above this point`
			`"""`
			`RC = 1.0 / (cutoff * 2 * math.pi)`
			`dt = 1.0 / seg.frame_rate`

			`alpha = dt / (RC + dt)`

			`original = seg.get_array_of_samples()`
			`filteredArray = array.array(seg.array_type, original)`

			`frame_count = int(seg.frame_count())`

			`last_val = [0] * seg.channels`
			`for i in range(seg.channels):`
			`last_val[i] = filteredArray[i] = original[i]`

			`for i in range(1, frame_count):`
			`for j in range(seg.channels):`
			`offset = (i * seg.channels) + j`
			`last_val[j] = last_val[j] + (alpha * (original[offset] - last_val[j]))`
			`filteredArray[offset] = int(last_val[j])`

			`return seg._spawn(data=filteredArray)`


			`@register_pydub_effect`
			`def high_pass_filter(seg, cutoff):`
			`"""`
			`cutoff - Frequency (in Hz) where lower frequency signal will begin to`
			`be reduced by 6dB per octave (doubling in frequency) below this point`
			`"""`
			`RC = 1.0 / (cutoff * 2 * math.pi)`
			`dt = 1.0 / seg.frame_rate`

			`alpha = RC / (RC + dt)`

			`minval, maxval = get_min_max_value(seg.sample_width * 8)`

			`original = seg.get_array_of_samples()`
			`filteredArray = array.array(seg.array_type, original)`

			`frame_count = int(seg.frame_count())`

			`last_val = [0] * seg.channels`
			`for i in range(seg.channels):`
			`last_val[i] = filteredArray[i] = original[i]`

			`for i in range(1, frame_count):`
			`for j in range(seg.channels):`
			`offset = (i * seg.channels) + j`
			`offset_minus_1 = ((i-1) * seg.channels) + j`

			`last_val[j] = alpha * (last_val[j] + original[offset] - original[offset_minus_1])`
			`filteredArray[offset] = int(min(max(last_val[j], minval), maxval))`

			`return seg._spawn(data=filteredArray)`


			`@register_pydub_effect`
			`def pan(seg, pan_amount):`
			`"""`
			`pan_amount should be between -1.0 (100% left) and +1.0 (100% right)`

			`When pan_amount == 0.0 the left/right balance is not changed.`

			`Panning does not alter the perceived loundness, but since loudness`
			`is decreasing on one side, the other side needs to get louder to`
			`compensate. When panned hard left, the left channel will be 3dB louder.`
			`"""`
			`if not -1.0 <= pan_amount <= 1.0:`
			`raise ValueError("pan_amount should be between -1.0 (100% left) and +1.0 (100% right)")`

			`max_boost_db = ratio_to_db(2.0)`
			`boost_db = abs(pan_amount) * max_boost_db`

			`boost_factor = db_to_float(boost_db)`
			`reduce_factor = db_to_float(max_boost_db) - boost_factor`

			`reduce_db = ratio_to_db(reduce_factor)`

			`# Cut boost in half (max boost== 3dB) - in reality 2 speakers`
			`# do not sum to a full 6 dB.`
			`boost_db = boost_db / 2.0`

			`if pan_amount < 0:`
			`return seg.apply_gain_stereo(boost_db, reduce_db)`
			`else:`
			`return seg.apply_gain_stereo(reduce_db, boost_db)`


			`@register_pydub_effect`
			`def apply_gain_stereo(seg, left_gain=0.0, right_gain=0.0):`
			`"""`
			`left_gain - amount of gain to apply to the left channel (in dB)`
			`right_gain - amount of gain to apply to the right channel (in dB)`

			`note: mono audio segments will be converted to stereo`
			`"""`
			`if seg.channels == 1:`
			`left = right = seg`
			`elif seg.channels == 2:`
			`left, right = seg.split_to_mono()`

			`l_mult_factor = db_to_float(left_gain)`
			`r_mult_factor = db_to_float(right_gain)`

			`left_data = audioop.mul(left._data, left.sample_width, l_mult_factor)`
			`left_data = audioop.tostereo(left_data, left.sample_width, 1, 0)`

			`right_data = audioop.mul(right._data, right.sample_width, r_mult_factor)`
			`right_data = audioop.tostereo(right_data, right.sample_width, 0, 1)`

			`output = audioop.add(left_data, right_data, seg.sample_width)`

			`return seg._spawn(data=output,`
			`overrides={'channels': 2,`
			`'frame_width': 2 * seg.sample_width})`