riffusion-inference/riffusion/util/audio_util.py

"""
Audio utility functions.
"""

import io
import typing as T

import numpy as np
import pydub
from scipy.io import wavfile


def audio_from_waveform(
    samples: np.ndarray, sample_rate: int, normalize: bool = False
) -> pydub.AudioSegment:
    """
    Convert a numpy array of samples of a waveform to an audio segment.

    Args:
        samples: (channels, samples) array
    """
    # Normalize volume to fit in int16
    if normalize:
        samples *= np.iinfo(np.int16).max / np.max(np.abs(samples))

    # Transpose and convert to int16
    samples = samples.transpose(1, 0)
    samples = samples.astype(np.int16)

    # Write to the bytes of a WAV file
    wav_bytes = io.BytesIO()
    wavfile.write(wav_bytes, sample_rate, samples)
    wav_bytes.seek(0)

    # Read into pydub
    return pydub.AudioSegment.from_wav(wav_bytes)


def apply_filters(segment: pydub.AudioSegment, compression: bool = False) -> pydub.AudioSegment:
    """
    Apply post-processing filters to the audio segment to compress it and
    keep at a -10 dBFS level.
    """
    # TODO(hayk): Come up with a principled strategy for these filters and experiment end-to-end.
    # TODO(hayk): Is this going to make audio unbalanced between sequential clips?

    if compression:
        segment = pydub.effects.normalize(
            segment,
            headroom=0.1,
        )

        segment = segment.apply_gain(-10 - segment.dBFS)

        # TODO(hayk): This is quite slow, ~1.7 seconds on a beefy CPU
        segment = pydub.effects.compress_dynamic_range(
            segment,
            threshold=-20.0,
            ratio=4.0,
            attack=5.0,
            release=50.0,
        )

    desired_db = -12
    segment = segment.apply_gain(desired_db - segment.dBFS)

    segment = pydub.effects.normalize(
        segment,
        headroom=0.1,
    )

    return segment


def stitch_segments(
    segments: T.Sequence[pydub.AudioSegment], crossfade_s: float
) -> pydub.AudioSegment:
    """
    Stitch together a sequence of audio segments with a crossfade between each segment.
    """
    crossfade_ms = int(crossfade_s * 1000)
    combined_segment = segments[0]
    for segment in segments[1:]:
        combined_segment = combined_segment.append(segment, crossfade=crossfade_ms)
    return combined_segment


def overlay_segments(segments: T.Sequence[pydub.AudioSegment]) -> pydub.AudioSegment:
    """
    Overlay a sequence of audio segments on top of each other.
    """
    assert len(segments) > 0
    output: pydub.AudioSegment = None
    for segment in segments:
        if output is None:
            output = segment
        else:
            output = output.overlay(segment)
    return output
Pull out basic utilities into util package Topic: clean_rewrite 2022-12-26 18:15:05 -07:00			`"""`
			`Audio utility functions.`
			`"""`

			`import io`
Audio to audio handles interpolation within it Kill the separate page. Topic: audio_to_audio_interpolation 2023-01-14 12:31:33 -07:00			`import typing as T`
Pull out basic utilities into util package Topic: clean_rewrite 2022-12-26 18:15:05 -07:00
			`import numpy as np`
			`import pydub`
			`from scipy.io import wavfile`


			`def audio_from_waveform(`
			`samples: np.ndarray, sample_rate: int, normalize: bool = False`
			`) -> pydub.AudioSegment:`
			`"""`
			`Convert a numpy array of samples of a waveform to an audio segment.`
Audio splitter Topic: audio_splitter 2023-01-04 21:43:44 -07:00
			`Args:`
			`samples: (channels, samples) array`
Pull out basic utilities into util package Topic: clean_rewrite 2022-12-26 18:15:05 -07:00			`"""`
			`# Normalize volume to fit in int16`
			`if normalize:`
			`samples *= np.iinfo(np.int16).max / np.max(np.abs(samples))`

			`# Transpose and convert to int16`
			`samples = samples.transpose(1, 0)`
			`samples = samples.astype(np.int16)`

			`# Write to the bytes of a WAV file`
			`wav_bytes = io.BytesIO()`
			`wavfile.write(wav_bytes, sample_rate, samples)`
			`wav_bytes.seek(0)`

			`# Read into pydub`
			`return pydub.AudioSegment.from_wav(wav_bytes)`


Disable compression by default, too slow Topic: disable_compression 2022-12-27 08:44:39 -07:00			`def apply_filters(segment: pydub.AudioSegment, compression: bool = False) -> pydub.AudioSegment:`
Pull out basic utilities into util package Topic: clean_rewrite 2022-12-26 18:15:05 -07:00			`"""`
			`Apply post-processing filters to the audio segment to compress it and`
			`keep at a -10 dBFS level.`
			`"""`
			`# TODO(hayk): Come up with a principled strategy for these filters and experiment end-to-end.`
			`# TODO(hayk): Is this going to make audio unbalanced between sequential clips?`

Disable compression by default, too slow Topic: disable_compression 2022-12-27 08:44:39 -07:00			`if compression:`
			`segment = pydub.effects.normalize(`
			`segment,`
			`headroom=0.1,`
			`)`
Pull out basic utilities into util package Topic: clean_rewrite 2022-12-26 18:15:05 -07:00
Disable compression by default, too slow Topic: disable_compression 2022-12-27 08:44:39 -07:00			`segment = segment.apply_gain(-10 - segment.dBFS)`
Pull out basic utilities into util package Topic: clean_rewrite 2022-12-26 18:15:05 -07:00
Disable compression by default, too slow Topic: disable_compression 2022-12-27 08:44:39 -07:00			`# TODO(hayk): This is quite slow, ~1.7 seconds on a beefy CPU`
			`segment = pydub.effects.compress_dynamic_range(`
			`segment,`
			`threshold=-20.0,`
			`ratio=4.0,`
			`attack=5.0,`
			`release=50.0,`
			`)`
Pull out basic utilities into util package Topic: clean_rewrite 2022-12-26 18:15:05 -07:00
			`desired_db = -12`
			`segment = segment.apply_gain(desired_db - segment.dBFS)`

			`segment = pydub.effects.normalize(`
			`segment,`
			`headroom=0.1,`
			`)`

			`return segment`
Audio to audio handles interpolation within it Kill the separate page. Topic: audio_to_audio_interpolation 2023-01-14 12:31:33 -07:00

			`def stitch_segments(`
			`segments: T.Sequence[pydub.AudioSegment], crossfade_s: float`
			`) -> pydub.AudioSegment:`
			`"""`
			`Stitch together a sequence of audio segments with a crossfade between each segment.`
			`"""`
			`crossfade_ms = int(crossfade_s * 1000)`
			`combined_segment = segments[0]`
			`for segment in segments[1:]:`
			`combined_segment = combined_segment.append(segment, crossfade=crossfade_ms)`
			`return combined_segment`
Audio download buttons and proper extension handling across the app * Add buttons that download audio segments with the proper name, and display the name * Add a helper that displays the audio bar and the download button * Create a sidebar widget helper for choosing the output extension * Use this extension widget in all pages to dicate output types * Add a streamlit session state counter object to help with reruns * Improve UI in various places with small fixes Topic: audio_download_extensions_ui 2023-01-14 14:59:36 -07:00

			`def overlay_segments(segments: T.Sequence[pydub.AudioSegment]) -> pydub.AudioSegment:`
			`"""`
			`Overlay a sequence of audio segments on top of each other.`
			`"""`
			`assert len(segments) > 0`
			`output: pydub.AudioSegment = None`
			`for segment in segments:`
			`if output is None:`
			`output = segment`
			`else:`
			`output = output.overlay(segment)`
			`return output`