Disable compression by default, too slow

Topic: disable_compression
2022-12-27 15:44:39 +00:00 · 2022-12-27 15:44:39 +00:00 · f7288f8cd3
parent 266af6b3e0
commit f7288f8cd3
4 changed files with 23 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -108,9 +108,9 @@ Execute:
 python -m riffusion.cli image-to-audio --image spectrogram_image.png --audio clip.wav
 ```
-## Streamlit playground
+## Riffusion Playground
-Riffusion also has a streamlit app for interactive use and exploration.
+Riffusion also has a [streamlit](https://streamlit.io/) app for interactive use and exploration.
 This app is called the Riffusion Playground.
 Run with:
--- a/riffusion/server.py
+++ b/riffusion/server.py
@ -155,8 +155,9 @@ def compute_request(
    )
    # Reconstruct audio from the image
-    # TODO(hayk): It may help performance to cache this object
+    # TODO(hayk): It may help performance a bit to cache this object
    converter = SpectrogramImageConverter(params=params, device=str(pipeline.device))
    segment = converter.audio_from_spectrogram_image(
        image,
        apply_filters=True,
--- a/riffusion/spectrogram_converter.py
+++ b/riffusion/spectrogram_converter.py
@ -155,7 +155,10 @@ class SpectrogramConverter:
        # Optionally apply post-processing filters
        if apply_filters:
-            segment = audio_util.apply_filters(segment)
+            segment = audio_util.apply_filters(
                segment,
                compression=False,
            )
        return segment
--- a/riffusion/util/audio_util.py
+++ b/riffusion/util/audio_util.py
@ -32,7 +32,7 @@ def audio_from_waveform(
    return pydub.AudioSegment.from_wav(wav_bytes)
-def apply_filters(segment: pydub.AudioSegment) -> pydub.AudioSegment:
+def apply_filters(segment: pydub.AudioSegment, compression: bool = False) -> pydub.AudioSegment:
    """
    Apply post-processing filters to the audio segment to compress it and
    keep at a -10 dBFS level.
@ -40,20 +40,22 @@ def apply_filters(segment: pydub.AudioSegment) -> pydub.AudioSegment:
    # TODO(hayk): Come up with a principled strategy for these filters and experiment end-to-end.
    # TODO(hayk): Is this going to make audio unbalanced between sequential clips?
-    segment = pydub.effects.normalize(
+    if compression:
-        segment,
+        segment = pydub.effects.normalize(
-        headroom=0.1,
+            segment,
-    )
+            headroom=0.1,
        )
-    segment = segment.apply_gain(-10 - segment.dBFS)
+        segment = segment.apply_gain(-10 - segment.dBFS)
-    segment = pydub.effects.compress_dynamic_range(
+        # TODO(hayk): This is quite slow, ~1.7 seconds on a beefy CPU
-        segment,
+        segment = pydub.effects.compress_dynamic_range(
-        threshold=-20.0,
+            segment,
-        ratio=4.0,
+            threshold=-20.0,
-        attack=5.0,
+            ratio=4.0,
-        release=50.0,
+            attack=5.0,
-    )
+            release=50.0,
        )
    desired_db = -12
    segment = segment.apply_gain(desired_db - segment.dBFS)