From f7288f8cd3ffc8ed09bf1ff69a0d21d022b1781e Mon Sep 17 00:00:00 2001
From: Hayk Martiros <hayk.mart@gmail.com>
Date: Tue, 27 Dec 2022 15:44:39 +0000
Subject: [PATCH] Disable compression by default, too slow

Topic: disable_compression
---
 README.md                          |  4 ++--
 riffusion/server.py                |  3 ++-
 riffusion/spectrogram_converter.py |  5 ++++-
 riffusion/util/audio_util.py       | 28 +++++++++++++++-------------
 4 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 67cb31a..853d619 100644
--- a/README.md
+++ b/README.md
@@ -108,9 +108,9 @@ Execute:
 python -m riffusion.cli image-to-audio --image spectrogram_image.png --audio clip.wav
 ```
 
-## Streamlit playground
+## Riffusion Playground
 
-Riffusion also has a streamlit app for interactive use and exploration.
+Riffusion also has a [streamlit](https://streamlit.io/) app for interactive use and exploration.
 This app is called the Riffusion Playground.
 
 Run with:
diff --git a/riffusion/server.py b/riffusion/server.py
index 42ad7b7..aad1ca0 100644
--- a/riffusion/server.py
+++ b/riffusion/server.py
@@ -155,8 +155,9 @@ def compute_request(
     )
 
     # Reconstruct audio from the image
-    # TODO(hayk): It may help performance to cache this object
+    # TODO(hayk): It may help performance a bit to cache this object
     converter = SpectrogramImageConverter(params=params, device=str(pipeline.device))
+
     segment = converter.audio_from_spectrogram_image(
         image,
         apply_filters=True,
diff --git a/riffusion/spectrogram_converter.py b/riffusion/spectrogram_converter.py
index a1c5e00..9fbfc65 100644
--- a/riffusion/spectrogram_converter.py
+++ b/riffusion/spectrogram_converter.py
@@ -155,7 +155,10 @@ class SpectrogramConverter:
 
         # Optionally apply post-processing filters
         if apply_filters:
-            segment = audio_util.apply_filters(segment)
+            segment = audio_util.apply_filters(
+                segment,
+                compression=False,
+            )
 
         return segment
 
diff --git a/riffusion/util/audio_util.py b/riffusion/util/audio_util.py
index 251b5b8..aa02533 100644
--- a/riffusion/util/audio_util.py
+++ b/riffusion/util/audio_util.py
@@ -32,7 +32,7 @@ def audio_from_waveform(
     return pydub.AudioSegment.from_wav(wav_bytes)
 
 
-def apply_filters(segment: pydub.AudioSegment) -> pydub.AudioSegment:
+def apply_filters(segment: pydub.AudioSegment, compression: bool = False) -> pydub.AudioSegment:
     """
     Apply post-processing filters to the audio segment to compress it and
     keep at a -10 dBFS level.
@@ -40,20 +40,22 @@ def apply_filters(segment: pydub.AudioSegment) -> pydub.AudioSegment:
     # TODO(hayk): Come up with a principled strategy for these filters and experiment end-to-end.
     # TODO(hayk): Is this going to make audio unbalanced between sequential clips?
 
-    segment = pydub.effects.normalize(
-        segment,
-        headroom=0.1,
-    )
+    if compression:
+        segment = pydub.effects.normalize(
+            segment,
+            headroom=0.1,
+        )
 
-    segment = segment.apply_gain(-10 - segment.dBFS)
+        segment = segment.apply_gain(-10 - segment.dBFS)
 
-    segment = pydub.effects.compress_dynamic_range(
-        segment,
-        threshold=-20.0,
-        ratio=4.0,
-        attack=5.0,
-        release=50.0,
-    )
+        # TODO(hayk): This is quite slow, ~1.7 seconds on a beefy CPU
+        segment = pydub.effects.compress_dynamic_range(
+            segment,
+            threshold=-20.0,
+            ratio=4.0,
+            attack=5.0,
+            release=50.0,
+        )
 
     desired_db = -12
     segment = segment.apply_gain(desired_db - segment.dBFS)