diff --git a/riffusion/streamlit/pages/audio_to_audio.py b/riffusion/streamlit/pages/audio_to_audio.py index d9e61e1..f9dffcf 100644 --- a/riffusion/streamlit/pages/audio_to_audio.py +++ b/riffusion/streamlit/pages/audio_to_audio.py @@ -20,6 +20,21 @@ def render_audio_to_audio() -> None: """ ) + with st.expander("Help", False): + st.write( + """ + This tool allows you to upload an audio file of arbitrary length and modify it with + a text prompt. It does this by sweeping over the audio in overlapping clips, doing + img2img style transfer with riffusion, then stitching the clips back together with + cross fading to eliminate seams. + + Try a denoising strength of 0.4 for light modification and 0.55 for more heavy + modification. The best specific denoising depends on how different the prompt is + from the source audio. You can play with the seed to get infinite variations. + Currently the same seed is used for all clips along the track. + """ + ) + device = streamlit_util.select_device(st.sidebar) audio_file = st.file_uploader( diff --git a/riffusion/streamlit/pages/image_to_audio.py b/riffusion/streamlit/pages/image_to_audio.py index cfa0814..6736a93 100644 --- a/riffusion/streamlit/pages/image_to_audio.py +++ b/riffusion/streamlit/pages/image_to_audio.py @@ -18,6 +18,16 @@ def render_image_to_audio() -> None: """ ) + with st.expander("Help", False): + st.write( + """ + This tool takes an existing spectrogram image and reconstructs it into an audio + waveform. It also displays the EXIF metadata stored inside the image, which can + contain the parameters used to create the spectrogram image. If no EXIF is contained, + assumes default parameters. + """ + ) + device = streamlit_util.select_device(st.sidebar) image_file = st.file_uploader( diff --git a/riffusion/streamlit/pages/sample_clips.py b/riffusion/streamlit/pages/sample_clips.py index a80eef9..5fd01ea 100644 --- a/riffusion/streamlit/pages/sample_clips.py +++ b/riffusion/streamlit/pages/sample_clips.py @@ -17,6 +17,15 @@ def render_sample_clips() -> None: """ ) + with st.expander("Help", False): + st.write( + """ + This tool simply allows uploading an audio file and randomly sampling short clips + from it. It's useful for generating a large number of short clips from a single + audio file. Outputs can be saved to a given directory with a given audio extension. + """ + ) + audio_file = st.file_uploader( "Upload a file", type=["wav", "mp3", "ogg"], diff --git a/riffusion/streamlit/pages/split_audio.py b/riffusion/streamlit/pages/split_audio.py index e0979f7..c4c5241 100644 --- a/riffusion/streamlit/pages/split_audio.py +++ b/riffusion/streamlit/pages/split_audio.py @@ -15,6 +15,19 @@ def render_split_audio() -> None: """ ) + with st.expander("Help", False): + st.write( + """ + This tool allows uploading an audio file of arbitrary length and splits it into + stems of vocals, drums, bass, and other. It does this using a deep network that + sweeps over the audio in clips, extracts the stems, and then cross fades the clips + back together to construct the full length stems. It's particularly useful in + combination with audio_to_audio, for example to split and preserve vocals while + modifying the rest of the track with a prompt. Or, to pull out drums to add later + in a DAW. + """ + ) + device = streamlit_util.select_device(st.sidebar) audio_file = st.file_uploader( diff --git a/riffusion/streamlit/pages/text_to_audio.py b/riffusion/streamlit/pages/text_to_audio.py index 16cfa71..48ccbbf 100644 --- a/riffusion/streamlit/pages/text_to_audio.py +++ b/riffusion/streamlit/pages/text_to_audio.py @@ -12,11 +12,20 @@ def render_text_to_audio() -> None: st.subheader(":pencil2: Text to Audio") st.write( """ - Generate audio from text prompts. \nRuns the model directly without a seed image or - interpolation. + Generate audio from text prompts. """ ) + with st.expander("Help", False): + st.write( + """ + This tool runs riffusion in the simplest text to image form to generate an audio + clip from a text prompt. There is no seed image or interpolation here. This mode + allows more diversity and creativity than when using a seed image, but it also + leads to having less control. Play with the seed to get infinite variations. + """ + ) + device = streamlit_util.select_device(st.sidebar) prompt = st.text_input("Prompt") diff --git a/riffusion/streamlit/pages/text_to_audio_batch.py b/riffusion/streamlit/pages/text_to_audio_batch.py index ee2da65..8763011 100644 --- a/riffusion/streamlit/pages/text_to_audio_batch.py +++ b/riffusion/streamlit/pages/text_to_audio_batch.py @@ -38,11 +38,20 @@ def render_text_to_audio_batch() -> None: st.subheader(":scroll: Text to Audio Batch") st.write( """ - Generate audio in batch from a JSON file of text prompts. \nThe input - file contains a global params block and a list of entries with positive and negative - prompts. + Generate audio in batch from a JSON file of text prompts. """ ) + + with st.expander("Help", False): + st.write( + """ + This tool is a batch form of text_to_audio, where the inputs are read in from a JSON + file. The input file contains a global params block and a list of entries with positive + and negative prompts. It's useful for automating a larger set of generations. See the + example inputs below for the format of the file. + """ + ) + device = streamlit_util.select_device(st.sidebar) # Upload a JSON file