parent
7325bfa143
commit
6b02e98e35
|
@ -20,6 +20,21 @@ def render_audio_to_audio() -> None:
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with st.expander("Help", False):
|
||||||
|
st.write(
|
||||||
|
"""
|
||||||
|
This tool allows you to upload an audio file of arbitrary length and modify it with
|
||||||
|
a text prompt. It does this by sweeping over the audio in overlapping clips, doing
|
||||||
|
img2img style transfer with riffusion, then stitching the clips back together with
|
||||||
|
cross fading to eliminate seams.
|
||||||
|
|
||||||
|
Try a denoising strength of 0.4 for light modification and 0.55 for more heavy
|
||||||
|
modification. The best specific denoising depends on how different the prompt is
|
||||||
|
from the source audio. You can play with the seed to get infinite variations.
|
||||||
|
Currently the same seed is used for all clips along the track.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
device = streamlit_util.select_device(st.sidebar)
|
device = streamlit_util.select_device(st.sidebar)
|
||||||
|
|
||||||
audio_file = st.file_uploader(
|
audio_file = st.file_uploader(
|
||||||
|
|
|
@ -18,6 +18,16 @@ def render_image_to_audio() -> None:
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with st.expander("Help", False):
|
||||||
|
st.write(
|
||||||
|
"""
|
||||||
|
This tool takes an existing spectrogram image and reconstructs it into an audio
|
||||||
|
waveform. It also displays the EXIF metadata stored inside the image, which can
|
||||||
|
contain the parameters used to create the spectrogram image. If no EXIF is contained,
|
||||||
|
assumes default parameters.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
device = streamlit_util.select_device(st.sidebar)
|
device = streamlit_util.select_device(st.sidebar)
|
||||||
|
|
||||||
image_file = st.file_uploader(
|
image_file = st.file_uploader(
|
||||||
|
|
|
@ -17,6 +17,15 @@ def render_sample_clips() -> None:
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with st.expander("Help", False):
|
||||||
|
st.write(
|
||||||
|
"""
|
||||||
|
This tool simply allows uploading an audio file and randomly sampling short clips
|
||||||
|
from it. It's useful for generating a large number of short clips from a single
|
||||||
|
audio file. Outputs can be saved to a given directory with a given audio extension.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
audio_file = st.file_uploader(
|
audio_file = st.file_uploader(
|
||||||
"Upload a file",
|
"Upload a file",
|
||||||
type=["wav", "mp3", "ogg"],
|
type=["wav", "mp3", "ogg"],
|
||||||
|
|
|
@ -15,6 +15,19 @@ def render_split_audio() -> None:
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with st.expander("Help", False):
|
||||||
|
st.write(
|
||||||
|
"""
|
||||||
|
This tool allows uploading an audio file of arbitrary length and splits it into
|
||||||
|
stems of vocals, drums, bass, and other. It does this using a deep network that
|
||||||
|
sweeps over the audio in clips, extracts the stems, and then cross fades the clips
|
||||||
|
back together to construct the full length stems. It's particularly useful in
|
||||||
|
combination with audio_to_audio, for example to split and preserve vocals while
|
||||||
|
modifying the rest of the track with a prompt. Or, to pull out drums to add later
|
||||||
|
in a DAW.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
device = streamlit_util.select_device(st.sidebar)
|
device = streamlit_util.select_device(st.sidebar)
|
||||||
|
|
||||||
audio_file = st.file_uploader(
|
audio_file = st.file_uploader(
|
||||||
|
|
|
@ -12,8 +12,17 @@ def render_text_to_audio() -> None:
|
||||||
st.subheader(":pencil2: Text to Audio")
|
st.subheader(":pencil2: Text to Audio")
|
||||||
st.write(
|
st.write(
|
||||||
"""
|
"""
|
||||||
Generate audio from text prompts. \nRuns the model directly without a seed image or
|
Generate audio from text prompts.
|
||||||
interpolation.
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
with st.expander("Help", False):
|
||||||
|
st.write(
|
||||||
|
"""
|
||||||
|
This tool runs riffusion in the simplest text to image form to generate an audio
|
||||||
|
clip from a text prompt. There is no seed image or interpolation here. This mode
|
||||||
|
allows more diversity and creativity than when using a seed image, but it also
|
||||||
|
leads to having less control. Play with the seed to get infinite variations.
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -38,11 +38,20 @@ def render_text_to_audio_batch() -> None:
|
||||||
st.subheader(":scroll: Text to Audio Batch")
|
st.subheader(":scroll: Text to Audio Batch")
|
||||||
st.write(
|
st.write(
|
||||||
"""
|
"""
|
||||||
Generate audio in batch from a JSON file of text prompts. \nThe input
|
Generate audio in batch from a JSON file of text prompts.
|
||||||
file contains a global params block and a list of entries with positive and negative
|
|
||||||
prompts.
|
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with st.expander("Help", False):
|
||||||
|
st.write(
|
||||||
|
"""
|
||||||
|
This tool is a batch form of text_to_audio, where the inputs are read in from a JSON
|
||||||
|
file. The input file contains a global params block and a list of entries with positive
|
||||||
|
and negative prompts. It's useful for automating a larger set of generations. See the
|
||||||
|
example inputs below for the format of the file.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
device = streamlit_util.select_device(st.sidebar)
|
device = streamlit_util.select_device(st.sidebar)
|
||||||
|
|
||||||
# Upload a JSON file
|
# Upload a JSON file
|
||||||
|
|
Loading…
Reference in New Issue