riffusion-inference/riffusion/streamlit/pages/text_to_audio.py

import typing as T

import streamlit as st

from riffusion.spectrogram_params import SpectrogramParams
from riffusion.streamlit import util as streamlit_util


def render_text_to_audio() -> None:
    st.set_page_config(layout="wide", page_icon="🎸")

    st.subheader(":pencil2: Text to Audio")
    st.write(
        """
    Generate audio from text prompts.
    """
    )

    with st.expander("Help", False):
        st.write(
            """
            This tool runs riffusion in the simplest text to image form to generate an audio
            clip from a text prompt. There is no seed image or interpolation here. This mode
            allows more diversity and creativity than when using a seed image, but it also
            leads to having less control. Play with the seed to get infinite variations.
            """
        )

    device = streamlit_util.select_device(st.sidebar)

    prompt = st.text_input("Prompt")
    negative_prompt = st.text_input("Negative prompt")

    with st.sidebar.expander("Text to Audio Params", expanded=True):
        seed = T.cast(int, st.number_input("Seed", value=42))
        num_inference_steps = T.cast(int, st.number_input("Inference steps", value=50))
        width = T.cast(int, st.number_input("Width", value=512))
        guidance = st.number_input(
            "Guidance", value=7.0, help="How much the model listens to the text prompt"
        )

    if not prompt:
        st.info("Enter a prompt")
        return

    image = streamlit_util.run_txt2img(
        prompt=prompt,
        num_inference_steps=num_inference_steps,
        guidance=guidance,
        negative_prompt=negative_prompt,
        seed=seed,
        width=width,
        height=512,
        device=device,
    )

    st.image(image)

    # TODO(hayk): Change the frequency range to [20, 20k] once the model is retrained
    params = SpectrogramParams(
        min_frequency=0,
        max_frequency=10000,
    )

    audio_bytes = streamlit_util.audio_bytes_from_spectrogram_image(
        image=image,
        params=params,
        device=device,
        output_format="mp3",
    )
    st.audio(audio_bytes)


if __name__ == "__main__":
    render_text_to_audio()
Improve text to audio Topic: streamlit_app 2022-12-26 22:03:30 -07:00			`import typing as T`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00
			`import streamlit as st`

			`from riffusion.spectrogram_params import SpectrogramParams`
			`from riffusion.streamlit import util as streamlit_util`


			`def render_text_to_audio() -> None:`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`st.set_page_config(layout="wide", page_icon="🎸")`

			`st.subheader(":pencil2: Text to Audio")`
			`st.write(`
			`"""`
Help text for app Topic: app_help_text 2023-01-06 10:19:38 -07:00			`Generate audio from text prompts.`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00			`"""`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`)`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00
Help text for app Topic: app_help_text 2023-01-06 10:19:38 -07:00			`with st.expander("Help", False):`
			`st.write(`
			`"""`
			`This tool runs riffusion in the simplest text to image form to generate an audio`
			`clip from a text prompt. There is no seed image or interpolation here. This mode`
			`allows more diversity and creativity than when using a seed image, but it also`
			`leads to having less control. Play with the seed to get infinite variations.`
			`"""`
			`)`

Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`device = streamlit_util.select_device(st.sidebar)`

Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`prompt = st.text_input("Prompt")`
			`negative_prompt = st.text_input("Negative prompt")`

Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`with st.sidebar.expander("Text to Audio Params", expanded=True):`
			`seed = T.cast(int, st.number_input("Seed", value=42))`
			`num_inference_steps = T.cast(int, st.number_input("Inference steps", value=50))`
			`width = T.cast(int, st.number_input("Width", value=512))`
			`guidance = st.number_input(`
			`"Guidance", value=7.0, help="How much the model listens to the text prompt"`
			`)`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00
Improve text to audio Topic: streamlit_app 2022-12-26 22:03:30 -07:00			`if not prompt:`
			`st.info("Enter a prompt")`
			`return`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00
Improve text to audio Topic: streamlit_app 2022-12-26 22:03:30 -07:00			`image = streamlit_util.run_txt2img(`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00			`prompt=prompt,`
			`num_inference_steps=num_inference_steps,`
			`guidance=guidance,`
			`negative_prompt=negative_prompt,`
			`seed=seed,`
			`width=width,`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`height=512,`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00			`device=device,`
			`)`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00			`st.image(image)`

			`# TODO(hayk): Change the frequency range to [20, 20k] once the model is retrained`
			`params = SpectrogramParams(`
			`min_frequency=0,`
			`max_frequency=10000,`
			`)`

Improve text to audio Topic: streamlit_app 2022-12-26 22:03:30 -07:00			`audio_bytes = streamlit_util.audio_bytes_from_spectrogram_image(`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00			`image=image,`
			`params=params,`
			`device=device,`
Improve text to audio Topic: streamlit_app 2022-12-26 22:03:30 -07:00			`output_format="mp3",`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00			`)`
Improve text to audio Topic: streamlit_app 2022-12-26 22:03:30 -07:00			`st.audio(audio_bytes)`
Streamlit app for interactive use of the model Topic: streamlit_app 2022-12-26 21:01:27 -07:00

			`if __name__ == "__main__":`
			`render_text_to_audio()`