riffusion-inference/riffusion/streamlit/tasks/text_to_audio_batch.py

import json
import typing as T
from pathlib import Path

import streamlit as st

from riffusion.spectrogram_params import SpectrogramParams
from riffusion.streamlit import util as streamlit_util

# Example input json file to process in batch
EXAMPLE_INPUT = """
{
  "params": {
    "checkpoint": "riffusion/riffusion-model-v1",
    "scheduler": "DPMSolverMultistepScheduler",
    "num_inference_steps": 50,
    "guidance": 7.0,
    "width": 512,
  },
  "entries": [
    {
      "prompt": "Church bells",
      "seed": 42
    },
    {
      "prompt": "electronic beats",
      "negative_prompt": "drums",
      "seed": 100
    },
    {
      "prompt": "classical violin concerto",
      "seed": 4
    }
  ]
}
"""


def render() -> None:
    st.subheader("📜 Text to Audio Batch")
    st.write(
        """
    Generate audio in batch from a JSON file of text prompts.
    """
    )

    with st.expander("Help", False):
        st.write(
            """
            This tool is a batch form of text_to_audio, where the inputs are read in from a JSON
            file. The input file contains a global params block and a list of entries with positive
            and negative prompts. It's useful for automating a larger set of generations. See the
            example inputs below for the format of the file.
            """
        )

    device = streamlit_util.select_device(st.sidebar)

    # Upload a JSON file
    json_file = st.file_uploader(
        "JSON file",
        type=["json"],
        label_visibility="collapsed",
    )

    # Handle the null case
    if json_file is None:
        st.info("Upload a JSON file containing params and prompts")
        with st.expander("Example inputs.json", expanded=False):
            st.code(EXAMPLE_INPUT)
        return

    # Read in and print it
    data = json.loads(json_file.read())
    with st.expander("Input Data", expanded=False):
        st.json(data)

    # Params can either be a list or a single entry
    if isinstance(data["params"], list):
        param_sets = data["params"]
    else:
        param_sets = [data["params"]]

    entries = data["entries"]

    show_images = st.sidebar.checkbox("Show Images", True)
    num_seeds = st.sidebar.number_input(
        "Num Seeds",
        value=1,
        min_value=1,
        max_value=10,
        help="When > 1, increments the seed and runs multiple for each entry",
    )

    # Optionally specify an output directory
    output_dir = st.sidebar.text_input("Output Directory", "")
    output_path: T.Optional[Path] = None
    if output_dir:
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)

    # Write title cards for each param set
    title_cols = st.columns(len(param_sets))
    for i, params in enumerate(param_sets):
        col = title_cols[i]

        if "name" not in params:
            params["name"] = f"params[{i}]"

        col.write(f"## Param Set {i}")
        col.json(params)

    for entry_i, entry in enumerate(entries):
        st.write("---")
        print(entry)
        prompt = entry["prompt"]
        negative_prompt = entry.get("negative_prompt", None)

        base_seed = entry.get("seed", 42)

        text = f"##### ({base_seed}) {prompt}"
        if negative_prompt:
            text += f"  \n**Negative**: {negative_prompt}"
        st.write(text)

        for seed in range(base_seed, base_seed + num_seeds):
            cols = st.columns(len(param_sets))
            for i, params in enumerate(param_sets):
                col = cols[i]
                col.write(params["name"])

                image = streamlit_util.run_txt2img(
                    prompt=prompt,
                    negative_prompt=negative_prompt,
                    seed=seed,
                    num_inference_steps=params.get("num_inference_steps", 50),
                    guidance=params.get("guidance", 7.0),
                    width=params.get("width", 512),
                    checkpoint=params.get("checkpoint", streamlit_util.DEFAULT_CHECKPOINT),
                    scheduler=params.get("scheduler", streamlit_util.SCHEDULER_OPTIONS[0]),
                    height=512,
                    device=device,
                )

                if show_images:
                    col.image(image)

                # TODO(hayk): Change the frequency range to [20, 20k] once the model is retrained
                p_spectrogram = SpectrogramParams(
                    min_frequency=0,
                    max_frequency=10000,
                )

                output_format = "mp3"
                audio_bytes = streamlit_util.audio_bytes_from_spectrogram_image(
                    image=image,
                    params=p_spectrogram,
                    device=device,
                    output_format=output_format,
                )
                col.audio(audio_bytes)

                if output_path:
                    prompt_slug = entry["prompt"].replace(" ", "_")
                    negative_prompt_slug = entry.get("negative_prompt", "").replace(" ", "_")

                    image_path = (
                        output_path / f"image_{i}_{prompt_slug}_neg_{negative_prompt_slug}.jpg"
                    )
                    image.save(image_path, format="JPEG")
                    entry["image_path"] = str(image_path)

                    audio_path = (
                        output_path
                        / f"audio_{i}_{prompt_slug}_neg_{negative_prompt_slug}.{output_format}"
                    )
                    audio_path.write_bytes(audio_bytes.getbuffer())
                    entry["audio_path"] = str(audio_path)

    if output_path:
        output_json_path = output_path / "index.json"
        output_json_path.write_text(json.dumps(data, indent=4))
        st.info(f"Output written to {str(output_path)}")
    else:
        st.info("Enter output directory in sidebar to save to disk")
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`import json`
			`import typing as T`
			`from pathlib import Path`

			`import streamlit as st`

			`from riffusion.spectrogram_params import SpectrogramParams`
			`from riffusion.streamlit import util as streamlit_util`

Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`# Example input json file to process in batch`
			`EXAMPLE_INPUT = """`
			`{`
			`"params": {`
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`"checkpoint": "riffusion/riffusion-model-v1",`
			`"scheduler": "DPMSolverMultistepScheduler",`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`"num_inference_steps": 50,`
			`"guidance": 7.0,`
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`"width": 512,`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`},`
			`"entries": [`
			`{`
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`"prompt": "Church bells",`
			`"seed": 42`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`},`
			`{`
			`"prompt": "electronic beats",`
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`"negative_prompt": "drums",`
			`"seed": 100`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`},`
			`{`
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`"prompt": "classical violin concerto",`
			`"seed": 4`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`}`
			`]`
			`}`
			`"""`

Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00
Upgrade playground app to Streamlit 1.18+ The first change was using the new non-experimental cache decorators, but then I decided to refactor to get rid of using the streamlit pages feature and instead have my own dropdown. This allows for more control to fix a page layout issue that popped up with this version. 2023-03-26 16:54:28 -06:00			`def render() -> None:`
			`st.subheader("📜 Text to Audio Batch")`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`st.write(`
			`"""`
Help text for app Topic: app_help_text 2023-01-06 10:19:38 -07:00			`Generate audio in batch from a JSON file of text prompts.`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`"""`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`)`
Help text for app Topic: app_help_text 2023-01-06 10:19:38 -07:00
			`with st.expander("Help", False):`
			`st.write(`
			`"""`
			`This tool is a batch form of text_to_audio, where the inputs are read in from a JSON`
			`file. The input file contains a global params block and a list of entries with positive`
			`and negative prompts. It's useful for automating a larger set of generations. See the`
			`example inputs below for the format of the file.`
			`"""`
			`)`

Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`device = streamlit_util.select_device(st.sidebar)`

			`# Upload a JSON file`
			`json_file = st.file_uploader(`
			`"JSON file",`
			`type=["json"],`
			`label_visibility="collapsed",`
			`)`

			`# Handle the null case`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`if json_file is None:`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`st.info("Upload a JSON file containing params and prompts")`
			`with st.expander("Example inputs.json", expanded=False):`
			`st.code(EXAMPLE_INPUT)`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`return`

Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`# Read in and print it`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`data = json.loads(json_file.read())`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`with st.expander("Input Data", expanded=False):`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`st.json(data)`

Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`# Params can either be a list or a single entry`
			`if isinstance(data["params"], list):`
			`param_sets = data["params"]`
			`else:`
			`param_sets = [data["params"]]`

Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00			`entries = data["entries"]`

Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`show_images = st.sidebar.checkbox("Show Images", True)`
			`num_seeds = st.sidebar.number_input(`
			`"Num Seeds",`
			`value=1,`
			`min_value=1,`
			`max_value=10,`
			`help="When > 1, increments the seed and runs multiple for each entry",`
			`)`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00
			`# Optionally specify an output directory`
			`output_dir = st.sidebar.text_input("Output Directory", "")`
			`output_path: T.Optional[Path] = None`
			`if output_dir:`
			`output_path = Path(output_dir)`
			`output_path.mkdir(parents=True, exist_ok=True)`

Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`# Write title cards for each param set`
			`title_cols = st.columns(len(param_sets))`
			`for i, params in enumerate(param_sets):`
			`col = title_cols[i]`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`if "name" not in params:`
			`params["name"] = f"params[{i}]"`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`col.write(f"## Param Set {i}")`
			`col.json(params)`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`for entry_i, entry in enumerate(entries):`
			`st.write("---")`
			`print(entry)`
			`prompt = entry["prompt"]`
			`negative_prompt = entry.get("negative_prompt", None)`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00
Streamlit fixes and batch text to audio with multiple models Topic: batch_text_with_multiple_models 2023-04-10 22:26:30 -06:00			`base_seed = entry.get("seed", 42)`

			`text = f"##### ({base_seed}) {prompt}"`
			`if negative_prompt:`
			`text += f" \nNegative: {negative_prompt}"`
			`st.write(text)`

			`for seed in range(base_seed, base_seed + num_seeds):`
			`cols = st.columns(len(param_sets))`
			`for i, params in enumerate(param_sets):`
			`col = cols[i]`
			`col.write(params["name"])`

			`image = streamlit_util.run_txt2img(`
			`prompt=prompt,`
			`negative_prompt=negative_prompt,`
			`seed=seed,`
			`num_inference_steps=params.get("num_inference_steps", 50),`
			`guidance=params.get("guidance", 7.0),`
			`width=params.get("width", 512),`
			`checkpoint=params.get("checkpoint", streamlit_util.DEFAULT_CHECKPOINT),`
			`scheduler=params.get("scheduler", streamlit_util.SCHEDULER_OPTIONS[0]),`
			`height=512,`
			`device=device,`
			`)`

			`if show_images:`
			`col.image(image)`

			`# TODO(hayk): Change the frequency range to [20, 20k] once the model is retrained`
			`p_spectrogram = SpectrogramParams(`
			`min_frequency=0,`
			`max_frequency=10000,`
			`)`

			`output_format = "mp3"`
			`audio_bytes = streamlit_util.audio_bytes_from_spectrogram_image(`
			`image=image,`
			`params=p_spectrogram,`
			`device=device,`
			`output_format=output_format,`
			`)`
			`col.audio(audio_bytes)`

			`if output_path:`
			`prompt_slug = entry["prompt"].replace(" ", "_")`
			`negative_prompt_slug = entry.get("negative_prompt", "").replace(" ", "_")`

			`image_path = (`
			`output_path / f"image_{i}_{prompt_slug}_neg_{negative_prompt_slug}.jpg"`
			`)`
			`image.save(image_path, format="JPEG")`
			`entry["image_path"] = str(image_path)`

			`audio_path = (`
			`output_path`
			`/ f"audio_{i}_{prompt_slug}_neg_{negative_prompt_slug}.{output_format}"`
			`)`
			`audio_path.write_bytes(audio_bytes.getbuffer())`
			`entry["audio_path"] = str(audio_path)`
Add batch text to audio processing Topic: streamlit_app 2022-12-26 22:32:42 -07:00
			`if output_path:`
			`output_json_path = output_path / "index.json"`
			`output_json_path.write_text(json.dumps(data, indent=4))`
			`st.info(f"Output written to {str(output_path)}")`
Add several streamlit demo pages Topic: streamlit_app 2022-12-27 01:25:19 -07:00			`else:`
			`st.info("Enter output directory in sidebar to save to disk")`