riffusion-inference/riffusion/server.py

"""
Flask server that serves the riffusion model as an API.
"""

import dataclasses
import io
import json
import logging
import time
import typing as T
from pathlib import Path

import dacite
import flask
import PIL
from flask_cors import CORS

from riffusion.datatypes import InferenceInput, InferenceOutput
from riffusion.riffusion_pipeline import RiffusionPipeline
from riffusion.spectrogram_image_converter import SpectrogramImageConverter
from riffusion.spectrogram_params import SpectrogramParams
from riffusion.util import base64_util

# Flask app with CORS
app = flask.Flask(__name__)
CORS(app)

# Log at the INFO level to both stdout and disk
logging.basicConfig(level=logging.INFO)
logging.getLogger().addHandler(logging.FileHandler("server.log"))

# Global variable for the model pipeline
PIPELINE: T.Optional[RiffusionPipeline] = None

# Where built-in seed images are stored
SEED_IMAGES_DIR = Path(Path(__file__).resolve().parent.parent, "seed_images")


def run_app(
    *,
    checkpoint: str = "riffusion/riffusion-model-v1",
    no_traced_unet: bool = False,
    device: str = "cuda",
    host: str = "127.0.0.1",
    port: int = 3013,
    debug: bool = False,
    ssl_certificate: T.Optional[str] = None,
    ssl_key: T.Optional[str] = None,
):
    """
    Run a flask API that serves the given riffusion model checkpoint.
    """
    # Initialize the model
    global PIPELINE
    PIPELINE = RiffusionPipeline.load_checkpoint(
        checkpoint=checkpoint,
        use_traced_unet=not no_traced_unet,
        device=device,
    )

    args = dict(
        debug=debug,
        threaded=False,
        host=host,
        port=port,
    )

    if ssl_certificate:
        assert ssl_key is not None
        args["ssl_context"] = (ssl_certificate, ssl_key)

    app.run(**args)  # type: ignore


@app.route("/run_inference/", methods=["POST"])
def run_inference():
    """
    Execute the riffusion model as an API.

    Inputs:
        Serialized JSON of the InferenceInput dataclass

    Returns:
        Serialized JSON of the InferenceOutput dataclass
    """
    start_time = time.time()

    # Parse the payload as JSON
    json_data = json.loads(flask.request.data)

    # Log the request
    logging.info(json_data)

    # Parse an InferenceInput dataclass from the payload
    try:
        inputs = dacite.from_dict(InferenceInput, json_data)
    except dacite.exceptions.WrongTypeError as exception:
        logging.info(json_data)
        return str(exception), 400
    except dacite.exceptions.MissingValueError as exception:
        logging.info(json_data)
        return str(exception), 400

    response = compute_request(
        inputs=inputs,
        seed_images_dir=SEED_IMAGES_DIR,
        pipeline=PIPELINE,
    )

    # Log the total time
    logging.info(f"Request took {time.time() - start_time:.2f} s")

    return response


def compute_request(
    inputs: InferenceInput,
    pipeline: RiffusionPipeline,
    seed_images_dir: str,
) -> T.Union[str, T.Tuple[str, int]]:
    """
    Does all the heavy lifting of the request.

    Args:
        inputs: The input dataclass
        pipeline: The riffusion model pipeline
        seed_images_dir: The directory where seed images are stored
    """
    # Load the seed image by ID
    init_image_path = Path(seed_images_dir, f"{inputs.seed_image_id}.png")

    if not init_image_path.is_file():
        return f"Invalid seed image: {inputs.seed_image_id}", 400
    init_image = PIL.Image.open(str(init_image_path)).convert("RGB")

    # Load the mask image by ID
    mask_image: T.Optional[PIL.Image.Image] = None
    if inputs.mask_image_id:
        mask_image_path = Path(seed_images_dir, f"{inputs.mask_image_id}.png")
        if not mask_image_path.is_file():
            return f"Invalid mask image: {inputs.mask_image_id}", 400
        mask_image = PIL.Image.open(str(mask_image_path)).convert("RGB")

    # Execute the model to get the spectrogram image
    image = pipeline.riffuse(
        inputs,
        init_image=init_image,
        mask_image=mask_image,
    )

    # TODO(hayk): Change the frequency range to [20, 20k] once the model is retrained
    params = SpectrogramParams(
        min_frequency=0,
        max_frequency=10000,
    )

    # Reconstruct audio from the image
    # TODO(hayk): It may help performance a bit to cache this object
    converter = SpectrogramImageConverter(params=params, device=str(pipeline.device))

    segment = converter.audio_from_spectrogram_image(
        image,
        apply_filters=True,
    )

    # Export audio to MP3 bytes
    mp3_bytes = io.BytesIO()
    segment.export(mp3_bytes, format="mp3")
    mp3_bytes.seek(0)

    # Export image to JPEG bytes
    image_bytes = io.BytesIO()
    image.save(image_bytes, exif=image.getexif(), format="JPEG")
    image_bytes.seek(0)

    # Assemble the output dataclass
    output = InferenceOutput(
        image="data:image/jpeg;base64," + base64_util.encode(image_bytes),
        audio="data:audio/mpeg;base64," + base64_util.encode(mp3_bytes),
        duration_s=segment.duration_seconds,
    )

    return json.dumps(dataclasses.asdict(output))


if __name__ == "__main__":
    import argh

    argh.dispatch_command(run_app)
revup virtual diff target 2fc9b8d37961cc8cab5579b8c58df1af79371553 d98efa55fe6e64c65f3344a22cad8db1111240ff 450a9b6fad158cfefa12df4dc3d17880de8d0263 a7b41d90bd843e23fa2c430e1d17e1bf4d69c718 2023-01-16 19:17:23 -07:00			`"""`
			`Flask server that serves the riffusion model as an API.`
			`"""`

			`import dataclasses`
			`import io`
			`import json`
			`import logging`
			`import time`
			`import typing as T`
			`from pathlib import Path`

			`import dacite`
			`import flask`
			`import PIL`
			`from flask_cors import CORS`

			`from riffusion.datatypes import InferenceInput, InferenceOutput`
			`from riffusion.riffusion_pipeline import RiffusionPipeline`
			`from riffusion.spectrogram_image_converter import SpectrogramImageConverter`
			`from riffusion.spectrogram_params import SpectrogramParams`
			`from riffusion.util import base64_util`

			`# Flask app with CORS`
			`app = flask.Flask(__name__)`
			`CORS(app)`

			`# Log at the INFO level to both stdout and disk`
			`logging.basicConfig(level=logging.INFO)`
			`logging.getLogger().addHandler(logging.FileHandler("server.log"))`

			`# Global variable for the model pipeline`
			`PIPELINE: T.Optional[RiffusionPipeline] = None`

			`# Where built-in seed images are stored`
			`SEED_IMAGES_DIR = Path(Path(__file__).resolve().parent.parent, "seed_images")`


			`def run_app(`
			`*,`
			`checkpoint: str = "riffusion/riffusion-model-v1",`
			`no_traced_unet: bool = False,`
			`device: str = "cuda",`
			`host: str = "127.0.0.1",`
			`port: int = 3013,`
			`debug: bool = False,`
			`ssl_certificate: T.Optional[str] = None,`
			`ssl_key: T.Optional[str] = None,`
			`):`
			`"""`
			`Run a flask API that serves the given riffusion model checkpoint.`
			`"""`
			`# Initialize the model`
			`global PIPELINE`
			`PIPELINE = RiffusionPipeline.load_checkpoint(`
			`checkpoint=checkpoint,`
			`use_traced_unet=not no_traced_unet,`
			`device=device,`
			`)`

			`args = dict(`
			`debug=debug,`
			`threaded=False,`
			`host=host,`
			`port=port,`
			`)`

			`if ssl_certificate:`
			`assert ssl_key is not None`
			`args["ssl_context"] = (ssl_certificate, ssl_key)`

			`app.run(**args) # type: ignore`


			`@app.route("/run_inference/", methods=["POST"])`
			`def run_inference():`
			`"""`
			`Execute the riffusion model as an API.`

			`Inputs:`
			`Serialized JSON of the InferenceInput dataclass`

			`Returns:`
			`Serialized JSON of the InferenceOutput dataclass`
			`"""`
			`start_time = time.time()`

			`# Parse the payload as JSON`
			`json_data = json.loads(flask.request.data)`

			`# Log the request`
			`logging.info(json_data)`

			`# Parse an InferenceInput dataclass from the payload`
			`try:`
			`inputs = dacite.from_dict(InferenceInput, json_data)`
			`except dacite.exceptions.WrongTypeError as exception:`
			`logging.info(json_data)`
			`return str(exception), 400`
			`except dacite.exceptions.MissingValueError as exception:`
			`logging.info(json_data)`
			`return str(exception), 400`

			`response = compute_request(`
			`inputs=inputs,`
			`seed_images_dir=SEED_IMAGES_DIR,`
			`pipeline=PIPELINE,`
			`)`

			`# Log the total time`
			`logging.info(f"Request took {time.time() - start_time:.2f} s")`

			`return response`


			`def compute_request(`
			`inputs: InferenceInput,`
			`pipeline: RiffusionPipeline,`
			`seed_images_dir: str,`
			`) -> T.Union[str, T.Tuple[str, int]]:`
			`"""`
			`Does all the heavy lifting of the request.`

			`Args:`
			`inputs: The input dataclass`
			`pipeline: The riffusion model pipeline`
			`seed_images_dir: The directory where seed images are stored`
			`"""`
			`# Load the seed image by ID`
			`init_image_path = Path(seed_images_dir, f"{inputs.seed_image_id}.png")`

			`if not init_image_path.is_file():`
			`return f"Invalid seed image: {inputs.seed_image_id}", 400`
			`init_image = PIL.Image.open(str(init_image_path)).convert("RGB")`

			`# Load the mask image by ID`
			`mask_image: T.Optional[PIL.Image.Image] = None`
			`if inputs.mask_image_id:`
			`mask_image_path = Path(seed_images_dir, f"{inputs.mask_image_id}.png")`
			`if not mask_image_path.is_file():`
			`return f"Invalid mask image: {inputs.mask_image_id}", 400`
			`mask_image = PIL.Image.open(str(mask_image_path)).convert("RGB")`

			`# Execute the model to get the spectrogram image`
			`image = pipeline.riffuse(`
			`inputs,`
			`init_image=init_image,`
			`mask_image=mask_image,`
			`)`

			`# TODO(hayk): Change the frequency range to [20, 20k] once the model is retrained`
			`params = SpectrogramParams(`
			`min_frequency=0,`
			`max_frequency=10000,`
			`)`

			`# Reconstruct audio from the image`
			`# TODO(hayk): It may help performance a bit to cache this object`
			`converter = SpectrogramImageConverter(params=params, device=str(pipeline.device))`

			`segment = converter.audio_from_spectrogram_image(`
			`image,`
			`apply_filters=True,`
			`)`

			`# Export audio to MP3 bytes`
			`mp3_bytes = io.BytesIO()`
			`segment.export(mp3_bytes, format="mp3")`
			`mp3_bytes.seek(0)`

			`# Export image to JPEG bytes`
			`image_bytes = io.BytesIO()`
			`image.save(image_bytes, exif=image.getexif(), format="JPEG")`
			`image_bytes.seek(0)`

			`# Assemble the output dataclass`
			`output = InferenceOutput(`
			`image="data:image/jpeg;base64," + base64_util.encode(image_bytes),`
			`audio="data:audio/mpeg;base64," + base64_util.encode(mp3_bytes),`
			`duration_s=segment.duration_seconds,`
			`)`

			`return json.dumps(dataclasses.asdict(output))`


			`if __name__ == "__main__":`
			`import argh`

			`argh.dispatch_command(run_app)`