Batch fix

And support 20khz in image to audio playground UI
2023-02-07 02:14:59 +00:00 · 2023-02-07 02:14:59 +00:00 · 0e6d7436f4
parent 38cce7ab00
commit 0e6d7436f4
2 changed files with 22 additions and 11 deletions
--- a/riffusion/cli.py
+++ b/riffusion/cli.py
@ -213,15 +213,19 @@ def sample_clips_batch(
    mono: bool = False,
    extension: str = "mp3",
    num_threads: T.Optional[int] = None,
    glob: str = "*",
    limit: int = -1,
    seed: int = -1,
 ):
    """
    Sample short clips from a directory of audio files, multi-threaded.
    """
-    audio_paths = list(Path(audio_dir).glob("*"))
+    audio_paths = list(Path(audio_dir).glob(glob))
    audio_paths.sort()
    # Exclude json
    audio_paths = [p for p in audio_paths if p.suffix != ".json"]
    if limit > 0:
        audio_paths = audio_paths[:limit]
@ -242,26 +246,24 @@ def sample_clips_batch(
        segment_duration_ms = int(segment.duration_seconds * 1000)
        for i in range(num_clips_per_file):
-            clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms)
+            try:
                clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms)
            except ValueError:
                continue
            clip = segment[clip_start_ms : clip_start_ms + duration_ms]
            clip_name = (
-                f"{audio_path.stem}_{i}"
+                f"{audio_path.stem}_{i}_"
-                "start_{clip_start_ms}_ms_duration_{duration_ms}_ms.{extension}"
+                f"start_{clip_start_ms}_ms_dur_{duration_ms}_ms.{extension}"
            )
            clip.export(output_path / clip_name, format=extension)
    pool = ThreadPool(processes=num_threads)
    with tqdm.tqdm(total=len(audio_paths)) as pbar:
        for result in pool.imap_unordered(process_one, audio_paths):
            # process_one(audio_path)
            pbar.update()
    # with tqdm.tqdm(total=len(audio_paths)) as pbar:
    #     for i, _ in enumerate(pool.imap_unordered(process_one, audio_paths)):
    #         pass
    # pbar.update()
 if __name__ == "__main__":
    argh.dispatch_commands(
--- a/riffusion/streamlit/pages/image_to_audio.py
+++ b/riffusion/streamlit/pages/image_to_audio.py
@ -32,6 +32,8 @@ def render_image_to_audio() -> None:
    device = streamlit_util.select_device(st.sidebar)
    extension = streamlit_util.select_audio_extension(st.sidebar)
    use_20k = st.sidebar.checkbox("Use 20kHz", value=False)
    image_file = st.file_uploader(
        "Upload a file",
        type=streamlit_util.IMAGE_EXTENSIONS,
@ -52,7 +54,14 @@ def render_image_to_audio() -> None:
        params = SpectrogramParams.from_exif(exif=image.getexif())
    except KeyError:
        st.info("Could not find spectrogram parameters in exif data. Using defaults.")
-        params = SpectrogramParams()
+        if use_20k:
            params = SpectrogramParams(
                min_frequency=10,
                max_frequency=20000,
                stereo=True,
            )
        else:
            params = SpectrogramParams()
    with st.expander("Spectrogram Parameters", expanded=False):
        st.json(dataclasses.asdict(params))