diff --git a/riffusion/cli.py b/riffusion/cli.py index fc83071..2baa31e 100644 --- a/riffusion/cli.py +++ b/riffusion/cli.py @@ -213,15 +213,19 @@ def sample_clips_batch( mono: bool = False, extension: str = "mp3", num_threads: T.Optional[int] = None, + glob: str = "*", limit: int = -1, seed: int = -1, ): """ Sample short clips from a directory of audio files, multi-threaded. """ - audio_paths = list(Path(audio_dir).glob("*")) + audio_paths = list(Path(audio_dir).glob(glob)) audio_paths.sort() + # Exclude json + audio_paths = [p for p in audio_paths if p.suffix != ".json"] + if limit > 0: audio_paths = audio_paths[:limit] @@ -242,26 +246,24 @@ def sample_clips_batch( segment_duration_ms = int(segment.duration_seconds * 1000) for i in range(num_clips_per_file): - clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms) + try: + clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms) + except ValueError: + continue + clip = segment[clip_start_ms : clip_start_ms + duration_ms] clip_name = ( - f"{audio_path.stem}_{i}" - "start_{clip_start_ms}_ms_duration_{duration_ms}_ms.{extension}" + f"{audio_path.stem}_{i}_" + f"start_{clip_start_ms}_ms_dur_{duration_ms}_ms.{extension}" ) clip.export(output_path / clip_name, format=extension) pool = ThreadPool(processes=num_threads) with tqdm.tqdm(total=len(audio_paths)) as pbar: for result in pool.imap_unordered(process_one, audio_paths): - # process_one(audio_path) pbar.update() - # with tqdm.tqdm(total=len(audio_paths)) as pbar: - # for i, _ in enumerate(pool.imap_unordered(process_one, audio_paths)): - # pass - # pbar.update() - if __name__ == "__main__": argh.dispatch_commands( diff --git a/riffusion/streamlit/pages/image_to_audio.py b/riffusion/streamlit/pages/image_to_audio.py index d6ba406..f330cc6 100644 --- a/riffusion/streamlit/pages/image_to_audio.py +++ b/riffusion/streamlit/pages/image_to_audio.py @@ -32,6 +32,8 @@ def render_image_to_audio() -> None: device = streamlit_util.select_device(st.sidebar) extension = streamlit_util.select_audio_extension(st.sidebar) + use_20k = st.sidebar.checkbox("Use 20kHz", value=False) + image_file = st.file_uploader( "Upload a file", type=streamlit_util.IMAGE_EXTENSIONS, @@ -52,7 +54,14 @@ def render_image_to_audio() -> None: params = SpectrogramParams.from_exif(exif=image.getexif()) except KeyError: st.info("Could not find spectrogram parameters in exif data. Using defaults.") - params = SpectrogramParams() + if use_20k: + params = SpectrogramParams( + min_frequency=10, + max_frequency=20000, + stereo=True, + ) + else: + params = SpectrogramParams() with st.expander("Spectrogram Parameters", expanded=False): st.json(dataclasses.asdict(params))