Batch fix

And support 20khz in image to audio playground UI
This commit is contained in:
Hayk Martiros 2023-02-07 02:14:59 +00:00
parent 38cce7ab00
commit 0e6d7436f4
2 changed files with 22 additions and 11 deletions

View File

@ -213,15 +213,19 @@ def sample_clips_batch(
mono: bool = False, mono: bool = False,
extension: str = "mp3", extension: str = "mp3",
num_threads: T.Optional[int] = None, num_threads: T.Optional[int] = None,
glob: str = "*",
limit: int = -1, limit: int = -1,
seed: int = -1, seed: int = -1,
): ):
""" """
Sample short clips from a directory of audio files, multi-threaded. Sample short clips from a directory of audio files, multi-threaded.
""" """
audio_paths = list(Path(audio_dir).glob("*")) audio_paths = list(Path(audio_dir).glob(glob))
audio_paths.sort() audio_paths.sort()
# Exclude json
audio_paths = [p for p in audio_paths if p.suffix != ".json"]
if limit > 0: if limit > 0:
audio_paths = audio_paths[:limit] audio_paths = audio_paths[:limit]
@ -242,26 +246,24 @@ def sample_clips_batch(
segment_duration_ms = int(segment.duration_seconds * 1000) segment_duration_ms = int(segment.duration_seconds * 1000)
for i in range(num_clips_per_file): for i in range(num_clips_per_file):
clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms) try:
clip_start_ms = np.random.randint(0, segment_duration_ms - duration_ms)
except ValueError:
continue
clip = segment[clip_start_ms : clip_start_ms + duration_ms] clip = segment[clip_start_ms : clip_start_ms + duration_ms]
clip_name = ( clip_name = (
f"{audio_path.stem}_{i}" f"{audio_path.stem}_{i}_"
"start_{clip_start_ms}_ms_duration_{duration_ms}_ms.{extension}" f"start_{clip_start_ms}_ms_dur_{duration_ms}_ms.{extension}"
) )
clip.export(output_path / clip_name, format=extension) clip.export(output_path / clip_name, format=extension)
pool = ThreadPool(processes=num_threads) pool = ThreadPool(processes=num_threads)
with tqdm.tqdm(total=len(audio_paths)) as pbar: with tqdm.tqdm(total=len(audio_paths)) as pbar:
for result in pool.imap_unordered(process_one, audio_paths): for result in pool.imap_unordered(process_one, audio_paths):
# process_one(audio_path)
pbar.update() pbar.update()
# with tqdm.tqdm(total=len(audio_paths)) as pbar:
# for i, _ in enumerate(pool.imap_unordered(process_one, audio_paths)):
# pass
# pbar.update()
if __name__ == "__main__": if __name__ == "__main__":
argh.dispatch_commands( argh.dispatch_commands(

View File

@ -32,6 +32,8 @@ def render_image_to_audio() -> None:
device = streamlit_util.select_device(st.sidebar) device = streamlit_util.select_device(st.sidebar)
extension = streamlit_util.select_audio_extension(st.sidebar) extension = streamlit_util.select_audio_extension(st.sidebar)
use_20k = st.sidebar.checkbox("Use 20kHz", value=False)
image_file = st.file_uploader( image_file = st.file_uploader(
"Upload a file", "Upload a file",
type=streamlit_util.IMAGE_EXTENSIONS, type=streamlit_util.IMAGE_EXTENSIONS,
@ -52,7 +54,14 @@ def render_image_to_audio() -> None:
params = SpectrogramParams.from_exif(exif=image.getexif()) params = SpectrogramParams.from_exif(exif=image.getexif())
except KeyError: except KeyError:
st.info("Could not find spectrogram parameters in exif data. Using defaults.") st.info("Could not find spectrogram parameters in exif data. Using defaults.")
params = SpectrogramParams() if use_20k:
params = SpectrogramParams(
min_frequency=10,
max_frequency=20000,
stereo=True,
)
else:
params = SpectrogramParams()
with st.expander("Spectrogram Parameters", expanded=False): with st.expander("Spectrogram Parameters", expanded=False):
st.json(dataclasses.asdict(params)) st.json(dataclasses.asdict(params))