Source code for recipes.cad1.task2.baseline.audio_manager
"""A utility class for managing audio files."""
from __future__ import annotations
import logging
import warnings
from pathlib import Path
import numpy as np
import pyloudnorm as pyln
from scipy.io import wavfile
logger = logging.getLogger(__name__)
[docs]
class AudioManager:
"""A utility class for managing audio files."""
def __init__(
self,
sample_rate: int = 44100,
output_audio_path: str | Path = "",
soft_clip: bool = False,
):
"""Initialize the AudioManager instance."""
self.audios_to_save: dict[str, np.ndarray] = {}
self.sample_rate = sample_rate
self.soft_clip = soft_clip
self.output_audio_path = Path(output_audio_path)
self.output_audio_path.mkdir(exist_ok=True, parents=True)
self.level_meter = pyln.Meter(self.sample_rate)
[docs]
def add_audios_to_save(self, file_name: str, waveform: np.ndarray) -> None:
"""Add a waveform to the list of audios to save.
Args:
file_name (str): The name of the track.
waveform (np.ndarray): The track to save.
"""
self.audios_to_save[file_name] = waveform.copy()
[docs]
def save_audios(self) -> None:
"""Save the audios to the given path.
Args:
output_audio_path (str): The path to save the audios to.
"""
for file_name, waveform in self.audios_to_save.items():
self._save_audio(file_name, waveform)
def _save_audio(self, file_name: str, waveform: np.ndarray) -> None:
"""Save the audio to the given path.
It always save in Int16 format.
Args:
file_name (str): The name of the track.
waveform (np.ndarray): The track to save.
output_audio_path (str): The path to save the audio to.
sample_rate (int): The sample rate of the audio.
"""
waveform = waveform.T if waveform.shape[0] == 2 else waveform
n_clipped, waveform = self.clip_audio(waveform)
if n_clipped > 0:
logger.warning(
f"Writing {self.output_audio_path / file_name}: {n_clipped} "
"samples clipped"
)
waveform = (32768.0 * waveform).astype(np.int16)
wavfile.write(
self.output_audio_path / f"{file_name}.wav",
self.sample_rate,
waveform,
)
[docs]
def clip_audio(
self, signal: np.ndarray, min_val: float = -1, max_val: float = 1
) -> tuple[int, np.ndarray]:
"""Clip a WAV file to the given range.
Args:
signal (np.ndarray): The WAV file to clip.
min_val (float): The minimum value to clip to. Defaults to -1.
max_val (float): The maximum value to clip to. Defaults to 1.
Returns:
Tuple[int, np.ndarray]: Number of samples clipped and the clipped signal.
"""
if self.soft_clip:
signal = np.tanh(signal)
n_clipped = np.sum(np.abs(signal) > 1.0)
return int(n_clipped), np.clip(signal, min_val, max_val)
[docs]
def get_lufs_level(self, signal: np.ndarray) -> float:
"""Get the LUFS level of the signal.
Args:
signal (np.ndarray): The signal to get the LUFS level of.
Returns:
float: The LUFS level of the signal.
"""
return self.level_meter.integrated_loudness(signal)
[docs]
def scale_to_lufs(self, signal: np.ndarray, target_lufs: float) -> np.ndarray:
"""Scale the signal to the given LUFS level.
Args:
signal (np.ndarray): The signal to scale.
target_lufs (float): The target LUFS level.
Returns:
np.ndarray: The scaled signal.
"""
current_lufs = self.get_lufs_level(signal)
with warnings.catch_warnings(record=True):
scaled_signal = pyln.normalize.loudness(signal, current_lufs, target_lufs).T
return scaled_signal