Source code for recipes.cad1.task2.baseline.audio_manager

"""A utility class for managing audio files."""

from __future__ import annotations

import logging
import warnings
from pathlib import Path

import numpy as np
import pyloudnorm as pyln
from scipy.io import wavfile

logger = logging.getLogger(__name__)



[docs]
class AudioManager:
    """A utility class for managing audio files."""

    def __init__(
        self,
        sample_rate: int = 44100,
        output_audio_path: str | Path = "",
        soft_clip: bool = False,
    ):
        """Initialize the AudioManager instance."""
        self.audios_to_save: dict[str, np.ndarray] = {}
        self.sample_rate = sample_rate
        self.soft_clip = soft_clip
        self.output_audio_path = Path(output_audio_path)
        self.output_audio_path.mkdir(exist_ok=True, parents=True)
        self.level_meter = pyln.Meter(self.sample_rate)


[docs]
    def add_audios_to_save(self, file_name: str, waveform: np.ndarray) -> None:
        """Add a waveform to the list of audios to save.

        Args:
            file_name (str): The name of the track.
            waveform (np.ndarray): The track to save.
        """
        self.audios_to_save[file_name] = waveform.copy()



[docs]
    def save_audios(self) -> None:
        """Save the audios to the given path.

        Args:
            output_audio_path (str): The path to save the audios to.
        """
        for file_name, waveform in self.audios_to_save.items():
            self._save_audio(file_name, waveform)


    def _save_audio(self, file_name: str, waveform: np.ndarray) -> None:
        """Save the audio to the given path.
        It always save in Int16 format.

        Args:
            file_name (str): The name of the track.
            waveform (np.ndarray): The track to save.
            output_audio_path (str): The path to save the audio to.
            sample_rate (int): The sample rate of the audio.
        """
        waveform = waveform.T if waveform.shape[0] == 2 else waveform

        n_clipped, waveform = self.clip_audio(waveform)
        if n_clipped > 0:
            logger.warning(
                f"Writing {self.output_audio_path / file_name}: {n_clipped} "
                "samples clipped"
            )

        waveform = (32768.0 * waveform).astype(np.int16)

        wavfile.write(
            self.output_audio_path / f"{file_name}.wav",
            self.sample_rate,
            waveform,
        )


[docs]
    def clip_audio(
        self, signal: np.ndarray, min_val: float = -1, max_val: float = 1
    ) -> tuple[int, np.ndarray]:
        """Clip a WAV file to the given range.

        Args:
            signal (np.ndarray): The WAV file to clip.
            min_val (float): The minimum value to clip to. Defaults to -1.
            max_val (float): The maximum value to clip to. Defaults to 1.

        Returns:
            Tuple[int, np.ndarray]: Number of samples clipped and the clipped signal.
        """
        if self.soft_clip:
            signal = np.tanh(signal)
        n_clipped = np.sum(np.abs(signal) > 1.0)
        return int(n_clipped), np.clip(signal, min_val, max_val)



[docs]
    def get_lufs_level(self, signal: np.ndarray) -> float:
        """Get the LUFS level of the signal.

        Args:
            signal (np.ndarray): The signal to get the LUFS level of.

        Returns:
            float: The LUFS level of the signal.
        """
        return self.level_meter.integrated_loudness(signal)



[docs]
    def scale_to_lufs(self, signal: np.ndarray, target_lufs: float) -> np.ndarray:
        """Scale the signal to the given LUFS level.

        Args:
            signal (np.ndarray): The signal to scale.
            target_lufs (float): The target LUFS level.

        Returns:
            np.ndarray: The scaled signal.
        """
        current_lufs = self.get_lufs_level(signal)
        with warnings.catch_warnings(record=True):
            scaled_signal = pyln.normalize.loudness(signal, current_lufs, target_lufs).T
        return scaled_signal