Source code for recipes.cad1.task2.baseline.baseline_utils

"""Utility functions for the baseline model."""

from __future__ import annotations

# pylint: disable=import-error
import json
import logging
import warnings
from pathlib import Path

import librosa
import numpy as np
import pandas as pd
from omegaconf import DictConfig

from clarity.utils.audiogram import Listener

logger = logging.getLogger(__name__)



[docs]
def read_mp3(
    file_path: str | Path, sample_rate: float | None = None
) -> tuple[np.ndarray, float]:
    """Read a MP3 file and return its signal.

    Args:
        file_path (str, Path): The path to the mp3 file.
        sample_rate (int): The sampling frequency of the mp3 file.

    Returns:
        signal (np.ndarray): The signal of the mp3 file.
        sample_rate (int): The sampling frequency of the mp3 file.
    """

    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            signal, returned_sample_rate = librosa.load(
                str(file_path),
                sr=sample_rate,
                mono=False,
                res_type="soxr_hq",
                dtype=np.float32,
            )
    except Exception as error:
        raise ValueError from error

    if signal.ndim == 1:
        # If mono, duplicate to stereo
        signal = np.stack([signal, signal], axis=0)

    # Peak Normalization for cases when signal has
    # absolute values greater than 1
    if np.max(np.abs(signal)) > 1:
        signal = signal / np.max(np.abs(signal))

    return signal, returned_sample_rate




[docs]
def load_hrtf(config: DictConfig) -> dict:
    """Load the HRTF file.

    Args:
        config (DictConfig): A dictionary-like object containing various configuration
            parameters for the evaluation. This includes the path to the HRTF files.

    Returns:
        hrtf_data (dict): A dictionary containing the HRTF data for the dataset.

    """
    with open(config.path.hrtf_file, encoding="utf-8") as fp:
        hrtf_data = json.load(fp)
    return hrtf_data[config.evaluate.split]




[docs]
def load_listeners_and_scenes(
    config: DictConfig,
) -> tuple[dict, dict[str, Listener], dict]:
    """Load listener and scene data

    Args:
        config (DictConfig): A dictionary-like object containing various configuration
            parameters for the evaluation. This includes the path to the scenes file,
            the path to the listeners train file, and the path to the listeners valid
            file.

    Returns:
        Tuple[dict, dict, dict]: A tuple containing the scene data, the listener data
            and the pair scenes-listeners.

    """
    # Load listener data
    with open(config.path.scenes_file, encoding="utf-8") as fp:
        df_scenes = pd.read_json(fp, orient="index")

    # Load audiograms and scene data for the corresponding split
    listeners = Listener.load_listener_dict(config.path.listeners_file)
    if config.evaluate.split in ["train", "valid", "test"]:
        scenes = df_scenes[df_scenes["split"] == config.evaluate.split].to_dict("index")
    else:
        raise ValueError(f"Unknown split {config.evaluate.split}")

    with open(config.path.scenes_listeners_file, encoding="utf-8") as fp:
        scenes_listeners = json.load(fp)
        scenes_listeners = {
            k: v for k, v in scenes_listeners.items() if k in scenes.keys()
        }

    return scenes, listeners, scenes_listeners




[docs]
def make_scene_listener_list(scenes_listeners, small_test=False):
    """Make the list of scene-listener pairing to process"""
    scene_listener_pairs = [
        (scene, listener)
        for scene in scenes_listeners
        for listener in scenes_listeners[scene]
    ]

    # Can define a standard 'small_test' with just 1/15 of the data
    if small_test:
        scene_listener_pairs = scene_listener_pairs[::15]

    return scene_listener_pairs