Source code for recipes.cad1.task2.baseline.baseline_utils

"""Utility functions for the baseline model."""

from __future__ import annotations

# pylint: disable=import-error
import json
import logging
import warnings
from pathlib import Path

import librosa
import numpy as np
import pandas as pd
from omegaconf import DictConfig

from clarity.utils.audiogram import Listener

logger = logging.getLogger(__name__)


[docs] def read_mp3( file_path: str | Path, sample_rate: float | None = None ) -> tuple[np.ndarray, float]: """Read a MP3 file and return its signal. Args: file_path (str, Path): The path to the mp3 file. sample_rate (int): The sampling frequency of the mp3 file. Returns: signal (np.ndarray): The signal of the mp3 file. sample_rate (int): The sampling frequency of the mp3 file. """ try: with warnings.catch_warnings(): warnings.simplefilter("ignore") signal, returned_sample_rate = librosa.load( str(file_path), sr=sample_rate, mono=False, res_type="soxr_hq", dtype=np.float32, ) except Exception as error: raise ValueError from error if signal.ndim == 1: # If mono, duplicate to stereo signal = np.stack([signal, signal], axis=0) # Peak Normalization for cases when signal has # absolute values greater than 1 if np.max(np.abs(signal)) > 1: signal = signal / np.max(np.abs(signal)) return signal, returned_sample_rate
[docs] def load_hrtf(config: DictConfig) -> dict: """Load the HRTF file. Args: config (DictConfig): A dictionary-like object containing various configuration parameters for the evaluation. This includes the path to the HRTF files. Returns: hrtf_data (dict): A dictionary containing the HRTF data for the dataset. """ with open(config.path.hrtf_file, encoding="utf-8") as fp: hrtf_data = json.load(fp) return hrtf_data[config.evaluate.split]
[docs] def load_listeners_and_scenes( config: DictConfig, ) -> tuple[dict, dict[str, Listener], dict]: """Load listener and scene data Args: config (DictConfig): A dictionary-like object containing various configuration parameters for the evaluation. This includes the path to the scenes file, the path to the listeners train file, and the path to the listeners valid file. Returns: Tuple[dict, dict, dict]: A tuple containing the scene data, the listener data and the pair scenes-listeners. """ # Load listener data with open(config.path.scenes_file, encoding="utf-8") as fp: df_scenes = pd.read_json(fp, orient="index") # Load audiograms and scene data for the corresponding split listeners = Listener.load_listener_dict(config.path.listeners_file) if config.evaluate.split in ["train", "valid", "test"]: scenes = df_scenes[df_scenes["split"] == config.evaluate.split].to_dict("index") else: raise ValueError(f"Unknown split {config.evaluate.split}") with open(config.path.scenes_listeners_file, encoding="utf-8") as fp: scenes_listeners = json.load(fp) scenes_listeners = { k: v for k, v in scenes_listeners.items() if k in scenes.keys() } return scenes, listeners, scenes_listeners
[docs] def make_scene_listener_list(scenes_listeners, small_test=False): """Make the list of scene-listener pairing to process""" scene_listener_pairs = [ (scene, listener) for scene in scenes_listeners for listener in scenes_listeners[scene] ] # Can define a standard 'small_test' with just 1/15 of the data if small_test: scene_listener_pairs = scene_listener_pairs[::15] return scene_listener_pairs