Source code for recipes.cec3.baseline.enhance

""" Run the dummy enhancement. """

import json
import logging
import pathlib

import hydra
import numpy as np
from omegaconf import DictConfig
from scipy.io import wavfile
from tqdm import tqdm

from clarity.enhancer.compressor import Compressor
from clarity.enhancer.nalr import NALR
from clarity.utils.audiogram import Audiogram, Listener
from recipes.icassp_2023.baseline.evaluate import make_scene_listener_list

logger = logging.getLogger(__name__)


[docs] def amplify_signal(signal, audiogram: Audiogram, enhancer, compressor): """Amplify signal for a given audiogram""" nalr_fir, _ = enhancer.build(audiogram) out = enhancer.apply(nalr_fir, signal) out, _, _ = compressor.process(out) return out
[docs] @hydra.main(config_path=".", config_name="config") def enhance(cfg: DictConfig) -> None: """Run the dummy enhancement.""" enhanced_folder = pathlib.Path(cfg.path.exp) / "enhanced_signals" enhanced_folder.mkdir(parents=True, exist_ok=True) with open(cfg.path.scenes_listeners_file, encoding="utf-8") as fp: scenes_listeners = json.load(fp) listener_dict = Listener.load_listener_dict(cfg.path.listeners_file) enhancer = NALR(**cfg.nalr) compressor = Compressor(**cfg.compressor) amplified_folder = pathlib.Path(cfg.path.exp) / "amplified_signals" amplified_folder.mkdir(parents=True, exist_ok=True) # Make list of all scene listener pairs that will be run scene_listener_pairs = make_scene_listener_list( scenes_listeners, cfg.evaluate.small_test ) for scene, listener_id in tqdm(scene_listener_pairs): sample_rate, signal_ch1 = wavfile.read( pathlib.Path(cfg.path.scenes_folder) / f"{scene}_mix_CH1.wav" ) _, signal_ch2 = wavfile.read( pathlib.Path(cfg.path.scenes_folder) / f"{scene}_mix_CH2.wav" ) _, signal_ch3 = wavfile.read( pathlib.Path(cfg.path.scenes_folder) / f"{scene}_mix_CH3.wav" ) # Convert to 32-bit floating point scaled between -1 and 1 signal_ch1 = (signal_ch1 / 32768.0).astype(np.float32) signal_ch2 = (signal_ch2 / 32768.0).astype(np.float32) signal_ch3 = (signal_ch3 / 32768.0).astype(np.float32) signal = (signal_ch1 + signal_ch2 + signal_ch3) / 3 # pylint: disable=unused-variable listener = listener_dict[listener_id] # noqa: F841 wavfile.write( enhanced_folder / f"{scene}_{listener_id}_enhanced.wav", sample_rate, signal ) # Apply the baseline NALR amplification out_l = amplify_signal( signal[:, 0], listener.audiogram_left, enhancer, compressor ) out_r = amplify_signal( signal[:, 1], listener.audiogram_right, enhancer, compressor ) amplified = np.stack([out_l, out_r], axis=1) if cfg.soft_clip: amplified = np.tanh(amplified) wavfile.write( amplified_folder / f"{scene}_{listener_id}_HA-output.wav", sample_rate, amplified.astype(np.float32), )
# pylint: disable=no-value-for-parameter if __name__ == "__main__": enhance()