Source code for clarity.utils.flac_encoder

"""
Class for encoding and decoding audio signals
    using flac compression.
"""

from __future__ import annotations

import logging
import tempfile

# pylint: disable=import-error, protected-access
from pathlib import Path

import numpy as np
import pyflac as pf
import soundfile as sf

from clarity.utils.signal_processing import clip_signal, resample, to_16bit

logger = logging.getLogger(__name__)



[docs]
class WavEncoder(pf.encoder._Encoder):
    """
    Class offers an adaptation of the pyflac.encoder.FileEncoder
    to work directly with WAV signals as input.

    """

    def __init__(
        self,
        signal: np.ndarray,
        sample_rate: int,
        output_file: str | Path | None = None,
        compression_level: int = 5,
        blocksize: int = 0,
        streamable_subset: bool = True,
        verify: bool = False,
    ) -> None:
        """
        Initialise the encoder.

        Args:
            signal (np.ndarray): The raw audio data to be encoded.
            sample_rate (int): The sample rate of the audio data.
            output_file (str | Path | None): Path to the output FLAC file,
                a temporary file will be created if unspecified.
            compression_level (int): The compression level parameter that
                varies from 0 (fastest) to 8 (slowest). The default setting
                is 5, see https://en.wikipedia.org/wiki/FLAC for more details.
            blocksize (int): The size of the block to be returned in the
                callback. The default is 0 which allows libFLAC to determine
                the best block size.
            streamable_subset (bool): Whether to use the streamable subset for encoding.
                If true the encoder will check settings for compatibility. If false, the
                settings may take advantage of the full range that the format allows.
            verify (bool): If `True`, the encoder will verify it's own
                encoded output by feeding it through an internal decoder and
                comparing the original signal against the decoded signal.
                If a mismatch occurs, the `process` method will raise a
                `EncoderProcessException`.  Note that this will slow the
                encoding process by the extra time required for decoding and comparison.
        """
        super().__init__()

        self.__raw_audio = signal
        self._sample_rate = sample_rate

        if output_file:
            self.__output_file = (
                Path(output_file) if isinstance(output_file, str) else output_file
            )
        else:
            with tempfile.NamedTemporaryFile(suffix=".flac") as ofile:
                self.__output_file = Path(ofile.name)

        self._blocksize = blocksize
        self._compression_level = compression_level
        self._streamable_subset = streamable_subset
        self._verify = verify
        self._initialised = False

    def _init(self):
        """
        Initialise the encoder to write to a file.

        Raises:
            EncoderInitException: if initialisation fails.
        """
        c_output_filename = pf.encoder._ffi.new(
            "char[]", str(self.__output_file).encode("utf-8")
        )
        rc = pf.encoder._lib.FLAC__stream_encoder_init_file(
            self._encoder,
            c_output_filename,
            pf.encoder._lib._progress_callback,
            self._encoder_handle,
        )
        pf.encoder._ffi.release(c_output_filename)
        if rc != pf.encoder._lib.FLAC__STREAM_ENCODER_INIT_STATUS_OK:
            raise pf.EncoderInitException(rc)

        self._initialised = True


[docs]
    def process(self) -> bytes:
        """
        Process the audio data from the WAV file.

        Returns:
            (bytes): The FLAC encoded bytes.

        Raises:
            EncoderProcessException: if an error occurs when processing the samples
        """
        super().process(self.__raw_audio)
        self.finish()
        with open(self.__output_file, "rb") as f:
            return f.read()





[docs]
class FileDecoder(pf.decoder.FileDecoder):

[docs]
    def process(self) -> tuple[np.ndarray, int]:
        """
        Overwritten version of the process method from the pyflac decoder.
        Original process returns stereo signals in float64 format.

        In this version, the data is returned using the original number
        of channels and in in16 format.

        Returns:
            (tuple): A tuple of the decoded numpy audio array, and the sample rate
                of the audio data.

        Raises:
            DecoderProcessException: if any fatal read, write, or memory allocation
                error occurred (meaning decoding must stop)
        """
        result = pf.decoder._lib.FLAC__stream_decoder_process_until_end_of_stream(
            self._decoder
        )
        if self.state != pf.decoder.DecoderState.END_OF_STREAM and not result:
            raise pf.DecoderProcessException(str(self.state))

        self.finish()
        self.__output.close()
        return sf.read(str(self.__output_file), always_2d=False, dtype="int16")





[docs]
class FlacEncoder:
    """
    Class for encoding and decoding audio signals using FLAC

    It uses the pyflac library to encode and decode the audio data.
    And offers convenient methods for encoding and decoding audio data.
    """

    def __init__(self, compression_level: int = 5) -> None:
        """
        Initialise the compressor.

        Args:
            compression_level (int): The compression level parameter that
                varies from 0 (fastest) to 8 (slowest). The default setting
                is 5, see https://en.wikipedia.org/wiki/FLAC for more details.
        """
        self.compression_level = compression_level


[docs]
    def encode(
        self,
        signal: np.ndarray,
        sample_rate: int,
        output_file: str | Path | None = None,
    ) -> bytes:
        """
        Method to encode the audio data using FLAC compressor.

        It creates a WavEncoder object and uses it to encode the audio data.

        Args:
            signal (np.ndarray): The raw audio data to be compressed.
            sample_rate (int): The sample rate of the audio data.
            output_file (str | Path): Path to where to
                save the output FLAC file. If not specified, a temporary file
                will be created.

        Returns:
            (bytes): The FLAC encoded audio signal.

        Raises:
            ValueError: If the audio signal is not in `np.int16` format.
        """
        if signal.dtype != np.int16:
            logger.error(
                "FLAC encoder only supports 16-bit integer signals, "
                f"but got {signal.dtype}"
            )
            raise ValueError(
                "FLAC encoder only supports 16-bit integer signals, "
                f"but got {signal.dtype}"
            )

        wav_encoder = WavEncoder(
            signal=signal,
            sample_rate=sample_rate,
            compression_level=self.compression_level,
            output_file=output_file,
        )
        return wav_encoder.process()



[docs]
    @staticmethod
    def decode(input_filename: Path | str) -> tuple[np.ndarray, float]:
        """
        Method to decode a flac file to wav audio data.

        It uses the pyflac library to decode the flac file.

        Args:
            input_filename (pathlib.Path | str): Path to the input FLAC file.

        Returns:
            (np.ndarray): The raw audio data.

        Raises:
            FileNotFoundError: If the flac file to decode does not exist.
        """
        input_filename = (
            Path(input_filename) if isinstance(input_filename, str) else input_filename
        )

        if not input_filename.exists():
            logger.error(f"File {input_filename} not found.")
            raise FileNotFoundError(f"File {input_filename} not found.")

        decoder = FileDecoder(input_filename)
        signal, sample_rate = decoder.process()

        return signal, float(sample_rate)





[docs]
def read_flac_signal(filename: Path) -> tuple[np.ndarray, float]:
    """Read a FLAC signal and return it as a numpy array

    Args:
        filename (Path): The path to the FLAC file to read.

    Returns:
        signal (np.ndarray): The decoded signal.
        sample_rate (float): The sample rate of the signal.
    """
    # Create encoder object
    flac_encoder = FlacEncoder()

    # Decode FLAC file
    signal, sample_rate = flac_encoder.decode(
        filename,
    )
    signal = (signal / 32768.0).astype(np.float32)

    # Load scale factor
    if filename.with_suffix(".txt").exists():
        with open(filename.with_suffix(".txt"), encoding="utf-8") as fp:
            max_value = float(fp.read())
            # Scale signal
            signal *= max_value
    return signal, sample_rate




[docs]
def save_flac_signal(
    signal: np.ndarray,
    filename: Path,
    signal_sample_rate: int,
    output_sample_rate: int | None = None,
    do_clip_signal: bool = False,
    do_soft_clip: bool = False,
    do_scale_signal: bool = False,
) -> None:
    """
    Function to save output signals.

    - The output signal will be resample to ``output_sample_rate``.
        If ``output_sample_rate`` is None, the output signal will have
        the same sample rate as the input signal.
    - The output signal will be clipped to [-1, 1] if ``do_clip_signal`` is True
        and use soft clipped if ``do_soft_clip`` is True. Note that if
        ``do_clip_signal`` is False, ``do_soft_clip`` will be ignored.
        Note that if ``do_clip_signal`` is True, ``do_scale_signal`` will be ignored.
    - The output signal will be scaled to [-1, 1] if ``do_scale_signal`` is True.
        If signal is scale, the scale factor will be saved in a TXT file.
        Note that if ``do_clip_signal`` is True, ``do_scale_signal`` will be ignored.
    - The output signal will be saved as a FLAC file.

    Args:
        signal (np.ndarray) : Signal to save
        filename (Path) : Path to save signal
        signal_sample_rate (int) : Sample rate of the input signal
        output_sample_rate (int) : Sample rate of the output signal
        do_clip_signal (bool) : Whether to clip signal
        do_soft_clip (bool) : Whether to apply soft clipping
        do_scale_signal (bool) : Whether to scale signal
    """
    # Resample signal to expected output sample rate
    if output_sample_rate is None:
        output_sample_rate = signal_sample_rate

    if signal_sample_rate != output_sample_rate:
        signal = resample(signal, signal_sample_rate, output_sample_rate)

    if do_scale_signal:
        # Scale stem signal
        max_value = np.max(np.abs(signal))
        signal = signal / max_value

        # Save scale factor
        with open(filename.with_suffix(".txt"), "w", encoding="utf-8") as file:
            file.write(f"{max_value}")

    elif do_clip_signal:
        # Clip the signal
        signal, n_clipped = clip_signal(signal, do_soft_clip)
        if n_clipped > 0:
            logger.warning(f"Writing {filename}: {n_clipped} samples clipped")

    # Convert signal to 16-bit integer
    signal = to_16bit(signal)

    # Create flac encoder object to compress and save the signal
    FlacEncoder().encode(signal, output_sample_rate, filename)