Source code for diffFx_pytorch.processors.spatial.widener

import torch 
import numpy as np 
from typing import Dict, Union
from ..base_utils import check_params 
from ..base import ProcessorsBase, EffectParam
from ..core.midside import * 

[docs]class StereoWidener(ProcessorsBase):
    """Differentiable implementation of mid-side stereo width control.
    
    This processor implements stereo width adjustment using mid-side (M/S) processing,
    allowing continuous control from mono to enhanced stereo width. It operates by
    converting the input to M/S representation, scaling the side signal, and converting
    back to left-right stereo.

    The width control is implemented using the following process:
    
    .. math::

        M_{out} = M_{in} * 2(1 - width)
        
        S_{out} = S_{in} * 2(width)

    where:
        - M is the mid (mono) signal: (L + R) / √2
        - S is the side (difference) signal: (L - R) / √2
        - width is the stereo width control parameter
        - Scaling ensures energy preservation across width settings

    Processing Chain:
        1. Convert L/R to M/S representation
        2. Scale mid and side signals based on width
        3. Convert back to L/R representation

    Args:
        sample_rate (int): Audio sample rate in Hz

    Parameters Details:
        width: Stereo width control
            - 0.0: Mono (side signal removed)
            - 0.5: Original stereo (no change)
            - 1.0: Enhanced stereo (doubled side signal)
            - Continuously variable between these points
            - Maintains constant total energy

    Note:
        - Input must be stereo (two channels)
        - Uses energy-preserving M/S conversion matrices
        - Width control affects the ratio of mid to side signal
        - Extreme width settings may cause phase issues
        - Mono compatibility is maintained across all settings

    Warning:
        When using with neural networks:
            - norm_params must be in range [0, 1]
            - Parameter will be automatically mapped to width range
            - Ensure your network output is properly normalized (e.g., using sigmoid)
            - Parameter order must match _register_default_parameters()

    Examples:
        Basic DSP Usage:
            >>> # Create a stereo widener
            >>> widener = StereoWidener(sample_rate=44100)
            >>> # Process stereo audio with direct width control
            >>> output = widener(input_audio, dsp_params={
            ...     'width': 0.75  # Enhance stereo width by 50%
            ... })

        Neural Network Control:
            >>> # 1. Simple parameter prediction
            >>> class WidthController(nn.Module):
            ...     def __init__(self, input_size):
            ...         super().__init__()
            ...         self.net = nn.Sequential(
            ...             nn.Linear(input_size, 32),
            ...             nn.ReLU(),
            ...             nn.Linear(32, 1),
            ...             nn.Sigmoid()  # Ensures output is in [0,1] range
            ...         )
            ...     
            ...     def forward(self, x):
            ...         return self.net(x)
            >>> 
            >>> # Initialize controller
            >>> widener = StereoWidener(sample_rate=44100)
            >>> controller = WidthController(input_size=16)
            >>> 
            >>> # Process with features
            >>> features = torch.randn(batch_size, 16)  # Audio features
            >>> norm_params = {'width': controller(features)}
            >>> output = widener(input_audio, norm_params=norm_params)
    """
[docs]    def _register_default_parameters(self):
        """Register the width parameter.
        
        Sets up the width parameter with range:
            - 0.0: Mono (collapse to center)
            - 0.5: No change (original stereo)
            - 1.0: Enhanced stereo (maximum width)
        """
        # 0.0 -> mono 0.5 -> no change 1.0 -> stereo
        self.params = {
            'width': EffectParam(min_val=0.0, max_val=1.0),
        } 
    
[docs]    def process(self, x: torch.Tensor, norm_params: Union[Dict[str, torch.Tensor], None] = None, dsp_params: Union[Dict[str, torch.Tensor], None] = None):
        """Process input signal through the stereo widener.
        
        Args:
            x (torch.Tensor): Input audio tensor. Shape: (batch, 2, samples)
            norm_params (Dict[str, torch.Tensor]): Normalized parameters (0 to 1)
                Must contain the following keys:
                - 'width': Stereo width control (0 to 1)
                    0.0: Mono/centered
                    0.5: Original stereo width
                    1.0: Maximum width
                Each value should be a tensor of shape (batch_size,)
            dsp_params (Dict[str, Union[float, torch.Tensor]], optional): Direct DSP parameters.
                Can specify widener parameters as:
                - float/int: Single value applied to entire batch
                - 0D tensor: Single value applied to entire batch
                - 1D tensor: Batch of values matching input batch size
                Parameters will be automatically expanded to match batch size
                and moved to input device if necessary.
                If provided, norm_params must be None.

        Returns:
            torch.Tensor: Processed stereo audio tensor. Shape: (batch, 2, samples)
            
        Raises:
            AssertionError: If input is not stereo (two channels)
        """
        check_params(norm_params, dsp_params)
        
        # get parameters 
        if norm_params is not None:
            params = self.map_parameters(norm_params)
        else:
            params = dsp_params
        
        width = params['width']
        bs, chs, seq_len = x.size()
        assert chs == 2, "Input tensor must have shape (bs, 2, seq_len)"
        
        x_ms = lr_to_ms(x, mult=1/np.sqrt(2)) 
        
        # Split M/S signals
        m, s = torch.split(x_ms, (1, 1), -2)
        
        # Adjust side signal based on width
        # width = 0.0 -> side * 0 = mono
        # width = 0.5 -> side * 1 = original stereo
        # width = 1.0 -> side * 2 = wider stereo
        width = width.view(-1, 1, 1)
        mid = m * (2 * (1 - width)) 
        side = s * (2 * width) 
        
        # Recombine M/S
        x_ms = torch.cat([mid, side], -2)
        x_lr = ms_to_lr(x_ms, mult=1/np.sqrt(2))
        
        return x_lr