Source code for diffFx_pytorch.processors.delay.pinpong

import torch 
import torch.nn as nn
import torch.nn.functional as F
import numpy as np 
from typing import Dict, List, Tuple, Union
from ..base import ProcessorsBase, EffectParam
from ..base_utils import check_params
from ..core.phase import unwrap_phase


[docs]class PingPongDelay(ProcessorsBase):
    """Differentiable implementation of a stereo ping-pong delay effect.
    
    This processor implements a stereo delay effect where echoes alternate between
    left and right channels, creating a "ping-pong" spatial pattern. The implementation
    uses a cross-coupled feedback structure in the frequency domain for precise timing
    and smooth transitions.

    Implementation is based on: 
    
    ..  [1] Reiss, Joshua D., and Andrew McPherson. 
            Audio effects: theory, implementation and application. CRC Press, 2014.
    ..  [2] Smith, Julius O. "Digital Audio Effects." 
            https://ccrma.stanford.edu/~jos/fp3/Phase_Unwrapping.html
    
    The system is described by coupled transfer functions:

    .. math::

        H_{11}(z) = \\frac{1}{1 - b_1b_2z^{-2N}}

        H_{12}(z) = \\frac{b_1z^{-N}}{1 - b_1b_2z^{-2N}}

        H_{21}(z) = \\frac{b_2z^{-N}}{1 - b_1b_2z^{-2N}}

        H_{22}(z) = \\frac{b_1b_2z^{-2N}}{1 - b_1b_2z^{-2N}}

    where:
        - z^(-N) represents the base delay
        - b1, b2 are feedback gains for each channel
        - System stability ensured by |b1*b2| < 1

    Processing Chain:
        1. Zero-pad stereo input for delay buffer
        2. Convert to frequency domain
        3. Calculate cross-coupled transfer functions
        4. Apply transfers to each channel
        5. Convert back to time domain
        6. Mix processed signal with original

    Args:
        sample_rate (int): Audio sample rate in Hz
        param_range (Dict[str, EffectParam], optional): Parameter ranges.

    Parameters Details:
        delay_ms: Base delay time
            - Range: 0.1 to 3000.0 milliseconds
            - Controls time between alternating echoes
            - Each bounce takes this amount of time
            
        feedback_ch1: Left channel feedback gain
            - Range: 0.0 to 0.99
            - Controls decay of left-to-right echoes
            - Higher values create longer decay times
            
        feedback_ch2: Right channel feedback gain
            - Range: 0.0 to 0.99
            - Controls decay of right-to-left echoes
            - Can differ from ch1 for asymmetric patterns
            
        mix: Wet/dry mix ratio
            - Range: 0.0 to 1.0
            - 0.0: Only original signal
            - 1.0: Only processed signal

    Note:
        - Uses FFT-based delay for precise time shifting
        - Phase unwrapping prevents discontinuities
        - Automatic padding handles all delay times
        - Particularly effective for:
            - Creating rhythmic spatial patterns
            - Adding stereo width and movement
            - Building complex stereo textures
        - System stability is maintained by gain limits

    Examples:
        Basic DSP Usage:
            >>> # Create a ping-pong delay
            >>> delay = PingPongDelay(sample_rate=44100)
            >>> # Process with rhythmic spatial echoes
            >>> output = delay(input_audio, dsp_params={
            ...     'delay_ms': 250.0,     # Quarter note at 120 BPM
            ...     'feedback_ch1': 0.7,   # Left to right decay
            ...     'feedback_ch2': 0.7,   # Right to left decay
            ...     'mix': 0.5            # Equal mix of dry and wet
            ... })

        Neural Network Control:
            >>> # 1. Simple parameter prediction
            >>> class PingPongController(nn.Module):
            ...     def __init__(self, input_size):
            ...         super().__init__()
            ...         self.net = nn.Sequential(
            ...             nn.Linear(input_size, 32),
            ...             nn.ReLU(),
            ...             nn.Linear(32, 4),  # 4 parameters
            ...             nn.Sigmoid()  # Ensures output is in [0,1] range
            ...         )
            ...     
            ...     def forward(self, x):
            ...         return self.net(x)
            >>> 
            >>> # Process with features
            >>> controller = PingPongController(input_size=16)
            >>> features = torch.randn(batch_size, 16)
            >>> norm_params = controller(features)
            >>> output = delay(input_audio, norm_params=norm_params)
    """
[docs]    def _register_default_parameters(self):
        """Register delay time, feedback, and mix parameters.
        
        Sets up four parameters:
            - delay_ms: Base delay time (0.1 to 3000.0 ms)
            - feedback_ch1: Left channel feedback (0.0 to 0.99)
            - feedback_ch2: Right channel feedback (0.0 to 0.99)
            - mix: Wet/dry mix ratio (0.0 to 1.0)
        """
        self.params = {
            'delay_ms': EffectParam(min_val=0.1, max_val=3000.0),
            'feedback_ch1': EffectParam(min_val=0.0, max_val=0.99),
            'feedback_ch2': EffectParam(min_val=0.0, max_val=0.99),
            'mix': EffectParam(min_val=0.0, max_val=1.0)
        }
    
[docs]    def process(self, x: torch.Tensor, norm_params: Union[Dict[str, torch.Tensor], None] = None , dsp_params: Union[Dict[str, torch.Tensor], None] = None):
        """Process input signal through the ping-pong delay.
        
        Args:
            x (torch.Tensor): Input audio tensor. Shape: (batch, 2, samples)
            norm_params (Dict[str, torch.Tensor]): Normalized parameters (0 to 1)
                Must contain the following keys:
                - 'delay_ms': Base delay time in milliseconds (0 to 1)
                - 'feedback_ch1': Left channel feedback (0 to 1)
                - 'feedback_ch2': Right channel feedback (0 to 1)
                - 'mix': Wet/dry balance (0 to 1)
                Each value should be a tensor of shape (batch_size,)
            dsp_params (Dict[str, Union[float, torch.Tensor]], optional): Direct DSP parameters.
                Can specify ping-pong parameters as:
                - float/int: Single value applied to entire batch
                - 0D tensor: Single value applied to entire batch
                - 1D tensor: Batch of values matching input batch size
                Parameters will be automatically expanded to match batch size
                and moved to input device if necessary.
                If provided, norm_params must be None.

        Returns:
            torch.Tensor: Processed stereo audio tensor of same shape as input. Shape: (batch, 2, samples)
            
        Raises:
            AssertionError: If input is not stereo (2 channels)
        """
        # Set proper configuration
        check_params(norm_params, dsp_params)
        if norm_params is not None:
            params = self.map_parameters(norm_params)
        else:
            params = dsp_params
        
        delay_ms = params['delay_ms'].view(-1, 1, 1)
        b1 = params['feedback_ch1'].view(-1, 1, 1)
        b2 = params['feedback_ch2'].view(-1, 1, 1)
        mix = params['mix'].view(-1, 1, 1)
        
        b, ch, s = x.shape
        assert ch == 2, "Input must be stereo"
        
        max_delay_samples = max(
            1,
            int(torch.max(delay_ms) * self.sample_rate / 1000)
        )
        # Calculate FFT size (next power of 2 for efficiency)
        fft_size = 2 ** int(np.ceil(np.log2(x.shape[-1] + max_delay_samples)))
        # Pad input signal to FFT size
        pad_right = fft_size - (x.shape[-1] + max_delay_samples)
        x_padded = torch.nn.functional.pad(x, (max_delay_samples, pad_right))
        
        X = torch.fft.rfft(x_padded, n=fft_size)
        freqs = torch.fft.rfftfreq(x_padded.shape[-1], 1/self.sample_rate).to(x.device)
        phase = -2 * np.pi * freqs * delay_ms / 1000
        phase = unwrap_phase(phase, dim=-1)
        z_n = torch.exp(1j * phase).to(X.dtype)
        
        eps = 1e-6
        den = 1 - b1 * b2 * z_n * z_n + eps
        
        # Modified transfer functions for ping-pong behavior
        H11 = 1 /den  # Direct path for left
        H12 = b1 * z_n / den  # Left to right (single delay)
        H21 = b2 * z_n / den  # Right to left (single delay)
        H22 = b1 * b2 * z_n * z_n / den  # Right to right (double delay through feedback)
        
        Y1 = H11 * X[:, 0:1] + H12 * X[:, 1:2]
        Y2 = H21 * X[:, 0:1] + H22 * X[:, 1:2]
        
        Y = torch.cat([Y1, Y2], dim=1)
        # y = torch.fft.irfft(Y, n=x_padded.shape[-1])[:, :, max_delay_samples:]
        y = torch.fft.irfft(Y, n=fft_size)
        y = y[..., max_delay_samples:max_delay_samples + x.shape[-1]]
        
        return (1 - mix) * x + mix * y