Source code for diffFx_pytorch.processors.modulation.flanger

import torch 
import torch.nn as nn
import torch.nn.functional as F
import numpy as np 
from typing import Dict, List, Tuple, Union
from ..base import ProcessorsBase, EffectParam
from ..base_utils import check_params
from ..core.utils import variable_delay
import math

# ref: https://www.audiokit.io/DunneAudioKit/documentation/dunneaudiokit/modulationeffects
# ref: https://ccrma.stanford.edu/~jos/pasp/Flanging.html

from torch import Tensor as T
import torch
import torch.nn as nn
from typing import Dict, Union

[docs]class Flanger(ProcessorsBase):
    """Differentiable implementation of a flanger audio effect.

    Implementation is based on: 
    
    ..  [1] Reiss, Joshua D., and Andrew McPherson. 
            Audio effects: theory, implementation and application. CRC Press, 2014.
    
    This processor implements a modulated delay line to create the flanger effect,
    using a low-frequency oscillator (LFO) to modulate a very short delay time. 
    The implementation creates the characteristic "swooshing" sound through 
    phase cancellation and reinforcement.

    Processing Chain:
        1. Generate LFO for delay modulation
        2. Calculate delay phases
        3. Apply variable delay
        4. Mix with original signal

    The transfer function is:

    .. math::

        y(t) = mix * x(t - d(t)) + (1 - mix) * x(t)
        
        d(t) = depth * sin(2πf_rt) + delay_{base}

    where coefficients are functions of:
        - x(t): Input signal
        - f_r: LFO rate in Hz
        - depth: Modulation depth
        - delay_base: Base delay time
        - mix: Wet/dry balance

    Args:
        sample_rate (int): Audio sample rate in Hz. Defaults to 44100.

    Attributes:
        sample_rate (int): Audio sample rate in Hz

    Parameters Details:
        delay_ms: Base delay time 
            - Range: 1.0 to 10.0 ms
            - Controls center delay time
            - Very short delays for flanger effect
            
        rate: LFO modulation frequency
            - Range: 0.1 to 2.0 Hz
            - Controls modulation speed
            - Lower values create slow sweeps
            - Higher values for faster effects
            
        depth: Modulation intensity
            - Range: 0.0 to 1.0
            - Controls sweep width
            - Affects intensity of effect
            
        mix: Wet/dry balance
            - Range: 0.0 to 1.0
            - 0.0: Only clean signal
            - 1.0: Only flanged signal

    Note:
        The processor supports the following features:
            - Variable delay implementation
            - Smooth LFO modulation
            - Phase-coherent processing
            - Automatic buffer size handling
            - Efficient batch processing

    Warning:
        When using with neural networks:
            - norm_params must be in range [0, 1]
            - Parameters will be automatically mapped to ranges
            - Ensure network output is properly normalized (e.g., using sigmoid)
            - Parameter order must match _register_default_parameters()

    Examples:
        Basic DSP Usage:
            >>> # Create a flanger effect
            >>> flanger = Flanger(
            ...     sample_rate=44100
            ... )
            >>> # Process with musical settings
            >>> output = flanger(input_audio, dsp_params={
            ...     'delay_ms': 5.0,    # 5ms base delay
            ...     'rate': 0.5,        # 0.5 Hz modulation
            ...     'depth': 0.7,       # Strong sweep
            ...     'mix': 0.6          # 60% wet
            ... })

        Neural Network Control:
            >>> # Simple parameter prediction
            >>> class FlangerController(nn.Module):
            ...     def __init__(self, input_size, num_params):
            ...         super().__init__()
            ...         self.net = nn.Sequential(
            ...             nn.Linear(input_size, 32),
            ...             nn.ReLU(),
            ...             nn.Linear(32, num_params),
            ...             nn.Sigmoid()  # Ensures output is in [0,1] range
            ...         )
            ...     
            ...     def forward(self, x):
            ...         return self.net(x)
            >>> 
            >>> # Initialize controller
            >>> flanger = Flanger(sample_rate=44100)
            >>> num_params = flanger.count_num_parameters()  # 4 parameters
            >>> controller = FlangerController(input_size=16, num_params=num_params)
            >>> 
            >>> # Process with features
            >>> features = torch.randn(batch_size, 16)  # Audio features
            >>> norm_params = controller(features)
            >>> output = flanger(input_audio, norm_params=norm_params)
    """
[docs]    def _register_default_parameters(self):
        """Register default parameters for the flanger effect.
    
        Sets up:
            delay_ms: Base delay time (1.0 to 10.0 ms)
            rate: LFO modulation rate (0.1 to 2.0 Hz)
            depth: Modulation intensity (0.0 to 1.0)
            mix: Wet/dry balance (0.0 to 1.0)
        """
        self.params = {
            'delay_ms': EffectParam(min_val=1.0, max_val=10.0),    # Increased range
            'rate': EffectParam(min_val=0.1, max_val=2.0),         # More musical range
            'depth': EffectParam(min_val=0.0, max_val=1.0),        # Full range
            'mix': EffectParam(min_val=0.0, max_val=1.0)
        }
        
[docs]    def __init__(self, sample_rate=44100, param_range=None):
        super().__init__(sample_rate, param_range)
        self.sample_rate = sample_rate
        self._register_default_parameters()
        
[docs]    def process(self, 
        x: torch.Tensor, norm_params: Union[Dict[str, torch.Tensor], None] = None, 
        dsp_params: Union[Dict[str, torch.Tensor], None] = None
    ) -> torch.Tensor:
        """Process input signal through the flanger effect.
    
        Args:
            x (torch.Tensor): Input audio tensor. Shape: (batch, channels, samples)
            norm_params (Dict[str, torch.Tensor]): Normalized parameters (0 to 1)
                Must contain the following keys:
                    - 'delay_ms': Base delay time (0 to 1)
                    - 'rate': LFO frequency (0 to 1)
                    - 'depth': Modulation intensity (0 to 1)
                    - 'mix': Wet/dry balance (0 to 1)
                Each value should be a tensor of shape (batch_size,)
            dsp_params (Dict[str, Union[float, torch.Tensor]], optional): Direct DSP parameters.
                Can specify flanger parameters as:
                - float/int: Single value applied to entire batch
                - 0D tensor: Single value applied to entire batch
                - 1D tensor: Batch of values matching input batch size
                Parameters will be automatically expanded to match batch size
                and moved to input device if necessary.
                If provided, norm_params must be None.

        Returns:
            torch.Tensor: Processed audio tensor of same shape as input
        """
        # Get parameters
        check_params(norm_params, dsp_params)
        # Set proper configuration
        if norm_params is not None:
            params = self.map_parameters(norm_params)
        else:
            params = dsp_params
        batch_size, n_ch, n_samples = x.shape
        device = x.device
        
        # Map parameters with correct shapes
        delay_ms = params['delay_ms'].view(-1, 1, 1)    # (batch, 1, 1)
        rate = params['rate'].view(-1, 1, 1)            # (batch, 1, 1)
        depth = params['depth'].view(-1, 1, 1)          # (batch, 1, 1)
        mix = params['mix'].view(-1, 1, 1)              # (batch, 1, 1)
        
        # Calculate maximum delay in samples
        max_delay_samples = max(1, int(torch.max(delay_ms) * self.sample_rate / 1000.0))
        delay_center = delay_ms / 1000.0 * self.sample_rate # samples 
        
        # Generate time base for LFO
        time = torch.linspace(0, n_samples/self.sample_rate, n_samples, device=device)
        
        # Generate LFO with batch dimension
        # phase = 2 * math.pi * rate * time.view(1, 1, -1)  # (1, 1, n_samples)
        delay_lfo = torch.sin(2 * math.pi * rate * time.view(1, 1, -1))  # (batch, 1, n_samples)
        
        # Calculate delay values
        delay_value = delay_lfo * (depth * delay_center) + delay_center # 
        # print('> delay_value: ', delay_value)
        delay_phase = delay_value / max_delay_samples
        
        # Expand phase for all channels
        delay_phase = delay_phase.expand(-1, n_ch, -1)  # (batch, channel, n_samples)
        
        # Apply variable delay
        delayed = variable_delay(delay_phase, x, buf_size=math.ceil(max_delay_samples))
        
        # Mix dry and wet signals
        return mix * delayed + (1 - mix) * x

[docs]class StereoFlanger(ProcessorsBase):
    """Differentiable implementation of a stereo flanger effect with quadrature LFOs.

    Implementation is based on: 
    
    ..  [1] Reiss, Joshua D., and Andrew McPherson. 
            Audio effects: theory, implementation and application. CRC Press, 2014.
        
    This processor implements a stereo flanger that uses quadrature (90° phase-shifted) 
    LFOs for the left and right channels, creating a wide stereo image through 
    independent modulation. The implementation provides smooth phase differences 
    between channels while maintaining the characteristic flanger sound.

    Processing Chain:
    1. Generate quadrature LFOs for stereo modulation
    2. Calculate independent channel delays
    3. Apply stereo variable delay
    4. Mix with original signal

    The transfer function for each channel is:

    .. math::

        y_L(t) = mix * x_L(t - d_L(t)) + (1 - mix) * x_L(t)
    
        y_R(t) = mix * x_R(t - d_R(t)) + (1 - mix) * x_R(t)
    
        d_L(t) = depth * sin(2πf_rt) + delay_{base}
    
        d_R(t) = depth * sin(2πf_rt + π/2) + delay_{base}

    where coefficients are functions of:
    - x_L, x_R: Left and right input signals
    - f_r: LFO rate in Hz
    - depth: Modulation depth
    - delay_base: Base delay time
    - mix: Wet/dry balance

    Args:
    sample_rate (int): Audio sample rate in Hz. Defaults to 44100.

    Attributes:
    sample_rate (int): Audio sample rate in Hz

    Parameters Details:
    delay_ms: Base delay time 
        - Range: 1.0 to 10.0 ms
        - Controls center delay time
        - Very short delays for flanger effect
        
    rate: LFO modulation frequency
        - Range: 0.1 to 2.0 Hz
        - Controls modulation speed
        - Lower values create slow stereo sweeps
        
    depth: Modulation intensity
        - Range: 0.0 to 1.0
        - Controls stereo sweep width
        - Affects intensity of effect
        
    mix: Wet/dry balance
        - Range: 0.0 to 1.0
        - 0.0: Only clean signal
        - 1.0: Only flanged signal

    Note:
    The processor supports the following features:
        - Quadrature LFOs for true stereo
        - Independent channel processing
        - Phase-coherent stereo field
        - Automatic buffer size handling
        - Efficient batch processing

    Warning:
    When using with neural networks:
        - norm_params must be in range [0, 1]
        - Parameters will be automatically mapped to ranges
        - Ensure network output is properly normalized (e.g., using sigmoid)
        - Parameter order must match _register_default_parameters()
        - Input must be stereo (2 channels)

    Examples:
    Basic DSP Usage:
        >>> # Create a stereo flanger
        >>> flanger = StereoFlanger(
        ...     sample_rate=44100
        ... )
        >>> # Process with musical settings
        >>> output = flanger(input_audio, dsp_params={
        ...     'delay_ms': 5.0,    # 5ms base delay
        ...     'rate': 0.5,        # 0.5 Hz modulation
        ...     'depth': 0.7,       # Strong sweep
        ...     'mix': 0.6          # 60% wet
        ... })

    Neural Network Control:
        >>> # Simple parameter prediction
        >>> class StereoFlangerController(nn.Module):
        ...     def __init__(self, input_size, num_params):
        ...         super().__init__()
        ...         self.net = nn.Sequential(
        ...             nn.Linear(input_size, 32),
        ...             nn.ReLU(),
        ...             nn.Linear(32, num_params),
        ...             nn.Sigmoid()  # Ensures output is in [0,1] range
        ...         )
        ...     
        ...     def forward(self, x):
        ...         return self.net(x)
        >>> 
        >>> # Initialize controller
        >>> flanger = StereoFlanger(sample_rate=44100)
        >>> num_params = flanger.count_num_parameters()  # 4 parameters
        >>> controller = StereoFlangerController(input_size=16, num_params=num_params)
        >>> 
        >>> # Process with features
        >>> features = torch.randn(batch_size, 16)  # Audio features
        >>> norm_params = controller(features)
        >>> output = flanger(input_audio, norm_params=norm_params)
    """
[docs]    def _register_default_parameters(self):
        """Register default parameters for the stereo flanger effect.
   
        Sets up:
            delay_ms: Base delay time (1.0 to 10.0 ms)
            rate: LFO modulation rate (0.1 to 2.0 Hz)
            depth: Modulation intensity (0.0 to 1.0)
            mix: Wet/dry balance (0.0 to 1.0)
        """
        self.params = {
            'delay_ms': EffectParam(min_val=1.0, max_val=10.0),    # Increased range
            'rate': EffectParam(min_val=0.1, max_val=2.0),         # More musical range
            'depth': EffectParam(min_val=0.0, max_val=1.0),        # Full range
            'mix': EffectParam(min_val=0.0, max_val=1.0)
        }
        
[docs]    def __init__(self, sample_rate=44100, param_range=None):
        super().__init__(sample_rate, param_range)
        self.sample_rate = sample_rate
        self._register_default_parameters()
        
[docs]    def process(self, 
        x: torch.Tensor, norm_params: Union[Dict[str, torch.Tensor], None] = None, 
        dsp_params: Union[Dict[str, torch.Tensor], None] = None
    ) -> torch.Tensor:
        """Process input signal through the stereo flanger effect.
   
        Args:
            x (torch.Tensor): Input audio tensor. Shape: (batch, 2, samples)
                Must be stereo input (2 channels).
            norm_params (Dict[str, torch.Tensor]): Normalized parameters (0 to 1)
                Must contain the following keys:
                    - 'delay_ms': Base delay time (0 to 1)
                    - 'rate': LFO frequency (0 to 1)
                    - 'depth': Modulation intensity (0 to 1)
                    - 'mix': Wet/dry balance (0 to 1)
                Each value should be a tensor of shape (batch_size,)
            dsp_params (Dict[str, Union[float, torch.Tensor]], optional): Direct DSP parameters.
                Can specify flanger parameters as:
                - float/int: Single value applied to entire batch
                - 0D tensor: Single value applied to entire batch
                - 1D tensor: Batch of values matching input batch size
                Parameters will be automatically expanded to match batch size
                and moved to input device if necessary.
                If provided, norm_params must be None.

        Returns:
            torch.Tensor: Processed stereo audio tensor. Shape: (batch, 2, samples)
            
        Raises:
            AssertionError: If input is not stereo (2 channels)
        """
        # Get parameters
        check_params(norm_params, dsp_params)
        # Set proper configuration
        if norm_params is not None:
            params = self.map_parameters(norm_params)
        else:
            params = dsp_params
        
        batch_size, n_ch, n_samples = x.shape
        assert n_ch == 2, "Input tensor must have shape (bs, 2, seq_len)"
        device = x.device
        
        # Map parameters with correct shapes
        delay_ms = params['delay_ms'].view(-1, 1, 1)    # (batch, 1, 1)
        rate = params['rate'].view(-1, 1, 1)            # (batch, 1, 1)
        depth = params['depth'].view(-1, 1, 1)          # (batch, 1, 1)
        mix = params['mix'].view(-1, 1, 1)              # (batch, 1, 1)
        
        # Calculate maximum delay in samples
        max_delay_samples = max(1, int(torch.max(delay_ms) * self.sample_rate / 1000.0))
        delay_center = delay_ms / 1000.0 * self.sample_rate # samples 
        
        # Generate time base for LFO
        time = torch.linspace(0, n_samples/self.sample_rate, n_samples, device=device)
        
        # Generate quadrature LFOs (90 degrees phase difference)
        phase_left = 2 * math.pi * rate * time.view(1, 1, -1)
        phase_right = phase_left + math.pi/2  # 
        
        # Generate left and right channel LFOs
        delay_lfo_left = torch.sin(phase_left)   # LFO
        delay_lfo_right = torch.sin(phase_right) # LFO
        
        # Stack LFOs for stereo processing
        delay_lfo = torch.cat([delay_lfo_left, delay_lfo_right], dim=1)  # (batch, 2, samples)
        
        # Calculate delay values (now for both channels)
        delay_value = delay_lfo * (depth * delay_center) + delay_center
        delay_phase = delay_value / max_delay_samples
        
        # Apply stereo delay
        delayed = variable_delay(delay_phase, x, buf_size=math.ceil(max_delay_samples))
        
        # Mix dry and wet signals (same as before)
        return mix * delayed + (1 - mix) * x
     
[docs]class FeedbackFlanger(ProcessorsBase):
    """Differentiable implementation of a feedback flanger effect.

    Implementation is based on: 
    
    ..  [1] Reiss, Joshua D., and Andrew McPherson. 
            Audio effects: theory, implementation and application. CRC Press, 2014.
    
    This processor implements a flanger with feedback path, allowing the delayed signal
    to be fed back into the input. The feedback creates resonant peaks in the frequency
    response, resulting in a more pronounced and characteristically "metallic" flanger sound.

    Processing Chain:
    1. Generate LFO for delay modulation
    2. Sum input with feedback signal
    3. Apply modulated delay
    4. Feed delayed signal back
    5. Mix with original signal

    The transfer function with feedback is:

    .. math::

        y(t) = mix * (x(t) + fb * y(t - d(t))) + (1 - mix) * x(t)
    
        d(t) = depth * sin(2πf_rt) + delay_{base}

    where coefficients are functions of:
    - x(t): Input signal
    - f_r: LFO rate in Hz
    - depth: Modulation depth
    - delay_base: Base delay time
    - fb: Feedback amount
    - mix: Wet/dry balance

    Args:
    sample_rate (int): Audio sample rate in Hz. Defaults to 44100.

    Attributes:
    sample_rate (int): Audio sample rate in Hz

    Parameters Details:
    delay_ms: Base delay time 
        - Range: 1.0 to 10.0 ms
        - Controls center delay time
        - Very short delays for flanger effect
        
    rate: LFO modulation frequency
        - Range: 0.1 to 10.0 Hz
        - Controls modulation speed
        - Lower values create slow sweeps
        
    depth: Modulation intensity
        - Range: 0.0 to 0.25
        - Controls sweep width
        - Affects intensity of effect
        
    feedback: Feedback amount
        - Range: 0.0 to 0.7
        - Controls resonance intensity
        - Higher values create metallic sound
        
    mix: Wet/dry balance
        - Range: 0.0 to 1.0
        - 0.0: Only clean signal
        - 1.0: Only flanged signal

    Note:
    The processor supports the following features:
        - Variable delay implementation
        - Feedback path processing
        - Resonant frequency peaks
        - Automatic stability control
        - Efficient batch processing

    Warning:
    When using with neural networks:
        - norm_params must be in range [0, 1]
        - Parameters will be automatically mapped to ranges
        - Ensure network output is properly normalized (e.g., using sigmoid)
        - Parameter order must match _register_default_parameters()
        - High feedback can create intense resonance

    Examples:
    Basic DSP Usage:
        >>> # Create a feedback flanger
        >>> flanger = FeedbackFlanger(
        ...     sample_rate=44100
        ... )
        >>> # Process with musical settings
        >>> output = flanger(input_audio, dsp_params={
        ...     'delay_ms': 5.0,     # 5ms base delay
        ...     'rate': 0.5,         # 0.5 Hz modulation
        ...     'depth': 0.15,       # Moderate sweep
        ...     'feedback': 0.4,     # Medium resonance
        ...     'mix': 0.6           # 60% wet
        ... })

    Neural Network Control:
        >>> # Simple parameter prediction
        >>> class FlangerController(nn.Module):
        ...     def __init__(self, input_size, num_params):
        ...         super().__init__()
        ...         self.net = nn.Sequential(
        ...             nn.Linear(input_size, 32),
        ...             nn.ReLU(),
        ...             nn.Linear(32, num_params),
        ...             nn.Sigmoid()  # Ensures output is in [0,1] range
        ...         )
        ...     
        ...     def forward(self, x):
        ...         return self.net(x)
        >>> 
        >>> # Initialize controller
        >>> flanger = FeedbackFlanger(sample_rate=44100)
        >>> num_params = flanger.count_num_parameters()  # 5 parameters
        >>> controller = FlangerController(input_size=16, num_params=num_params)
        >>> 
        >>> # Process with features
        >>> features = torch.randn(batch_size, 16)  # Audio features
        >>> norm_params = controller(features)
        >>> output = flanger(input_audio, norm_params=norm_params)
    """
[docs]    def _register_default_parameters(self):
        """Register default parameters for the feedback flanger effect.

        Sets up:
            delay_ms: Base delay time (1.0 to 10.0 ms)
            rate: LFO modulation rate (0.1 to 10.0 Hz)
            depth: Modulation intensity (0.0 to 0.25)
            feedback: Feedback amount (0.0 to 0.7)
            mix: Wet/dry balance (0.0 to 1.0)
        """
        self.params = {
            'delay_ms': EffectParam(min_val=1.0, max_val=10.0),    # Increased range
            'rate': EffectParam(min_val=0.1, max_val=10.0),         # More musical range
            'depth': EffectParam(min_val=0.0, max_val=0.25),        # Full range
            'feedback': EffectParam(min_val=0.0, max_val=0.7),
            'mix': EffectParam(min_val=0.0, max_val=1.0)
        }
        
[docs]    def __init__(self, sample_rate=44100, param_range=None):
        super().__init__(sample_rate, param_range)
        self.sample_rate = sample_rate
        self._register_default_parameters()
        
[docs]    def process(self, 
        x: torch.Tensor, norm_params: Union[Dict[str, torch.Tensor], None] = None, 
        dsp_params: Union[Dict[str, torch.Tensor], None] = None
    ) -> torch.Tensor:
        """Process input signal through the feedback flanger effect.
   
        Args:
            x (torch.Tensor): Input audio tensor. Shape: (batch, channels, samples)
            norm_params (Dict[str, torch.Tensor]): Normalized parameters (0 to 1)
                Must contain the following keys:
                    - 'delay_ms': Base delay time (0 to 1)
                    - 'rate': LFO frequency (0 to 1)
                    - 'depth': Modulation intensity (0 to 1)
                    - 'feedback': Feedback amount (0 to 1)
                    - 'mix': Wet/dry balance (0 to 1)
                Each value should be a tensor of shape (batch_size,)
            dsp_params (Dict[str, Union[float, torch.Tensor]], optional): Direct DSP parameters.
                Can specify flanger parameters as:
                - float/int: Single value applied to entire batch
                - 0D tensor: Single value applied to entire batch
                - 1D tensor: Batch of values matching input batch size
                Parameters will be automatically expanded to match batch size
                and moved to input device if necessary.
                If provided, norm_params must be None.

        Returns:
            torch.Tensor: Processed audio tensor of same shape as input
        """
        # Get parameters
        check_params(norm_params, dsp_params)
        # Set proper configuration
        if norm_params is not None:
            params = self.map_parameters(norm_params)
        else:
            params = dsp_params
        
        batch_size, n_ch, n_samples = x.shape
        device = x.device
        
        # Map parameters with correct shapes
        delay_ms = params['delay_ms'].view(-1, 1, 1)    # (batch, 1, 1)
        rate = params['rate'].view(-1, 1, 1)            # (batch, 1, 1)
        depth = params['depth'].view(-1, 1, 1)          # (batch, 1, 1)
        mix = params['mix'].view(-1, 1, 1)              # (batch, 1, 1)
        feedback = params['feedback'].view(-1, 1, 1)
        
        # Calculate maximum delay in samples
        max_delay_samples = max(1, int(torch.max(delay_ms) * self.sample_rate / 1000.0))
        delay_center = delay_ms / 1000.0 * self.sample_rate # samples 
        
        # Generate time base for LFO
        time = torch.linspace(0, n_samples/self.sample_rate, n_samples, device=device)
        
        # Generate LFO with batch dimension
        phase = 2 * math.pi * rate * time.view(1, 1, -1)  # (1, 1, n_samples)
        delay_lfo = torch.sin(phase)  # (batch, 1, n_samples)
        
        # Calculate delay values
        delay_value = delay_lfo * (depth * delay_center) + delay_center # 
        delay_phase = delay_value / max_delay_samples
        
        # Expand phase for all channels
        delay_phase = delay_phase.expand(-1, n_ch, -1)  # (batch, channel, n_samples)
        
        # 初始化 feedback buffer
        delayed = x
        # Apply variable delay
        delayed = variable_delay(delay_phase, x + feedback * delayed, buf_size=math.ceil(max_delay_samples))
        
        # Mix dry and wet signals
        return mix * delayed + (1 - mix) * x