Skip to content

runn.keras.layers.mono_dense

Constrained Monotonic Neural Networks.

This Python library implements Constrained Monotonic Neural Networks as described in:

Davor Runje, Sharath M. Shankaranarayana, “Constrained Monotonic Neural Networks”, in Proceedings of the 40th International Conference on Machine Learning, 2023. Github: https://github.com/airtai/monotonic-nn/tree/main

Version: 0.3.4 (https://github.com/airtai/monotonic-nn/releases/tag/0.3.4)

This file has been modified from the original version by the authors of RUNN to be compatible with the RUNN library.

T = TypeVar('T') module-attribute #

MonoDense(units, *, activation=None, monotonicity_indicator=1, is_convex=False, is_concave=False, activation_weights=(7.0, 7.0, 2.0), **kwargs) #

Bases: Dense

Monotonic counterpart of the regular Dense Layer of tf.keras

Constructs a new MonoDense instance.

PARAMETER DESCRIPTION
units

Positive integer, dimensionality of the output space.

TYPE: int

activation

Activation function to use, it is assumed to be convex monotonically increasing function such as "relu" or "elu"

TYPE: Optional[Union[str, Callable[[TensorLike], TensorLike]]] DEFAULT: None

monotonicity_indicator

Vector to indicate which of the inputs are monotonically increasing or monotonically decreasing or non-monotonic. Has value 1 for monotonically increasing, -1 for monotonically decreasing and 0 for non-monotonic.

TYPE: ArrayLike DEFAULT: 1

is_convex

convex if set to True

TYPE: bool DEFAULT: False

is_concave

concave if set to True

TYPE: bool DEFAULT: False

activation_weights

relative weights for each type of activation, the default is (1.0, 1.0, 1.0). Ignored if is_convex or is_concave is set to True

TYPE: Tuple[float, float, float] DEFAULT: (7.0, 7.0, 2.0)

**kwargs

passed as kwargs to the constructor of Dense

TYPE: Any DEFAULT: {}

Raise

ValueError: - if both is_concave and is_convex are set to True, or - if any component of activation_weights is negative or there is not exactly three components

Source code in runn/keras/layers/mono_dense.py
def __init__(
    self,
    units: int,
    *,
    activation: Optional[Union[str, Callable[[TensorLike], TensorLike]]] = None,
    monotonicity_indicator: ArrayLike = 1,
    is_convex: bool = False,
    is_concave: bool = False,
    activation_weights: Tuple[float, float, float] = (7.0, 7.0, 2.0),
    **kwargs: Any,
):
    """Constructs a new MonoDense instance.

    Params:
        units: Positive integer, dimensionality of the output space.
        activation: Activation function to use, it is assumed to be convex monotonically
            increasing function such as "relu" or "elu"
        monotonicity_indicator: Vector to indicate which of the inputs are monotonically increasing or
            monotonically decreasing or non-monotonic. Has value 1 for monotonically increasing,
            -1 for monotonically decreasing and 0 for non-monotonic.
        is_convex: convex if set to True
        is_concave: concave if set to True
        activation_weights: relative weights for each type of activation, the default is (1.0, 1.0, 1.0).
            Ignored if is_convex or is_concave is set to True
        **kwargs: passed as kwargs to the constructor of `Dense`

    Raise:
        ValueError:
            - if both **is_concave** and **is_convex** are set to **True**, or
            - if any component of activation_weights is negative or there is not exactly three components
    """
    if is_convex and is_concave:
        raise ValueError("The model cannot be set to be both convex and concave (only linear functions are both).")

    if len(activation_weights) != 3:
        raise ValueError(
            f"There must be exactly three components of activation_weights, but we have this instead: {activation_weights}."
        )

    if (np.array(activation_weights) < 0).any():
        raise ValueError(
            f"Values of activation_weights must be non-negative, but we have this instead: {activation_weights}."
        )

    super(MonoDense, self).__init__(units=units, activation=None, **kwargs)

    self.units = units
    self.org_activation = activation
    self.monotonicity_indicator = monotonicity_indicator
    self.is_convex = is_convex
    self.is_concave = is_concave
    self.activation_weights = activation_weights

    (
        self.convex_activation,
        self.concave_activation,
        self.saturated_activation,
    ) = self.get_activation_functions(self.org_activation)

activation_weights = activation_weights instance-attribute #

is_concave = is_concave instance-attribute #

is_convex = is_convex instance-attribute #

monotonicity_indicator = monotonicity_indicator instance-attribute #

org_activation = activation instance-attribute #

units = units instance-attribute #

apply_activations(x, *, units, convex_activation, concave_activation, saturated_activation, is_convex=False, is_concave=False, activation_weights=(7.0, 7.0, 2.0)) #

Source code in runn/keras/layers/mono_dense.py
@tf.function
def apply_activations(
    self,
    x: TensorLike,
    *,
    units: int,
    convex_activation: Callable[[TensorLike], TensorLike],
    concave_activation: Callable[[TensorLike], TensorLike],
    saturated_activation: Callable[[TensorLike], TensorLike],
    is_convex: bool = False,
    is_concave: bool = False,
    activation_weights: Tuple[float, float, float] = (7.0, 7.0, 2.0),
) -> TensorLike:
    if convex_activation is None:
        return x

    elif is_convex:
        normalized_activation_weights = np.array([1.0, 0.0, 0.0])
    elif is_concave:
        normalized_activation_weights = np.array([0.0, 1.0, 0.0])
    else:
        if len(activation_weights) != 3:
            raise ValueError(f"activation_weights={activation_weights}")
        if (np.array(activation_weights) < 0).any():
            raise ValueError(f"activation_weights={activation_weights}")
        normalized_activation_weights = np.array(activation_weights) / sum(activation_weights)

    s_convex = round(normalized_activation_weights[0] * units)
    s_concave = round(normalized_activation_weights[1] * units)
    s_saturated = units - s_convex - s_concave

    x_convex, x_concave, x_saturated = tf.split(x, (s_convex, s_concave, s_saturated), axis=-1)

    y_convex = convex_activation(x_convex)
    y_concave = concave_activation(x_concave)
    y_saturated = saturated_activation(x_saturated)

    y = tf.concat([y_convex, y_concave, y_saturated], axis=-1)

    return y

apply_monotonicity_indicator_to_kernel(kernel, monotonicity_indicator) #

Source code in runn/keras/layers/mono_dense.py
def apply_monotonicity_indicator_to_kernel(
    self,
    kernel: tf.Variable,
    monotonicity_indicator: ArrayLike,
) -> TensorLike:
    # convert to tensor if needed and make it broadcastable to the kernel
    monotonicity_indicator = tf.convert_to_tensor(monotonicity_indicator)

    # absolute value of the kernel
    abs_kernel = tf.abs(kernel)

    # replace original kernel values for positive or negative ones where needed
    xs = tf.where(
        monotonicity_indicator == 1,
        abs_kernel,
        kernel,
    )
    xs = tf.where(monotonicity_indicator == -1, -abs_kernel, xs)

    return xs

build(input_shape, *args, **kwargs) #

Build

PARAMETER DESCRIPTION
input_shape

input tensor

TYPE: Tuple

args

positional arguments passed to Dense.build()

TYPE: List[Any] DEFAULT: ()

kwargs

keyword arguments passed to Dense.build()

TYPE: Any DEFAULT: {}

Source code in runn/keras/layers/mono_dense.py
def build(self, input_shape: Tuple, *args: List[Any], **kwargs: Any) -> None:
    """Build

    Args:
        input_shape: input tensor
        args: positional arguments passed to Dense.build()
        kwargs: keyword arguments passed to Dense.build()
    """
    super(MonoDense, self).build(input_shape, *args, **kwargs)
    self.monotonicity_indicator = self.get_monotonicity_indicator(
        monotonicity_indicator=self.monotonicity_indicator,
        input_shape=input_shape,
        units=self.units,
    )

call(inputs) #

Call

PARAMETER DESCRIPTION
inputs

input tensor of shape (batch_size, ..., x_length)

TYPE: TensorLike

RETURNS DESCRIPTION
TensorLike

N-D tensor with shape: (batch_size, ..., units).

Source code in runn/keras/layers/mono_dense.py
def call(self, inputs: TensorLike) -> TensorLike:
    """Call

    Args:
        inputs: input tensor of shape (batch_size, ..., x_length)

    Returns:
        N-D tensor with shape: `(batch_size, ..., units)`.

    """
    # calculate W'*x+y after we replace the kernal according to monotonicity vector
    with self.replace_kernel_using_monotonicity_indicator(self, monotonicity_indicator=self.monotonicity_indicator):
        h = super(MonoDense, self).call(inputs)

    y = self.apply_activations(
        h,
        units=self.units,
        convex_activation=self.convex_activation,
        concave_activation=self.concave_activation,
        saturated_activation=self.saturated_activation,
        is_convex=self.is_convex,
        is_concave=self.is_concave,
        activation_weights=self.activation_weights,
    )

    return y

create_type_1(inputs, *, units, final_units, activation, n_layers, final_activation=None, monotonicity_indicator=1, is_convex=False, is_concave=False, dropout=None) classmethod #

Builds Type-1 monotonic network

Type-1 architecture corresponds to the standard MLP type of neural network architecture used in general, where each of the input features is concatenated to form one single input feature vector $\mathbf{x}$ and fed into the network, with the only difference being that instead of standard fully connected or dense layers, we employ monotonic dense units throughout. For the first (or input layer) layer, the indicator vector $\mathbf{t}$, is used to identify the monotonicity property of the input feature with respect to the output. Specifically, $\mathbf{t}$ is set to $1$ for those components in the input feature vector that are monotonically increasing and is set to $-1$ for those components that are monotonically decreasing and set to $0$ if the feature is non-monotonic. For the subsequent hidden layers, monotonic dense units with the indicator vector $\mathbf{t}$ always being set to $1$ are used in order to preserve monotonicity. Finally, depending on whether the problem at hand is a regression problem or a classification problem (or even a multi-task problem), an appropriate activation function (such as linear activation or sigmoid or softmax) to obtain the final output.

PARAMETER DESCRIPTION
inputs

input tensor or a dictionary of tensors

TYPE: Union[TensorLike, Dict[str, TensorLike], List[TensorLike]]

units

number of units in hidden layers

TYPE: int

final_units

number of units in the output layer

TYPE: int

activation

the base activation function

TYPE: Union[str, Callable[[TensorLike], TensorLike]]

n_layers

total number of layers (hidden layers plus the output layer)

TYPE: int

final_activation

the activation function of the final layer (typicall softmax, sigmoid or linear). If set to None (default value), then the linear activation is used.

TYPE: Optional[Union[str, Callable[[TensorLike], TensorLike]]] DEFAULT: None

monotonicity_indicator

if an instance of dictionary, then maps names of input feature to their monotonicity indicator (-1 for monotonically decreasing, 1 for monotonically increasing and 0 otherwise). If int, then all input features are set to the same monotinicity indicator.

TYPE: Union[int, Dict[str, int], List[int]] DEFAULT: 1

is_convex

set to True if a particular input feature is convex

TYPE: Union[bool, Dict[str, bool], List[bool]] DEFAULT: False

is_concave

set to True if a particular inputs feature is concave

TYPE: Union[bool, Dict[str, bool], List[bool]] DEFAULT: False

dropout

dropout rate. If set to float greater than 0, Dropout layers are inserted after hidden layers.

TYPE: Optional[float] DEFAULT: None

RETURNS DESCRIPTION
TensorLike

Output tensor

Source code in runn/keras/layers/mono_dense.py
@classmethod
def create_type_1(
    cls,
    inputs: Union[TensorLike, Dict[str, TensorLike], List[TensorLike]],
    *,
    units: int,
    final_units: int,
    activation: Union[str, Callable[[TensorLike], TensorLike]],
    n_layers: int,
    final_activation: Optional[Union[str, Callable[[TensorLike], TensorLike]]] = None,
    monotonicity_indicator: Union[int, Dict[str, int], List[int]] = 1,
    is_convex: Union[bool, Dict[str, bool], List[bool]] = False,
    is_concave: Union[bool, Dict[str, bool], List[bool]] = False,
    dropout: Optional[float] = None,
) -> TensorLike:
    """Builds Type-1 monotonic network

    Type-1 architecture corresponds to the standard MLP type of neural network architecture used in general, where each
    of the input features is concatenated to form one single input feature vector $\mathbf{x}$ and fed into the network,
    with the only difference being that instead of standard fully connected or dense layers, we employ monotonic dense units
    throughout. For the first (or input layer) layer, the indicator vector $\mathbf{t}$, is used to identify the monotonicity
    property of the input feature with respect to the output. Specifically, $\mathbf{t}$ is set to $1$ for those components
    in the input feature vector that are monotonically increasing and is set to $-1$ for those components that are monotonically
    decreasing and set to $0$ if the feature is non-monotonic. For the subsequent hidden layers, monotonic dense units with the
    indicator vector $\mathbf{t}$ always being set to $1$ are used in order to preserve monotonicity. Finally, depending on
    whether the problem at hand is a regression problem or a classification problem (or even a multi-task problem), an appropriate
    activation function (such as linear activation or sigmoid or softmax) to obtain the final output.

    Args:
        inputs: input tensor or a dictionary of tensors
        units: number of units in hidden layers
        final_units: number of units in the output layer
        activation: the base activation function
        n_layers: total number of layers (hidden layers plus the output layer)
        final_activation: the activation function of the final layer (typicall softmax, sigmoid or linear).
            If set to None (default value), then the linear activation is used.
        monotonicity_indicator: if an instance of dictionary, then maps names of input feature to their monotonicity
            indicator (-1 for monotonically decreasing, 1 for monotonically increasing and 0 otherwise). If int,
            then all input features are set to the same monotinicity indicator.
        is_convex: set to True if a particular input feature is convex
        is_concave: set to True if a particular inputs feature is concave
        dropout: dropout rate. If set to float greater than 0, Dropout layers are inserted after hidden layers.

    Returns:
        Output tensor

    """
    return _create_type_1(
        inputs,
        units=units,
        final_units=final_units,
        activation=activation,
        n_layers=n_layers,
        final_activation=final_activation,
        monotonicity_indicator=monotonicity_indicator,
        is_convex=is_convex,
        is_concave=is_concave,
        dropout=dropout,
    )

create_type_2(inputs, *, input_units=None, units, final_units, activation, n_layers, final_activation=None, monotonicity_indicator=1, is_convex=False, is_concave=False, dropout=None) classmethod #

Builds Type-2 monotonic network

Type-2 architecture is another example of a neural network architecture that can be built employing proposed monotonic dense blocks. The difference when compared to the architecture described above lies in the way input features are fed into the hidden layers of neural network architecture. Instead of concatenating the features directly, this architecture provides flexibility to employ any form of complex feature extractors for the non-monotonic features and use the extracted feature vectors as inputs. Another difference is that each monotonic input is passed through separate monotonic dense units. This provides an advantage since depending on whether the input is completely concave or convex or both, we can adjust the activation selection vector $\mathbf{s}$ appropriately along with an appropriate value for the indicator vector $\mathbf{t}$. Thus, each of the monotonic input features has a separate monotonic dense layer associated with it. Thus as the major difference to the above-mentioned architecture, we concatenate the feature vectors instead of concatenating the inputs directly. The subsequent parts of the network are similar to the architecture described above wherein for the rest of the hidden monotonic dense units, the indicator vector $\mathbf{t}$ is always set to $1$ to preserve monotonicity.

PARAMETER DESCRIPTION
inputs

input tensor or a dictionary of tensors

TYPE: Union[TensorLike, Dict[str, TensorLike], List[TensorLike]]

input_units

used to preprocess features before entering the common mono block

TYPE: Optional[int] DEFAULT: None

units

number of units in hidden layers

TYPE: int

final_units

number of units in the output layer

TYPE: int

activation

the base activation function

TYPE: Union[str, Callable[[TensorLike], TensorLike]]

n_layers

total number of layers (hidden layers plus the output layer)

TYPE: int

final_activation

the activation function of the final layer (typicall softmax, sigmoid or linear). If set to None (default value), then the linear activation is used.

TYPE: Optional[Union[str, Callable[[TensorLike], TensorLike]]] DEFAULT: None

monotonicity_indicator

if an instance of dictionary, then maps names of input feature to their monotonicity indicator (-1 for monotonically decreasing, 1 for monotonically increasing and 0 otherwise). If int, then all input features are set to the same monotinicity indicator.

TYPE: Union[int, Dict[str, int], List[int]] DEFAULT: 1

is_convex

set to True if a particular input feature is convex

TYPE: Union[bool, Dict[str, bool], List[bool]] DEFAULT: False

is_concave

set to True if a particular inputs feature is concave

TYPE: Union[bool, Dict[str, bool], List[bool]] DEFAULT: False

dropout

dropout rate. If set to float greater than 0, Dropout layers are inserted after hidden layers.

TYPE: Optional[float] DEFAULT: None

RETURNS DESCRIPTION
TensorLike

Output tensor

Source code in runn/keras/layers/mono_dense.py
@classmethod
def create_type_2(
    cls,
    inputs: Union[TensorLike, Dict[str, TensorLike], List[TensorLike]],
    *,
    input_units: Optional[int] = None,
    units: int,
    final_units: int,
    activation: Union[str, Callable[[TensorLike], TensorLike]],
    n_layers: int,
    final_activation: Optional[Union[str, Callable[[TensorLike], TensorLike]]] = None,
    monotonicity_indicator: Union[int, Dict[str, int], List[int]] = 1,
    is_convex: Union[bool, Dict[str, bool], List[bool]] = False,
    is_concave: Union[bool, Dict[str, bool], List[bool]] = False,
    dropout: Optional[float] = None,
) -> TensorLike:
    """Builds Type-2 monotonic network

    Type-2 architecture is another example of a neural network architecture that can be built employing proposed
    monotonic dense blocks. The difference when compared to the architecture described above lies in the way input
    features are fed into the hidden layers of neural network architecture. Instead of concatenating the features
    directly, this architecture provides flexibility to employ any form of complex feature extractors for the
    non-monotonic features and use the extracted feature vectors as inputs. Another difference is that each monotonic
    input is passed through separate monotonic dense units. This provides an advantage since depending on whether the
    input is completely concave or convex or both, we can adjust the activation selection vector $\mathbf{s}$ appropriately
    along with an appropriate value for the indicator vector $\mathbf{t}$. Thus, each of the monotonic input features has
    a separate monotonic dense layer associated with it. Thus as the major difference to the above-mentioned architecture,
    we concatenate the feature vectors instead of concatenating the inputs directly. The subsequent parts of the network are
    similar to the architecture described above wherein for the rest of the hidden monotonic dense units, the indicator vector
    $\mathbf{t}$ is always set to $1$ to preserve monotonicity.

    Args:
        inputs: input tensor or a dictionary of tensors
        input_units: used to preprocess features before entering the common mono block
        units: number of units in hidden layers
        final_units: number of units in the output layer
        activation: the base activation function
        n_layers: total number of layers (hidden layers plus the output layer)
        final_activation: the activation function of the final layer (typicall softmax, sigmoid or linear).
            If set to None (default value), then the linear activation is used.
        monotonicity_indicator: if an instance of dictionary, then maps names of input feature to their monotonicity
            indicator (-1 for monotonically decreasing, 1 for monotonically increasing and 0 otherwise). If int,
            then all input features are set to the same monotinicity indicator.
        is_convex: set to True if a particular input feature is convex
        is_concave: set to True if a particular inputs feature is concave
        dropout: dropout rate. If set to float greater than 0, Dropout layers are inserted after hidden layers.

    Returns:
        Output tensor

    """
    return _create_type_2(
        inputs,
        input_units=input_units,
        units=units,
        final_units=final_units,
        activation=activation,
        n_layers=n_layers,
        final_activation=final_activation,
        monotonicity_indicator=monotonicity_indicator,
        is_convex=is_convex,
        is_concave=is_concave,
        dropout=dropout,
    )

get_activation_functions(activation=None) cached #

Source code in runn/keras/layers/mono_dense.py
@lru_cache
def get_activation_functions(
    self,
    activation: Optional[Union[str, Callable[[TensorLike], TensorLike]]] = None,
) -> Tuple[
    Callable[[TensorLike], TensorLike],
    Callable[[TensorLike], TensorLike],
    Callable[[TensorLike], TensorLike],
]:
    convex_activation = tf.keras.activations.get(activation.lower() if isinstance(activation, str) else activation)

    @tf.function
    def concave_activation(x: TensorLike) -> TensorLike:
        return -convex_activation(-x)

    saturated_activation = self.get_saturated_activation(convex_activation, concave_activation)
    return convex_activation, concave_activation, saturated_activation

get_config() #

Get config is used for saving the model

Source code in runn/keras/layers/mono_dense.py
def get_config(self) -> Dict[str, Any]:
    """Get config is used for saving the model"""
    return dict(
        units=self.units,
        activation=self.org_activation,
        monotonicity_indicator=self.monotonicity_indicator,
        is_convex=self.is_convex,
        is_concave=self.is_concave,
        activation_weights=self.activation_weights,
    )

get_monotonicity_indicator(monotonicity_indicator, *, input_shape, units) #

Source code in runn/keras/layers/mono_dense.py
def get_monotonicity_indicator(
    self,
    monotonicity_indicator: ArrayLike,
    *,
    input_shape: Tuple[int, ...],
    units: int,
) -> TensorLike:
    # convert to tensor if needed and make it broadcastable to the kernel
    monotonicity_indicator = np.array(monotonicity_indicator)
    if len(monotonicity_indicator.shape) < 2:
        monotonicity_indicator = np.reshape(monotonicity_indicator, (-1, 1))
    elif len(monotonicity_indicator.shape) > 2:
        raise ValueError(f"monotonicity_indicator has rank greater than 2: {monotonicity_indicator.shape}")

    monotonicity_indicator_broadcasted = np.broadcast_to(monotonicity_indicator, shape=(input_shape[-1], units))

    if not np.all((monotonicity_indicator == -1) | (monotonicity_indicator == 0) | (monotonicity_indicator == 1)):
        raise ValueError(
            f"Each element of monotonicity_indicator must be one of -1, 0, 1, but it is: '{monotonicity_indicator}'"
        )
    return monotonicity_indicator

get_saturated_activation(convex_activation, concave_activation, a=1.0, c=1.0) #

Source code in runn/keras/layers/mono_dense.py
def get_saturated_activation(
    self,
    convex_activation: Callable[[TensorLike], TensorLike],
    concave_activation: Callable[[TensorLike], TensorLike],
    a: float = 1.0,
    c: float = 1.0,
) -> Callable[[TensorLike], TensorLike]:
    @tf.function
    def saturated_activation(
        x: TensorLike,
        convex_activation: Callable[[TensorLike], TensorLike] = convex_activation,
        concave_activation: Callable[[TensorLike], TensorLike] = concave_activation,
        a: float = a,
        c: float = c,
    ) -> TensorLike:
        cc = convex_activation(tf.ones_like(x) * c)
        ccc = concave_activation(-tf.ones_like(x) * c)
        return a * tf.where(
            x <= 0,
            convex_activation(x + c) - cc,
            concave_activation(x - c) + cc,
        )

    return saturated_activation  # type: ignore

replace_kernel_using_monotonicity_indicator(layer, monotonicity_indicator) #

Source code in runn/keras/layers/mono_dense.py
@contextmanager
def replace_kernel_using_monotonicity_indicator(
    self,
    layer: tf.keras.layers.Dense,
    monotonicity_indicator: TensorLike,
) -> Generator[None, None, None]:
    old_kernel = layer.kernel

    layer.kernel = self.apply_monotonicity_indicator_to_kernel(layer.kernel, monotonicity_indicator)
    try:
        yield
    finally:
        layer.kernel = old_kernel