Skip to content

RUNN (Random Utility Neural Network)

runn.models.base

runn.models.base

`optimizers = {'adadelta': Adadelta, 'adafactor': Adafactor, 'adagrad': Adagrad, 'adam': Adam, 'adamw': AdamW, 'adamax': Adamax, 'ftrl': Ftrl, 'lion': Lion, 'nadam': Nadam, 'rmsprop': RMSprop, 'sgd': SGD}` `module-attribute` #

`warning_manager = WarningManager()` `module-attribute` #

`BaseModel(attributes=None, n_alt=None, layers_dim=[25, 25], regularizer=None, regularization_rate=0.001, learning_rate=0.001, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], filename=None, warnings=True)` #

Abstract base class for all choice models.

PARAMETER	DESCRIPTION
`attributes`	List with the attributes names in the model, in the same order as in the input data. If None, the model cannot be initialized unless it is loaded from a file. Default: None. TYPE: `Optional[list]` DEFAULT: `None`
`n_alt`	Number of alternatives in the choice set. If None, the model cannot be initialized unless it is loaded from a file. Default: None. TYPE: `Optional[int]` DEFAULT: `None`
`layers_dim`	List with the number of neurons in each hidden layer, the length of the list is the number of hidden layers. Default: [25, 25]. TYPE: `list` DEFAULT: `[25, 25]`
`regularizer`	Type of regularization to apply. Possible values: 'l1', 'l2' or 'l1_l2'. Default: None. TYPE: `Optional[str]` DEFAULT: `None`
`regularization_rate`	Regularization rate if regularizer is not None. Default: 0.001. TYPE: `float` DEFAULT: `0.001`
`learning_rate`	Learning rate of the optimizer. Default: 0.001. TYPE: `float` DEFAULT: `0.001`
`optimizer`	Optimizer to use. Can be either a string or a tf.keras.optimizers.Optimizer. Default: 'adam'. TYPE: `Union[str, Optimizer]` DEFAULT: `'adam'`
`loss`	Loss function to use. Can be either a string or a tf.keras.losses.Loss. Default: 'categorical_crossentropy'. TYPE: `Union[str, Loss]` DEFAULT: `'categorical_crossentropy'`
`metrics`	List of metrics to be evaluated by the model during training and testing. Each of this can be either a string or a tf.keras.metrics.Metric. Default: ['accuracy']. TYPE: `list` DEFAULT: `['accuracy']`
`filename`	Load a previously trained model from a file. If None, a new model will be initialized. When loading a model from a file, the previous parameters will be ignored. Default: None. TYPE: `Optional[str]` DEFAULT: `None`
`warnings`	Whether to show warnings or not. Default: True. TYPE: `bool` DEFAULT: `True`

Source code in runn/models/base.py

def __init__(
    self,
    attributes: Optional[list] = None,
    n_alt: Optional[int] = None,
    layers_dim: list = [25, 25],
    regularizer: Optional[str] = None,
    regularization_rate: float = 0.001,
    learning_rate: float = 0.001,
    optimizer: Union[str, tf.keras.optimizers.Optimizer] = "adam",
    loss: Union[str, tf.keras.losses.Loss] = "categorical_crossentropy",
    metrics: list = ["accuracy"],
    filename: Optional[str] = None,
    warnings: bool = True,
) -> None:
    self._initialize_base_variables(warnings=warnings)
    if filename is None:
        # Initialize new model
        self._initialize_base_params(
            attributes=attributes,
            n_alt=n_alt,
            layers_dim=layers_dim,
            regularizer=regularizer,
            regularization_rate=regularization_rate,
            learning_rate=learning_rate,
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
    elif isinstance(filename, str):
        # Load model from file
        self.load(filename)
    else:
        raise ValueError("The 'filename' parameter should be a string.")

`evaluate(x, y, **kwargs)` #

Returns the loss value & metrics values for the model for a given input.

PARAMETER	DESCRIPTION
`x`	Input data. Can be a tf.Tensor, np.ndarray or pd.DataFrame. TYPE: `Union[Tensor, ndarray, DataFrame]`
`y`	The alternative selected by each decision maker in the sample x. Can be either a tf.Tensor or np.ndarray. It should be a 1D array with integers in the range [0, n_alt-1] or a 2D array with one-hot encoded alternatives. TYPE: `Union[Tensor, ndarray]`
`**kwargs`	Additional arguments passed to the keras model. See tf.keras.Model.evaluate() for details. DEFAULT: `{}`

RETURNS	DESCRIPTION
`Union[float, list]`	Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). See tf.keras.Model.evaluate() for details.

Source code in runn/models/base.py

def evaluate(
    self, x: Union[tf.Tensor, np.ndarray, pd.DataFrame], y: Union[tf.Tensor, np.ndarray], **kwargs
) -> Union[float, list]:
    """Returns the loss value & metrics values for the model for a given input.

    Args:
        x: Input data. Can be a tf.Tensor, np.ndarray or pd.DataFrame.
        y: The alternative selected by each decision maker in the sample x. Can be either a tf.Tensor or np.ndarray.
            It should be a 1D array with integers in the range [0, n_alt-1] or a 2D array with one-hot encoded
            alternatives.
        **kwargs: Additional arguments passed to the keras model. See tf.keras.Model.evaluate() for details.

    Returns:
        Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has
            multiple outputs and/or metrics). See tf.keras.Model.evaluate() for details.
    """
    if self.fitted is False:
        raise Exception("The model is not fitted yet. Please call the 'fit' method first.")
    if isinstance(x, pd.DataFrame):
        x = x.values
    if isinstance(x, np.ndarray):
        x = tf.convert_to_tensor(x)
    # Check if y is one-hot encoded or a 1D array with integers in the range [0, n_alt-1]
    if isinstance(y, tf.Tensor):
        y = y.numpy()
    if not (len(y.shape) == 2 and y.shape[1] == self.n_alt):
        # y is not one-hot encoded, hence it should be a 1D array with integers in the range [0, n_alt-1]
        if np.any(y < 0) or np.any(y >= self.n_alt):
            raise ValueError("The input parameter 'y' should contain integers in the range [0, n_alt-1].")
    return self.keras_model.evaluate(x, y, **kwargs)

`fit(x, y, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, **kwargs)` #

Train the model.

PARAMETER	DESCRIPTION
`x`	Input data. Can be a tf.Tensor, np.ndarray or pd.DataFrame. TYPE: `Union[Tensor, ndarray, DataFrame]`
`y`	The alternative selected by each decision maker in the sample x. Can be either a tf.Tensor or np.ndarray. It should be a 1D array with integers in the range [0, n_alt-1] or a 2D array with one-hot encoded alternatives. TYPE: `Union[Tensor, ndarray]`
`batch_size`	Number of samples per gradient update. If unspecified, batch_size will default to 32. TYPE: `Optional[int]` DEFAULT: `None`
`epochs`	Number of epochs to train the model. An epoch is an iteration over the entire x and y data provided. Default: 1. TYPE: `int` DEFAULT: `1`
`verbose`	Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 1. TYPE: `int` DEFAULT: `1`
`callbacks`	List of tf.keras.callbacks.Callback instances. List of callbacks to apply during training. See tf.keras.callbacks for details. Default: None. TYPE: `Optional[list]` DEFAULT: `None`
`validation_split`	Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the x and y data provided, before shuffling. Default: 0.0. TYPE: `float` DEFAULT: `0.0`
`validation_data`	Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. This could be a tuple (x_val, y_val) or a tuple (x_val, y_val, val_sample_weights). Default: None. TYPE: `Optional[tuple]` DEFAULT: `None`
`**kwargs`	Additional arguments passed to the keras model. See tf.keras.Model.fit() for details. DEFAULT: `{}`

Source code in runn/models/base.py

def fit(
    self,
    x: Union[tf.Tensor, np.ndarray, pd.DataFrame],
    y: Union[tf.Tensor, np.ndarray],
    batch_size: Optional[int] = None,
    epochs: int = 1,
    verbose: int = 1,
    callbacks: Optional[list] = None,
    validation_split: float = 0.0,
    validation_data: Optional[tuple] = None,
    **kwargs,
) -> None:
    """Train the model.

    Args:
        x: Input data. Can be a tf.Tensor, np.ndarray or pd.DataFrame.
        y: The alternative selected by each decision maker in the sample x. Can be either a tf.Tensor or np.ndarray.
            It should be a 1D array with integers in the range [0, n_alt-1] or a 2D array with one-hot encoded
            alternatives.
        batch_size: Number of samples per gradient update. If unspecified, batch_size will default to 32.
        epochs: Number of epochs to train the model. An epoch is an iteration over the entire x and y data
            provided. Default: 1.
        verbose: Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 1.
        callbacks: List of tf.keras.callbacks.Callback instances. List of callbacks to apply during training.
            See tf.keras.callbacks for details. Default: None.
        validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data.
            The model will set apart this fraction of the training data, will not train on it, and will evaluate
            the loss and any model metrics on this data at the end of each epoch. The validation data is selected
            from the last samples in the x and y data provided, before shuffling. Default: 0.0.
        validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The
            model will not be trained on this data. This could be a tuple (x_val, y_val) or a tuple (x_val, y_val,
            val_sample_weights). Default: None.
        **kwargs: Additional arguments passed to the keras model. See tf.keras.Model.fit() for details.
    """
    # Check if the model is initialized
    if self.keras_model is None:
        raise ValueError("The model is not initialized yet. Please initialize the model first.")
    # Check if y is one-hot encoded or a 1D array with integers in the range [0, n_alt-1]
    if isinstance(y, tf.Tensor):
        y = y.numpy()
    if not (len(y.shape) == 2 and y.shape[1] == self.n_alt):
        # y is not one-hot encoded, hence it should be a 1D array with integers in the range [0, n_alt-1]
        if np.any(y < 0) or np.any(y >= self.n_alt):
            raise ValueError("The input parameter 'y' should contain integers in the range [0, n_alt-1].")

    if validation_data is not None:
        if isinstance(validation_data, tuple) and len(validation_data) == 2:
            x_val, y_val = validation_data
            if isinstance(x_val, pd.DataFrame):
                x_val = x_val.values
            if isinstance(x_val, np.ndarray):
                x_val = tf.convert_to_tensor(x_val)
            if isinstance(y_val, tf.Tensor):
                y_val = y_val.numpy()
            if not (len(y_val.shape) == 2 and y_val.shape[1] == self.n_alt):
                # y is not one-hot encoded, hence it should be a 1D array with integers in the range [0, n_alt-1]
                if np.any(y_val < 0) or np.any(y_val >= self.n_alt):
                    raise ValueError("The input parameter 'y_val' should contain integers in the range [0, n_alt-1].")
            validation_data = (x_val, y_val)
        else:
            raise ValueError("The 'validation_data' parameter should be a tuple (x_val, y_val).")

    history = self.keras_model.fit(
        x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, **kwargs
    )
    self.history = history.history
    self.fitted = True

`get_history()` #

Return the history of the model training.

RETURNS	DESCRIPTION
`dict`	A dictionary containing the loss and metrics values at the end of each epoch.

Source code in runn/models/base.py

def get_history(self) -> dict:
    """Return the history of the model training.

    Returns:
        A dictionary containing the loss and metrics values at the end of each epoch.
    """
    if self.history is None:
        raise Exception("The model is not fitted yet. Please call the 'fit' method first.")
    return self.history

`get_utility(x, name)` `abstractmethod` #

Source code in runn/models/base.py

@abstractmethod
def get_utility(self, x: Union[tf.Tensor, np.ndarray, pd.DataFrame], name: str) -> np.ndarray:
    raise NotImplementedError

`load(path)` `abstractmethod` #

Source code in runn/models/base.py

@abstractmethod
def load(self, path: str) -> None:
    raise NotImplementedError

`plot_model(filename=None, expand_nested=True, dpi=96)` #

Generate a graphical representation of the model.

PARAMETER	DESCRIPTION
`filename`	File to which the plot will be saved. If None, the plot will only be displayed on screen. Default: None. TYPE: `str` DEFAULT: `None`
`expand_nested`	Whether to expand nested models into clusters. Default: True. DEFAULT: `True`
`dpi`	Resolution of the plot. Default: 96. TYPE: `int` DEFAULT: `96`

Source code in runn/models/base.py

def plot_model(self, filename: str = None, expand_nested=True, dpi: int = 96) -> None:
    """Generate a graphical representation of the model.

    Args:
        filename: File to which the plot will be saved. If None, the plot will only be displayed on screen. Default:
            None.
        expand_nested: Whether to expand nested models into clusters. Default: True.
        dpi: Resolution of the plot. Default: 96.
    """
    if self.keras_model is None:
        raise ValueError("Keras model is not initialized yet. Please initialize the model first.")
    if filename is None:
        filename = self.__class__.__name__ + ".png"
    return plot_model(
        self.keras_model,
        show_shapes=True,
        show_layer_names=True,
        expand_nested=expand_nested,
        rankdir="TB",
        style=0,
        color=True,
        to_file=filename,
        dpi=dpi,
    )

`predict(x, **kwargs)` #

Predict the choice probabilities for a given input.

PARAMETER	DESCRIPTION
`x`	Input data. TYPE: `Union[Tensor, ndarray, DataFrame]`
`**kwargs`	Additional arguments passed to the keras model. See tf.keras.Model.predict() for details. DEFAULT: `{}`

RETURNS	DESCRIPTION
`ndarray`	Numpy array with the choice probabilities for each alternative.

Source code in runn/models/base.py

def predict(self, x: Union[tf.Tensor, np.ndarray, pd.DataFrame], **kwargs) -> np.ndarray:
    """Predict the choice probabilities for a given input.

    Args:
        x: Input data.
        **kwargs: Additional arguments passed to the keras model. See tf.keras.Model.predict() for details.

    Returns:
        Numpy array with the choice probabilities for each alternative.
    """
    if self.fitted is False:
        raise Exception("The model is not fitted yet. Please call the 'fit' method first.")
    if isinstance(x, pd.DataFrame):
        x = x.values
    if isinstance(x, np.ndarray):
        x = tf.convert_to_tensor(x)
    return self.keras_model.predict(x, **kwargs)

`save(path='model.zip')` `abstractmethod` #

Source code in runn/models/base.py

@abstractmethod
def save(self, path: str = "model.zip") -> None:
    raise NotImplementedError

`summary(line_length=100, **kwargs)` #

Print a summary of the keras model.

PARAMETER	DESCRIPTION
`line_length`	Total length of printed lines. Default: 100. TYPE: `int` DEFAULT: `100`
`**kwargs`	Additional arguments passed to the keras model. See tf.keras.Model.summary() for details. DEFAULT: `{}`

Source code in runn/models/base.py

def summary(self, line_length: int = 100, **kwargs) -> None:
    """Print a summary of the keras model.

    Args:
        line_length: Total length of printed lines. Default: 100.
        **kwargs: Additional arguments passed to the keras model. See tf.keras.Model.summary() for details.
    """
    if self.keras_model is None:
        raise Exception("Keras model is not initialized yet. Please initialize the model first.")
    print("------ {} ------".format(self.__class__.__name__))
    self._print_data_summary(line_length=line_length)
    print("\nSummary of the keras model:")
    self.keras_model.summary(line_length=line_length, **kwargs)