Skip to content

runn.models.base

optimizers = {'adadelta': Adadelta, 'adafactor': Adafactor, 'adagrad': Adagrad, 'adam': Adam, 'adamw': AdamW, 'adamax': Adamax, 'ftrl': Ftrl, 'lion': Lion, 'nadam': Nadam, 'rmsprop': RMSprop, 'sgd': SGD} module-attribute #

warning_manager = WarningManager() module-attribute #

BaseModel(attributes=None, n_alt=None, layers_dim=[25, 25], regularizer=None, regularization_rate=0.001, learning_rate=0.001, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], filename=None, warnings=True) #

Abstract base class for all choice models.

PARAMETER DESCRIPTION
attributes

List with the attributes names in the model, in the same order as in the input data. If None, the model cannot be initialized unless it is loaded from a file. Default: None.

TYPE: Optional[list] DEFAULT: None

n_alt

Number of alternatives in the choice set. If None, the model cannot be initialized unless it is loaded from a file. Default: None.

TYPE: Optional[int] DEFAULT: None

layers_dim

List with the number of neurons in each hidden layer, the length of the list is the number of hidden layers. Default: [25, 25].

TYPE: list DEFAULT: [25, 25]

regularizer

Type of regularization to apply. Possible values: 'l1', 'l2' or 'l1_l2'. Default: None.

TYPE: Optional[str] DEFAULT: None

regularization_rate

Regularization rate if regularizer is not None. Default: 0.001.

TYPE: float DEFAULT: 0.001

learning_rate

Learning rate of the optimizer. Default: 0.001.

TYPE: float DEFAULT: 0.001

optimizer

Optimizer to use. Can be either a string or a tf.keras.optimizers.Optimizer. Default: 'adam'.

TYPE: Union[str, Optimizer] DEFAULT: 'adam'

loss

Loss function to use. Can be either a string or a tf.keras.losses.Loss. Default: 'categorical_crossentropy'.

TYPE: Union[str, Loss] DEFAULT: 'categorical_crossentropy'

metrics

List of metrics to be evaluated by the model during training and testing. Each of this can be either a string or a tf.keras.metrics.Metric. Default: ['accuracy'].

TYPE: list DEFAULT: ['accuracy']

filename

Load a previously trained model from a file. If None, a new model will be initialized. When loading a model from a file, the previous parameters will be ignored. Default: None.

TYPE: Optional[str] DEFAULT: None

warnings

Whether to show warnings or not. Default: True.

TYPE: bool DEFAULT: True

Source code in runn/models/base.py
def __init__(
    self,
    attributes: Optional[list] = None,
    n_alt: Optional[int] = None,
    layers_dim: list = [25, 25],
    regularizer: Optional[str] = None,
    regularization_rate: float = 0.001,
    learning_rate: float = 0.001,
    optimizer: Union[str, tf.keras.optimizers.Optimizer] = "adam",
    loss: Union[str, tf.keras.losses.Loss] = "categorical_crossentropy",
    metrics: list = ["accuracy"],
    filename: Optional[str] = None,
    warnings: bool = True,
) -> None:
    self._initialize_base_variables(warnings=warnings)
    if filename is None:
        # Initialize new model
        self._initialize_base_params(
            attributes=attributes,
            n_alt=n_alt,
            layers_dim=layers_dim,
            regularizer=regularizer,
            regularization_rate=regularization_rate,
            learning_rate=learning_rate,
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
    elif isinstance(filename, str):
        # Load model from file
        self.load(filename)
    else:
        raise ValueError("The 'filename' parameter should be a string.")

evaluate(x, y, **kwargs) #

Returns the loss value & metrics values for the model for a given input.

PARAMETER DESCRIPTION
x

Input data. Can be a tf.Tensor, np.ndarray or pd.DataFrame.

TYPE: Union[Tensor, ndarray, DataFrame]

y

The alternative selected by each decision maker in the sample x. Can be either a tf.Tensor or np.ndarray. It should be a 1D array with integers in the range [0, n_alt-1] or a 2D array with one-hot encoded alternatives.

TYPE: Union[Tensor, ndarray]

**kwargs

Additional arguments passed to the keras model. See tf.keras.Model.evaluate() for details.

DEFAULT: {}

RETURNS DESCRIPTION
Union[float, list]

Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). See tf.keras.Model.evaluate() for details.

Source code in runn/models/base.py
def evaluate(
    self, x: Union[tf.Tensor, np.ndarray, pd.DataFrame], y: Union[tf.Tensor, np.ndarray], **kwargs
) -> Union[float, list]:
    """Returns the loss value & metrics values for the model for a given input.

    Args:
        x: Input data. Can be a tf.Tensor, np.ndarray or pd.DataFrame.
        y: The alternative selected by each decision maker in the sample x. Can be either a tf.Tensor or np.ndarray.
            It should be a 1D array with integers in the range [0, n_alt-1] or a 2D array with one-hot encoded
            alternatives.
        **kwargs: Additional arguments passed to the keras model. See tf.keras.Model.evaluate() for details.

    Returns:
        Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has
            multiple outputs and/or metrics). See tf.keras.Model.evaluate() for details.
    """
    if self.fitted is False:
        raise Exception("The model is not fitted yet. Please call the 'fit' method first.")
    if isinstance(x, pd.DataFrame):
        x = x.values
    if isinstance(x, np.ndarray):
        x = tf.convert_to_tensor(x)
    # Check if y is one-hot encoded or a 1D array with integers in the range [0, n_alt-1]
    if isinstance(y, tf.Tensor):
        y = y.numpy()
    if not (len(y.shape) == 2 and y.shape[1] == self.n_alt):
        # y is not one-hot encoded, hence it should be a 1D array with integers in the range [0, n_alt-1]
        if np.any(y < 0) or np.any(y >= self.n_alt):
            raise ValueError("The input parameter 'y' should contain integers in the range [0, n_alt-1].")
    return self.keras_model.evaluate(x, y, **kwargs)

fit(x, y, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, **kwargs) #

Train the model.

PARAMETER DESCRIPTION
x

Input data. Can be a tf.Tensor, np.ndarray or pd.DataFrame.

TYPE: Union[Tensor, ndarray, DataFrame]

y

The alternative selected by each decision maker in the sample x. Can be either a tf.Tensor or np.ndarray. It should be a 1D array with integers in the range [0, n_alt-1] or a 2D array with one-hot encoded alternatives.

TYPE: Union[Tensor, ndarray]

batch_size

Number of samples per gradient update. If unspecified, batch_size will default to 32.

TYPE: Optional[int] DEFAULT: None

epochs

Number of epochs to train the model. An epoch is an iteration over the entire x and y data provided. Default: 1.

TYPE: int DEFAULT: 1

verbose

Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 1.

TYPE: int DEFAULT: 1

callbacks

List of tf.keras.callbacks.Callback instances. List of callbacks to apply during training. See tf.keras.callbacks for details. Default: None.

TYPE: Optional[list] DEFAULT: None

validation_split

Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the x and y data provided, before shuffling. Default: 0.0.

TYPE: float DEFAULT: 0.0

validation_data

Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. This could be a tuple (x_val, y_val) or a tuple (x_val, y_val, val_sample_weights). Default: None.

TYPE: Optional[tuple] DEFAULT: None

**kwargs

Additional arguments passed to the keras model. See tf.keras.Model.fit() for details.

DEFAULT: {}

Source code in runn/models/base.py
def fit(
    self,
    x: Union[tf.Tensor, np.ndarray, pd.DataFrame],
    y: Union[tf.Tensor, np.ndarray],
    batch_size: Optional[int] = None,
    epochs: int = 1,
    verbose: int = 1,
    callbacks: Optional[list] = None,
    validation_split: float = 0.0,
    validation_data: Optional[tuple] = None,
    **kwargs,
) -> None:
    """Train the model.

    Args:
        x: Input data. Can be a tf.Tensor, np.ndarray or pd.DataFrame.
        y: The alternative selected by each decision maker in the sample x. Can be either a tf.Tensor or np.ndarray.
            It should be a 1D array with integers in the range [0, n_alt-1] or a 2D array with one-hot encoded
            alternatives.
        batch_size: Number of samples per gradient update. If unspecified, batch_size will default to 32.
        epochs: Number of epochs to train the model. An epoch is an iteration over the entire x and y data
            provided. Default: 1.
        verbose: Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 1.
        callbacks: List of tf.keras.callbacks.Callback instances. List of callbacks to apply during training.
            See tf.keras.callbacks for details. Default: None.
        validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data.
            The model will set apart this fraction of the training data, will not train on it, and will evaluate
            the loss and any model metrics on this data at the end of each epoch. The validation data is selected
            from the last samples in the x and y data provided, before shuffling. Default: 0.0.
        validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The
            model will not be trained on this data. This could be a tuple (x_val, y_val) or a tuple (x_val, y_val,
            val_sample_weights). Default: None.
        **kwargs: Additional arguments passed to the keras model. See tf.keras.Model.fit() for details.
    """
    # Check if the model is initialized
    if self.keras_model is None:
        raise ValueError("The model is not initialized yet. Please initialize the model first.")
    # Check if y is one-hot encoded or a 1D array with integers in the range [0, n_alt-1]
    if isinstance(y, tf.Tensor):
        y = y.numpy()
    if not (len(y.shape) == 2 and y.shape[1] == self.n_alt):
        # y is not one-hot encoded, hence it should be a 1D array with integers in the range [0, n_alt-1]
        if np.any(y < 0) or np.any(y >= self.n_alt):
            raise ValueError("The input parameter 'y' should contain integers in the range [0, n_alt-1].")

    if validation_data is not None:
        if isinstance(validation_data, tuple) and len(validation_data) == 2:
            x_val, y_val = validation_data
            if isinstance(x_val, pd.DataFrame):
                x_val = x_val.values
            if isinstance(x_val, np.ndarray):
                x_val = tf.convert_to_tensor(x_val)
            if isinstance(y_val, tf.Tensor):
                y_val = y_val.numpy()
            if not (len(y_val.shape) == 2 and y_val.shape[1] == self.n_alt):
                # y is not one-hot encoded, hence it should be a 1D array with integers in the range [0, n_alt-1]
                if np.any(y_val < 0) or np.any(y_val >= self.n_alt):
                    raise ValueError("The input parameter 'y_val' should contain integers in the range [0, n_alt-1].")
            validation_data = (x_val, y_val)
        else:
            raise ValueError("The 'validation_data' parameter should be a tuple (x_val, y_val).")

    history = self.keras_model.fit(
        x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, **kwargs
    )
    self.history = history.history
    self.fitted = True

get_history() #

Return the history of the model training.

RETURNS DESCRIPTION
dict

A dictionary containing the loss and metrics values at the end of each epoch.

Source code in runn/models/base.py
def get_history(self) -> dict:
    """Return the history of the model training.

    Returns:
        A dictionary containing the loss and metrics values at the end of each epoch.
    """
    if self.history is None:
        raise Exception("The model is not fitted yet. Please call the 'fit' method first.")
    return self.history

get_utility(x, name) abstractmethod #

Source code in runn/models/base.py
@abstractmethod
def get_utility(self, x: Union[tf.Tensor, np.ndarray, pd.DataFrame], name: str) -> np.ndarray:
    raise NotImplementedError

load(path) abstractmethod #

Source code in runn/models/base.py
@abstractmethod
def load(self, path: str) -> None:
    raise NotImplementedError

plot_model(filename=None, expand_nested=True, dpi=96) #

Generate a graphical representation of the model.

PARAMETER DESCRIPTION
filename

File to which the plot will be saved. If None, the plot will only be displayed on screen. Default: None.

TYPE: str DEFAULT: None

expand_nested

Whether to expand nested models into clusters. Default: True.

DEFAULT: True

dpi

Resolution of the plot. Default: 96.

TYPE: int DEFAULT: 96

Source code in runn/models/base.py
def plot_model(self, filename: str = None, expand_nested=True, dpi: int = 96) -> None:
    """Generate a graphical representation of the model.

    Args:
        filename: File to which the plot will be saved. If None, the plot will only be displayed on screen. Default:
            None.
        expand_nested: Whether to expand nested models into clusters. Default: True.
        dpi: Resolution of the plot. Default: 96.
    """
    if self.keras_model is None:
        raise ValueError("Keras model is not initialized yet. Please initialize the model first.")
    if filename is None:
        filename = self.__class__.__name__ + ".png"
    return plot_model(
        self.keras_model,
        show_shapes=True,
        show_layer_names=True,
        expand_nested=expand_nested,
        rankdir="TB",
        style=0,
        color=True,
        to_file=filename,
        dpi=dpi,
    )

predict(x, **kwargs) #

Predict the choice probabilities for a given input.

PARAMETER DESCRIPTION
x

Input data.

TYPE: Union[Tensor, ndarray, DataFrame]

**kwargs

Additional arguments passed to the keras model. See tf.keras.Model.predict() for details.

DEFAULT: {}

RETURNS DESCRIPTION
ndarray

Numpy array with the choice probabilities for each alternative.

Source code in runn/models/base.py
def predict(self, x: Union[tf.Tensor, np.ndarray, pd.DataFrame], **kwargs) -> np.ndarray:
    """Predict the choice probabilities for a given input.

    Args:
        x: Input data.
        **kwargs: Additional arguments passed to the keras model. See tf.keras.Model.predict() for details.

    Returns:
        Numpy array with the choice probabilities for each alternative.
    """
    if self.fitted is False:
        raise Exception("The model is not fitted yet. Please call the 'fit' method first.")
    if isinstance(x, pd.DataFrame):
        x = x.values
    if isinstance(x, np.ndarray):
        x = tf.convert_to_tensor(x)
    return self.keras_model.predict(x, **kwargs)

save(path='model.zip') abstractmethod #

Source code in runn/models/base.py
@abstractmethod
def save(self, path: str = "model.zip") -> None:
    raise NotImplementedError

summary(line_length=100, **kwargs) #

Print a summary of the keras model.

PARAMETER DESCRIPTION
line_length

Total length of printed lines. Default: 100.

TYPE: int DEFAULT: 100

**kwargs

Additional arguments passed to the keras model. See tf.keras.Model.summary() for details.

DEFAULT: {}

Source code in runn/models/base.py
def summary(self, line_length: int = 100, **kwargs) -> None:
    """Print a summary of the keras model.

    Args:
        line_length: Total length of printed lines. Default: 100.
        **kwargs: Additional arguments passed to the keras model. See tf.keras.Model.summary() for details.
    """
    if self.keras_model is None:
        raise Exception("Keras model is not initialized yet. Please initialize the model first.")
    print("------ {} ------".format(self.__class__.__name__))
    self._print_data_summary(line_length=line_length)
    print("\nSummary of the keras model:")
    self.keras_model.summary(line_length=line_length, **kwargs)