Source code for grafei.model.normalize_features

##########################################################################
# basf2 (Belle II Analysis Software Framework)                           #
# Author: The Belle II Collaboration                                     #
#                                                                        #
# See git log for contributors and copyright holders.                    #
# This file is licensed under LGPL-3.0, see LICENSE.md.                  #
##########################################################################


import numpy as np
from typing import Union


def _power(array: np.ndarray, power: Union[int, float]):
    """Preprocessing function to take power of given feature."""
    return np.sign(array) * np.power(np.abs(array), power)


def _linear(array: np.ndarray, mu=0.0, sigma=1.0):
    """Preprocessing function to linear scale given feature."""
    return (array - mu) / sigma


methods = {"power": _power, "linear": _linear}


[docs]def normalize_features( normalize={}, features=[], x=[], edge_features=[], x_edges=[], global_features=[], x_global=[], ): """ Function to normalize input features. ``normalize`` should be a dictionary of the form ``{'power', [0.5], 'linear', [-0.5, 4.1]}``. ``power`` and ``linear`` are the only processes supported. Args: normalize (dict): Normalization processes and parameters. features (list): List of node feature names. x (numpy.ndarray): Array of node features. edge_features (list): List of edge feature names. x_edges (numpy.ndarray): Array of edge features. global_features (list): List of global feature names. x_global (numpy.ndarray): Array of global features. """ for feat, processes in normalize.items(): # Start with node features feat_name = f"feat_{feat}" if feat_name in features: feat_idx = features.index(feat_name) # Apply normalizations in order for proc in processes: args = proc[1:] x[:, feat_idx] = methods[proc[0]](x[:, feat_idx], *args) continue # assume no features of different type with same name # Continue with edge features feat_name = f"edge_{feat}" if feat_name in edge_features: feat_idx = edge_features.index(feat_name) # Apply normalizations in order for proc in processes: args = proc[1:] x_edges[:, feat_idx] = methods[proc[0]](x_edges[:, feat_idx], *args) continue # assume no features of different type with same name # Continue with global features feat_name = f"glob_{feat}" if feat_name in global_features: feat_idx = global_features.index(feat_name) # Apply normalizations in order for proc in processes: args = proc[1:] x_global[:, feat_idx] = methods[proc[0]](x_global[:, feat_idx], *args)