In [1]:
from functools import partial
from pprint import pprint

import numpy as np
import pandas as pd

from hyperopt import fmin, hp, space_eval, tpe, STATUS_OK, Trials
from hyperopt.pyll import scope, stochastic
from plotly import express as px
from plotly import graph_objects as go
from plotly import offline as pyo
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.model_selection import cross_val_score, KFold
from sklearn.utils import check_random_state

pyo.init_notebook_mode()
In [2]:
MEDIAN_HOME_VALUE = "median_home_value"

# Load the boston dataset using sklearn's helper function
boston_dataset = load_boston()
# Convert the data into a Pandas dataframe
data = np.concatenate(
    [boston_dataset["data"], boston_dataset["target"][..., np.newaxis]],
    axis=1,
)
features, target = boston_dataset["feature_names"], MEDIAN_HOME_VALUE
columns = np.concatenate([features, [target]])
boston_dataset_df = pd.DataFrame(data, columns=columns)
In [3]:
boston_dataset_df
Out[3]:
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT median_home_value
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 396.90 5.33 36.2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
501 0.06263 0.0 11.93 0.0 0.573 6.593 69.1 2.4786 1.0 273.0 21.0 391.99 9.67 22.4
502 0.04527 0.0 11.93 0.0 0.573 6.120 76.7 2.2875 1.0 273.0 21.0 396.90 9.08 20.6
503 0.06076 0.0 11.93 0.0 0.573 6.976 91.0 2.1675 1.0 273.0 21.0 396.90 5.64 23.9
504 0.10959 0.0 11.93 0.0 0.573 6.794 89.3 2.3889 1.0 273.0 21.0 393.45 6.48 22.0
505 0.04741 0.0 11.93 0.0 0.573 6.030 80.8 2.5050 1.0 273.0 21.0 396.90 7.88 11.9

506 rows × 14 columns

In [4]:
# Define constant strings that we will use as keys in the “search space” dictionary below. Note that
# the convention I use throughout is to represent the characters in the string with a variable that
# matches that string except that the characters in the variable are capitalized. This convention
# allows us to easily interpret what these variable mean when we encounter them in the code. For
# example, we know that the variable `MODEL` contains the string “model”. Following this pattern of
# representing strings with variables allows me to avoid typos when repeatedly using the same string
# throughout the code since typos in variable names will be caught as errors by my linter.
GRADIENT_BOOSTING_REGRESSOR = "gradient_boosting_regressor"
KWARGS = "kwargs"
LEARNING_RATE = "learning_rate"
LINEAR_REGRESSION = "linear_regression"
MAX_DEPTH = "max_depth"
MODEL = "model"
MODEL_CHOICE = "model_choice"
NORMALIZE = "normalize"
N_ESTIMATORS = "n_estimators"
RANDOM_FOREST_REGRESSOR = "random_forest_regressor"
RANDOM_STATE = "random_state"

# Declare the search space for the random forest regressor model.
random_forest_regressor = {
    MODEL: RANDOM_FOREST_REGRESSOR,
    # I define the model parameters as a separate dictionary so that we can feed the parameters into
    # the `__init__` of the model with dictionary unpacking. See the `sample_to_model` function
    # that's defined alongside the objective function to see this in action.
    KWARGS: {
        N_ESTIMATORS: scope.int(
            hp.quniform(f"{RANDOM_FOREST_REGRESSOR}__{N_ESTIMATORS}", 50, 150, 1)
        ),
        MAX_DEPTH: scope.int(
            hp.quniform(f"{RANDOM_FOREST_REGRESSOR}__{MAX_DEPTH}", 2, 12, 1)
        ),
        RANDOM_STATE: 0,
    },
}

# Declare the search space for the gradient boosting regressor model, following the same structure
# as the random forest regressor search space.
gradient_boosting_regressor = {
    MODEL: GRADIENT_BOOSTING_REGRESSOR,
    KWARGS: {
        LEARNING_RATE: scope.float(
            hp.uniform(
                f"{GRADIENT_BOOSTING_REGRESSOR}__{LEARNING_RATE}",
                0.01,
                0.15,
            )
        ),  # lower learning rate
        N_ESTIMATORS: scope.int(
            hp.quniform(f"{GRADIENT_BOOSTING_REGRESSOR}__{N_ESTIMATORS}", 50, 150, 1)
        ),
        MAX_DEPTH: scope.int(
            hp.quniform(f"{GRADIENT_BOOSTING_REGRESSOR}__{MAX_DEPTH}", 2, 12, 1)
        ),
        RANDOM_STATE: 0,
    },
}

# Combine both model search spaces with a top level "choice" between the two models to get the final
# search space.
space = {
    MODEL_CHOICE: hp.choice(
        MODEL_CHOICE,
        [
            random_forest_regressor,
            gradient_boosting_regressor,
        ],
    )
}
In [5]:
# Define a few additional variables to represent strings. Note that this code expects that we have
# access to all variables that we previously defined in the "search space" code snippet.
LOSS = "loss"
STATUS = "status"

# Mapping from string name to model class deinition object that we'll use to create an initialized
# version of a model from a sample generated from the search space by hyperopt.
MODELS = {
    GRADIENT_BOOSTING_REGRESSOR: GradientBoostingRegressor,
    RANDOM_FOREST_REGRESSOR: RandomForestRegressor,
}
# Create a scoring function that we'll use in our objective
mse_scorer = make_scorer(mean_squared_error)


# Helper function thta converts from a sample generated by hyperopt to an initialized model. Note
# that because we split the model type and model keyword-arguments into separate key-value pairs in
# the search space declaration we are able to use dictionary unpacking to create an initialized
# version of the model.
def sample_to_model(sample):
    kwargs = sample[MODEL_CHOICE][KWARGS]
    return MODELS[sample[MODEL_CHOICE][MODEL]](**kwargs)


# Define the objective function for hyperopt. We'll fix the `dataset`, `features`, and `target`
# arguments with `functools.partial` to create that version of this function that we will supply as
# an argument to `fmin`
def objective(sample, dataset_df, features, target):
    model = sample_to_model(sample)
    rng = check_random_state(0)
    # Handle randomization by shuffling when creating folds. In reality, we probably want a better
    # strategy for managing randomization than the fixed `RandomState` instance generated above.
    cv = KFold(n_splits=10, random_state=rng, shuffle=True)
    # Calculate average mean squared error for each fold. Since `n_splits` is 10, `mse` will is an
    # array of size 10 with each element representing the average mean squared error for a fold.
    mse = cross_val_score(
        model,
        dataset_df.loc[:, features],
        dataset_df.loc[:, target],
        scoring=mse_scorer,
        cv=cv,
    )
    # Return average of mean squared error across all folds.
    return {LOSS: np.mean(mse), STATUS: STATUS_OK}
In [6]:
# Since we defined our objective function to be generic in terms of the dataset, we need to use
# `partial` from the `functools` module to "fix" the `dataset_df`, `features`, and `target`
# arguments to the values that we want for this example so that we have an objective function that
# takes in only one argument as assumed by the `hyperopt` interface.
boston_objective = partial(
    objective, dataset_df=boston_dataset_df, features=features, target=MEDIAN_HOME_VALUE
)
# `hyperopt` tracks the results of each iteration in this `Trials` object. We’ll be collecting the
# data that we will use for visualization from this object.
trials = Trials()
rng = check_random_state(0)  # reproducibility!
# `fmin` searches for hyperparameters that “minimize” our object, mean squared error and returns the
# “best” set of hyperparameters.
best = fmin(boston_objective, space, tpe.suggest, 1000, trials=trials, rstate=rng)
100%|██████████| 1000/1000 [47:51<00:00,  2.87s/trial, best loss: 10.319696358611179] 
In [7]:
pprint([t for t in trials][:5])
[{'book_time': datetime.datetime(2020, 11, 4, 0, 51, 42, 199000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'gradient_boosting_regressor__learning_rate': [],
                    'gradient_boosting_regressor__max_depth': [],
                    'gradient_boosting_regressor__n_estimators': [],
                    'model_choice': [0],
                    'random_forest_regressor__max_depth': [0],
                    'random_forest_regressor__n_estimators': [0]},
           'tid': 0,
           'vals': {'gradient_boosting_regressor__learning_rate': [],
                    'gradient_boosting_regressor__max_depth': [],
                    'gradient_boosting_regressor__n_estimators': [],
                    'model_choice': [0],
                    'random_forest_regressor__max_depth': [5.0],
                    'random_forest_regressor__n_estimators': [90.0]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2020, 11, 4, 0, 51, 46, 83000),
  'result': {'loss': 16.359897953574603, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 0,
  'version': 0},
 {'book_time': datetime.datetime(2020, 11, 4, 0, 51, 46, 92000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'gradient_boosting_regressor__learning_rate': [1],
                    'gradient_boosting_regressor__max_depth': [1],
                    'gradient_boosting_regressor__n_estimators': [1],
                    'model_choice': [1],
                    'random_forest_regressor__max_depth': [],
                    'random_forest_regressor__n_estimators': []},
           'tid': 1,
           'vals': {'gradient_boosting_regressor__learning_rate': [0.03819110609989756],
                    'gradient_boosting_regressor__max_depth': [8.0],
                    'gradient_boosting_regressor__n_estimators': [137.0],
                    'model_choice': [1],
                    'random_forest_regressor__max_depth': [],
                    'random_forest_regressor__n_estimators': []},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2020, 11, 4, 0, 51, 52, 70000),
  'result': {'loss': 18.045981512632412, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 1,
  'version': 0},
 {'book_time': datetime.datetime(2020, 11, 4, 0, 51, 52, 81000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'gradient_boosting_regressor__learning_rate': [2],
                    'gradient_boosting_regressor__max_depth': [2],
                    'gradient_boosting_regressor__n_estimators': [2],
                    'model_choice': [2],
                    'random_forest_regressor__max_depth': [],
                    'random_forest_regressor__n_estimators': []},
           'tid': 2,
           'vals': {'gradient_boosting_regressor__learning_rate': [0.08587985607913044],
                    'gradient_boosting_regressor__max_depth': [12.0],
                    'gradient_boosting_regressor__n_estimators': [95.0],
                    'model_choice': [1],
                    'random_forest_regressor__max_depth': [],
                    'random_forest_regressor__n_estimators': []},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2020, 11, 4, 0, 51, 57, 519000),
  'result': {'loss': 21.235091223167437, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 2,
  'version': 0},
 {'book_time': datetime.datetime(2020, 11, 4, 0, 51, 57, 528000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'gradient_boosting_regressor__learning_rate': [],
                    'gradient_boosting_regressor__max_depth': [],
                    'gradient_boosting_regressor__n_estimators': [],
                    'model_choice': [3],
                    'random_forest_regressor__max_depth': [3],
                    'random_forest_regressor__n_estimators': [3]},
           'tid': 3,
           'vals': {'gradient_boosting_regressor__learning_rate': [],
                    'gradient_boosting_regressor__max_depth': [],
                    'gradient_boosting_regressor__n_estimators': [],
                    'model_choice': [0],
                    'random_forest_regressor__max_depth': [2.0],
                    'random_forest_regressor__n_estimators': [93.0]},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2020, 11, 4, 0, 52, 0, 661000),
  'result': {'loss': 23.582397665666413, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 3,
  'version': 0},
 {'book_time': datetime.datetime(2020, 11, 4, 0, 52, 0, 670000),
  'exp_key': None,
  'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
           'idxs': {'gradient_boosting_regressor__learning_rate': [4],
                    'gradient_boosting_regressor__max_depth': [4],
                    'gradient_boosting_regressor__n_estimators': [4],
                    'model_choice': [4],
                    'random_forest_regressor__max_depth': [],
                    'random_forest_regressor__n_estimators': []},
           'tid': 4,
           'vals': {'gradient_boosting_regressor__learning_rate': [0.0638511443414372],
                    'gradient_boosting_regressor__max_depth': [5.0],
                    'gradient_boosting_regressor__n_estimators': [72.0],
                    'model_choice': [1],
                    'random_forest_regressor__max_depth': [],
                    'random_forest_regressor__n_estimators': []},
           'workdir': None},
  'owner': None,
  'refresh_time': datetime.datetime(2020, 11, 4, 0, 52, 2, 875000),
  'result': {'loss': 15.253327611719737, 'status': 'ok'},
  'spec': None,
  'state': 2,
  'tid': 4,
  'version': 0}]
In [8]:
# This is a simple helper function that allows us to fill in `np.nan` when a particular
# hyperparameter is not relevant to a particular trial.
def unpack(x):
    if x:
        return x[0]
    return np.nan


# We'll first turn each trial into a series and then stack those series together as a dataframe.
trials_df = pd.DataFrame([pd.Series(t["misc"]["vals"]).apply(unpack) for t in trials])
# Then we'll add other relevant bits of information to the correct rows and perform a couple of
# mappings for convenience
trials_df["loss"] = [t["result"]["loss"] for t in trials]
trials_df["trial_number"] = trials_df.index
trials_df[MODEL_CHOICE] = trials_df[MODEL_CHOICE].apply(
    lambda x: RANDOM_FOREST_REGRESSOR if x == 0 else GRADIENT_BOOSTING_REGRESSOR
)
In [9]:
trials_df
Out[9]:
gradient_boosting_regressor__learning_rate gradient_boosting_regressor__max_depth gradient_boosting_regressor__n_estimators model_choice random_forest_regressor__max_depth random_forest_regressor__n_estimators loss trial_number
0 NaN NaN NaN random_forest_regressor 5.0 90.0 16.359898 0
1 0.038191 8.0 137.0 gradient_boosting_regressor NaN NaN 18.045982 1
2 0.085880 12.0 95.0 gradient_boosting_regressor NaN NaN 21.235091 2
3 NaN NaN NaN random_forest_regressor 2.0 93.0 23.582398 3
4 0.063851 5.0 72.0 gradient_boosting_regressor NaN NaN 15.253328 4
... ... ... ... ... ... ... ... ...
995 0.096458 4.0 135.0 gradient_boosting_regressor NaN NaN 12.958627 995
996 0.140628 3.0 127.0 gradient_boosting_regressor NaN NaN 11.505915 996
997 0.125948 2.0 142.0 gradient_boosting_regressor NaN NaN 12.944855 997
998 0.083939 4.0 138.0 gradient_boosting_regressor NaN NaN 13.804730 998
999 NaN NaN NaN random_forest_regressor 4.0 74.0 17.077669 999

1000 rows × 8 columns

In [10]:
# px is an alias for "express" that's created by following the convention of importing "express" by
# running `from plotly import express as px`
fig = px.scatter(trials_df, x="trial_number", y="loss")
fig.show()
In [11]:
fig = px.scatter(trials_df, x="trial_number", y="loss", color=MODEL_CHOICE)
fig.show()
In [12]:
def add_hover_data(fig, df, model_choice):
    # Filter to only columns that are relevant to the current model choice. Note that this relies on
    # the convention of including the model name in the hyperparameter name when we declare the
    # search space.
    cols = [col for col in trials_df.columns if model_choice in col]
    fig.update_traces(
        # This specifies the data that we want to plot for the current model choice.
        customdata=trials_df.loc[
            trials_df[MODEL_CHOICE] == model_choice, cols + [MODEL_CHOICE]
        ],
        hovertemplate="<br>".join(
            [
                f"{col.split('__')[1]}: %{{customdata[{i}]}}"
                for i, col in enumerate(cols)
            ]
        )
        + "<extra></extra>",
        # We only apply the hover data for the current model choice.
        selector={"name": model_choice},
    )
    return fig


fig = px.scatter(
    trials_df,
    x="trial_number",
    y="loss",
    color=MODEL_CHOICE,
)
# We call the `add_hover_data` function once for each model type so that we can add different sets
# of hyperparameters as hover data for each model type.
fig = add_hover_data(fig, trials_df, RANDOM_FOREST_REGRESSOR)
fig = add_hover_data(fig, trials_df, GRADIENT_BOOSTING_REGRESSOR)
fig.show()
In [13]:
# Since max_depth == 3 outperforms other settings, we'll filther to only look at that slice. This
# creates a boolean array that we will use to filter down to relevant rows in the `trials_df`
# dataframe.
max_depth_filter = (trials_df[MODEL_CHOICE] == GRADIENT_BOOSTING_REGRESSOR) & (
    trials_df["gradient_boosting_regressor__max_depth"] == 3
)

# plotly express does not support contour plots so we will use `graph_objects` instead. `go.Contour
# automatically interpolates "z" values for our loss.
fig = go.Figure(
    data=go.Contour(
        z=trials_df.loc[max_depth_filter, "loss"],
        x=trials_df.loc[max_depth_filter, "gradient_boosting_regressor__learning_rate"],
        y=trials_df.loc[max_depth_filter, "gradient_boosting_regressor__n_estimators"],
        contours=dict(
            showlabels=True,  # show labels on contours
            labelfont=dict(
                size=12,
                color="white",
            ),  # label font properties
        ),
        colorbar=dict(
            title="loss",
            titleside="right",
        ),
        hovertemplate="loss: %{z}<br>learning_rate: %{x}<br>n_estimators: %{y}<extra></extra>",
    )
)

fig.update_layout(
    xaxis_title="learning_rate",
    yaxis_title="n_estimators",
    title={
        "text": "learning_rate vs. n_estimators | max_depth == 3",
        "xanchor": "center",
        "yanchor": "top",
        "x": 0.5,
    },
)

fig.show()