Hyperopt, part 3 (conditional parameters)

Mar 12

The (shockingly) little Hyperopt documentation that exists mentions conditional hyperparameter tuning. (For example, I only need a degree parameter if my SVM has a polynomial kernel). However, after trying three different examples of how to use conditional parameters, I was ready to give up — because none of them worked! Then, I found a Kaggle tutorial that explained I have to unpack the conditions whenever they apply. Scikit-learn, for example, can’t do that for me. So, here is a working (for me at least) example of how to use conditional hyperparameters in Hyperopt with scikit-learn classifiers. You’ll have to supply your own data, though…

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope as ho_scope
from hyperopt import tpe
from sklearn.model_selection import GridSearchCV, cross_val_score, cross_validate, LeaveOneOut, StratifiedKFold
from sklearn import preprocessing
from sklearn import svm, tree
from sklearn.inspection import permutation_importance
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import normalize, scale
from xgboost import XGBClassifier, DMatrix

def f_unpack_dict(dct):
    """
    Unpacks all sub-dictionaries in given dictionary recursively. There should be no duplicated keys 
    across all nested subdictionaries, or some instances will be lost without warning
    
    Source: https://www.kaggle.com/fanvacoolt/tutorial-on-hyperopt
    
    Parameters:
    ----------------
    dct : dictionary to unpack
    
    Returns:
    ----------------
    : unpacked dictionary
    """
    
    res = {}
    for (k, v) in dct.items():
        if isinstance(v, dict):
            res = {**res, **f_unpack_dict(v)}
        else:
            res[k] = v
            
    return res

# Set up space to search.
# I want to search the space for each classifier type separately

# RandomForestClassifier objects don't have an estimators_ attribute until fitted.
# But apparently other scikit-learn classifiers do?!
rf = RandomForestClassifier(random_state=1, n_jobs=1, n_estimators=2) 
rf.estimators_=None # different behavior from other sklearn model types

# dictionary defining search space
models_to_compare = {
    'classification_tree':{
        'clf':tree.DecisionTreeClassifier(random_state=1)
        ,'parameters':{
            'min_samples_split':ho_scope.int(hp.uniform('min_samples_split',2, 41)) # 1 to 40 https://arxiv.org/abs/1812.02207
            ,'min_samples_leaf':ho_scope.int(hp.uniform('min_samples_leaf',2, 21)) # 1 to 20 https://arxiv.org/abs/1812.02207
            , 'max_features': hp.choice('max_features', ['sqrt', 'log2'])
            ,'scale': hp.choice('scale', [0, 1])
            ,'normalize': hp.choice('normalize', [0, 1])
            # criterion doesn't make much difference https://www.unine.ch/files/live/sites/imi/files/shared/documents/papers/Gini_index_fulltext.pdf
        }
    }
    ,'xgboost':{
        'clf':XGBClassifier(random_state=1, n_jobs=1, eval_metric='logloss',  use_label_encoder=False
                            #, tree_method='gpu_hist', gpu_id=0
                           )
        , 'parameters':{
            'learning_rate':hp.loguniform('learning_rate ', np.log(0.001), np.log(0.1))
             ,'n_estimators':ho_scope.int(hp.loguniform('n_estimators', np.log(10), np.log(1000)))
            ,'subsample':hp.uniform('subsample',0.1,1.0)
            ,'max_depth':ho_scope.int(hp.uniform('max_depth',1,15)) #7,9
            ,'scale': hp.choice('scale', [0, 1])
            ,'normalize': hp.choice('normalize', [0, 1])
        }
    }
    ,'random_forest':{
        'clf': rf
        ,'parameters':{
            'n_estimators':ho_scope.int(hp.loguniform('n_estimators',np.log(2),np.log(1000)))
            , 'max_features': hp.choice('max_features', ['sqrt', 'log2'])
            ,'scale': hp.choice('scale', [0, 1])
            ,'normalize': hp.choice('normalize', [0, 1])
                       }
                     }
    ,'svm':{
        'clf':svm.SVC(random_state=1)
        , 'parameters':{

            'kernel': hp.choice('svm_kernel', [
            {'kernel': 'linear'}
            ,{'kernel': 'sigmoid'}
            ,{'kernel': 'rbf'}
            ,{'kernel': 'poly', 'degree': ho_scope.int(hp.uniform('degree', 2, 5))},
            ])
            ,'C':hp.loguniform('C', np.log(0.001), np.log(100))
            ,'gamma': hp.choice('gamma_type', ['auto', hp.uniform('gamma_value', 0, 20)])
            ,'scale': hp.choice('scale', [0, 1])
            ,'normalize': hp.choice('normalize', [0, 1])
        }
    }
    ,'logistic':{
        'clf': LogisticRegression(random_state=1, solver='saga', n_jobs=1)
        , 'parameters':{
            'penalty': hp.choice('penalty', [
             {'penalty':'none'}
            ,{'penalty':'l1','C':hp.loguniform('Cl1', np.log(0.01), np.log(100))}
            ,{'penalty':'l2','C':hp.loguniform('Cl2', np.log(0.01), np.log(100))}
            ,{'penalty': 'elasticnet', 'l1_ratio': hp.uniform('l1_ratio', 0.1, 0.9),'C':hp.loguniform('Celastic', np.log(0.01), np.log(100))},
            ])
            ,'scale': hp.choice('scale', [0, 1])
            ,'normalize': hp.choice('normalize', [0, 1])
        }
    }
}

# cross validation method
cv_method = LeaveOneOut() # ok if small dataset
#cv_method = StratifiedKFold(10) # use this if more than a few hundred or things are slow

# my training and scoring function
def hyperopt_train_test(clf, params):
    X_ = X[:]
    if 'normalize' in params:
        if params['normalize'] == 1:
            X_ = normalize(X_)
        del params['normalize']
    if 'scale' in params:
        if params['scale'] == 1:
            X_ = scale(X_)
        del params['scale']
        #print(f_unpack_dict(params))
    clf_to_test = copy.copy(clf)
    clf_to_test = clf_to_test.set_params(**f_unpack_dict(params))
    return cross_val_score(clf_to_test, X, y, cv=cv_method, scoring='accuracy', n_jobs=8).mean()

# Function that returns one number that hyperopt will minimize
def report_score_to_minimize(model_spec):
    return -1.0 * hyperopt_train_test(model_spec['clf'], model_spec['parameters'])

# store best model from each model type
best_models = dict()
for model_type in models_to_compare:
    trials=Trials()
    best = fmin(report_score_to_minimize, models_to_compare[model_type], algo=tpe.suggest, max_evals=100, trials=trials)
    best_models[model_type] = best

best_models

Ryan Melvin

Hyperopt, part 3 (conditional parameters)

Pareto charts: making and interpreting

Scikit-optimize