Hyperopt, part 3 (conditional parameters)
The (shockingly) little Hyperopt documentation that exists mentions conditional hyperparameter tuning. (For example, I only need a degree parameter if my SVM has a polynomial kernel). However, after trying three different examples of how to use conditional parameters, I was ready to give up — because none of them worked! Then, I found a Kaggle tutorial that explained I have to unpack the conditions whenever they apply. Scikit-learn, for example, can’t do that for me. So, here is a working (for me at least) example of how to use conditional hyperparameters in Hyperopt with scikit-learn classifiers. You’ll have to supply your own data, though…
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials from hyperopt.pyll import scope as ho_scope from hyperopt import tpe from sklearn.model_selection import GridSearchCV, cross_val_score, cross_validate, LeaveOneOut, StratifiedKFold from sklearn import preprocessing from sklearn import svm, tree from sklearn.inspection import permutation_importance from sklearn import tree from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import normalize, scale from xgboost import XGBClassifier, DMatrix def f_unpack_dict(dct): """ Unpacks all sub-dictionaries in given dictionary recursively. There should be no duplicated keys across all nested subdictionaries, or some instances will be lost without warning Source: https://www.kaggle.com/fanvacoolt/tutorial-on-hyperopt Parameters: ---------------- dct : dictionary to unpack Returns: ---------------- : unpacked dictionary """ res = {} for (k, v) in dct.items(): if isinstance(v, dict): res = {**res, **f_unpack_dict(v)} else: res[k] = v return res # Set up space to search. # I want to search the space for each classifier type separately # RandomForestClassifier objects don't have an estimators_ attribute until fitted. # But apparently other scikit-learn classifiers do?! rf = RandomForestClassifier(random_state=1, n_jobs=1, n_estimators=2) rf.estimators_=None # different behavior from other sklearn model types # dictionary defining search space models_to_compare = { 'classification_tree':{ 'clf':tree.DecisionTreeClassifier(random_state=1) ,'parameters':{ 'min_samples_split':ho_scope.int(hp.uniform('min_samples_split',2, 41)) # 1 to 40 https://arxiv.org/abs/1812.02207 ,'min_samples_leaf':ho_scope.int(hp.uniform('min_samples_leaf',2, 21)) # 1 to 20 https://arxiv.org/abs/1812.02207 , 'max_features': hp.choice('max_features', ['sqrt', 'log2']) ,'scale': hp.choice('scale', [0, 1]) ,'normalize': hp.choice('normalize', [0, 1]) # criterion doesn't make much difference https://www.unine.ch/files/live/sites/imi/files/shared/documents/papers/Gini_index_fulltext.pdf } } ,'xgboost':{ 'clf':XGBClassifier(random_state=1, n_jobs=1, eval_metric='logloss', use_label_encoder=False #, tree_method='gpu_hist', gpu_id=0 ) , 'parameters':{ 'learning_rate':hp.loguniform('learning_rate ', np.log(0.001), np.log(0.1)) ,'n_estimators':ho_scope.int(hp.loguniform('n_estimators', np.log(10), np.log(1000))) ,'subsample':hp.uniform('subsample',0.1,1.0) ,'max_depth':ho_scope.int(hp.uniform('max_depth',1,15)) #7,9 ,'scale': hp.choice('scale', [0, 1]) ,'normalize': hp.choice('normalize', [0, 1]) } } ,'random_forest':{ 'clf': rf ,'parameters':{ 'n_estimators':ho_scope.int(hp.loguniform('n_estimators',np.log(2),np.log(1000))) , 'max_features': hp.choice('max_features', ['sqrt', 'log2']) ,'scale': hp.choice('scale', [0, 1]) ,'normalize': hp.choice('normalize', [0, 1]) } } ,'svm':{ 'clf':svm.SVC(random_state=1) , 'parameters':{ 'kernel': hp.choice('svm_kernel', [ {'kernel': 'linear'} ,{'kernel': 'sigmoid'} ,{'kernel': 'rbf'} ,{'kernel': 'poly', 'degree': ho_scope.int(hp.uniform('degree', 2, 5))}, ]) ,'C':hp.loguniform('C', np.log(0.001), np.log(100)) ,'gamma': hp.choice('gamma_type', ['auto', hp.uniform('gamma_value', 0, 20)]) ,'scale': hp.choice('scale', [0, 1]) ,'normalize': hp.choice('normalize', [0, 1]) } } ,'logistic':{ 'clf': LogisticRegression(random_state=1, solver='saga', n_jobs=1) , 'parameters':{ 'penalty': hp.choice('penalty', [ {'penalty':'none'} ,{'penalty':'l1','C':hp.loguniform('Cl1', np.log(0.01), np.log(100))} ,{'penalty':'l2','C':hp.loguniform('Cl2', np.log(0.01), np.log(100))} ,{'penalty': 'elasticnet', 'l1_ratio': hp.uniform('l1_ratio', 0.1, 0.9),'C':hp.loguniform('Celastic', np.log(0.01), np.log(100))}, ]) ,'scale': hp.choice('scale', [0, 1]) ,'normalize': hp.choice('normalize', [0, 1]) } } } # cross validation method cv_method = LeaveOneOut() # ok if small dataset #cv_method = StratifiedKFold(10) # use this if more than a few hundred or things are slow # my training and scoring function def hyperopt_train_test(clf, params): X_ = X[:] if 'normalize' in params: if params['normalize'] == 1: X_ = normalize(X_) del params['normalize'] if 'scale' in params: if params['scale'] == 1: X_ = scale(X_) del params['scale'] #print(f_unpack_dict(params)) clf_to_test = copy.copy(clf) clf_to_test = clf_to_test.set_params(**f_unpack_dict(params)) return cross_val_score(clf_to_test, X, y, cv=cv_method, scoring='accuracy', n_jobs=8).mean() # Function that returns one number that hyperopt will minimize def report_score_to_minimize(model_spec): return -1.0 * hyperopt_train_test(model_spec['clf'], model_spec['parameters']) # store best model from each model type best_models = dict() for model_type in models_to_compare: trials=Trials() best = fmin(report_score_to_minimize, models_to_compare[model_type], algo=tpe.suggest, max_evals=100, trials=trials) best_models[model_type] = best best_models