VotingClassifier
hard and soft votingvoting = VotingClassifier(
[('logreg', LogisticRegression(C=100)),
('tree', DecisionTreeClassifier(max_depth=3, random_state=0))],
voting='soft')
voting.fit(X_train, y_train)
lr, tree = voting.estimators_
voting.score(X_test, y_test), lr.score(X_test, y_test), \
tree.score(X_test, y_test)
BaggingClassifier, BaggingRegressor
train_scores = []
test_scores = []
rf = RandomForestClassifier(warm_start=True)
estimator_range = range(1, 100, 5)
for n_estimators in estimator_range:
rf.n_estimators = n_estimators
rf.fit(X_train, y_train)
train_scores.append(rf.score(X_train, y_train))
test_scores.append(rf.score(X_test, y_test))
train_scores, test_scores, oob_scores = [], [], []
feature_range = range(1, 64, 5)
for max_features in feature_range:
rf = RandomForestClassifier(max_features=max_features, oob_score=True,
n_estimators=200, random_state=0)
rf.fit(X_train, y_train)
train_scores.append(rf.score(X_train, y_train))
test_scores.append(rf.score(X_test, y_test))
oob_scores.append(rf.oob_score_)
X_train, X_test, y_train, y_test = \
train_test_split(iris.data, iris.target,
stratify=iris.target, random_state=1)
rf = RandomForestClassifier(n_estimators=100).fit(X_train, y_train)
rf.feature_importances_
plt.barh(range(4), rf.feature_importances_)
plt.yticks(range(4), iris.feature_names);
# array([ 0.126, 0.033, 0.445, 0.396])
from sklearn.neighbors import KNeighborsClassifier
X, y = make_moons(noise=.4, random_state=16, n_samples=300)
X_train, X_test, y_train, y_test = \
train_test_split(X, y, stratify=y, random_state=0)
voting = VotingClassifier(
[('logreg', LogisticRegression(C=100)),
('tree', DecisionTreeClassifier(max_depth=3)),
('knn', KNeighborsClassifier(n_neighbors=3))],
voting='soft')
voting.fit(X_train, y_train)
lr, tree, knn = voting.estimators_
stacking = make_pipeline(voting, LogisticRegression(C=100))
stacking.fit(X_train, y_train)
stacking.named_steps.logisticregression.coef_
from sklearn.model_selection import cross_val_predict
# take only probabilities of positive classes for
# more interpretable coefficients
first_stage = make_pipeline(voting,
FunctionTransformer(lambda X: X[:, 1::2]))
transform_cv = cross_val_predict(first_stage, X_train,
y_train, cv=10, method="transform")
second_stage = LogisticRegression(C=100).fit(transform_cv, y_train)
print(second_stage.coef_)
print(second_stage.score(transform_cv, y_train))
print(second_stage.score(first_stage.transform(X_test), y_test))
\[ f_{1}(x) \approx y \] \[ f_{2}(x) \approx y - f_{1}(x) \] \[ f_{3}(x) \approx y - f_{1}(x) - f_{2}(x)\]
\(y \approx\) + + + …
\(f_{1}(x) \approx y\)
\(f_{2}(x) \approx y - \alpha f_{1}(x)\)
\(f_{3}(x) \approx y - \alpha f_{1}(x) - \alpha f_{2}(x)\)
\(y \approx \alpha\) + \(\alpha\) + \(\alpha\) + …
Learning rate \(\alpha, i.e. 0.1\)
from sklearn.ensemble.partial_dependence import plot_partial_dependence
boston = load_boston()
X_train, X_test, y_train, y_test = \
train_test_split(boston.data, boston.target,
random_state=0)
gbrt = GradientBoostingRegressor().fit(X_train, y_train)
fig, axs = \
plot_partial_dependence(gbrt, X_train,
np.argsort(gbrt.feature_importances_)[-6:],
feature_names=boston.feature_names, n_jobs=3,
grid_resolution=50)
plot_partial_dependence(
gbrt, X_train, [np.argsort(gbrt.feature_importances_)[-2:]],
feature_names=boston.feature_names,
n_jobs=3, grid_resolution=50)
from sklearn.ensemble.partial_dependence import plot_partial_dependence
for i in range(3):
fig, axs = \
plot_partial_dependence(gbrt, X_train, range(4), n_cols=4,
feature_names=iris.feature_names,
grid_resolution=50, label=i)
pip install xgboost
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.fit(X_train, y_train)
xgb.score(X_test, y_test)
LightGBM
pip install lightgbm
from lightgbm.sklearn import LGBMClassifier
lgbm = LGBMClassifier()
lgbm.fit(X_train, y_train)
lgbm.score(X_test, y_test)
CatBoost
pip install catboost
from catboost.sklearn import CatBoostClassifier
catb = CatBoostClassifier()
catb.fit(X_train, y_train)
catb.score(X_test, y_test)
sklearn
Estimator
GradientBoostingClassifier
HistGradientBoostingClassifier
DESlib
Quick Examplefrom deslib.des.knora_e import KNORAE
# 'pool' of 10 classifiers
pool_classifiers = RandomForestClassifier(n_estimators=10)
pool_classifiers.fit(X_train, y_train)
knorae = KNORAE(pool_classifiers) # init DES model
knorae.fit(X_dsel, y_dsel) # preprocess to find good 'regions'
knorae.predict(X_test) # predict