데이터 분석/머신러닝

[ML] 10. 앙상블 - Boosting

eunnys 2023. 11. 23. 16:53

▶ Boosting - Regressor

 

from sklearn.datasets import load_diabetes
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

x, y = load_diabetes(return_X_y=True)
x = MinMaxScaler().fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=10)
# 모델 생성 및 학습
gb_r = GradientBoostingRegressor(random_state=10, alpha=0.9, n_estimators=40, max_depth=3) 
# 회귀모형 40개
gb_r.fit(x_train, y_train)
# 예측 및 평가
y_hat = gb_r.predict(x_test)

print(f'결정계수: {r2_score(y_test, y_hat):.3f}')
결정계수: 0.486

 

 

▶ Boosting - Classifier

 

from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve, roc_auc_score, confusion_matrix 
from sklearn.model_selection import train_test_split
import pandas as pd
breast = load_breast_cancer()

df = pd.DataFrame(breast.data, columns=breast.feature_names)
df['target'] = breast.target
data_x = breast.data
data_y = breast.target

x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, train_size=0.8, random_state=10, stratify=data_y)
# 모델 생성 및 학습
gb_c = GradientBoostingClassifier(random_state=10)
gb_c.fit(x_train, y_train)
# 예측 및 평가
y_hat = gb_c.predict(x_test)

print(f'정확도: {accuracy_score(y_test, y_hat):.3f}')
print(f'AUC: {roc_auc_score(y_test, gb_c.predict_proba(x_test)[:,1]):.3f}')
정확도: 0.956
AUC: 0.996