LGBでクロスバリデーション

params = {'boosting_type':'gbdt',
          'objective':'binary',
          'metric':'auc',
          'learning_rate':0.1,
          'num_leaves':16,
          'n_estimators':100000,
          'random_state':123,
          'importance_type':'gain'
         }
metrics =[]
imp = pd.DataFrame()

n_splits = 5
cv = list(StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=123).split(x_train, y_train))

for nfold in range(n_splits):
    idx_tr, idx_va = cv[nfold][0], cv[nfold][1]
    x_tr, y_tr = x_train.loc[idx_tr,:], y_train.loc[idx_tr, :]
    x_va, y_va = x_train.loc[idx_va,:], y_train.loc[idx_va, :]
    
    model = lgb.LGBMClassifier(**params)
    model.fit(x_tr,
              y_tr,
              eval_set=[(x_tr, y_tr),(x_va, y_va)],
              early_stopping_rounds=100,
              verbose=100,
             )
    
    y_tr_pred = model.predict(x_tr)
    y_va_pred = model.predict(x_va)
    
    metric_tr = accuracy_score(y_tr, y_tr_pred)
    metric_va = accuracy_score(y_va, y_va_pred)
    
    metrics.append([nfold, metric_tr, metric_va])
    
    _imp = pd.DataFrame({"col":x_train.columns, "imp":model.feature_importances_, "nfold": nfold})
    
    imp = pd.concat([imp, _imp], axis=0, ignore_index=True)

metrics = np.array(metrics)

print(metrics)

imp = imp.groupby("col")['imp'].agg(['mean','std'])
imp.columns = ['imp', 'imp_std']
imp = imp.reset_index(drop=False)
imp.sort_values('imp', ascending = False, ignore_index=True)
    


この記事が気に入ったらサポートをしてみませんか?