Dive into secure and efficient coding practices with our curated list of the top 10 examples showcasing 'lightgbm' in functional components in Python. Our advanced machine learning engine meticulously scans each line of code, cross-referencing millions of open source libraries to ensure your implementation is not just functional, but also robust and secure. Elevate your React applications to new heights by mastering the art of handling side effects, API calls, and asynchronous operations with confidence and precision.
"verbosity": -1}
# "seed": 8888
# folds = StratifiedKFold(n_splits=10, shuffle=True, random_state=8888)
# idx = y_train.argsort()
# y_lab = np.repeat(list(range(50000 // 20)), 20)
# y_lab = np.asarray(sorted(list(zip(idx, y_lab))))[:, -1].astype(np.int32)
# splits = folds.split(X_train, y_lab)
folds = KFold(n_splits=5, shuffle=True, random_state=2019)
splits = folds.split(X_train, y_train)
for fold_, (trn_idx, val_idx) in enumerate(splits):
print("fold n°{}".format(fold_ + 1))
trn_data = lgb.Dataset(X_train[trn_idx], y_train[trn_idx])
val_data = lgb.Dataset(X_train[val_idx], y_train[val_idx])
num_round = 20000
clf = lgb.train(param, trn_data, num_round, valid_sets=[trn_data, val_data], verbose_eval=100,
early_stopping_rounds=100)
oof_lgb[val_idx] = clf.predict(X_train[val_idx], num_iteration=clf.best_iteration)
predictions_lgb += clf.predict(X_test, num_iteration=clf.best_iteration) / folds.n_splits
print("MAE CV score: {:<8.8f}".format(1/(mean_absolute_error(oof_lgb, y_train)+1)))
print(predictions_lgb)
np.save('val.mse_lgb.npy',oof_lgb)
np.save('test.mse_lgb.npy',predictions_lgb)
def suggest_learning_rate(self, X, y, max_boost_round):
lr = [0.01, 0.02, 0.03, 0.04, 0.05]
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)
params = self.setParams(self.default_hyper_param)
max_round = max_boost_round // 500
auc = np.zeros([len(lr), max_round])
for i in range(len(lr)):
print ('learning rate: %.2f' %(lr[i]))
params['learning_rate'] = lr[i]
train_data = lgb.Dataset(X_train, y_train, free_raw_data=False)
clf = None
for j in range(max_round):
clf = lgb.train(params, train_data, num_boost_round=500, init_model=clf, keep_training_booster=True)
# score with regularization
auc[i, j] = roc_auc_score(y_valid, clf.predict(X_valid)) - lr[i] * 0.1 + j * 0.001
print (auc)
idx = np.argmax(auc)
best_lr = lr[idx // max_round]
best_boost_round = (idx % max_round + 1) * 500
return best_lr, best_boost_round
import lightgbm as lgb
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
X, y = datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
n_estimators = 10
d_train = lgb.Dataset(X_train, label=y_train)
params = {
'boosting_type': 'dart',
'objective': 'binary',
}
clf = lgb.train(params, d_train, n_estimators)
y_pred = clf.predict(X_test)
clf.save_model('lg_dart_breast_cancer.model') # save the model in txt format
np.savetxt('lg_dart_breast_cancer_true_predictions.txt', y_pred)
np.savetxt('breast_cancer_test.tsv', X_test, delimiter='\t')
d = clf.dump_model()
import json
with open('lg_dart_breast_cancer.json', 'w') as fout:
json.dump(d, fout, indent=1)
def test_plot_split_value_histogram(self):
gbm0 = lgb.train(self.params, self.train_data, num_boost_round=10)
ax0 = lgb.plot_split_value_histogram(gbm0, 27)
self.assertIsInstance(ax0, matplotlib.axes.Axes)
self.assertEqual(ax0.get_title(), 'Split value histogram for feature with index 27')
self.assertEqual(ax0.get_xlabel(), 'Feature split value')
self.assertEqual(ax0.get_ylabel(), 'Count')
self.assertLessEqual(len(ax0.patches), 2)
gbm1 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True)
gbm1.fit(self.X_train, self.y_train)
ax1 = lgb.plot_split_value_histogram(gbm1, gbm1.booster_.feature_name()[27], figsize=(10, 5),
title='Histogram for feature @index/name@ @feature@',
xlabel='x', ylabel='y', color='r')
self.assertIsInstance(ax1, matplotlib.axes.Axes)
self.assertEqual(ax1.get_title(),
'Histogram for feature name {}'.format(gbm1.booster_.feature_name()[27]))
self.assertEqual(ax1.get_xlabel(), 'x')
self.assertEqual(ax1.get_ylabel(), 'y')
self.assertLessEqual(len(ax1.patches), 2)
for patch in ax1.patches:
self.assertTupleEqual(patch.get_facecolor(), (1., 0, 0, 1.)) # red
ax2 = lgb.plot_split_value_histogram(gbm0, 27, bins=10, color=['r', 'y', 'g', 'b'],
title=None, xlabel=None, ylabel=None)
if binary:
data["drv"] = data["drv"].replace("r", "4")
numeric_features = ["displ", "year", "cyl"]
categorical_features = ["class"]
text_features = []
mapper = DataFrameMapper(
[(numeric_features, [ContinuousDomain()])] +
[([f], [CategoricalDomain(), PMMLLabelEncoder()]) for f in categorical_features] +
[(f, [CategoricalDomain(), CountVectorizer(tokenizer=Splitter())]) for f in text_features]
)
pipeline = PMMLPipeline([
("mapper", mapper),
("model", LGBMClassifier(n_estimators=1000))
])
pipeline.fit(data, data["drv"], model__categorical_feature=[3])
suffix = "binary" if binary else "multiclass"
sklearn2pmml(pipeline, "test/support/python/lightgbm_" + suffix + ".pmml")
print(list(pipeline.predict(data[:10])))
print(list(pipeline.predict_proba(data[0:1])[0]))
def _get_create_model(self, classification):
if classification:
model = LGBMClassifier()
else:
model = LGBMRegressor()
def create_model(x, y):
return model.fit(x, y)
return create_model
def create_lightgbm_classifier(X, y):
lgbm = LGBMClassifier(boosting_type='gbdt', learning_rate=0.1,
max_depth=5, n_estimators=200, n_jobs=1, random_state=777)
model = lgbm.fit(X, y)
return model
# custom metric (disable default metric)
gbm = lgb.LGBMRegressor(metric='None',
**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 1)
self.assertIn('error', gbm.evals_result_['training'])
# default metric for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1',
**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('l1', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training'])
# non-default metric for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1', metric='mape',
**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('mape', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training'])
# multiple metrics for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1', metric=['l1', 'gamma'],
**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 3)
self.assertIn('l1', gbm.evals_result_['training'])
self.assertIn('gamma', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training'])
# custom metric (disable default metric for non-default objective)
gbm = lgb.LGBMRegressor(objective='regression_l1', metric='None',
**params).fit(eval_metric=constant_metric, **params_fit)
# enable display training loss
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False, eval_train_metric=True)
self.assertIn('train l1-mean', cv_res)
self.assertIn('valid l1-mean', cv_res)
self.assertNotIn('train l2-mean', cv_res)
self.assertNotIn('valid l2-mean', cv_res)
self.assertEqual(len(cv_res['train l1-mean']), 10)
self.assertEqual(len(cv_res['valid l1-mean']), 10)
# self defined folds
tss = TimeSeriesSplit(3)
folds = tss.split(X_train)
cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
verbose_eval=False)
cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
verbose_eval=False)
np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
# lambdarank
X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train'))
q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train.query'))
params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
# ... with l2 metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
metrics='l2', verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
# ... with NDCG (default) metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
tss = TimeSeriesSplit(3)
folds = tss.split(X_train)
cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
verbose_eval=False)
cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
verbose_eval=False)
np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
# lambdarank
X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train'))
q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train.query'))
params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
# ... with l2 metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
metrics='l2', verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
# ... with NDCG (default) metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['ndcg@3-mean']).any())
# self defined folds with lambdarank
cv_res_lambda_obj = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10,
folds=GroupKFold(n_splits=3),
verbose_eval=False)
np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])