Dive into secure and efficient coding practices with our curated list of the top 10 examples showcasing 'xgboost' in functional components in Python. Our advanced machine learning engine meticulously scans each line of code, cross-referencing millions of open source libraries to ensure your implementation is not just functional, but also robust and secure. Elevate your React applications to new heights by mastering the art of handling side effects, API calls, and asynchronous operations with confidence and precision.
# step 2: Select Feature
data = extract_feature_and_label(data, feature_name_list=conf['feature_name'], label_name_list=conf['label_name'])
# step 3: Preprocess
train, test = divide_train_and_test(data, conf['training_set_proportion'])
train_x, train_y = data_transform_for_xgboost(train)
test_x, test_y = data_transform_for_xgboost(test)
train_y = sign(train_y)
test_y = sign(test_y)
indices = find_all_indices(train_y, 1)
indices.extend(find_all_indices(train_y, -1))
train_x = np.array(train_x)[indices]
train_y = np.array(train_y)[indices]
dtrain = xgb.DMatrix(train_x, train_y)
param = {
'booster': 'gbtree',
'silent': True,
'eta': 0.01,
'max_depth': 5,
'gamma': 0.1,
'objective': 'multi:softmax',
'num_class': 3,
'seed': 1000,
'scale_pos_weight': 1
}
clf = xgb.XGBClassifier(**param)
if conf['use_previous_model'] is False:
clf.fit(train_x, train_y)
def test():
data = np.random.rand(5,10) # 5 entities, each contains 10 features
label = np.random.randint(2, size=5) # binary target
dtrain = xgb.DMatrix( data, label=label)
dtest = xgb.DMatrix(test)
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
evallist = [(dtest,'eval'), (dtrain,'train')]
num_round = 10
bst = xgb.train( param, dtrain, num_round, evallist)
X_train, y_train = X[:tr_size, :], y[:tr_size]
X_test, y_test = X[tr_size:, :], y[tr_size:]
# First with cpu_predictor
params = {'tree_method': 'gpu_hist',
'predictor': 'cpu_predictor',
'n_jobs': -1,
'seed': 123}
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
# Now with gpu_predictor
params['predictor'] = 'gpu_predictor'
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
gpu_train_score = m.score(X_train, y_train)
gpu_test_score = m.score(X_test, y_test)
assert np.allclose(cpu_train_score, gpu_train_score)
assert np.allclose(cpu_test_score, gpu_test_score)
def test_xgboost_direct():
try:
import xgboost
except Exception as e:
print("Skipping test_xgboost_direct!")
return
import shap
N = 100
M = 4
X = np.random.randn(N,M)
y = np.random.randn(N)
model = xgboost.XGBRegressor()
model.fit(X, y)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
assert np.allclose(shap_values[0,:], _brute_force_tree_shap(explainer.model, X[0,:]))
def test_xgboost_classifier(output_margin):
import xgboost as xgb
df = pd.read_csv("./open_data/creditcard.csv")
X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
gbm = xgb.sklearn.XGBClassifier()
gbm.fit(X, y)
gbm.predict(X, output_margin=output_margin)
gbm.predict_proba(X, output_margin=output_margin)
custom_input1,
custom_input2,
model,
custom_output1,
):
with train as reader:
train_df = reader.read(concat=True)
dtrain_x = xgb.DMatrix(train_df[:-1])
dtrain_y = xgb.DMatrix(train_df[-1])
with validation as reader:
validation_df = reader.read(concat=True)
dvalidation_x = xgb.DMatrix(validation_df[:-1])
dvalidation_y = xgb.DMatrix(validation_df[-1])
my_model = xgb.XGBModel(**static_hyperparameters)
my_model.fit(dtrain_x,
dtrain_y,
eval_set=[(dvalidation_x, dvalidation_y)],
eval_metric=sample_eval_function)
model.set(my_model)
custom_output1.set(my_model.evals_result())
def test_xgboost_ranking():
try:
import xgboost
except:
print("Skipping test_xgboost_ranking!")
return
import shap
# train lightgbm ranker model
x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
params = {'objective': 'rank:pairwise', 'learning_rate': 0.1,
'gamma': 1.0, 'min_child_weight': 0.1,
'max_depth': 4, 'n_estimators': 4}
model = xgboost.sklearn.XGBRanker(**params)
model.fit(x_train, y_train, q_train.astype(int),
eval_set=[(x_test, y_test)], eval_group=[q_test.astype(int)])
_validate_shap_values(model, x_test)
def test_xgboost_regression(output_margin):
import xgboost as xgb
df = pd.read_csv("./open_data/creditcard.csv")
X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
gbm = xgb.sklearn.XGBRegressor()
gbm.fit(X, y)
gbm.predict(X, output_margin=output_margin)
def test_xgboost_ranking():
try:
import xgboost
except:
print("Skipping test_xgboost_ranking!")
return
import shap
# train lightgbm ranker model
x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
params = {'objective': 'rank:pairwise', 'learning_rate': 0.1,
'gamma': 1.0, 'min_child_weight': 0.1,
'max_depth': 4, 'n_estimators': 4}
model = xgboost.sklearn.XGBRanker(**params)
model.fit(x_train, y_train, q_train.astype(int),
eval_set=[(x_test, y_test)], eval_group=[q_test.astype(int)])
_validate_shap_values(model, x_test)
def run_test(name, params_fun):
"""Runs a distributed GPU test."""
# Always call this before using distributed module
xgb.rabit.init()
rank = xgb.rabit.get_rank()
world = xgb.rabit.get_world_size()
# Load file, file will be automatically sharded in distributed mode.
dtrain = xgb.DMatrix('../../demo/data/agaricus.txt.train')
dtest = xgb.DMatrix('../../demo/data/agaricus.txt.test')
params, n_rounds = params_fun(rank)
# Specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
# Run training, all the features in training API is available.
# Currently, this script only support calling train once for fault recovery purpose.
bst = xgb.train(params, dtrain, n_rounds, watchlist, early_stopping_rounds=2)
# Have each worker save its model
model_name = "test.model.%s.%d" % (name, rank)