Skip to content

Instantly share code, notes, and snippets.

@arc279
Created March 29, 2018 08:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arc279/fc5fce1eccbfbb6e464683c3e9409c4b to your computer and use it in GitHub Desktop.
Save arc279/fc5fce1eccbfbb6e464683c3e9409c4b to your computer and use it in GitHub Desktop.
dask xgboost で irisの分類までの一連の流れ
"""
# pip install pandas sklearn dask_xgboost
"""
from sklearn import datasets
#--- dataset
iris = datasets.load_iris()
print(iris.feature_names)
X = iris.data
print(iris.target_names)
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#--- train
"""
# start scheduler on another shell
`dask-scheduler`
# start worker on another shell
`dask-worker 127.0.0.1:8786`
2 workers on below example.
"""
from dask.distributed import Client
client = Client('127.0.0.1:8786')
print(client)
import dask.dataframe as dd
X_train_dd = dd.from_array(X_train, columns=iris.feature_names, chunksize=5)
y_train_dd = dd.from_array(y_train, chunksize=5)
print(X_train_dd)
print(y_train_dd)
params = {
'max_depth': 3,
'eta': 0.3,
'objective': 'multi:softprob',
'num_class': 3}
import dask_xgboost as dxgb
bst = dxgb.train(client, params, X_train_dd, y_train_dd)
bst.save_model('model.xgb')
#--- predict
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
clf = xgb.XGBClassifier()
booster = xgb.Booster()
booster.load_model('./model.xgb')
clf._Booster = booster
clf._le = LabelEncoder().fit(iris.target_names)
print(clf)
import pandas as pd
X_test_dd = pd.DataFrame(data=X_test, columns=iris.feature_names)
print(clf.predict(X_test_dd))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment