Skip to content

Instantly share code, notes, and snippets.

@Evanto
Last active April 12, 2021 21:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Evanto/6ab9b7abbc1b2a6ef7d033078a4f1bcf to your computer and use it in GitHub Desktop.
Save Evanto/6ab9b7abbc1b2a6ef7d033078a4f1bcf to your computer and use it in GitHub Desktop.
import kfp.dsl
from kfp.components import ComponentStore
store = ComponentStore.default_store
chicago_taxi_dataset_op = store.load_component('datasets/Chicago_Taxi_Trips')
xgboost_train_on_csv_op = store.load_component('XGBoost/Train')
@kfp.dsl.pipeline(name='XGBoost Train')
def xgboost_train_pipeline(start_date: str = '2019-01-01',
end_date: str = '2019-02-01',
limit: int = 100):
features = ['trip_seconds', 'trip_miles', 'pickup_community_area', 'dropoff_community_area',
'fare', 'tolls', 'extras', 'trip_total']
target = 'tips'
training_data_csv = chicago_taxi_dataset_op(
select=','.join([target] + features),
where=f'trip_start_timestamp >= "{start_date}" AND trip_start_timestamp < "{end_date}"',
limit=limit,
).output
# Training
training_step = xgboost_train_on_csv_op(
training_data=training_data_csv,
label_column=0,
objective='reg:squarederror',
num_iterations=200,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment