Skip to content

Instantly share code, notes, and snippets.

View gidutz's full-sized avatar

Gad Benram gidutz

View GitHub Profile
SELECT
year,
month,
day,
cumsum,
ROUND(predicted_label) predicted,
monthly_cost actual_month_cost,
ROUND(100 * (ABS(predicted_label - monthly_cost) / monthly_cost),1) abs_err
FROM
ML.PREDICT(MODEL `billing_dataset_example.model_linear_regression`,
CREATE OR REPLACE MODEL
billing_dataset_example.model_linear_regression --model save path
OPTIONS
( model_type='linear_reg', -- As of Aug 2018 you can choose between linear regression and logistic regression
ls_init_learn_rate=.015,
l1_reg=0.1,
l2_reg=0.1,
data_split_method='seq',
data_split_col='split_col',
WITH
cumsum_table AS(
SELECT
account_name, year, month, day,
ROUND(SUM(daily_cost) OVER (PARTITION BY account_name, year, month ORDER BY day )) AS cumsum,
ROUND(AVG(daily_cost) OVER (PARTITION BY account_name, year, month ORDER BY day )) AS mean_daily_cost
FROM
`billing_dataset_example.billing_daily_monthly`
ORDER BY account_name, year, month, day),
monthly_cost_table AS (
-- https://stackoverflow.com/questions/41707583/get-total-no-of-days-in-given-month-in-google-bigquery
CREATE TEMP FUNCTION DaysInMonth(d TIMESTAMP) AS (
32 - EXTRACT(DAY FROM DATE_ADD(DATE_TRUNC(CAST (d AS DATE), MONTH), INTERVAL 31 DAY))
);
WITH monthly_cost_table AS (SELECT
account_name,
EXTRACT (MONTH FROM bill_datetime) month,
EXTRACT (YEAR FROM bill_datetime) year,
SUM(cost) monthly_cost
@gidutz
gidutz / extract_features.sql
Created June 6, 2019 12:43
chicago taxi feature extraction
WITH dataset AS( SELECT
EXTRACT(HOUR FROM trip_start_timestamp) trip_start_hour
, EXTRACT(DAYOFWEEK FROM trip_start_timestamp) trip_start_weekday
, EXTRACT(WEEK FROM trip_start_timestamp) trip_start_week
, EXTRACT(DAYOFYEAR FROM trip_start_timestamp) trip_start_yearday
, EXTRACT(MONTH FROM trip_start_timestamp) trip_start_month
, (trip_miles * 1.60934 ) / ((trip_seconds + .01) / (60 * 60)) trip_speed_kmph
, trip_miles
, pickup_latitude
@gidutz
gidutz / hyper_param_spec.json
Created June 6, 2019 12:50
chicago taxi hypertune
{
"scaleTier": "CUSTOM",
"masterType": "standard_gpu",
"args": [
"--preprocess",
"--validation_split=0.2",
"--model_type=regression",
"--hidden_units=120,60,60",
"--batch_size=128",
"--eval_frequency_secs=128",
@gidutz
gidutz / submit_train.py
Last active June 6, 2019 12:58
chicago submit training
def train_hyper_params(cloudml_client, training_inputs):
job_name = 'chicago_travel_time_training_{}'.format(datetime.utcnow().strftime('%Y%m%d%H%M%S'))
project_name = 'projects/{}'.format(project_id)
job_spec = {'jobId': job_name, 'trainingInput': training_inputs}
response = cloudml_client.projects().jobs().create(body=job_spec,
parent=project_name).execute()
print(response)
@gidutz
gidutz / deploy_model.py
Last active July 2, 2019 15:08
chicago taxi deploy model
def create_model(cloudml_client):
"""
Creates a Model entity in AI Platform
:param cloudml_client: discovery client
"""
models = cloudml_client.projects().models()
create_spec = {'name': model_name}
models.create(body=create_spec,
parent=project_name).execute()
@gidutz
gidutz / predict.py
Created July 2, 2019 15:09
chicago taxi predict
def validate_model():
"""
Function to validate the model results
"""
df_val = pd.read_csv('{}/processed_data/test.csv'.format(job_dir))
# Submit only 10 samples to the server, ignore the first column (=target column)
instances = [", ".join(x) for x in df_val.iloc[:10, 1:].astype(str).values.tolist()]
service = discovery.build('ml', 'v1')
version_name = 'projects/{}/models/{}'.format(project_id, model_name)
def get_headers(self):
headers = {
'authority': self.host,
'x-requested-with': 'XMLHttpRequest',
'origin': 'https://{}'.format(self.host),
'x-csrf-token': self.x_csrf_token,
'user-agent': '...',
'content-type': 'application/json;charset=UTF-8',
'accept': '*/*',
'sec-fetch-site': 'same-origin',