Skip to content

Instantly share code, notes, and snippets.

View joshreini1's full-sized avatar

Josh Reini joshreini1

View GitHub Profile
tru.set_environment("remote")
tru.add_project("BQML Integration Example", score_type = "logits")
query_attributions = """#standardsql
SELECT
(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'pageviews') AS pageviews,
(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'country') AS country,
(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'is_mobile') AS device_id,
(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'os') AS os
FROM 'gcp-sandbox-311420.bqml_tutorial.sample model preds explanations 0701 0708'
"""
job_attr = client.query(query_attributions)
df_attr = job_attr.to_dataframe()
from google.cloud.bigquery import Client, QueryJobConfig
client = Client()
!pip install truera
import truera
from truera.client.truera_workspace import TrueraWorkspace
from truera.client.truera_authentication import TokenAuthentication
tru = TrueraWorkspace('https://app.truera.net/', TokenAuthentication(TOKEN))
#standardSQL
CREATE MODEL 'bqml_tutorial.sample_model'
OPTIONS(model_type='boosted_tree_classifier') AS
SELECT
IF(totals.transactions IS NULL, 0, 1) AS label,
IFNULL(device.operatingSystem, "") as os,
device.isMobile AS is_mobile,
IFNULL(geoNetwork.country, "") AS country,
IFNULL(totals.pageviews, 0) AS pageviews
FROM
#standardSQL
SELECT
visitorID, #passthrough
Country,
Top_feature_attributions, # from ml.explain_predict
SUM(predicted_label) as total_predicted_purchases,
label #passthrough
FROM
ML.EXPLAIN_PREDICT(MODEL `bqml_tutorial.sample_model`, (
SELECT
! pip install truera
from truera.client.truera_workspace import TrueraWorkspace #import truera
from truera.client.truera_authentication import TokenAuthentication # import authentication
tru = TrueraWorkspace("https://app.truera.net", TokenAuthentication("ADD YOUR AUTH TOKEN"))
tru.add_project("My project", score_type="CHOOSE A SCORE TYPE: regression, classification, probits or logits")
tru.add_data_collection("data_collection_1") # the schema that will hold your data "splits" and model
tru.add_data(
data = data,
data_split_name = "train",
column_spec=ColumnSpec(
def rebalance_gender(df, data_type):
if data_type == 0:
df_female_true = df[(df['Sex'] == 'Female') & (df['PINCP'] == True)]
df_else = df[~((df['Sex'] == 'Female') & (df['PINCP'] == True))]
else:
df_female_true = df[(df['Sex_Female'] == 1) & (df['PINCP'] == True)]
df_else = df[~((df['Sex_Female'] == 1) & (df['PINCP'] == True))]
if data_type == 0:
# add data splits to the collection we just created
year_begin = 2014
year_end = 2016 # exclusive
states = ['CA', 'NY']
for year in range(year_begin, year_end):
for state in states:
tru.add_data_split(f'{year}-{state}',
pre_data = data[year][state]['data_preprocessed'].\
feature_map.pop('Sex')
tru.add_data_collection("Data Collection v2",
pre_to_post_feature_map = feature_map,
provide_transform_with_model = False)