This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tru.set_environment("remote") | |
tru.add_project("BQML Integration Example", score_type = "logits") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
query_attributions = """#standardsql | |
SELECT | |
(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'pageviews') AS pageviews, | |
(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'country') AS country, | |
(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'is_mobile') AS device_id, | |
(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'os') AS os | |
FROM 'gcp-sandbox-311420.bqml_tutorial.sample model preds explanations 0701 0708' | |
""" | |
job_attr = client.query(query_attributions) | |
df_attr = job_attr.to_dataframe() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud.bigquery import Client, QueryJobConfig | |
client = Client() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install truera | |
import truera | |
from truera.client.truera_workspace import TrueraWorkspace | |
from truera.client.truera_authentication import TokenAuthentication | |
tru = TrueraWorkspace('https://app.truera.net/', TokenAuthentication(TOKEN)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standardSQL | |
CREATE MODEL 'bqml_tutorial.sample_model' | |
OPTIONS(model_type='boosted_tree_classifier') AS | |
SELECT | |
IF(totals.transactions IS NULL, 0, 1) AS label, | |
IFNULL(device.operatingSystem, "") as os, | |
device.isMobile AS is_mobile, | |
IFNULL(geoNetwork.country, "") AS country, | |
IFNULL(totals.pageviews, 0) AS pageviews | |
FROM |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standardSQL | |
SELECT | |
visitorID, #passthrough | |
Country, | |
Top_feature_attributions, # from ml.explain_predict | |
SUM(predicted_label) as total_predicted_purchases, | |
label #passthrough | |
FROM | |
ML.EXPLAIN_PREDICT(MODEL `bqml_tutorial.sample_model`, ( | |
SELECT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
! pip install truera | |
from truera.client.truera_workspace import TrueraWorkspace #import truera | |
from truera.client.truera_authentication import TokenAuthentication # import authentication | |
tru = TrueraWorkspace("https://app.truera.net", TokenAuthentication("ADD YOUR AUTH TOKEN")) | |
tru.add_project("My project", score_type="CHOOSE A SCORE TYPE: regression, classification, probits or logits") | |
tru.add_data_collection("data_collection_1") # the schema that will hold your data "splits" and model | |
tru.add_data( | |
data = data, | |
data_split_name = "train", | |
column_spec=ColumnSpec( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def rebalance_gender(df, data_type): | |
if data_type == 0: | |
df_female_true = df[(df['Sex'] == 'Female') & (df['PINCP'] == True)] | |
df_else = df[~((df['Sex'] == 'Female') & (df['PINCP'] == True))] | |
else: | |
df_female_true = df[(df['Sex_Female'] == 1) & (df['PINCP'] == True)] | |
df_else = df[~((df['Sex_Female'] == 1) & (df['PINCP'] == True))] | |
if data_type == 0: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# add data splits to the collection we just created | |
year_begin = 2014 | |
year_end = 2016 # exclusive | |
states = ['CA', 'NY'] | |
for year in range(year_begin, year_end): | |
for state in states: | |
tru.add_data_split(f'{year}-{state}', | |
pre_data = data[year][state]['data_preprocessed'].\ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
feature_map.pop('Sex') | |
tru.add_data_collection("Data Collection v2", | |
pre_to_post_feature_map = feature_map, | |
provide_transform_with_model = False) |