Josh Reini joshreini1

## create_truera_project_bqml.py
tru.set_environment("remote")
tru.add_project("BQML Integration Example", score_type = "logits")

## bqml_unnest_feature_attributions.py
query_attributions = """#standardsql
SELECT
  (SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'pageviews') AS pageviews,
  (SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'country') AS country,
  (SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'is_mobile') AS device_id,
  (SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'os') AS os
FROM 'gcp-sandbox-311420.bqml_tutorial.sample model preds explanations 0701 0708'
"""
job_attr = client.query(query_attributions)
df_attr = job_attr.to_dataframe()

## bq_client_and_poll_data.py
from google.cloud.bigquery import Client, QueryJobConfig
client = Client()

## truera_workspace.py
!pip install truera

import truera
from truera.client.truera_workspace import TrueraWorkspace
from truera.client.truera_authentication import TokenAuthentication

tru = TrueraWorkspace('https://app.truera.net/', TokenAuthentication(TOKEN))

## bqml_gbm.sql
#standardSQL
CREATE MODEL 'bqml_tutorial.sample_model'
OPTIONS(model_type='boosted_tree_classifier') AS
SELECT
  IF(totals.transactions IS NULL, 0, 1) AS label,
  IFNULL(device.operatingSystem, "") as os,
  device.isMobile AS is_mobile,
  IFNULL(geoNetwork.country, "") AS country,
  IFNULL(totals.pageviews, 0) AS pageviews
FROM

## bqml-ml-explain-predict.sql
#standardSQL
SELECT
  visitorID, #passthrough
  Country,
  Top_feature_attributions, # from ml.explain_predict
  SUM(predicted_label) as total_predicted_purchases,
  label #passthrough
FROM
  ML.EXPLAIN_PREDICT(MODEL `bqml_tutorial.sample_model`, (
SELECT

## add-truera-snippet.py
! pip install truera
from truera.client.truera_workspace import TrueraWorkspace #import truera
from truera.client.truera_authentication import TokenAuthentication # import authentication
tru = TrueraWorkspace("https://app.truera.net", TokenAuthentication("ADD YOUR AUTH TOKEN"))
tru.add_project("My project", score_type="CHOOSE A SCORE TYPE: regression, classification, probits or logits")
tru.add_data_collection("data_collection_1") # the schema that will hold your data "splits" and model
tru.add_data(
    data = data,
    data_split_name = "train",
    column_spec=ColumnSpec(

## rebalance-gender.py
def rebalance_gender(df, data_type):
   if data_type == 0:
       df_female_true = df[(df['Sex'] == 'Female') & (df['PINCP'] == True)]
       df_else = df[~((df['Sex'] == 'Female') & (df['PINCP'] == True))]
   else:
       df_female_true = df[(df['Sex_Female'] == 1) & (df['PINCP'] == True)]
       df_else = df[~((df['Sex_Female'] == 1) & (df['PINCP'] == True))]


   if data_type == 0:

## fair-splits-without-sex.py
# add data splits to the collection we just created
year_begin = 2014
year_end = 2016 # exclusive
states = ['CA', 'NY']


for year in range(year_begin, year_end):
   for state in states:
       tru.add_data_split(f'{year}-{state}',
         pre_data = data[year][state]['data_preprocessed'].\

## fairness-new-data-collection.py
feature_map.pop('Sex')
tru.add_data_collection("Data Collection v2",
                       pre_to_post_feature_map = feature_map,
                       provide_transform_with_model = False)
	tru.set_environment("remote")
	tru.add_project("BQML Integration Example", score_type = "logits")
	query_attributions = """#standardsql
	SELECT
	(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'pageviews') AS pageviews,
	(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'country') AS country,
	(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'is_mobile') AS device_id,
	(SELECT attribution FROM UNNEST(top_feature_attributions) WHERE feature = 'os') AS os
	FROM 'gcp-sandbox-311420.bqml_tutorial.sample model preds explanations 0701 0708'
	"""
	job_attr = client.query(query_attributions)
	df_attr = job_attr.to_dataframe()
	from google.cloud.bigquery import Client, QueryJobConfig
	client = Client()
	!pip install truera

	import truera
	from truera.client.truera_workspace import TrueraWorkspace
	from truera.client.truera_authentication import TokenAuthentication

	tru = TrueraWorkspace('https://app.truera.net/', TokenAuthentication(TOKEN))
	#standardSQL
	CREATE MODEL 'bqml_tutorial.sample_model'
	OPTIONS(model_type='boosted_tree_classifier') AS
	SELECT
	IF(totals.transactions IS NULL, 0, 1) AS label,
	IFNULL(device.operatingSystem, "") as os,
	device.isMobile AS is_mobile,
	IFNULL(geoNetwork.country, "") AS country,
	IFNULL(totals.pageviews, 0) AS pageviews
	FROM
	#standardSQL
	SELECT
	visitorID, #passthrough
	Country,
	Top_feature_attributions, # from ml.explain_predict
	SUM(predicted_label) as total_predicted_purchases,
	label #passthrough
	FROM
	ML.EXPLAIN_PREDICT(MODEL `bqml_tutorial.sample_model`, (
	SELECT
	! pip install truera
	from truera.client.truera_workspace import TrueraWorkspace #import truera
	from truera.client.truera_authentication import TokenAuthentication # import authentication
	tru = TrueraWorkspace("https://app.truera.net", TokenAuthentication("ADD YOUR AUTH TOKEN"))
	tru.add_project("My project", score_type="CHOOSE A SCORE TYPE: regression, classification, probits or logits")
	tru.add_data_collection("data_collection_1") # the schema that will hold your data "splits" and model
	tru.add_data(
	data = data,
	data_split_name = "train",
	column_spec=ColumnSpec(
	def rebalance_gender(df, data_type):
	if data_type == 0:
	df_female_true = df[(df['Sex'] == 'Female') & (df['PINCP'] == True)]
	df_else = df[~((df['Sex'] == 'Female') & (df['PINCP'] == True))]
	else:
	df_female_true = df[(df['Sex_Female'] == 1) & (df['PINCP'] == True)]
	df_else = df[~((df['Sex_Female'] == 1) & (df['PINCP'] == True))]


	if data_type == 0:
	# add data splits to the collection we just created
	year_begin = 2014
	year_end = 2016 # exclusive
	states = ['CA', 'NY']


	for year in range(year_begin, year_end):
	for state in states:
	tru.add_data_split(f'{year}-{state}',
	pre_data = data[year][state]['data_preprocessed'].\
	feature_map.pop('Sex')
	tru.add_data_collection("Data Collection v2",
	pre_to_post_feature_map = feature_map,
	provide_transform_with_model = False)