Ian Whitestone ian-whitestone

## simulations_for_post.ipynb

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                ian-whitestone
                / simulations_for_post.ipynb
            
            
              Created
              March 13, 2021 19:49
            
              
                Code for the randomization unit <> analysis unit post - https://ianwhitestone.work/randomization-unit-analysis-unit/
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## row_num.py
# Let's add an row number to indicate the first message per app & microservice
# This code is analagous to the SQL: row_number() over (partition by id, topic order by msg_ts asc)
df['row_num'] = df.sort_values(['id', 'msg_ts'], ascending=True).groupby(['id', 'topic']).cumcount() + 1

## warehouse_uptime_example.sql
with
warehouse_periods as (
select
  warehouse_name,
  timestamp as valid_from,
  lead(timestamp) over (partition by warehouse_name order by timestamp asc) as    valid_to,
  event_name = 'RESUME_WAREHOUSE' as is_active
from snowflake.account_usage.warehouse_events_history
where
  -- double check these names, can't remember exact values

## simulations.ipynb

      
              1 file
            
          
              1 fork
            
          
              1 comment
            
          
              1 star
            
          
                ian-whitestone
                / simulations.ipynb
            
            
              Last active
              December 20, 2023 03:53
            
              
                Code for the choosing your randomization unit post - https://ianwhitestone.work/choosing-randomization-unit/
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## zappa_package_cleaner.py
"""
Read accompanying blog post: https://ianwhitestone.work/Zappa-Zip-Callbacks
"""
import os
import re
import shutil
import tarfile
import zipfile


## example_cte_query.sql
with sample_data as (
    select *
    from snowflake_sample_data.tpch_sf1.customer
),

nation_14_customers as (
    select *
    from sample_data
    where c_nationkey = 14
),

## notes.md

      
              1 file
            
          
              7 forks
            
          
              1 comment
            
          
              56 stars
            
          
                ian-whitestone
                / notes.md
            
            
              Last active
              March 1, 2023 01:45
            
              
                Best practices for presto sql
              
          
    Presto Specific


Don’t SELECT *, Specify explicit column names (columnar store)
Avoid large JOINs (filter each table first)

In PRESTO tables are joined in the order they are listed!!
Join small tables earlier in the plan and leave larger fact tables to the end
Avoid cross joins or 1 to many joins as these can degrade performance


Order by and group by take time

only use order by in subqueries if it is really necessary


When using GROUP BY, order the columns by the highest cardinality (that is, most number of unique values) to the lowest.


## query_tags.sql
{% macro set_query_tag() -%}
    {# Start with any model-configured dict #}
    {% set tag_dict = config.get('query_tag', default={}) %}

    {# Regardless of resource type, we can always access the config via the 'model' variable #}
    {%- do tag_dict.update(
        dbt_snowflake_query_tags_version='1.1.3',
        app='dbt',
        dbt_version=dbt_version,
        project_name=project_name,

## mode_confettis.js
<script src="https://cdn.jsdelivr.net/npm/canvas-confetti@1.4.0/dist/confetti.browser.min.js"></script>
 <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>


 <div id="bottom_of_page">Thanks for scrolling to the bottom 🙌</div>

 <div>Enjoy some confetti 🎉</div>

 <script>


## great_expecations_examples.py
## Pandas
import great_expectations as ge

# Build up expectations on a sample dataset and save them
train = ge.read_csv("data/npi.csv")
train.expect_column_values_to_not_be_null("NPI")
train.save_expectation_suite("npi_csv_expectations.json")

# Load in a new dataset and test them
test = ge.read_csv("data/npi_new.csv")
	# Let's add an row number to indicate the first message per app & microservice
	# This code is analagous to the SQL: row_number() over (partition by id, topic order by msg_ts asc)
	df['row_num'] = df.sort_values(['id', 'msg_ts'], ascending=True).groupby(['id', 'topic']).cumcount() + 1
	with
	warehouse_periods as (
	select
	warehouse_name,
	timestamp as valid_from,
	lead(timestamp) over (partition by warehouse_name order by timestamp asc) as valid_to,
	event_name = 'RESUME_WAREHOUSE' as is_active
	from snowflake.account_usage.warehouse_events_history
	where
	-- double check these names, can't remember exact values
	"""
	Read accompanying blog post: https://ianwhitestone.work/Zappa-Zip-Callbacks
	"""
	import os
	import re
	import shutil
	import tarfile
	import zipfile
	with sample_data as (
	select *
	from snowflake_sample_data.tpch_sf1.customer
	),

	nation_14_customers as (
	select *
	from sample_data
	where c_nationkey = 14
	),
	{% macro set_query_tag() -%}
	{# Start with any model-configured dict #}
	{% set tag_dict = config.get('query_tag', default={}) %}

	{# Regardless of resource type, we can always access the config via the 'model' variable #}
	{%- do tag_dict.update(
	dbt_snowflake_query_tags_version='1.1.3',
	app='dbt',
	dbt_version=dbt_version,
	project_name=project_name,
	<script src="https://cdn.jsdelivr.net/npm/canvas-confetti@1.4.0/dist/confetti.browser.min.js"></script>
	<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>


	<div id="bottom_of_page">Thanks for scrolling to the bottom 🙌</div>

	<div>Enjoy some confetti 🎉</div>

	<script>
	## Pandas
	import great_expectations as ge

	# Build up expectations on a sample dataset and save them
	train = ge.read_csv("data/npi.csv")
	train.expect_column_values_to_not_be_null("NPI")
	train.save_expectation_suite("npi_csv_expectations.json")

	# Load in a new dataset and test them
	test = ge.read_csv("data/npi_new.csv")