Bruno Gonzalez bruno-uy

## redshift_copy_error_message.sql
select s3l.message, s3l.*, sle.*
from stl_load_errors sle
left join svl_s3log s3l
    on sle.query = s3l.query
order by sle.starttime desc
limit 10;

## print_variable_name_and_value.py
variable = "This is the value"
print(f"{variable=}")
# >>> variable='This is the value'

## session_cache_off_redshift.sql
-- This setting disables the results cache, so we can see the full processing runtime each time we run the query
SET enable_result_cache_for_session TO OFF;

## print_current_line.py
from inspect import currentframe, getframeinfo

print(getframeinfo(currentframe()).lineno)  # prints 3

## add_schema_to_search_path.sql
-- Problem: you don't see all the schemas when querying PG_TABLE_DEF
-- Solution:
-- 1. First check if the schema you're trying to query is on the search path
show search_path;
-- 2. Add the missing one(s) to the search path (imagine the result was only public and you're missing data_warehouse and matching)
set search_path to '$user', public, data_warehouse, matching;  -- No matter which is your user, use '$user'

## git_good_practices.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                bruno-uy
                / git_good_practices.md
            
            
              Last active
              October 18, 2022 14:59
            
              
                Git good practices
              
          
    Git good practices


Write meaningful and concise commit message:

❌ "Add new feature"
✅ Changing X and Y because of Z


Follow a pattern / convention for commit messages. You can check a good reference here.
Squash commits you did for testing / adding small changes. You can check how to do that here.
Separate your commits into isolated units of "atomic" changes. Examples:

Changes in one class / file
Refactor previous to the actual change you'll be doing


Changes in one function if the change is considerable


## squash_git_commits.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bruno-uy
                / squash_git_commits.md
            
            
              Last active
              March 23, 2023 02:37
            
              
                Squash git commits into one before pushing to origin
              
          
    Squash commits

Definition: combine multiple commits into one. More related to get tidy commits than a technical problem about not doing that.
You need to first figure out how many commits do you have to squash. To check that you can use:
git log

Imagine you wanna combine the last 3 commits into one. You'll do a soft reset from HEAD minus 3 commits:


## set_pandas_display_options.py
import pandas as pd

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
pd.set_option("display.max_colwidth", None)

## df_to_dict_with_none.py
import pandas as pd
import numpy as np

df = pd.DataFrame({"A": [1, 2, 3], "B": [1.2, np.NaN, 3.4]})
result = (
    df
    .replace([np.nan], [None], regex=False)
    .to_dict(orient="records")
)

## read_all_csv_gz_current_folder.py
import pandas as pd
df = pd.concat([pd.read_csv(f, compression="gzip") for f in os.listdir() if f.endswith(".gz")], ignore_index=True)
	select s3l.message, s3l., sle.
	from stl_load_errors sle
	left join svl_s3log s3l
	on sle.query = s3l.query
	order by sle.starttime desc
	limit 10;
	variable = "This is the value"
	print(f"{variable=}")
	# >>> variable='This is the value'
	-- This setting disables the results cache, so we can see the full processing runtime each time we run the query
	SET enable_result_cache_for_session TO OFF;
	from inspect import currentframe, getframeinfo

	print(getframeinfo(currentframe()).lineno) # prints 3
	-- Problem: you don't see all the schemas when querying PG_TABLE_DEF
	-- Solution:
	-- 1. First check if the schema you're trying to query is on the search path
	show search_path;
	-- 2. Add the missing one(s) to the search path (imagine the result was only public and you're missing data_warehouse and matching)
	set search_path to '$user', public, data_warehouse, matching; -- No matter which is your user, use '$user'
	import pandas as pd

	pd.set_option("display.max_rows", 500)
	pd.set_option("display.max_columns", 500)
	pd.set_option("display.width", 1000)
	pd.set_option("display.max_colwidth", None)
	import pandas as pd
	import numpy as np

	df = pd.DataFrame({"A": [1, 2, 3], "B": [1.2, np.NaN, 3.4]})
	result = (
	df
	.replace([np.nan], [None], regex=False)
	.to_dict(orient="records")
	)
	import pandas as pd
	df = pd.concat([pd.read_csv(f, compression="gzip") for f in os.listdir() if f.endswith(".gz")], ignore_index=True)