kade kxzk

## dev_limit.jinja
{#
dev_limit

A macro to limit the number of rows
processed during development and staging.
In production, the limit is removed.

By default, the macro runs using
the bernoulli sampling method.
However, the system method is faster,

## template.jinja
{#
<insert_macro_name>

<insert_description>

usage:
    {{ insert_macro_name(some_value) }}
    (sql) -> <insert sql generated>
    {{ insert_macro_name(some_value, some_value) }}
    (sql) -> <insert sql generated>

## changelog.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                kxzk
                / changelog.md
            
            
              Created
              June 24, 2020 02:04
            
          
    ${project-name} Change Log

All notable changes to this project will be documented in this file.
The format is based on Keep a Changelog and this project adheres to Semantic Versioning.
1.0.0


engage


## apply_func_all_dtype_cols.py
# Specify columns, so DataFrame isn't overwritten
df[["first_name", "last_name", "email"]] = df.select_dtypes(
  include=["object"]).apply(lambda x: x.str.lower()
)

## infer_dtype.py
from pandas.api.types import infer_dtype

infer_dtype(["john", np.nan, "jack"], skipna=True)
# string

infer_dtype(["john", np.nan, "jack"], skipna=False)
# mixed

## checking_dtype.py
from pandas.api.types import is_numeric_dtype

is_numeric_dtype("hello world")
# False

## testing.py
from pandas.util.testing import assert_frame_equal

# Methods for Series and Index as well
assert_frame_equal(df_1, df_2)

## cats.py
from pandas.api.types import union_categoricals

food = pd.Categorical(["burger king", "wendys"])
food_2 = pd.Categorical(["burger king", "chipotle"])

union_categoricals([food, food_2])

## square_root.py
df["age_sqrt"] = np.sqrt(df["age"])

## reciprocal.py
df["age_reciprocal"] = 1.0 / df["age"]
	{#
	dev_limit

	A macro to limit the number of rows
	processed during development and staging.
	In production, the limit is removed.

	By default, the macro runs using
	the bernoulli sampling method.
	However, the system method is faster,
	{#
	<insert_macro_name>

	<insert_description>

	usage:
	{{ insert_macro_name(some_value) }}
	(sql) -> <insert sql generated>
	{{ insert_macro_name(some_value, some_value) }}
	(sql) -> <insert sql generated>
	# Specify columns, so DataFrame isn't overwritten
	df[["first_name", "last_name", "email"]] = df.select_dtypes(
	include=["object"]).apply(lambda x: x.str.lower()
	)
	from pandas.api.types import infer_dtype

	infer_dtype(["john", np.nan, "jack"], skipna=True)
	# string

	infer_dtype(["john", np.nan, "jack"], skipna=False)
	# mixed
	from pandas.api.types import is_numeric_dtype

	is_numeric_dtype("hello world")
	# False
	from pandas.util.testing import assert_frame_equal

	# Methods for Series and Index as well
	assert_frame_equal(df_1, df_2)
	from pandas.api.types import union_categoricals

	food = pd.Categorical(["burger king", "wendys"])
	food_2 = pd.Categorical(["burger king", "chipotle"])

	union_categoricals([food, food_2])