Josh Levy-Kramer joshlk

## rolling_stats.py
import pandas as pd

def nunique_rolling_time_series(data_series, step_freqency, window_size, output_name=''):
    """
    Calculate a rolling statistic of nunique of a time series. The input series has a DateTime index.
    """

    data_series = data_series.sort_index()

    min_date = data_series.index.min()

## acorn_segments_lookup.csv

          
            Acorn_Category
            Acorn_Category_Name
            Acorn_Group
            Acorn_Group_Name
            Acorn_Type
            Acorn_Type_Code
            Acorn_Type_Name

            
              1
              1 Affluent Achievers
              A
              1.A Lavish Lifestyles
              1
              1.A.1
              1.A.1 Exclusive enclaves

            
              1
              1 Affluent Achievers
              A
              1.A Lavish Lifestyles
              2
              1.A.2
              1.A.2 Metropolitan money

            
              1
              1 Affluent Achievers
              A
              1.A Lavish Lifestyles
              3
              1.A.3
              1.A.3 Large house luxury

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              4
              1.B.4
              1.B.4 Asset rich families

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              5
              1.B.5
              1.B.5 Wealthy countryside commuters

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              6
              1.B.6
              1.B.6 Financially comfortable families

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              7
              1.B.7
              1.B.7 Affluent professionals

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              8
              1.B.8
              1.B.8 Prosperous suburban families

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              9
              1.B.9
              1.B.9 Well-off edge of towners

## python_anaconda_3_and_2
To have both Anaconda Python installed with both Python 2 and 3 running simultaneously:
1. Download Python Anconda 3
2. Create a Python 2.7 environment `conda create -n py3k python=3.4 anaconda`
3. Navigate to the install directory. Something like `Anaconda3\envs\py2k`
4. Rename python to python2. And in scripts pip to pip2
5 Add those two directories to the path

## pandas_utils.py
import pandas as pd

def clean_str_cols(df, encoding='ascii'):
    """
    As string columns are stored as 'objects' it can cause many problems, especially when reading and writig CSVs. This function
    forces the columns to be strings and be encoded as a specified encoding.

    Solves `UnicodeEncodeError` errors when using `to_csv`.
    """
    df = df.copy()

## SQLAlchemy_quickstart.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                joshlk
                / SQLAlchemy_quickstart.md
            
            
              Last active
              December 8, 2017 14:43
            
              
                SQLAlchemy quickstart, tutorial
              
          
    SQLAlchemy has two parts:
* ORM: this maps table and relationships between tables to Python objects
* Core: alows you to write and execute SQL using Python exspressions

Core

Select statment:
import pandas as pd

  
## pandas_to_sqlite.py
import pandas as pd
from sqlalchemy import create_engine

df = pd.read_csv('<INPUT_FILE_PATH>.csv', encoding='utf')  # Needs to be unicode for sqlite
disk_engine = create_engine('sqlite:///<DB_PATH>.sqlite')
df.to_sql('<TABLE_NAME>', disk_engine, if_exists='append')

## custome_aggregate_functions.sql
-- See: https://stackoverflow.com/questions/48770517/take-first-second-third-last-value-and-selecting-rows-window-function-wit
----- N = 1
-- State transition function
-- agg_state: the current state, el: new element
create or replace function lag_agg_sfunc_1(agg_state point, el float)
    returns point
    immutable
    language plpgsql
    as $$
declare

## first_not_null.sql
-- https://wiki.postgresql.org/wiki/First/last_(aggregate)
-- Create a function that always returns the first non-NULL item
CREATE OR REPLACE FUNCTION public.first_agg ( anyelement, anyelement )
RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$
        SELECT $1;
$$;

-- And then wrap an aggregate around it
CREATE AGGREGATE public.FIRST (
        sfunc    = public.first_agg,

## cross_validate_keras.py
#!/usr/bin/env python

import warnings
import numbers
import time

import numpy as np

from sklearn.base import is_classifier, clone
from sklearn.utils import indexable

## keras_one_hot_encode.py
class OneHotEncode(Embedding):
    """
    One-hot-encode layer eg. [[1, 3]] -> [[[0,1,0,0],[0,0,0,1]]]

    input_dim: Number of categories to one-hot encode
    input_length: length of X vecotrs (optional)
    mask_zero: treat 0 as masking
    """

    def __init__(self, input_dim, input_length=None, mask_zero=False):
	import pandas as pd

	def nunique_rolling_time_series(data_series, step_freqency, window_size, output_name=''):
	"""
	Calculate a rolling statistic of nunique of a time series. The input series has a DateTime index.
	"""

	data_series = data_series.sort_index()

	min_date = data_series.index.min()
Acorn_Category	Acorn_Category_Name	Acorn_Group	Acorn_Group_Name	Acorn_Type	Acorn_Type_Code	Acorn_Type_Name
1	1 Affluent Achievers	A	1.A Lavish Lifestyles	1	1.A.1	1.A.1 Exclusive enclaves
1	1 Affluent Achievers	A	1.A Lavish Lifestyles	2	1.A.2	1.A.2 Metropolitan money
1	1 Affluent Achievers	A	1.A Lavish Lifestyles	3	1.A.3	1.A.3 Large house luxury
1	1 Affluent Achievers	B	1.B Executive Wealth	4	1.B.4	1.B.4 Asset rich families
1	1 Affluent Achievers	B	1.B Executive Wealth	5	1.B.5	1.B.5 Wealthy countryside commuters
1	1 Affluent Achievers	B	1.B Executive Wealth	6	1.B.6	1.B.6 Financially comfortable families
1	1 Affluent Achievers	B	1.B Executive Wealth	7	1.B.7	1.B.7 Affluent professionals
1	1 Affluent Achievers	B	1.B Executive Wealth	8	1.B.8	1.B.8 Prosperous suburban families
1	1 Affluent Achievers	B	1.B Executive Wealth	9	1.B.9	1.B.9 Well-off edge of towners
	To have both Anaconda Python installed with both Python 2 and 3 running simultaneously:
	1. Download Python Anconda 3
	2. Create a Python 2.7 environment `conda create -n py3k python=3.4 anaconda`
	3. Navigate to the install directory. Something like `Anaconda3\envs\py2k`
	4. Rename python to python2. And in scripts pip to pip2
	5 Add those two directories to the path
	import pandas as pd

	def clean_str_cols(df, encoding='ascii'):
	"""
	As string columns are stored as 'objects' it can cause many problems, especially when reading and writig CSVs. This function
	forces the columns to be strings and be encoded as a specified encoding.

	Solves `UnicodeEncodeError` errors when using `to_csv`.
	"""
	df = df.copy()
	import pandas as pd
	from sqlalchemy import create_engine

	df = pd.read_csv('<INPUT_FILE_PATH>.csv', encoding='utf') # Needs to be unicode for sqlite
	disk_engine = create_engine('sqlite:///<DB_PATH>.sqlite')
	df.to_sql('<TABLE_NAME>', disk_engine, if_exists='append')
	-- See: https://stackoverflow.com/questions/48770517/take-first-second-third-last-value-and-selecting-rows-window-function-wit
	----- N = 1
	-- State transition function
	-- agg_state: the current state, el: new element
	create or replace function lag_agg_sfunc_1(agg_state point, el float)
	returns point
	immutable
	language plpgsql
	as $$
	declare
	-- https://wiki.postgresql.org/wiki/First/last_(aggregate)
	-- Create a function that always returns the first non-NULL item
	CREATE OR REPLACE FUNCTION public.first_agg ( anyelement, anyelement )
	RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$
	SELECT $1;
	$$;

	-- And then wrap an aggregate around it
	CREATE AGGREGATE public.FIRST (
	sfunc = public.first_agg,
	#!/usr/bin/env python

	import warnings
	import numbers
	import time

	import numpy as np

	from sklearn.base import is_classifier, clone
	from sklearn.utils import indexable
	class OneHotEncode(Embedding):
	"""
	One-hot-encode layer eg. [[1, 3]] -> [[[0,1,0,0],[0,0,0,1]]]

	input_dim: Number of categories to one-hot encode
	input_length: length of X vecotrs (optional)
	mask_zero: treat 0 as masking
	"""

	def __init__(self, input_dim, input_length=None, mask_zero=False):