SQLAlchemy has two parts:
* ORM: this maps table and relationships between tables to Python objects
* Core: alows you to write and execute SQL using Python exspressions
Select statment:
import pandas as pd
import pandas as pd | |
def nunique_rolling_time_series(data_series, step_freqency, window_size, output_name=''): | |
""" | |
Calculate a rolling statistic of nunique of a time series. The input series has a DateTime index. | |
""" | |
data_series = data_series.sort_index() | |
min_date = data_series.index.min() |
Acorn_Category | Acorn_Category_Name | Acorn_Group | Acorn_Group_Name | Acorn_Type | Acorn_Type_Code | Acorn_Type_Name | |
---|---|---|---|---|---|---|---|
1 | 1 Affluent Achievers | A | 1.A Lavish Lifestyles | 1 | 1.A.1 | 1.A.1 Exclusive enclaves | |
1 | 1 Affluent Achievers | A | 1.A Lavish Lifestyles | 2 | 1.A.2 | 1.A.2 Metropolitan money | |
1 | 1 Affluent Achievers | A | 1.A Lavish Lifestyles | 3 | 1.A.3 | 1.A.3 Large house luxury | |
1 | 1 Affluent Achievers | B | 1.B Executive Wealth | 4 | 1.B.4 | 1.B.4 Asset rich families | |
1 | 1 Affluent Achievers | B | 1.B Executive Wealth | 5 | 1.B.5 | 1.B.5 Wealthy countryside commuters | |
1 | 1 Affluent Achievers | B | 1.B Executive Wealth | 6 | 1.B.6 | 1.B.6 Financially comfortable families | |
1 | 1 Affluent Achievers | B | 1.B Executive Wealth | 7 | 1.B.7 | 1.B.7 Affluent professionals | |
1 | 1 Affluent Achievers | B | 1.B Executive Wealth | 8 | 1.B.8 | 1.B.8 Prosperous suburban families | |
1 | 1 Affluent Achievers | B | 1.B Executive Wealth | 9 | 1.B.9 | 1.B.9 Well-off edge of towners |
To have both Anaconda Python installed with both Python 2 and 3 running simultaneously: | |
1. Download Python Anconda 3 | |
2. Create a Python 2.7 environment `conda create -n py3k python=3.4 anaconda` | |
3. Navigate to the install directory. Something like `Anaconda3\envs\py2k` | |
4. Rename python to python2. And in scripts pip to pip2 | |
5 Add those two directories to the path |
import pandas as pd | |
def clean_str_cols(df, encoding='ascii'): | |
""" | |
As string columns are stored as 'objects' it can cause many problems, especially when reading and writig CSVs. This function | |
forces the columns to be strings and be encoded as a specified encoding. | |
Solves `UnicodeEncodeError` errors when using `to_csv`. | |
""" | |
df = df.copy() |
import pandas as pd | |
from sqlalchemy import create_engine | |
df = pd.read_csv('<INPUT_FILE_PATH>.csv', encoding='utf') # Needs to be unicode for sqlite | |
disk_engine = create_engine('sqlite:///<DB_PATH>.sqlite') | |
df.to_sql('<TABLE_NAME>', disk_engine, if_exists='append') |
-- See: https://stackoverflow.com/questions/48770517/take-first-second-third-last-value-and-selecting-rows-window-function-wit | |
----- N = 1 | |
-- State transition function | |
-- agg_state: the current state, el: new element | |
create or replace function lag_agg_sfunc_1(agg_state point, el float) | |
returns point | |
immutable | |
language plpgsql | |
as $$ | |
declare |
-- https://wiki.postgresql.org/wiki/First/last_(aggregate) | |
-- Create a function that always returns the first non-NULL item | |
CREATE OR REPLACE FUNCTION public.first_agg ( anyelement, anyelement ) | |
RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$ | |
SELECT $1; | |
$$; | |
-- And then wrap an aggregate around it | |
CREATE AGGREGATE public.FIRST ( | |
sfunc = public.first_agg, |
#!/usr/bin/env python | |
import warnings | |
import numbers | |
import time | |
import numpy as np | |
from sklearn.base import is_classifier, clone | |
from sklearn.utils import indexable |
class OneHotEncode(Embedding): | |
""" | |
One-hot-encode layer eg. [[1, 3]] -> [[[0,1,0,0],[0,0,0,1]]] | |
input_dim: Number of categories to one-hot encode | |
input_length: length of X vecotrs (optional) | |
mask_zero: treat 0 as masking | |
""" | |
def __init__(self, input_dim, input_length=None, mask_zero=False): |