Skip to content

Instantly share code, notes, and snippets.

View peacing's full-sized avatar

Paul Singman peacing

View GitHub Profile
with yest AS (
SELECT currency, price
FROM f_crypto_prices
WHERE datekey = '20220315')
, today AS (
SELECT currency, price
FROM f_crypto_prices
WHERE datekey = '20220316')
SELECT
@peacing
peacing / f_crypto_prices_daily_change.sql
Last active June 12, 2022 14:05
daily change of crypto prices
with yest AS (
SELECT currency, price
FROM f_crypto_prices
WHERE TRUNC(datetime, 'day') = '20220315')
, today AS (
SELECT currency, price
FROM f_crypto_prices
WHERE TRUNC(datetime, 'day') = '20220316')
SELECT
with begin as
(select currency,
max(price) as price
from crypto_prices_delta
where date_key = {{date_key | sqlsafe}}
and datetime =
(select min(datetime)
from crypto_prices_delta
where date_key = {{date_key | sqlsafe}})
group by currency), end as
import time
counter = 0
while True:
################
## code
## to fetch
## crypto API
## data
@peacing
peacing / crypto_data_fetcher.py
Created March 20, 2022 21:55
saving crypto data
import requests
from datetime import datetime
import pandas as pd
import awswrangler as wr
import boto3
session = boto3.session.Session(aws_access_key_id=key_access_aws, aws_secret_access_key=key_secret_aws)
s3 = session.client('s3')
# group by month using the loan created date and calculate some stats off the grouped data
def group_functions(x):
d = {}
d['loan_app_count'] = x['loan_amount'].count()
d['loan_funded_count'] = x['funded_at'].count()
d['conversion_rate'] = max(x['funded_at'].count() /x['created_at'].count(), 0)
d['time_to_conversion_avg'] = x['time_to_conversion'].mean()
return pd.Series(d, index=['loan_app_count', 'loan_funded_count',
# group by month using the loan created date and calculate some stats off the grouped data
def group_functions(x):
d = {}
d['loan_app_count'] = x['loan_amount'].count()
d['loan_funded_count'] = x['funded_at'].count()
d['conversion_rate'] = max(x['funded_at'].count() / x['created_at'].count(), 0)
d['time_to_conversion_avg'] = x['time_to_conversion'].mean()
return pd.Series(d, index=['loan_app_count','loan_funded_count',
import pyspark.pandas as ps
# data path in HDFS
loans_filename = '/FileStore/tables/loans.csv'
loans_df = ps.read_csv(
loans_filename,
header=None,
names=['loan_amount', 'address', 'created_at', 'funded_at'],
infer_datetime_format=True,
version: "3"
services:
lakefs-setup:
image: treeverse/lakefs:latest
container_name: lakefs-setup
depends_on:
- postgres
- minio-setup
environment:
- LAKEFS_AUTH_ENCRYPT_SECRET_KEY=some random secret string
@peacing
peacing / sessions.sql
Last active September 29, 2021 17:52
create table user_sessions
USING DELTA AS
( select user_id, event_date, event_action,
SUM(is_new_session) OVER (ORDER BY user_id, event_date) AS global_session_id,
SUM(is_new_session) OVER (PARTITION BY user_id ORDER BY event_date) AS user_session_id
FROM
( select *,
CASE WHEN unix_timestamp(event_date) - unix_timestamp(last_event) >= (24*60*60) OR
last_event is NULL THEN 1 ELSE 0 END AS is_new_session
from (