Skip to content

Instantly share code, notes, and snippets.

Avatar

Paul Singman peacing

View GitHub Profile
View f_crypto_prices_daily_change_v2.sql
with yest AS (
SELECT currency, price
FROM f_crypto_prices
WHERE datekey = '20220315')
, today AS (
SELECT currency, price
FROM f_crypto_prices
WHERE datekey = '20220316')
SELECT
@peacing
peacing / f_crypto_prices_daily_change.sql
Last active Jun 12, 2022
daily change of crypto prices
View f_crypto_prices_daily_change.sql
with yest AS (
SELECT currency, price
FROM f_crypto_prices
WHERE TRUNC(datetime, 'day') = '20220315')
, today AS (
SELECT currency, price
FROM f_crypto_prices
WHERE TRUNC(datetime, 'day') = '20220316')
SELECT
View crypto_alerts.sql
with begin as
(select currency,
max(price) as price
from crypto_prices_delta
where date_key = {{date_key | sqlsafe}}
and datetime =
(select min(datetime)
from crypto_prices_delta
where date_key = {{date_key | sqlsafe}})
group by currency), end as
View hex_fetcher_loop.py
import time
counter = 0
while True:
################
## code
## to fetch
## crypto API
## data
View crypto_data_fetcher.py
import requests
from datetime import datetime
import pandas as pd
import awswrangler as wr
import boto3
session = boto3.session.Session(aws_access_key_id=key_access_aws, aws_secret_access_key=key_secret_aws)
s3 = session.client('s3')
View pandas_on_spark_monthly_loan_stats.py
# group by month using the loan created date and calculate some stats off the grouped data
def group_functions(x):
d = {}
d['loan_app_count'] = x['loan_amount'].count()
d['loan_funded_count'] = x['funded_at'].count()
d['conversion_rate'] = max(x['funded_at'].count() /x['created_at'].count(), 0)
d['time_to_conversion_avg'] = x['time_to_conversion'].mean()
return pd.Series(d, index=['loan_app_count', 'loan_funded_count',
View pandas_monthly_loan_stats.py
# group by month using the loan created date and calculate some stats off the grouped data
def group_functions(x):
d = {}
d['loan_app_count'] = x['loan_amount'].count()
d['loan_funded_count'] = x['funded_at'].count()
d['conversion_rate'] = max(x['funded_at'].count() / x['created_at'].count(), 0)
d['time_to_conversion_avg'] = x['time_to_conversion'].mean()
return pd.Series(d, index=['loan_app_count','loan_funded_count',
View pandas-on-spark-read-df.py
import pyspark.pandas as ps
# data path in HDFS
loans_filename = '/FileStore/tables/loans.csv'
loans_df = ps.read_csv(
loans_filename,
header=None,
names=['loan_amount', 'address', 'created_at', 'funded_at'],
infer_datetime_format=True,
View docker-compose.yml
version: "3"
services:
lakefs-setup:
image: treeverse/lakefs:latest
container_name: lakefs-setup
depends_on:
- postgres
- minio-setup
environment:
- LAKEFS_AUTH_ENCRYPT_SECRET_KEY=some random secret string
View user_sessions.sql
select sessions,
sessions_with_purchase,
sessions_with_like,
ROUND(sessions_with_purchase/sessions::FLOAT *100, 2) AS percent_sessions_with_purchase,
ROUND(sessions_with_like/sessions::FLOAT *100, 2) AS percent_sessions_with_like
FROM (
SELECT COUNT(DISTINCT global_session_id) AS sessions,
COUNT(DISTINCT CASE WHEN event_action = 'purchase' THEN global_session_id ELSE NULL END) AS sessions_with_purchase,
COUNT(DISTINCT CASE WHEN event_action = 'like' THEN global_session_id ELSE NULL END) AS sessions_with_like
FROM (