Skip to content

Instantly share code, notes, and snippets.

View rachelannelise's full-sized avatar

Annelise rachelannelise

  • Parse.ly
  • Boulder, CO
View GitHub Profile
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
from django.conf import settings
from django.db import IntegrityError
from account.models import ParselyAPIKey, Publisher
# get list of API keys
keys = Publisher.objects.get(id=3624).apikeys.all()
@rachelannelise
rachelannelise / content_dim.sql
Created July 21, 2017 01:58
DPL Star Schema Queries
--CONTENT_DIM:
--Grain: 1 row per apikey, post
select distinct
--- ids
row_number() OVER (ORDER BY
apikey,
metadata_post_id) as content_dim_id,
from __future__ import division
from mage import MageClient
from deployment import local as settings
import datetime as dt
import time
import csv
# need to run ssh tunnel (dash repo and virtualenv; ./_start -e prod)
# need to start redis server on port 6479 redis-server --port 6479
from django.conf import settings
from django.db import IntegrityError
from account.models import ParselyAPIKey, Publisher
from django.core.exceptions import ObjectDoesNotExist
import csv
result = {}
with open('iheart_timezones.csv') as f:
reader = csv.reader(f)
from __future__ import division
from mage import MageClient
from deployment import hack as settings
from six import iteritems
from redis import Redis
from elasticsearch import Elasticsearch
import datetime as dt
import time
import csv
CREATE TABLE public.tmp_dpl_staging (
-- fields
action VARCHAR(256) NOT NULL,
apikey VARCHAR(256) NOT NULL,
campaign_id VARCHAR(256),
display BOOLEAN,
display_avail_height INTEGER,
display_avail_width INTEGER,
display_pixel_depth INTEGER,
display_total_height INTEGER,
--CONTENT_DIM:
--Grain: 1 row per apikey, post
--Surrogate Key: apikey + metadata_post_id
--Dimesions: metadata
--Calculated dimensions: word_count_buckets
CREATE TABLE public.content_dim (
-- keys
content_key varchar(256),
-- idetifying fields
--USER_DIM:
--Grain: 1 row per user
--Surrogate Key: visitor_ip + visitor_network_id + visitor_site_id
--Dimensions: ip address fields
--Calculated Dimensions: user_type (loyalty, returning, new); engaged_user (currently defined as >X sessions in full data set)
CREATE TABLE public.user_dim (
-- keys
user_key INTEGER NOT NULL,
-- identifying fields
--SESSION_CONTENT_FACT:
--Grain: 1 row per apikey, session, user, post
--Metrics: engaged_time, pageviews
CREATE TABLE public.session_content_fact (
-- keys
session_content_fact_key PRIMARY KEY,
content_key VARCHAR(256),
session_fact_key VARCHAR(256),
session_start_time_key TIMESTAMP,
--SESSION_FACT:
--Grain: 1 row per apikey, session, user
--Metrics: engaged_time, pageviews, visitors, sessions
-- keys
session_fact_key INTEGER NOT NULL,
-- session_detail_key,
user_key,
session_key,
session_start_time_key TIMESTAMP,