This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create external table audience_sessions ( | |
machine_id string, | |
domain_name string, | |
url_hosts string, | |
url_dirs string, | |
url_pages string, | |
patterns string, | |
actions int, | |
start_time bigint, | |
end_time bigint, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
remove_non_ascii = lambda string: "".join(letter for letter in string if ord(letter) < 128) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Create a temp table with a sample of recent daily pivot date (Note: TEMP TABLEs only exist for the duration of your connection to the database.)-- | |
CREATE TEMP TABLE sample_daily AS(SELECT * FROM adobe_data.dat_daily_pivot WHERE sample_start_date <= '2016-03-30' ORDER BY sample_start_date DESC LIMIT 100); | |
--Check initial total_sales sum-- | |
SELECT SUM(total_sales) FROM sample_daily; | |
-->>1800.2825 | |
--Update <table_name> SET <col_name> = <what you want to update the column to> WHERE <where clause>-- | |
UPDATE sample_daily SET total_sales = total_sales * 4 WHERE sample_start_date <= '2016-03-30'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import psycopg2 as pg | |
# import pymysql | |
query = """ | |
SELECT | |
report_campaign_id, | |
SUM(impressions) | |
FROM | |
adobe_data.dat_daily_pivot | |
WHERE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bs4 | |
import requests | |
soup = bs4.BeautifulSoup(requests.get("https://support.google.com/dcm/partner/answer/6005910?hl=en").content, "html.parser") | |
table_names = [] | |
for a in soup.find_all("a"): | |
try: | |
if a.get('class')[0] == 'zippy': | |
table_names.append(a.get('id')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime as dt | |
def list_of_dates(start_date, end_date): | |
""" | |
Function returns a list of string dates in YYYY-MM-DD format between and including start and end date. | |
:param start_date: Start date string in YYYY-MM-DD format | |
:param end_date: End date string in YYYY-MM-DD format | |
:return: List of date strings | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.linalg import Vectors | |
def delete_index_from_dv(dense_vector, index): | |
""" | |
Takes PySpark dense vector, removes element at index. | |
:dense_vector: PySpark dense vector to operate on. | |
:index: Index of element to delete. | |
:return: Dense vector with element at index removed. | |
""" | |
temp_list = list(dense_vector) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_sure_digits(string): | |
import re | |
regex = u"(^\d+$)" | |
match = re.match(regex, string) | |
if match: | |
return match.group() | |
return "" | |
def make_sure_int(string): | |
try: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo mdadm --create --verbose /dev/md0 --level=0 --name=MY_RAID --raid-devices=2 /dev/xvdb /dev/xvdc | |
sudo cat /proc/mdstat | |
sudo mdadm --detail /dev/md0 | |
sudo mkfs.ext4 -L MY_RAID /dev/md0 | |
sudo mkdir -p /data/raid | |
sudo mount LABEL=MY_RAID /data/raid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
from urlparse import urlparse | |
def get_top_alexa_sites(category): | |
category = category.title() | |
categories = ['Adult','Arts','Business','Computers','Games','Health','Home', | |
'Kids and Teens','News','Recreation','Reference','Regional','Science', | |
'Shopping','Society','Sports','World'] | |
assert(category in categories), "Category {} not in category list: {}".format(category, ", ".join(categories)) |
OlderNewer