Alec Barrett-Wilsdon alecbw

## [Oneliner] Print CSV Info
python3
import pandas as pd; import os; files = [f for f in os.listdir('.') if (os.path.isfile(f) and os.path.getsize(f) != 0 and any(x for x in [".csv", ".xlsx"] if x in f))]; print(files); df_tuples = [(f, pd.read_csv(f)) for f in files]; [print(df_tup[0], df_tup[1].shape, df_tup[1].columns, "\n") for df_tup in df_tuples]

## break-sls-deploy-if-env-vars-missing.yml
custom:
  scripts:
    commands:
      hello: This breaks the deploy if env vars aren't set. ${env:FOOBAR}

## Track NPS Survey
feedbackButtons = document.getElementsByClassName("feedbackButton")

for (i = 0; i < feedbackButtons.length; i++) {
  feedbackButtons[i].addEventListener("click", trackClick);
}

function trackClick(event) {
  var page = window.location.pathname
  var button = event.target.id
  fetch('https://foobar.execute-api.us-west-1.amazonaws.com/prod/feedback?page=' + page + '&button=' + button)

## Garbage_Collect_DataFrames.py
import gc

to_delete = []
for name, value in vars().items():
    if isinstance(value, pd.DataFrame):
        to_delete.append(name)

for item in to_delete:
    print(item)
    del item

## Facebook Ads Lookup API call.py
import requests

time_start = '2020-09-01'
time_end = '2020-09-01'
fields = ['website_ctr','reach','adset_name','frequency','action_values','campaign_name','unique_actions','unique_clicks','video_avg_percent_watched_actions','video_p75_watched_actions','spend','cpc','video_p25_watched_actions','canvas_avg_view_time','canvas_avg_view_percent','campaign_id','video_p50_watched_actions','ctr','cpm','cpp','unique_ctr','video_avg_time_watched_actions','ad_name','impressions','labels','video_p95_watched_actions','cost_per_10_sec_video_view','ad_id','adset_id','clicks','website_purchase_roas','location','actions','cost_per_unique_click']
fields = "['" + "','".join(fields) + "']"

api_url =  'https://graph.facebook.com/v8.0/' + os.environ['FB_ACCOUNT_ID'] + "/insights?"

api_url += "level=ad"

## Get Row Count of CSV in S3
import boto3

def get_row_count_of_s3_csv(bucket_name, path):
    sql_stmt = """SELECT count(*) FROM s3object """
    req = boto3.client('s3').select_object_content(
        Bucket=bucket_name,
        Key=path,
        ExpressionType="SQL",
        Expression=sql_stmt,
        InputSerialization = {"CSV": {"FileHeaderInfo": "Use", "AllowQuotedRecordDelimiter": True}},

## mailchimp_modify_user_tag.py
import hashlib
import requests
import os
import json


"""
Docs: https://mailchimp.com/developer/guides/organize-contacts-with-tags/#label-a-contact-with-a-tag
Emails must be MD5 hashed before sending the call (such is done so below)
The API returns 204 No Content no matter if the input is valid or invalid

## IAM Role AttachedPolicy for awswrangler writes to Athena-table-linked S3 Data Lakes.json
{
    "Statement": [
        {
            "Action": [
                "s3:ListBucket",
                "s3:GetBucketLocation"
            ],
            "Effect": "Allow",
            "Resource": [
                "arn:aws:s3:::bucket-your-data-is-in",

## IAM Role AttachedPolicy for reading from Athena Tables.json

{
    "Statement": [
        {
            "Action": [
                "s3:ListBucket",
                "s3:GetBucketLocation"
            ],
            "Effect": "Allow",
            "Resource": [

## awswrangler Athena+Glue+Redshift Example Functions.py
import awswrangler as wr
import pandas as pd

get current IAM role/user
name = wr.sts.get_current_identity_name()
arn = wr.sts.get_current_identity_arn()

# Reading files
df = wr.s3.read_csv(f"s3://sample-bucket/sample.csv") #you can optionally select a subset of columns with names=['col_name1'] and parse date cols with: parse_dates=["col_name2"]
df = wr.s3.read_json(f"s3://sample-bucket/sample.json")
	python3
	import pandas as pd; import os; files = [f for f in os.listdir('.') if (os.path.isfile(f) and os.path.getsize(f) != 0 and any(x for x in [".csv", ".xlsx"] if x in f))]; print(files); df_tuples = [(f, pd.read_csv(f)) for f in files]; [print(df_tup[0], df_tup[1].shape, df_tup[1].columns, "\n") for df_tup in df_tuples]
	custom:
	scripts:
	commands:
	hello: This breaks the deploy if env vars aren't set. ${env:FOOBAR}
	feedbackButtons = document.getElementsByClassName("feedbackButton")

	for (i = 0; i < feedbackButtons.length; i++) {
	feedbackButtons[i].addEventListener("click", trackClick);
	}

	function trackClick(event) {
	var page = window.location.pathname
	var button = event.target.id
	fetch('https://foobar.execute-api.us-west-1.amazonaws.com/prod/feedback?page=' + page + '&button=' + button)
	import gc

	to_delete = []
	for name, value in vars().items():
	if isinstance(value, pd.DataFrame):
	to_delete.append(name)

	for item in to_delete:
	print(item)
	del item
	import requests

	time_start = '2020-09-01'
	time_end = '2020-09-01'
	fields = ['website_ctr','reach','adset_name','frequency','action_values','campaign_name','unique_actions','unique_clicks','video_avg_percent_watched_actions','video_p75_watched_actions','spend','cpc','video_p25_watched_actions','canvas_avg_view_time','canvas_avg_view_percent','campaign_id','video_p50_watched_actions','ctr','cpm','cpp','unique_ctr','video_avg_time_watched_actions','ad_name','impressions','labels','video_p95_watched_actions','cost_per_10_sec_video_view','ad_id','adset_id','clicks','website_purchase_roas','location','actions','cost_per_unique_click']
	fields = "['" + "','".join(fields) + "']"

	api_url = 'https://graph.facebook.com/v8.0/' + os.environ['FB_ACCOUNT_ID'] + "/insights?"

	api_url += "level=ad"
	import boto3

	def get_row_count_of_s3_csv(bucket_name, path):
	sql_stmt = """SELECT count(*) FROM s3object """
	req = boto3.client('s3').select_object_content(
	Bucket=bucket_name,
	Key=path,
	ExpressionType="SQL",
	Expression=sql_stmt,
	InputSerialization = {"CSV": {"FileHeaderInfo": "Use", "AllowQuotedRecordDelimiter": True}},
	import hashlib
	import requests
	import os
	import json


	"""
	Docs: https://mailchimp.com/developer/guides/organize-contacts-with-tags/#label-a-contact-with-a-tag
	Emails must be MD5 hashed before sending the call (such is done so below)
	The API returns 204 No Content no matter if the input is valid or invalid
	{
	"Statement": [
	{
	"Action": [
	"s3:ListBucket",
	"s3:GetBucketLocation"
	],
	"Effect": "Allow",
	"Resource": [
	"arn:aws:s3:::bucket-your-data-is-in",
	import awswrangler as wr
	import pandas as pd

	get current IAM role/user
	name = wr.sts.get_current_identity_name()
	arn = wr.sts.get_current_identity_arn()

	# Reading files
	df = wr.s3.read_csv(f"s3://sample-bucket/sample.csv") #you can optionally select a subset of columns with names=['col_name1'] and parse date cols with: parse_dates=["col_name2"]
	df = wr.s3.read_json(f"s3://sample-bucket/sample.json")