Skip to content

Instantly share code, notes, and snippets.

View benjameep's full-sized avatar

Benjamin Earl benjameep

View GitHub Profile
@benjameep
benjameep / etl_decorators.py
Last active September 7, 2023 17:26
etl_decorators
import concurrent.futures
from datetime import datetime, timedelta
import os
import json
def decorator_with_args(decorator):
"""A decorator for decorators, allowing them to be used with or without arguments."""
def wrapper(*args, **kwargs):
if len(args) == 1 and callable(args[0]):
return decorator(args[0])
@benjameep
benjameep / normalize_item.py
Last active September 12, 2023 17:21
pandas functions
from collections import UserDict
import re
import warnings
def to_snake_case(s):
return re.sub(r'(?<!^)(?=[A-Z])', '_', s).lower()
class NormalizedItem(UserDict):
field_names = {}
@benjameep
benjameep / allVisibleElements.js
Created December 7, 2022 20:56
Html Analysis
function allVisibleElements(root=document.body){
const iter = document.createNodeIterator(root, NodeFilter.SHOW_ELEMENT, {
acceptNode(e){
if(e.offsetWidth && e.offsetHeight && e.getClientRects().length)
return NodeFilter.FILTER_ACCEPT
}
})
const nodes = []
let node;
while(node = iter.nextNode())
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@benjameep
benjameep / splix_servers.html
Created September 20, 2021 20:08
Refactor Splix servers json into a tabular format
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=900, initial-scale=1.0">
<title>JSON Tables</title>
<style>
table, th, td {
border-collapse: collapse;
@benjameep
benjameep / parse_timespan.py
Created September 9, 2021 17:02
parse a string timespan into milliseconds
import re
def parse_timespan(str):
SECOND = 1000
MINUTE = SECOND*60
HOUR = MINUTE*60
DAY = HOUR*24
multiplier = {
'ms': 1,
's':SECOND, 'sec':SECOND, 'second':SECOND, 'seconds':SECOND,
'm':MINUTE, 'min':MINUTE, 'mins': MINUTE, 'minute': MINUTE, 'minutes':MINUTE,
@benjameep
benjameep / find_sessions.py
Created September 8, 2021 21:05
Create session id based on grouping by person and time, splitting off into new session whenever more than the alotted time elapses
def find_sessions(table, groupby_col, time_col, max_time_diff='P0DT0H30M0S'):
time_diff = table.groupby(groupby_col)[time_col].diff()
new_sessions = np.where(time_diff.isnull() | (time_diff > max_time_diff))[0]
sessions = pd.Series(dtype=np.float64, index=table.index)
sessions.iloc[new_sessions] = new_sessions
sessions = sessions.fillna(method='ffill').astype(int)
return sessions
@benjameep
benjameep / find_sub_networks.py
Last active September 8, 2021 19:33
Given a pandas Dataframe and list of columns. Using the columns as identifiers it find all complete networks and returns a pandas Series of network ids
def find_sub_networks(table, columns):
aliases = {}
networks = {}
n_networks = 0
def get_network(item):
network = aliases.get(item)
while network in aliases:
network = aliases[network]
return network
@benjameep
benjameep / deserialize_dynamodb_object.py
Created April 15, 2021 18:19
Deserialize Dynaomdb Object to Python
def deserialize_dynamodb_object(raw):
def deserialize(val, dynamodb_type):
if dynamodb_type == 'NULL':
return None
elif dynamodb_type == 'N':
return float(val) if '.' in str(val) else int(val)
elif dynamodb_type == 'M':
return deserialize_dynamodb_object(val)
elif dynamodb_type == 'L':
return [deserialize_dynamodb_object(n) for n in val]
@benjameep
benjameep / add-adobe-ecid-to-pardot-custom-field.html
Last active October 21, 2020 22:42
Grabs the Adobe MCMID value from the AMCV Cookie and reloads the page with the custom_field_parameter in the query string so that it will populate the custom field
<script>
var custom_field_name = 'ecid';
var mcmid = document.cookie.match(/MCMID%7C(\w+)%7C/);
var current_query = window.location.search;
if(!current_query.includes(custom_field_name) && mcmid){
current_query += current_query ? '&' : '?';
window.location.search = current_query+custom_field_name+'='+mcmid[1];
}
</script>