Skip to content

Instantly share code, notes, and snippets.

@asifr
asifr / data.json
Last active September 19, 2015 01:30
Punchcard visualization using D3.js
[{"articles": [[2010, 6], [2011, 10], [2012, 11], [2013, 23], [2006, 1]],
"total": 51, "name": "The Journal of neuroscience"},
{"articles": [[2008, 1], [2010, 3], [2011, 4], [2012, 17], [2013, 10]],
"total": 35, "name": "Nature neuroscience"}]
@asifr
asifr / example_implementation.php
Last active August 29, 2015 14:13
An email-based authentication system
$IsAuth = false;
$User = array();
// connect to database
try { $db = new PDO('sqlite:'.BASEPATH.DBNAME); $db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING); } catch (PDOException $e) { die('Unable to open database. SQLite reported: '.$e->getMessage()); }
// create table if it doesn't already exist
if (count($db->query('SELECT 1 FROM sqlite_master WHERE name = \'users\' AND type=\'table\'')->fetchAll()) == 0) { $db->exec("CREATE TABLE users (id INTEGER NOT NULL,name VARCHAR(255) DEFAULT '',email VARCHAR(255) DEFAULT '',token VARCHAR(255) DEFAULT '',vars TEXT,created INTEGER,status INTEGER NOT NULL DEFAULT 0,PRIMARY KEY (id));"); }
// Set token for user and send email
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@asifr
asifr / event_label.py
Last active December 3, 2020 16:48
Label each row of a dataframe with EventXHoursFromNow or EventWithinXHours
import pyspark.sql.functions as F
from pyspark.sql import Window
from pyspark.sql.column import Column
def overlaps(start_first, end_first, start_second, end_second):
return (end_first >= start_second) & (end_second >= start_first)
def eventXHrFromNow(hours: int, time_col: str, start_col: str, end_col: str) -> Column:
"""
Creates a new connection to spark and makes available:
`spark`, `sq` (`SQLContext`), `F`, and `Window` in the global namespace.
"""
from textwrap import dedent
import findspark
import os
def _formulate_pyspark_submit_args(submit_args=None):
pass
import numpy as np
def ffill(arr: np.ndarray):
arr = arr.T
mask = np.isnan(arr)
idx = np.where(~mask, np.arange(mask.shape[1]), 0)
np.maximum.accumulate(idx, axis=1, out=idx)
out = arr[np.arange(idx.shape[0])[:, None], idx].T
return out
from typing import Dict, List, Tuple, Optional
import numpy as np
import pandas as pd
def ffill(arr: np.ndarray):
arr = arr.T
mask = np.isnan(arr)
idx = np.where(~mask, np.arange(mask.shape[1]), 0)
np.maximum.accumulate(idx, axis=1, out=idx)
import numpy as np
def outlier_detect(data, threshold=1, method="IQR"):
assert method in ["IQR", "STD", "MAD"], "Method must be one of IQR|STD|MAD"
if method == "IQR":
IQR = np.quantile(data, 0.75) - np.quantile(data, 0.25)
lower = np.quantile(data, 0.25) - (IQR * threshold)
upper = np.quantile(data, 0.75) + (IQR * threshold)
if method == "STD":
import numpy as np
def pad_sequences(
sequences, maxlen=None, dtype="int32", padding="pre", truncating="pre", value=0.0
):
if not hasattr(sequences, "__len__"):
raise ValueError("`sequences` must be iterable.")
lengths = []
for x in sequences:
if not hasattr(x, "__len__"):
@asifr
asifr / resample.py
Last active December 6, 2020 06:19
Resample a numpy array
import numpy as np
def resample(x, t, start, end, step):
bins = np.arange(start, end+step, step)
inds = np.digitize(t,bins)
n = x.shape[0]
y = np.empty((n, len(bins))) * np.nan
for i in range(n):
y[i,inds[i,:]] = x[i,:]
return y, bins