David Yerrington dyerrington

## removeblank.sh
grep -v '^[ \t]*$\|^[ \t]*#' /etc/ssh/sshd_config

## removeext
find . -type d -name .ext | xargs rm -rf

## word_counts
find . -type f -print0 | xargs -0 cat | wc -w

## reject_outliers
sql_df[np.abs(sql_df['score'].values - sql_df['score'].values.mean())<=(3*sql_df['score'].values.std())]

## preprocess_corpus.py
import numpy as np, pandas as pd, os, seaborn as sns, codecs
from gensim import corpora, models, similarities
from gensim.parsing.preprocessing import STOPWORDS

class preprocess_corpus(object):

    files  =  []
    dirs   =  []

    def __init__(self, dir, directory=False, stopwords_file=False):

## probability.py
import bisect
import random

def Mean(t):
    """Computes the mean of a sequence of numbers.

    Args:
        t: sequence of numbers

    Returns:

## gist:3d4cdd4d4c2a7f4a66b7
import numpy as np
import scipy as sp
import scipy.stats

def mean_confidence_interval(data, confidence=0.95):
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
    return m, m-h, m+h

## gist:1bcbd0378d65f6562cd9
from geopy.geocoders import Bing

geolocator  =   Bing("your key here")
location = geolocator.geocode('your location here')
try:
    if not location: continue
        geo_location = {
            'origin_address': location.address,
            'origin_latitude': location.latitude,
            'origin_longitude': location.longitude

## grouper.py

#Step1, convert Time after loading:
ufo = pd.read_csv('https://raw.githubusercontent.com/sinanuozdemir/SF_DAT_17/master/data/ufo.csv')   # can also read csvs directly from the web!
ufo['Time'] = ufo['Time'].apply(pd.to_datetime)


# Step 2:  Group by unique days
ufo.groupby([pd.Grouper(key='Time',freq='1D')])[['Shape Reported']].count()

# Also, you can concat Year, Month, and Day into a new feature, and group by that.  As an engineer, I much prefer to work on strict types and leverage current method.

## auto_coefficients.py
def auto_coefficients(df):

    sorted_coefs  =   list()
    coefs         =   df.corr()

    for row_index, row_values in enumerate(coefs.values):

        for col_index, col_value in enumerate(row_values):

            if coefs.columns[row_index] == coefs.columns[col_index]:
	import numpy as np, pandas as pd, os, seaborn as sns, codecs
	from gensim import corpora, models, similarities
	from gensim.parsing.preprocessing import STOPWORDS

	class preprocess_corpus(object):

	files = []
	dirs = []

	def __init__(self, dir, directory=False, stopwords_file=False):
	import bisect
	import random

	def Mean(t):
	"""Computes the mean of a sequence of numbers.

	Args:
	t: sequence of numbers

	Returns:
	import numpy as np
	import scipy as sp
	import scipy.stats

	def mean_confidence_interval(data, confidence=0.95):
	a = 1.0*np.array(data)
	n = len(a)
	m, se = np.mean(a), scipy.stats.sem(a)
	h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
	return m, m-h, m+h
	from geopy.geocoders import Bing

	geolocator = Bing("your key here")
	location = geolocator.geocode('your location here')
	try:
	if not location: continue
	geo_location = {
	'origin_address': location.address,
	'origin_latitude': location.latitude,
	'origin_longitude': location.longitude

	#Step1, convert Time after loading:
	ufo = pd.read_csv('https://raw.githubusercontent.com/sinanuozdemir/SF_DAT_17/master/data/ufo.csv') # can also read csvs directly from the web!
	ufo['Time'] = ufo['Time'].apply(pd.to_datetime)


	# Step 2: Group by unique days
	ufo.groupby([pd.Grouper(key='Time',freq='1D')])[['Shape Reported']].count()

	# Also, you can concat Year, Month, and Day into a new feature, and group by that. As an engineer, I much prefer to work on strict types and leverage current method.
	def auto_coefficients(df):

	sorted_coefs = list()
	coefs = df.corr()

	for row_index, row_values in enumerate(coefs.values):

	for col_index, col_value in enumerate(row_values):

	if coefs.columns[row_index] == coefs.columns[col_index]: