CeShine Lee ceshine

## avazu_ftrl.go
// Based on tinrtgu's Python script here:
// https://www.kaggle.com/c/avazu-ctr-prediction/forums/t/10927/beat-the-benchmark-with-less-than-1mb-of-memory
package main

import (
    "encoding/csv"
    "os"
    "strconv"
    "hash/fnv"
    "math"

## avazu_ftrl_concurrent.go
// Based on tinrtgu's Python script here:
// https://www.kaggle.com/c/avazu-ctr-prediction/forums/t/10927/beat-the-benchmark-with-less-than-1mb-of-memory
package main

import (
    "encoding/csv"
    "os"
    "strconv"
    "hash/fnv"
    "math"

## pymc3
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Bayesian Logistic Regression on the Kaggle titanic dataset"
   ]
  },
  {

## sample.py
#!/usr/bin/env python
# sample: Output lines from stdin to stdout with a given probability,
# for a given duration, and with a given delay between lines.
#
# Example usage: seq 100 | sample -r 20% -d 1000
#
# Dependency: Python 2.5
#
# Original Author: http://jeroenjanssens.com
# Original Script: https://github.com/jeroenjanssens/data-science-at-the-command-line/blob/master/tools/sample

## anova.R
n.samples = 100

series.A = rnorm(n.samples, 0, 2)
series.B = rnorm(n.samples, 0.1, 2)

bundle = data.frame(y=c(series.A, series.B), x=c(rep(0, n.samples), rep(1, n.samples)))
summary(aov(y ~ factor(x), data=bundle))

resampleTotal <- function(x){
    sum(sample(x, length(x), replace=T))

## frozen_lake.py
# -*- coding: utf-8 -*-
"""
@author: CeShine

Using keras-rl (https://github.com/matthiasplappert/keras-rl) to provide basic framework,
and embedding layer to make it essentially a Q-table lookup algorithm.
"""

import tempfile

## beijing_pm25.R
if (!require(pacman)){ install.packages("pacman") }
pacman::p_load(data.table, zoo, dygraphs)

# Data Source: http://www.stateair.net/web/historical/1/1.html
quality = rbind(
    fread("Beijing_2015_HourlyPM25_created20160201.csv", skip=3),
    fread("Beijing_2016_HourlyPM25_created20170201.csv", skip=3),
    fread("Beijing_2017_HourlyPM25_created20170705\ (3).csv", skip=3)
)

## jupyter_notebook_config.py
# Reference: https://svds.com/jupyter-notebook-best-practices-for-data-science/
import os
from subprocess import check_call

def post_save(model, os_path, contents_manager):
    """post-save hook for converting notebooks to .py scripts"""
    if model['type'] != 'notebook':
        return # only do this for notebooks
    d, fname = os.path.split(os_path)
    check_call(['jupyter', 'nbconvert', '--to', 'script', fname], cwd=d)

## lstm_input_dropout.py
# https://github.com/tensorflow/tensorflow/blob/v1.3.0/tensorflow/contrib/keras/python/keras/layers/recurrent.py#L1163
class LSTM(Recurrent):
  #...
  def get_constants(self, inputs, training=None):
    #...
    input_shape = K.int_shape(inputs)
    input_dim = input_shape[-1]
    ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
    ones = K.tile(ones, (1, int(input_dim)))
    def dropped_inputs():

## locked_dropout.py
# https://github.com/salesforce/awd-lstm-lm/blob/dfd3cb0235d2caf2847a4d53e1cbd495b781b5d2/locked_dropout.py#L5
class LockedDropout(nn.Module):
    # ...
    def forward(self, x, dropout=0.5):
        if not self.training or not dropout:
            return x
        m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
        mask = Variable(m, requires_grad=False) / (1 - dropout)
        mask = mask.expand_as(x)
        return mask * x
	// Based on tinrtgu's Python script here:
	// https://www.kaggle.com/c/avazu-ctr-prediction/forums/t/10927/beat-the-benchmark-with-less-than-1mb-of-memory
	package main

	import (
	"encoding/csv"
	"os"
	"strconv"
	"hash/fnv"
	"math"
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Bayesian Logistic Regression on the Kaggle titanic dataset"
	]
	},
	{
	#!/usr/bin/env python
	# sample: Output lines from stdin to stdout with a given probability,
	# for a given duration, and with a given delay between lines.
	#
	# Example usage: seq 100 \| sample -r 20% -d 1000
	#
	# Dependency: Python 2.5
	#
	# Original Author: http://jeroenjanssens.com
	# Original Script: https://github.com/jeroenjanssens/data-science-at-the-command-line/blob/master/tools/sample
	n.samples = 100

	series.A = rnorm(n.samples, 0, 2)
	series.B = rnorm(n.samples, 0.1, 2)

	bundle = data.frame(y=c(series.A, series.B), x=c(rep(0, n.samples), rep(1, n.samples)))
	summary(aov(y ~ factor(x), data=bundle))

	resampleTotal <- function(x){
	sum(sample(x, length(x), replace=T))
	# -- coding: utf-8 --
	"""
	@author: CeShine

	Using keras-rl (https://github.com/matthiasplappert/keras-rl) to provide basic framework,
	and embedding layer to make it essentially a Q-table lookup algorithm.
	"""

	import tempfile
	if (!require(pacman)){ install.packages("pacman") }
	pacman::p_load(data.table, zoo, dygraphs)

	# Data Source: http://www.stateair.net/web/historical/1/1.html
	quality = rbind(
	fread("Beijing_2015_HourlyPM25_created20160201.csv", skip=3),
	fread("Beijing_2016_HourlyPM25_created20170201.csv", skip=3),
	fread("Beijing_2017_HourlyPM25_created20170705\ (3).csv", skip=3)
	)
	# Reference: https://svds.com/jupyter-notebook-best-practices-for-data-science/
	import os
	from subprocess import check_call

	def post_save(model, os_path, contents_manager):
	"""post-save hook for converting notebooks to .py scripts"""
	if model['type'] != 'notebook':
	return # only do this for notebooks
	d, fname = os.path.split(os_path)
	check_call(['jupyter', 'nbconvert', '--to', 'script', fname], cwd=d)
	# https://github.com/tensorflow/tensorflow/blob/v1.3.0/tensorflow/contrib/keras/python/keras/layers/recurrent.py#L1163
	class LSTM(Recurrent):
	#...
	def get_constants(self, inputs, training=None):
	#...
	input_shape = K.int_shape(inputs)
	input_dim = input_shape[-1]
	ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
	ones = K.tile(ones, (1, int(input_dim)))
	def dropped_inputs():
	# https://github.com/salesforce/awd-lstm-lm/blob/dfd3cb0235d2caf2847a4d53e1cbd495b781b5d2/locked_dropout.py#L5
	class LockedDropout(nn.Module):
	# ...
	def forward(self, x, dropout=0.5):
	if not self.training or not dropout:
	return x
	m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
	mask = Variable(m, requires_grad=False) / (1 - dropout)
	mask = mask.expand_as(x)
	return mask * x