Skip to content

Instantly share code, notes, and snippets.

View Swalloow's full-sized avatar
🎯
Focusing

Junyoung Park Swalloow

🎯
Focusing
View GitHub Profile
# Default system properties included when running spark-submit.
# This is useful for setting default environmental settings.
# Example:
# spark.master spark://master:7077
spark.eventLog.enabled true
spark.eventLog.dir file:///home/ozawa/sparkeventlogs
# spark.serializer org.apache.spark.serializer.KryoSerializer
# spark.driver.memory 5g
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
@Swalloow
Swalloow / members_prep.py
Last active November 30, 2017 15:17
KKBOX Feature Engineering
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.ml.feature import StringIndexer
def preprocess_age(age):
if age > 70 or age < 5:
return -1
else:
@Swalloow
Swalloow / .vimrc
Last active December 23, 2017 07:15 — forked from simonista/.vimrc
A basic .vimrc file that will serve as a good template on which to build.
" Don't try to be vi compatible
set nocompatible
" Helps force plugins to load correctly when it is turned back on below
filetype off
" Load plugins here (pathogen or vundle)
set rtp+=~/.vim/bundle/Vundle.vim
call vundle#begin()
Plugin 'VundleVim/Vundle.vim'
@Swalloow
Swalloow / imperative.py
Created January 3, 2018 08:51
Imperative auto differentiation
class array(object) :
"""Simple Array object that support autodiff."""
def __init__(self, value, name=None):
self.value = value
if name:
self.grad = lambda g : {name : g}
def __add__(self, other):
assert isinstance(other, int)
ret = array(self.value + other)
class Model:
def __init__(self, data, target):
data_size = int(data.get_shape()[1])
target_size = int(target.get_shape()[1])
weight = tf.Variable(tf.truncated_normal([data_size, target_size]))
bias = tf.Variable(tf.constant(0.1, shape=[target_size]))
incoming = tf.matmul(data, weight) + bias
self._prediction = tf.nn.softmax(incoming)
cross_entropy = -tf.reduce_sum(target, tf.log(self._prediction))
class Model:
def __init__(self, data, target):
self.data = data
self.target = target
self._prediction = None
self._optimize = None
self._error = None
@property
import functools
def lazy_property(function):
attribute = '_cache_' + function.__name__
@property
@functools.wraps(function)
def decorator(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
class Model:
def __init__(self, data, target):
self.data = data
self.target = target
self.prediction
self.optimize
self.error
@lazy_property
import functools
def define_scope(function):
attribute = '_cache_' + function.__name__
@property
@functools.wraps(function)
def decorator(self):
if not hasattr(self, attribute):
with tf.variable_scope(function.__name):
@Swalloow
Swalloow / dynamo_delete_all.py
Created May 18, 2018 06:38
Boto3 DynamoDB delete all items
import boto3
dynamodb = boto3.resource('dynamodb', 'region-name')
table = dynamodb.Table('table-name')
scan = table.scan(
ProjectionExpression='#k',
ExpressionAttributeNames={
'#k': 'name'
}