Skip to content

Instantly share code, notes, and snippets.

@asw456
asw456 / gist:fa309f0967a2cfa2c4e2
Last active August 27, 2015 04:30 — forked from need4spd/gist:4584416
hadoop multiple outputs map/reduce sample
//mapper
package com.tistory.devyongsik.hadoop.mapre;
import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
  1. General Background and Overview
package org.apache.mahout.examples
import org.apache.mahout.drivers._
import org.apache.mahout.math.cf.SimilarityAnalysis
object RecommenderDriver extends MahoutDriver {
/**
* @param args Command line args, if empty a help message is printed.
*/
override def main(args: Array[String]): Unit = {
import sys, os, re
line = sys.stdin.readline()
columns = line.rstrip("\n").split("|")
load_statement = "full_dump = LOAD 'data/FullDump_clean.csv' AS (\n"
for column in columns:
load_statement += "\t{}:chararray, \n".format(column)
load_statement = load_statement[:-3]
load_statement += "\n);"
print load_statement
@asw456
asw456 / e1071.R
Last active August 29, 2015 14:13 — forked from ivannp/e1071.R
svmComputeOneForecast = function(
id,
data,
response,
startPoints,
endPoints,
len,
history=500,
trace=FALSE,
kernel="radial",
import time
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
from sklearn.neighbors import LSHForest
from sklearn.neighbors import NearestNeighbors
from annoy import AnnoyIndex
from pyflann import FLANN
n_iter = 50
n_neighbors = 100
import os
import sys
# constants, configure to match your environment
HOST = 'http://localhost:9200'
INDEX = 'test'
TYPE = 'attachment'
TMP_FILE_NAME = 'tmp.json'
# for supported formats, see apache tika - http://tika.apache.org/1.4/formats.html
INDEX_FILE_TYPES = ['html','pdf', 'doc', 'docx', 'xls', 'xlsx', 'xml']
@asw456
asw456 / 0.setup.sh
Last active August 29, 2015 14:06 — forked from ceteri/0.setup.sh
# using four part files to construct "minitweet"
cat rawtweets/part-0000[1-3] > minitweets
# change log4j properties to WARN to reduce noise during demo
mv conf/log4j.properties.template conf/log4j.properties
vim conf/log4j.properties # Change to WARN
# launch Spark shell REPL
./bin/spark-shell
@asw456
asw456 / bandits.py
Created September 1, 2014 09:19 — forked from syhw/bandits.py
import numpy as np
from scipy.stats import bernoulli
N_EXPS = 200 # number of experiences to conduct TODO test 10k or 20k EXPS
N_BAGS = 10 # number of bags
N_DRAWS = 100 # number of draws
SMOOTHER = 1.E-2 # shrinkage parameter
EPSILON_BANDIT = 0.1 # epsilon-greedy bandit epsilon
EPSILON_NUM = 1.E-9 # numerical epsilon
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.