Skip to content

Instantly share code, notes, and snippets.

@yankov
yankov / 0_reuse_code.js
Last active August 29, 2015 14:24
Here are some things you can do with Gists in GistBox.
// Use Gists to store code you would like to remember later on
console.log(window); // log the "window" object to the console
@yankov
yankov / gist:f196236fe5691873901e
Last active August 29, 2015 14:22
puzzles exbm
/* Given a list of numbers 0 to 10,000, return all those whose digits sum up to 20
* Digits cannot repeat. Search space should be reduced.
*/
def get20s(): Seq[Seq[Int]] = {
val ns = for {
i <- 0 to 6
j <- 1 to 7
k <- 2 to 8
if i + j + k == 11
%matplotlib inline
import md5, struct
import seaborn as sns
import matplotlib.pyplot as plt
import mmh3
def md5hash(i):
return md5.new(str(i)).digest()[12:16]
def mm(i):
@yankov
yankov / gist:a86d44bf0e6009944c21
Created February 3, 2015 19:47
partial onhotencoder
from scipy import sparse
class OneHotEncoder():
"""
OneHotEncoder takes data matrix with categorical columns and
converts it to a sparse binary matrix doing one-of-k encoding.
Parts of code borrowed from Paul Duan (www.paulduan.com)
Licence: MIT (https://github.com/pyduan/amazonaccess/blob/master/MIT-LICENSE)
# Avazu CTR prediction
# SGD Logistic regression + hashing trick.
import pandas as pd
import numpy as np
from datetime import datetime, date, time
from sklearn.linear_model import SGDClassifier
from sklearn.feature_extraction import FeatureHasher
from sklearn.preprocessing import LabelEncoder
# Avazu CTR prediction
# SGD Logistic regression + one hot encoder. Score: 0.414
import pandas as pd
import numpy as np
from datetime import datetime, date, time
from OneHotEncoderPartial import OneHotEncoder
from sklearn.linear_model import SGDClassifier
cols = ["C1","banner_pos","site_category", "device_type","device_conn_type","C14","C15","C16","C17","C18","C19","C20","C21", "hour"]
@yankov
yankov / gist:8df34f46cd73fbdeb2e8
Created August 8, 2014 17:08
postgres async connection pool
val poolConfiguration = new PoolConfiguration(
maxIdle = 1000,
maxObjects = 5,
maxQueueSize = 5,
validationInterval = 1000
)
val factory = new PostgreSQLConnectionFactory(configuration)
val pool = new SingleThreadedAsyncObjectPool[PostgreSQLConnection](factory, poolConfiguration)
@yankov
yankov / gist:7646128
Last active December 29, 2015 08:49
cassandra test
import com.datastax.driver.core._
val cluster = Cluster.builder.addContactPoint("127.0.0.1").build
val session = cluster.connect
// http://www.datastax.com/documentation/developer/java-driver/1.0/webhelp/index.html#java-driver/quick_start/qsSimpleClientAddSession_t.html
session.execute("CREATE KEYSPACE simplex WITH replication " +
"= {'class':'SimpleStrategy', 'replication_factor':3};");
session.execute(
@yankov
yankov / gist:5843404
Created June 23, 2013 01:48
producer example
val topic = "urls"
val props = new Properties()
props.put("broker.list", "0:localhost:9092")
props.put("serializer.class", "kafka.serializer.StringEncoder")
val config = new ProducerConfig(props)
val producer = new Producer[String, String](config)
val data = new ProducerData[String, String](topic, href)
producer.send(data)
@yankov
yankov / gist:5538283
Created May 8, 2013 04:56
Water pouring problem solution from Odersky's course
package week7
class Pouring(capacity: Vector[Int]) {
// States
type State = Vector[Int]
val initialState = capacity map (x => 0)
// Moves