A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# The unreasonable effectiveness of Character-level Language Models\n", | |
"## (and why RNNs are still cool)\n", | |
"\n", | |
"###[Yoav Goldberg](http://www.cs.biu.ac.il/~yogo)\n", |
name := """steel""" | |
version := "1.0-SNAPSHOT" | |
lazy val root = (project in file(".")).enablePlugins(PlayScala) | |
scalaVersion := "2.11.5" | |
resolvers ++= Seq( | |
"Typesafe repository" at "https://repo.typesafe.com/typesafe/releases/", | |
"Typesafe Maven Repository" at "http://repo.typesafe.com/typesafe/maven-releases/", |
val n = 9 | |
val s = Math.sqrt(n).toInt | |
type Board = IndexedSeq[IndexedSeq[Int]] | |
def solve(board: Board, cell: Int = 0): Option[Board] = (cell%n, cell/n) match { | |
case (r, `n`) => Some(board) | |
case (r, c) if board(r)(c) > 0 => solve(board, cell + 1) | |
case (r, c) => | |
def guess(x: Int) = solve(board.updated(r, board(r).updated(c, x)), cell + 1) | |
val used = board.indices.flatMap(i => Seq(board(r)(i), board(i)(c), board(s*(r/s) + i/s)(s*(c/s) + i%s))) |
--- | |
# Dataset Stuff ------------------------------------------------- | |
# | |
data_path: ~/data | |
output_path: ~/output | |
val_size: 10000 | |
train_chunk_size: 40000 | |
[{"name":"allthethings","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/allthethings-1414024836.png"},{"name":"android","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/android-1414024011.png"},{"name":"areyoukiddingme","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/areyoukiddingme-1414024355.png"},{"name":"arrington","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/arrington-1414023805.png"},{"name":"arya","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/arya-1414028821.png"},{"name":"ashton","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/ashton-1414025136.png"},{"name":"atlassian","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/atlassian-1414025304.png"},{"name":"awesome","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/awesome-1417754492.png"},{"name":"awthanks","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/awthanks-1414025485.png"},{"name":"aww","url":"https://dujrsrsgsd3nh.cloudfront.net/img/emoticons/aww-1417754503 |
# Steps to build and install tmux from source. | |
# Takes < 25 seconds on EC2 env [even on a low-end config instance]. | |
VERSION=2.7 | |
sudo yum -y remove tmux | |
sudo yum -y install wget tar libevent-devel ncurses-devel | |
wget https://github.com/tmux/tmux/releases/download/${VERSION}/tmux-${VERSION}.tar.gz | |
tar xzf tmux-${VERSION}.tar.gz | |
rm -f tmux-${VERSION}.tar.gz | |
cd tmux-${VERSION} |
#!/usr/bin/env bash | |
## create an ubuntu 14.04 hvm instance, then from your home directory: | |
# 1. download this script | |
# wget https://gist.githubusercontent.com/waylonflinn/506f563573600d944923/raw/install-python-data-science.sh | |
# 2. make it executable | |
# chmod a+x install-python-data-science.sh |
""" | |
A deep neural network with or w/o dropout in one file. | |
License: Do What The Fuck You Want to Public License http://www.wtfpl.net/ | |
""" | |
import numpy, theano, sys, math | |
from theano import tensor as T | |
from theano import shared | |
from theano.tensor.shared_randomstreams import RandomStreams |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
"""A simple implementation of a greedy transition-based parser. Released under BSD license.""" | |
from os import path | |
import os | |
import sys | |
from collections import defaultdict | |
import random | |
import time | |
import pickle | |
SHIFT = 0; RIGHT = 1; LEFT = 2; |