#A Collection of NLP notes
##N-grams
###Calculating unigram probabilities:
P( wi ) = count ( wi ) ) / count ( total number of words )
In english..
(ns net.cddr.rabbitmq | |
(:import (com.rabbitmq.client Channel Connection ConnectionFactory | |
QueueingConsumer | |
RpcClient StringRpcServer) | |
(com.rabbitmq.tools.jsonrpc JsonRpcServer JsonRpcClient) | |
(com.rabbitmq.examples HelloJsonService))) | |
;; (import '(com.rabbitmq.client Channel Connection ConnectionFactory | |
;; QueueingConsumer |
For each Ruby module/class, we have Ruby methods on the left and the equivalent | |
Clojure functions and/or relevant notes are on the right. | |
For clojure functions, symbols indicate existing method definitions, in the | |
clojure namespace if none is explicitly given. clojure.contrib.*/* functions can | |
be obtained from http://github.com/kevinoneill/clojure-contrib/tree/master, | |
ruby-to-clojure.*/* functions can be obtained from the source files in this | |
gist. | |
If no method symbol is given, we use the following notation: |
;; ## Example of timeseries aggregation in cascalog. | |
;; | |
;; (copy paste all of this in at the REPL!) | |
(use 'cascalog.api) | |
(def tseries [["ndvi" 1 0 [1 2 3 4]] | |
["ndvi" 1 2 [2 3 4 5]] | |
["ndvi" 1 1 [4 3 2 1]] | |
["ndvi" 1 4 [1 2 3 4]] |
# install git | |
sudo apt-get install g++ curl libssl-dev apache2-utils | |
sudo apt-get install git-core | |
# download the Node source, compile and install it | |
git clone https://github.com/joyent/node.git | |
cd node | |
./configure | |
make | |
sudo make install | |
# install the Node package manager for later use |
feed_hash = feed.to_hash | |
puts Benchmark.bm(20) { |x| | |
x.report('yaml') { (1..1000).each { feed_hash.to_yaml } } | |
x.report('yaml+zlib') { (1..1000).each { Zlib::Deflate.deflate(feed_hash.to_yaml) } } | |
x.report('json') { (1..1000).each { feed_hash.to_json } } | |
x.report('json+zlib') { (1..1000).each { Zlib::Deflate.deflate(feed_hash.to_json) } } | |
x.report('bson') { (1..1000).each { BSON.serialize feed_hash } } | |
x.report('marshal') { (1..1000).each { Marshal::dump(feed_hash) } } | |
x.report('marshal+zlib') { (1..1000).each { Zlib::Deflate.deflate(Marshal::dump(feed_hash)) } } | |
x.report('yajl') { (1..1000).each { Yajl::Encoder.encode(feed_hash) } } |
(ns aws.s3 | |
(:refer-clojure :exclude [get]) | |
(:use [clojure.walk :only (keywordize-keys stringify-keys)] | |
[clojure.contrib.def :only (defonce-)] | |
[clojure.contrib.json :only (read-json write-json)]) | |
(:import [java.io PrintWriter InputStreamReader ByteArrayInputStream ByteArrayOutputStream] | |
[java.util.zip GZIPInputStream GZIPOutputStream] | |
[com.google.common.base Charsets] | |
[com.amazonaws.services.s3 AmazonS3Client] | |
[com.amazonaws.services.s3.model Region CreateBucketRequest ObjectMetadata |
name := "theNextBigThing" | |
version := "0.1.0" | |
organization := "com.megacorp" | |
scalaVersion := "2.9.0-1" | |
scalacOptions ++= Seq("-deprecation", "-unchecked") |
;; (defproject async-test2 "0.1.0-SNAPSHOT" | |
;; :description "FIXME: write description" | |
;; :url "http://example.com/FIXME" | |
;; :license {:name "Eclipse Public License" | |
;; :url "http://www.eclipse.org/legal/epl-v10.html"} | |
;; :dependencies [[org.clojure/clojure "1.4.0"] | |
;; [http.async.client "0.4.5"]]) | |
(ns async-test2.core |
(use '[datomic.api :only (q db) :as d]) | |
(def initial-data | |
[{:sku "1" :price 0.95M :qty 1} | |
{:sku "2" :price 1.99M :qty 0} | |
{:sku "3" :price 1.99M :qty 0} | |
{:sku "4" :price 5.99M :qty 3} | |
{:sku "5" :price 9.99M :qty 2} | |
{:sku "6" :price 2.99M :qty 3} | |
{:sku "7" :price 2.99M :qty 2}]) |
#A Collection of NLP notes
##N-grams
###Calculating unigram probabilities:
P( wi ) = count ( wi ) ) / count ( total number of words )
In english..