#A Collection of NLP notes
##N-grams
###Calculating unigram probabilities:
P( wi ) = count ( wi ) ) / count ( total number of words )
In english..
#!/usr/bin/env ruby | |
# Usage: ruby detect_rhyme.rb /path/to/file.txt num | |
# Example: ruby detect_rhyme.rb 坊ちゃん.txt 3 | |
require 'MeCab' | |
def main(args) | |
path, num = args[0], args[1].to_i | |
rhyme = {} | |
File.open(path).each do |line| | |
node_list = get_node_list(line) |
#!/bin/sh | |
mecab --node-format="%f[7] " | nkf -w --hiragana | sed 's/EOS//g' |
#! /usr/bin/env python | |
""" | |
Author: Jeremy M. Stober | |
Program: SOFTMAX.PY | |
Date: Wednesday, February 29 2012 | |
Description: Simple softmax function. | |
""" | |
import numpy as np | |
npa = np.array |
#A Collection of NLP notes
##N-grams
###Calculating unigram probabilities:
P( wi ) = count ( wi ) ) / count ( total number of words )
In english..
{ | |
"IAB1": "Arts & Entertainment", | |
"IAB1-1": "Books & Literature", | |
"IAB1-2": "Celebrity Fan/Gossip", | |
"IAB1-3": "Fine Art", | |
"IAB1-4": "Humor", | |
"IAB1-5": "Movies", | |
"IAB1-6": "Music", | |
"IAB1-7": "Television", | |
"IAB2": "Automotive", |
// Copyright (c) 2013 Hiroyuki Tanaka | |
// Released under the MIT license | |
#include <stdint.h> | |
#include <cstdlib> | |
#include <cstring> | |
#include <string> | |
#include <map> | |
#include <vector> | |
#include <iostream> |
package net.mekajiki; | |
import com.ibm.icu.text.Transliterator; | |
import java.util.ArrayList; | |
import java.util.List; | |
public class Hiragana2Phoneme { | |
public static String hiragana2Phoneme(String text) { | |
return romaji2Phoneme(hiragana2Romaji(text)); | |
} |
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# This is a simplified implementation of the LSTM language model (by Graham Neubig) | |
# | |
# LSTM Neural Networks for Language Modeling | |
# Martin Sundermeyer, Ralf Schlüter, Hermann Ney | |
# InterSpeech 2012 | |
# | |
# The structure of the model is extremely simple. At every time step we |
[alias] | |
grep-add = "!sh -c 'git ls-files -m -o --exclude-standard | grep $1 | xargs git add' -" | |
grep-add-patch = "!sh -c 'git add -p `git ls-files -m -o --exclude-standard | grep $1`' -" |