Skip to content

Instantly share code, notes, and snippets.

View indiejoseph's full-sized avatar
🏠
Working from home

Joseph cheng indiejoseph

🏠
Working from home
View GitHub Profile
语料库在线 http://www.cncorpus.org
n 名词
nt 时间名词
nd 方位名词
nl 处所名词
nh 人名
nhf 姓
nhg 名
nn 族名
http://zh.wikipedia.org/w/api.php?action=query&titles=%E8%B4%9D%E5%A1%9E%E5%B0%94%E6%9B%B2%E7%BA%BF&redirects=&converttitles=&prop=revisions&rvprop=content&format=json
@indiejoseph
indiejoseph / dbc2sbc.coffee
Created May 24, 2014 10:35
全角轉半角
DBC2SBC = (str, flag) ->
result=''
return no if str.length <= 0
for i in [0...str.length]
str1=str.charCodeAt(i)
if !flag
if str1 < 127
result += String.fromCharCode str.charCodeAt(i) + 65248
else
@indiejoseph
indiejoseph / ictclas_pos.py
Created May 24, 2014 10:37
ICTCLAS 2014 POS
POS = {
"n": { #1. 名词 (1个一类,7个二类,5个三类)
"n":"名词",
"nr":"人名",
"nr1":"汉语姓氏",
"nr2":"汉语名字",
"nrj":"日语人名",
"nrf":"音译人名",
"ns":"地名",
"nsf":"音译地名",
@indiejoseph
indiejoseph / s2t.txt
Last active August 29, 2015 14:01
簡轉繁
台 臺
啓 啟
老板 老闆
開髮 開發
爲 為
裏 裡
衆 眾
@indiejoseph
indiejoseph / BaumWelch.coffee
Last active August 29, 2015 14:01
BaumWelch Algorithm
'use strict'
_ = require 'lodash'
MIN_FLOAT = -3.14e100
Object::default = (prop, value) ->
@[prop] = value unless @hasOwnProperty(prop)
Object::getValue = (prop, value) ->
@indiejoseph
indiejoseph / log_add.coffee
Created May 28, 2014 09:39
logarithm add
logAdd = (x, y) ->
maximum = Math.max x, y
minimum = Math.min x, y
return maximum if Math.abs(maximum - minimum) > 30
return maximum + Math.log 1 + Math.exp(maximum - minimum)
@indiejoseph
indiejoseph / sort_object.coffee
Last active August 29, 2015 14:01
Sort object by value
obj = do (obj) ->
res = {}
keys = Object.keys(obj).sort (a, b) -> obj[b] - obj[a]
res[name] = obj[name] for name in keys
res
@indiejoseph
indiejoseph / helpers.coffee
Last active August 29, 2015 14:01
Coffeescript helpers`
sum = (arr) -> arr.reduce ((a,b) -> a+b), 0
cmp = (x, y) -> (if x > y then 1 else (if x < y then -1 else 0))
keys = Object.keys(list).sort((a,b)-> list[b]-list[a]) # sort by object value DESC
# initial array with default value
_.range(3).map(function () { return 'a' })
# string to bigram array
toBigrams = (str) ->
oneGrams = str.split('')
This file has been truncated, but you can view the full file.
阿爸 a1'ba4 18137
阿昌族 a1'chang1'zu2 50849
阿斗 a1'dou3 42632
阿飞 a1'fei1 48603
阿富汗 a1'fu4'han4 3461
阿訇 a1'hong1 34432
阿拉伯数字 a1'la1'bo2'shu4'zi4 35937
阿拉伯语 a1'la1'bo2'yu3 30476
阿妈 a1'ma1 16220