Skip to content

Instantly share code, notes, and snippets.

@yongsun
yongsun / mmseg.py
Created June 21, 2013 02:23
implement mmseg Chinese word segmentation algorithm (http://technology.chtsai.org/mmseg/) with Python, dictionary file and character frequencies from mmseg4j project.
#!/usr/bin/python
# -*- encoding: UTF-8 -*-
import codecs
import sys
from math import log
from collections import defaultdict
class Trie (object):
class TrieNode:
@yongsun
yongsun / find_nth_int.py
Created September 12, 2012 02:41
find nth integer in distributed env
#!/usr/bin/python
import numpy.random as random
from bisect import bisect_left, bisect_right
class SlaveNode:
def __init__ (self, data):
self.data = sorted(data)
def get_range (self):
@yongsun
yongsun / proputil.py
Created July 12, 2012 10:05
A simple python utility for updating and merging Java properties
#! /usr/bin/env python
import sys, os
import re
class Properties(object):
def __init__(self, prop_name):
self._keys = [] # the keys in parsing order
self._props = {} # the property dict
@yongsun
yongsun / lr_circles.m
Created June 14, 2012 05:29
count the circles of numbers
X = [
1,0,3,0,0,0,0,0,1,0,0; % 7111
1,1,0,0,0,0,0,0,0,2,1; % 8809
1,0,1,2,0,0,0,0,1,0,0; % 2172
1,0,0,0,0,0,0,4,0,0,0; % 6666
1,0,4,0,0,0,0,0,0,0,0; % 1111
1,0,0,4,0,0,0,0,0,0,0; % 2222
1,0,0,1,0,0,0,2,1,0,0; % 7662
1,0,1,0,2,0,0,0,0,0,1; % 9313
1,4,0,0,0,0,0,0,0,0,0; % 0000