Bo Wang bwang482

## sentence_embedding.py
from __future__ import division
import gensim
import itertools
import numpy as np
from collections import Counter
from sklearn.decomposition import PCA


def gensim_load_vec(path):
    w2v_model = gensim.models.Word2Vec.load_word2vec_format(path, binary=False)

## kld.py
#!/usr/bin/python

import re, math, collections
from collections import Counter

def tokenize(_str):
	stopwords = ['and', 'for', 'if', 'the', 'then', 'be', 'is', 'are', 'will', 'in', 'it', 'to', 'that']
	tokens = collections.defaultdict(lambda: 0.)
	for m in re.finditer(r"(\w+)", _str, re.UNICODE):
		m = m.group(1).lower()
	from __future__ import division
	import gensim
	import itertools
	import numpy as np
	from collections import Counter
	from sklearn.decomposition import PCA


	def gensim_load_vec(path):
	w2v_model = gensim.models.Word2Vec.load_word2vec_format(path, binary=False)
	#!/usr/bin/python

	import re, math, collections
	from collections import Counter

	def tokenize(_str):
	stopwords = ['and', 'for', 'if', 'the', 'then', 'be', 'is', 'are', 'will', 'in', 'it', 'to', 'that']
	tokens = collections.defaultdict(lambda: 0.)
	for m in re.finditer(r"(\w+)", _str, re.UNICODE):
	m = m.group(1).lower()