Skip to content

Instantly share code, notes, and snippets.

#!/opt/local/bin/python
import MeCab
import math
def mecab(text):
tagger = MeCab.Tagger('-Ochasen')
node = tagger.parseToNode(text)
morphemes = []
while node:
@otknoy
otknoy / gist:8024098
Last active December 31, 2015 17:59
Python で flatten
from itertools import chain
l = [[1, 2, 3], [4, 5], [6, 7, 8, 9]]
all_doc = list(chain.from_iterable(l))
# こんなのもある
sum(l, [])
@otknoy
otknoy / gist:8177987
Last active January 1, 2016 17:29
term-score 合ってるのかわからない。
def term_score(topics):
labels_matrix = [[v for w, v in t] for t in topics]
beta_hat = numpy.array([numpy.array([w for w, v in t]) for t in topics])
gmean = scipy.stats.gmean(beta_hat)
gmean_m = numpy.tile(gmean, (len(beta_hat), 1))
ts = beta_hat * numpy.log(beta_hat / gmean_m)
new_topics = []
for i in range(len(labels_matrix)):
@otknoy
otknoy / lda
Last active August 29, 2015 13:56
LDA
#!/usr/bin/env python
import MeCab
import gensim
def tokenizer(s):
uni = s.encode('utf-8')
tagger = MeCab.Tagger("-Ochasen")
node = tagger.parseToNode(uni)
terms = []
@otknoy
otknoy / extract_date.py
Created April 9, 2014 11:16
Python で正規表現を用いて日付表現を抽出する
#!/usr/bin/env python
import re
def extract_date(s):
date_pattern = re.compile('(\d{4})/(\d{1,2})/(\d{1,2})')
result = date_pattern.search(s)
if result:
y, m, d = result.groups()
return {'year': y, 'month': m, 'day': d}
else:
@otknoy
otknoy / google-translate-auto.el
Created June 30, 2014 10:12
google-translate.el を使ったときに、翻訳する言語を自動で切り替える。
(require 'google-translate)
(global-set-key "\C-ct" 'google-translate-at-point-auto)
(defun set-google-translate-language (source target)
(custom-set-variables
'(google-translate-default-source-language source)
'(google-translate-default-target-language target)
)
)
@otknoy
otknoy / cos_sim.py
Last active December 1, 2016 08:32
Python + scipy で cos 類似度の計算
#!/usr/bin/env python
import numpy as np
import scipy.spatial.distance
if __name__ == '__main__':
x = np.array([1, 1, 1, 1, 1])
y = np.array([1, 0, 1, 0, 1])
z = np.array([0, 1, 0, 0, 0])
print 1 - scipy.spatial.distance.cosine(x, y)
@otknoy
otknoy / cos_sim.py
Last active September 29, 2017 13:40
Python + numpy で cos 類似度の計算
#!/usr/bin/env python
import numpy as np
def cos_sim(v1, v2):
return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
if __name__ == '__main__':
x = np.array([1, 1, 1, 1, 1])
y = np.array([1, 0, 1, 0, 1])
z = np.array([0, 1, 0, 0, 0])
@otknoy
otknoy / gist:d5ac40898c4446fb73de
Created January 27, 2015 02:40
Python で iterator の長さを取得する。
print sum(1 for _ in re.finditer(pattern, text))
@otknoy
otknoy / json_pretty_print.py
Created February 26, 2015 02:33
Python で日本語を含む dict を文字化けせずに表示 (ついでに整形)
import json
data = {"hoge": 1, "hige": 2, "huge" 3}
json_data = json.dumps(data, ensure_ascii=False, indent=2)
print json_data