Skip to content

Instantly share code, notes, and snippets.

View yamaguchiyuto's full-sized avatar

Yuto Yamaguchi yamaguchiyuto

View GitHub Profile
@yamaguchiyuto
yamaguchiyuto / extract_nouns.py
Created June 17, 2013 03:00
Extracting nouns from specified text using python and nltk.
import nltk
text_str = "I have written this book and these papers."
text = nltk.word_tokenize(text_str)
result = nltk.pos_tag(text)
nouns = [r[0] for r in result if r[1] == 'NN' or r[1] == 'NNS']
@yamaguchiyuto
yamaguchiyuto / twitter_timestamp_to_sec.py
Created June 17, 2013 03:22
Encode Twitter timestamp format into secs.
import time
twitter_timestamp_str = "Tue Apr 16 04:00:29 +0000 2013"
format_str = "%a %b %d %H:%M:%S +0000 %Y"
encoded_timestamp = time.strptime(twitter_timestamp_str, format_str)
print time.mktime(encoded_timestamp)
@yamaguchiyuto
yamaguchiyuto / dbscan
Created June 21, 2013 14:08
DBSCAN with scikit-learn
import numpy
from scipy.spatial import distance
from sklearn.cluster import DBSCAN
S = numpy.array([[0,0.9],[0.1,0.8],[0.9,0.1],[0.85,0.05],[0.9,0.05],[0.05,0.85],[0.5,0.4]])
dbs = DBSCAN(eps=0.2, min_samples=3)
dbs.fit(S)
dbs.labels_ # => array([ 1., 1., 0., 0., 0., 1., -1.])
@yamaguchiyuto
yamaguchiyuto / ngram_extraction.py
Created June 24, 2013 03:04
Extract n-gram features with scikit-learn
# -*- coding: utf-8 -*-
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(analyzer='char_wb', ngram_range=(2,2), min_df = 0)
corpus = [u'私は男です私は', u'私は女です。']
for text in corpus:
print text
print
import sys
import random
import os
import tweepy
def get_auth(key_file):
consumer_key, consumer_secret, access_token, access_token_secret = open(key_file, 'r').readline().rstrip().split(' ')
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
return auth
@yamaguchiyuto
yamaguchiyuto / basic_plot.py
Last active March 7, 2022 15:51
Plot degree distribution (Freq, CDF, CCDF) from edgelist data
import sys
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
def plot(data,filename,degreetype):
""" Plot Distribution """
plt.plot(range(len(data)),data,'bo')
plt.yscale('log')
plt.xscale('log')
@yamaguchiyuto
yamaguchiyuto / eigenspoke.py
Last active August 29, 2015 14:08
Plot EE-plot from edgelist [Aditya+, ICDM09]
import sys
import networkx as nx
from scipy.sparse import linalg
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
def plot(v,name,k,n=10000):
for i in range(0,k-1):
plt.plot(v[:n,i],v[:n,i+1],'r+')
@yamaguchiyuto
yamaguchiyuto / digits.py
Created December 2, 2014 14:30
scikit-learn digits dataset
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
digits = load_digits()
plt.gray()
plt.matshow(digits.images[0])
plt.show()
@yamaguchiyuto
yamaguchiyuto / get_precisions.py
Created December 2, 2014 15:32
LP and LS experiments
import sys
import numpy as np
import random
from sklearn import datasets
from sklearn.semi_supervised import label_propagation
from sklearn import svm
from sklearn.grid_search import ParameterGrid
def score(estimator, X, y, parameters, validation_true_labels, test_true_labels, validation_set, test_set, X_validation_for_svm=None, X_test_for_svm=None):
@yamaguchiyuto
yamaguchiyuto / oreore_ridge.py
Created December 5, 2014 04:04
scikit-learn-compatible Ridge Regression
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
class RidgeRegression(BaseEstimator, RegressorMixin):
def __init__(self,lamb=1.0):
self.lamb = lamb
def fit(self,X,y):
A = np.dot(X.T,X) + self.lamb * np.identity(X.shape[1])
b = np.dot(X.T,y)