Yuto Yamaguchi yamaguchiyuto

## open_random_favs.py
import sys
import random
import os
import tweepy

def get_auth(key_file):
    consumer_key, consumer_secret, access_token, access_token_secret = open(key_file, 'r').readline().rstrip().split(' ')
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    return auth

## ngram_extraction.py
# -*- coding: utf-8 -*-

from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer(analyzer='char_wb', ngram_range=(2,2), min_df = 0)

corpus = [u'私は男です私は', u'私は女です。']
for text in corpus:
    print text
print

## dbscan
import numpy
from scipy.spatial import distance
from sklearn.cluster import DBSCAN

S = numpy.array([[0,0.9],[0.1,0.8],[0.9,0.1],[0.85,0.05],[0.9,0.05],[0.05,0.85],[0.5,0.4]])
dbs = DBSCAN(eps=0.2, min_samples=3)
dbs.fit(S)
dbs.labels_  # => array([ 1.,  1.,  0.,  0.,  0.,  1., -1.])

## twitter_timestamp_to_sec.py
import time

twitter_timestamp_str = "Tue Apr 16 04:00:29 +0000 2013"

format_str = "%a %b %d %H:%M:%S +0000 %Y"

encoded_timestamp = time.strptime(twitter_timestamp_str, format_str)

print time.mktime(encoded_timestamp)

## extract_nouns.py
import nltk

text_str = "I have written this book and these papers."

text = nltk.word_tokenize(text_str)

result = nltk.pos_tag(text)

nouns = [r[0] for r in result if r[1] == 'NN' or r[1] == 'NNS']
	import sys
	import random
	import os
	import tweepy

	def get_auth(key_file):
	consumer_key, consumer_secret, access_token, access_token_secret = open(key_file, 'r').readline().rstrip().split(' ')
	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)
	return auth
	# -- coding: utf-8 --

	from sklearn.feature_extraction.text import CountVectorizer

	cv = CountVectorizer(analyzer='char_wb', ngram_range=(2,2), min_df = 0)

	corpus = [u'私は男です私は', u'私は女です。']
	for text in corpus:
	print text
	print
	import numpy
	from scipy.spatial import distance
	from sklearn.cluster import DBSCAN

	S = numpy.array([[0,0.9],[0.1,0.8],[0.9,0.1],[0.85,0.05],[0.9,0.05],[0.05,0.85],[0.5,0.4]])
	dbs = DBSCAN(eps=0.2, min_samples=3)
	dbs.fit(S)
	dbs.labels_ # => array([ 1., 1., 0., 0., 0., 1., -1.])
	import time

	twitter_timestamp_str = "Tue Apr 16 04:00:29 +0000 2013"

	format_str = "%a %b %d %H:%M:%S +0000 %Y"

	encoded_timestamp = time.strptime(twitter_timestamp_str, format_str)

	print time.mktime(encoded_timestamp)
	import nltk

	text_str = "I have written this book and these papers."

	text = nltk.word_tokenize(text_str)

	result = nltk.pos_tag(text)

	nouns = [r[0] for r in result if r[1] == 'NN' or r[1] == 'NNS']