yoshi0309/event.py

## event.py
#!/usr/bin/env python
# coding: utf-8

# jubatus server info.
host = 'localhost'
port = 9199

import sys
import json
import random

import jubatus
from jubatus.common import Datum

# for connpass json data
def getTitleList(filepath):
	f = open(filepath)
    data = json.load(f)
    f.close
    # print(json.dumps(data, sort_keys=True, indent=4))

    list = data["events"]
	titleList = []
    for event in list:
		titleList.append(event["title"])
	return titleList

# for hatebu feed data
def getTitleFromFeed(path):
	import feedparser
	entries = feedparser.parse(path)['entries']
	titleList = []
	for entry in entries:
        titleList.append(entry['title'])
	return titleList

def getTitleFromTxt(path):
	titleList = []
	f = open(path)
	for line in f:
		titleList.append(line)
	f.close
	return titleList

def buildTrainData(titleList,classLabel):
	traindata = []
	for title in titleList:
		traindata.append((classLabel, Datum({'title': title})))
	return traindata

def train(client,traindata):
	random.shuffle(traindata)
	client.train(traindata)

def traindata_ty():
	t1 = getTitleFromFeed('data/hatebu-1.xml')
	t1.extend(getTitleFromFeed('data/hatebu-2.xml'))
	t1.extend(getTitleFromFeed('data/hatebu-3.xml'))
	t1.extend(getTitleFromFeed('data/hatebu-4.xml'))
	t1.extend(getTitleFromFeed('data/hatebu-5.xml'))
	t1.extend(getTitleFromFeed('data/hatebu-6.xml'))
	traindata = buildTrainData(t1,'興味あり')

	t2 = []
	# t2 = getTitleFromFeed('data/hot-economics.xml')
	t2.extend(getTitleFromFeed('data/hot-entertainment.xml'))
	t2.extend(getTitleFromFeed('data/hot-general.xml'))
	t2.extend(getTitleFromFeed('data/hot-lif.xml'))
	t2.extend(getTitleFromFeed('data/hot-social.xml'))
	traindata_n = buildTrainData(t2,'興味なし')

	traindata.extend(traindata_n)
	return traindata

def traindata_mo():
	t1 = getTitleFromTxt('data/pocket.txt')
	traindata = buildTrainData(t1,'興味あり')

	t2 = []
	# t2 = getTitleFromFeed('data/hot-economics.xml')
	t2.extend(getTitleFromFeed('data/hot-entertainment.xml'))
	t2.extend(getTitleFromFeed('data/hot-general.xml'))
	t2.extend(getTitleFromFeed('data/hot-lif.xml'))
	t2.extend(getTitleFromFeed('data/hot-social.xml'))
	traindata_n = buildTrainData(t2,'興味なし')

	traindata.extend(traindata_n)
	return traindata

def traindata(userid):
	import psycopg2
	conn = psycopg2.connect("dbname=paper_development host=localhost user=postgres password=password")
	cur = conn.cursor()
	cur.execute("select given_title from Crawled_Pockets where user_id=" + str(userid))
	records = cur.fetchall()
	titleList = []
	for record in records:
		titleList.append(record[0])

	traindata = buildTrainData(titleList,'1') #興味あり

	t2 = []
	# t2 = getTitleFromFeed('data/hot-economics.xml')
	t2.extend(getTitleFromFeed('data/hot-entertainment.xml'))
	t2.extend(getTitleFromFeed('data/hot-general.xml'))
	t2.extend(getTitleFromFeed('data/hot-lif.xml'))
	t2.extend(getTitleFromFeed('data/hot-social.xml'))
	traindata_n = buildTrainData(t2,'0') #興味なし

	traindata.extend(traindata_n)
	return traindata

def predict(client):
	#data = [
	#	Datum({'title': u'ダイエー'}),
	#	Datum({'title': u'機械学習'}),
	#	Datum({'title': u'オムニ・チャネル'}),
	#	Datum({'title': u'AKB48'}), #OMG!
	#	Datum({'title': u'消費税増税'}),
	#	Datum({'title': u'Apache'}),
	#	Datum({'title': u'プロジェクト管理'}),
	#	Datum({'title': u'Webデザイン'}),
	#	Datum({'title': u'Ocaml'}),
	#	Datum({'title': u'Java'}),
	#	Datum({'title': u'Solr'}),
	#	Datum({'title': u'Elasticsearch'})
	#	]

	eventIdList = []

	import psycopg2
	conn = psycopg2.connect("dbname=paper_development host=localhost user=postgres password=password")
	cur = conn.cursor()
	cur.execute("select * from connpasses")
	records = cur.fetchall()
		for record in records:
			# print record[1]
			# print record[2]
			d = Datum({'title': record[2]})
			# print record[2]
			# print d

			res = client.classify([d])
			# sys.stdout.write(max(res[0], key=lambda x: x.score).label)
			# sys.stdout.write(' ')
			# sys.stdout.write(d.string_values[0][1].encode('utf-8'))
			# sys.stdout.write('\n')

			result = max(res[0], key=lambda x: x.score)
			# print res[0]
			# print d.string_values[0][1]

			# max かつ score が 0.3 よりも大きい物
			if result.label == '1' and result.score >= 0.3:
				eventIdList.append(record[1])
				print record[1], record[2], result.score # for debug
	conn.close()
	cur.close()
	return eventIdList

def getUserIds():
	import psycopg2
	conn = psycopg2.connect("dbname=paper_development host=localhost user=postgres password=password")
	cur = conn.cursor()
	cur.execute("select * from Users")
	records = cur.fetchall()
	userIds = []
	for rec in records:
		userIds.append((rec[0],rec[1]))
	cur.close()
	conn.close()
	return userIds

def saveDataToTable(userid,eventIdList):
	import psycopg2
	conn = psycopg2.connect("dbname=paper_development host=localhost user=postgres password=password")
	cur = conn.cursor()
	cur.execute("delete from recommends where user_id=" + str(userid))
	conn.commit()

	for eventId in eventIdList:
		cur.execute("INSERT INTO Recommends(user_id,event_id) VALUES (%s,%s)", [userid,eventId])
	conn.commit()
	cur.close()
	conn.close()

if __name__ == '__main__':

	# connect to the jubatus
	# client_ty = jubatus.Classifier(host, port, 'yoshida')
	# client_ty.clear()
	# train(client_ty, traindata_ty())
	# print 'predict for ty .............................'
	# predict(client_ty)

	# client_mo = jubatus.Classifier(host, port, 'morimoto')
	# client_mo.clear()
	# train(client_mo,traindata_mo())
	# print 'predict for mo .............................'
	# predict(client_mo)

	for user in getUserIds():
		client = jubatus.Classifier(host, port, str(user[0]))
	client.clear()
	train(client, traindata(user[0]))
	print 'predict for ' + str(user[0]) + ' ' + str(user[1]) + ' .............................'
	eventIdList = predict(client)
	saveDataToTable(user[0],eventIdList)

## event_juba.json
{
  "method": "AROW",
  "converter": {
    "num_filter_types": {},
    "num_filter_rules": [],
    "string_filter_types": {},
    "string_filter_rules": [],
    "num_types": {},
    "num_rules": [],
    "string_types": {
      "bigram": { "method": "ngram", "char_num": "3" },
      "mecab": {
        "method": "dynamic",
        "path": "libmecab_splitter.so",
        "function": "create",
        "arg": "-d /var/lib/mecab/dic/ipadic"
      }
    },
    "string_rules": [
      { "key": "*", "type": "bigram", "sample_weight": "bin", "global_weight": "bin" }
    ]
  },
  "parameter": {
    "regularization_weight" : 1.0
  }
}
	#!/usr/bin/env python
	# coding: utf-8

	# jubatus server info.
	host = 'localhost'
	port = 9199

	import sys
	import json
	import random

	import jubatus
	from jubatus.common import Datum

	# for connpass json data
	def getTitleList(filepath):
	f = open(filepath)
	data = json.load(f)
	f.close
	# print(json.dumps(data, sort_keys=True, indent=4))

	list = data["events"]
	titleList = []
	for event in list:
	titleList.append(event["title"])
	return titleList

	# for hatebu feed data
	def getTitleFromFeed(path):
	import feedparser
	entries = feedparser.parse(path)['entries']
	titleList = []
	for entry in entries:
	titleList.append(entry['title'])
	return titleList

	def getTitleFromTxt(path):
	titleList = []
	f = open(path)
	for line in f:
	titleList.append(line)
	f.close
	return titleList

	def buildTrainData(titleList,classLabel):
	traindata = []
	for title in titleList:
	traindata.append((classLabel, Datum({'title': title})))
	return traindata

	def train(client,traindata):
	random.shuffle(traindata)
	client.train(traindata)

	def traindata_ty():
	t1 = getTitleFromFeed('data/hatebu-1.xml')
	t1.extend(getTitleFromFeed('data/hatebu-2.xml'))
	t1.extend(getTitleFromFeed('data/hatebu-3.xml'))
	t1.extend(getTitleFromFeed('data/hatebu-4.xml'))
	t1.extend(getTitleFromFeed('data/hatebu-5.xml'))
	t1.extend(getTitleFromFeed('data/hatebu-6.xml'))
	traindata = buildTrainData(t1,'興味あり')

	t2 = []
	# t2 = getTitleFromFeed('data/hot-economics.xml')
	t2.extend(getTitleFromFeed('data/hot-entertainment.xml'))
	t2.extend(getTitleFromFeed('data/hot-general.xml'))
	t2.extend(getTitleFromFeed('data/hot-lif.xml'))
	t2.extend(getTitleFromFeed('data/hot-social.xml'))
	traindata_n = buildTrainData(t2,'興味なし')

	traindata.extend(traindata_n)
	return traindata

	def traindata_mo():
	t1 = getTitleFromTxt('data/pocket.txt')
	traindata = buildTrainData(t1,'興味あり')

	t2 = []
	# t2 = getTitleFromFeed('data/hot-economics.xml')
	t2.extend(getTitleFromFeed('data/hot-entertainment.xml'))
	t2.extend(getTitleFromFeed('data/hot-general.xml'))
	t2.extend(getTitleFromFeed('data/hot-lif.xml'))
	t2.extend(getTitleFromFeed('data/hot-social.xml'))
	traindata_n = buildTrainData(t2,'興味なし')

	traindata.extend(traindata_n)
	return traindata

	def traindata(userid):
	import psycopg2
	conn = psycopg2.connect("dbname=paper_development host=localhost user=postgres password=password")
	cur = conn.cursor()
	cur.execute("select given_title from Crawled_Pockets where user_id=" + str(userid))
	records = cur.fetchall()
	titleList = []
	for record in records:
	titleList.append(record[0])

	traindata = buildTrainData(titleList,'1') #興味あり

	t2 = []
	# t2 = getTitleFromFeed('data/hot-economics.xml')
	t2.extend(getTitleFromFeed('data/hot-entertainment.xml'))
	t2.extend(getTitleFromFeed('data/hot-general.xml'))
	t2.extend(getTitleFromFeed('data/hot-lif.xml'))
	t2.extend(getTitleFromFeed('data/hot-social.xml'))
	traindata_n = buildTrainData(t2,'0') #興味なし

	traindata.extend(traindata_n)
	return traindata

	def predict(client):
	#data = [
	# Datum({'title': u'ダイエー'}),
	# Datum({'title': u'機械学習'}),
	# Datum({'title': u'オムニ・チャネル'}),
	# Datum({'title': u'AKB48'}), #OMG!
	# Datum({'title': u'消費税増税'}),
	# Datum({'title': u'Apache'}),
	# Datum({'title': u'プロジェクト管理'}),
	# Datum({'title': u'Webデザイン'}),
	# Datum({'title': u'Ocaml'}),
	# Datum({'title': u'Java'}),
	# Datum({'title': u'Solr'}),
	# Datum({'title': u'Elasticsearch'})
	# ]

	eventIdList = []

	import psycopg2
	conn = psycopg2.connect("dbname=paper_development host=localhost user=postgres password=password")
	cur = conn.cursor()
	cur.execute("select * from connpasses")
	records = cur.fetchall()
	for record in records:
	# print record[1]
	# print record[2]
	d = Datum({'title': record[2]})
	# print record[2]
	# print d

	res = client.classify([d])
	# sys.stdout.write(max(res[0], key=lambda x: x.score).label)
	# sys.stdout.write(' ')
	# sys.stdout.write(d.string_values[0][1].encode('utf-8'))
	# sys.stdout.write('\n')

	result = max(res[0], key=lambda x: x.score)
	# print res[0]
	# print d.string_values[0][1]

	# max かつ score が 0.3 よりも大きい物
	if result.label == '1' and result.score >= 0.3:
	eventIdList.append(record[1])
	print record[1], record[2], result.score # for debug
	conn.close()
	cur.close()
	return eventIdList

	def getUserIds():
	import psycopg2
	conn = psycopg2.connect("dbname=paper_development host=localhost user=postgres password=password")
	cur = conn.cursor()
	cur.execute("select * from Users")
	records = cur.fetchall()
	userIds = []
	for rec in records:
	userIds.append((rec[0],rec[1]))
	cur.close()
	conn.close()
	return userIds

	def saveDataToTable(userid,eventIdList):
	import psycopg2
	conn = psycopg2.connect("dbname=paper_development host=localhost user=postgres password=password")
	cur = conn.cursor()
	cur.execute("delete from recommends where user_id=" + str(userid))
	conn.commit()

	for eventId in eventIdList:
	cur.execute("INSERT INTO Recommends(user_id,event_id) VALUES (%s,%s)", [userid,eventId])
	conn.commit()
	cur.close()
	conn.close()

	if __name__ == '__main__':

	# connect to the jubatus
	# client_ty = jubatus.Classifier(host, port, 'yoshida')
	# client_ty.clear()
	# train(client_ty, traindata_ty())
	# print 'predict for ty .............................'
	# predict(client_ty)

	# client_mo = jubatus.Classifier(host, port, 'morimoto')
	# client_mo.clear()
	# train(client_mo,traindata_mo())
	# print 'predict for mo .............................'
	# predict(client_mo)

	for user in getUserIds():
	client = jubatus.Classifier(host, port, str(user[0]))
	client.clear()
	train(client, traindata(user[0]))
	print 'predict for ' + str(user[0]) + ' ' + str(user[1]) + ' .............................'
	eventIdList = predict(client)
	saveDataToTable(user[0],eventIdList)
	{
	"method": "AROW",
	"converter": {
	"num_filter_types": {},
	"num_filter_rules": [],
	"string_filter_types": {},
	"string_filter_rules": [],
	"num_types": {},
	"num_rules": [],
	"string_types": {
	"bigram": { "method": "ngram", "char_num": "3" },
	"mecab": {
	"method": "dynamic",
	"path": "libmecab_splitter.so",
	"function": "create",
	"arg": "-d /var/lib/mecab/dic/ipadic"
	}
	},
	"string_rules": [
	{ "key": "*", "type": "bigram", "sample_weight": "bin", "global_weight": "bin" }
	]
	},
	"parameter": {
	"regularization_weight" : 1.0
	}
	}