Skip to content

Instantly share code, notes, and snippets.

@shimaXX
Last active October 1, 2016 13:13
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save shimaXX/6463769 to your computer and use it in GitHub Desktop.
# coding: utf8
import numpy as np
import scipy as sp
import math
from scipy import linalg as sln
from scipy.sparse import linalg as sprsln
from svm import *
from svmutil import *
imp_num = 2000 #データ発生数
usr_num = 100 # user数
def create_data():
adinfo = np.zeros((imp_num,1048)) #広告主データ
usrinfo = np.zeros((usr_num,1002)) #ユーザデータ
deviceinfo = np.zeros((imp_num,7)) #デバイスデータ
timeinfo = np.zeros((imp_num,1)) #訪問時間データ
np.random.seed(3) #ランダムシード
##ad情報
#campaignID data
adcmpgn_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*20)
for i in xrange(imp_num):
adinfo[i,int(adcmpgn_idx[i])] = 1
#creative type data
adcr_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*3)
for i in xrange(imp_num):
adinfo[i,20+int(adcr_idx[i])] = 1
#creative category data
adctgry_idx = np.array( [ np.floor(np.random.uniform(0, 1, size=1)[0]*adcmpgn_idx[i]*50)
for i in xrange(imp_num) ] )
for i in xrange(imp_num):
adinfo[i,23+int(adctgry_idx[i])] = 1
#creative size data
adcrsz_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*15)
for i in xrange(imp_num):
adinfo[i,1023+int(adcrsz_idx[i])] = 1
#creative position data
adcrps_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*10)
for i in xrange(imp_num):
adinfo[i,1038+int(adcrps_idx[i])] = 1
##ここからuser情報
#interest category data. 4week内で訪問したサイトのカテゴリ頻度
usr_ctg_idx = np.floor(np.random.uniform(0, 1, size=usr_num)*1000) #ユーザの興味のあるカテゴリ
usrinfo[:,0:1000] = np.floor( np.array(
[
np.array([np.random.uniform(0,1,size=1)*500
if i-20<=usr_ctg_idx[j]<=i+20 else np.random.uniform(0,1,size=1)*10
for i in xrange(1000)]).flatten()
for j in xrange(usr_num)]
))
rec = np.random.uniform(0,1,size=usr_num)*15
usrinfo[:, 1000:1001] = rec.reshape((usr_num, 1))
frec = np.random.uniform(0,1,size=usr_num)*7
usrinfo[:, 1001:1002] = frec.reshape((usr_num, 1))
##ここからdevice情報
#device data
dvc_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*2)
for i in xrange(imp_num):
deviceinfo[i,int(dvc_idx[i])] = 1
#os data
os_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*5)
for i in xrange(imp_num):
deviceinfo[i,2+int(os_idx[i])] = 1
##ここから広告提示時間のデータ
#time data
timeinfo = np.floor(np.random.uniform(0, 1, size=imp_num)*24)
##各広告にどのユーザが現れるか
imp_usr = np.floor(np.random.uniform(0, 1, size=imp_num)*usr_num)
#CV条件
ct = []
ct_crt = np.zeros((imp_num, 5))
for i in xrange(imp_usr.shape[0]):
print usr_ctg_idx[imp_usr[i]], imp_usr[i], adctgry_idx[i]
if usr_ctg_idx[imp_usr[i]]-20 <= adctgry_idx[i] <= usr_ctg_idx[imp_usr[i]]+20 \
and adcrps_idx[i]<6 and usrinfo[imp_usr[i],1001] < 5 and \
adcmpgn_idx[i] < 10 and adcrsz_idx[i] < 10:
ct.append(1)
else: ct.append(0)
print ct[i]
ct_crt[i,0] = usr_ctg_idx[imp_usr[i]]
ct_crt[i,1] = adctgry_idx[i]
ct_crt[i,2] = timeinfo[i]
ct_crt[i,3] = adcrps_idx[i]
ct_crt[i,4] = ct[i]
ct = np.array(ct)
return adinfo, usrinfo, deviceinfo, timeinfo, imp_usr, ct, ct_crt, usr_ctg_idx
if __name__=='__main__':
ad, usr, dvc, time, imp_usr, cv, cv_crt, usr_ctg = create_data()
imp_info = np.array(
[ np.hstack( (ad[i,:], usr[imp_usr[i],:], dvc[i,:], time[i]) )
for i in xrange(len(imp_usr)) ]
)
# ad行列の圧縮
U, S, Vh = sln.svd(imp_info)
imp_info = np.dot(imp_info, Vh[:, :1000])
#svmによる識別
problem = svm_problem(cv.tolist(),imp_info.tolist())
param = svm_parameter('-s 0 -t 0 -b 1 -w1 100')
m = svm_train(problem, param)
p_labels, p_acc, p_vals = svm_predict(cv.tolist(), imp_info.tolist(), m) # result training
print p_labels
print p_acc
print p_vals
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment