This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf8 | |
import numpy as np | |
import scipy as sp | |
import math | |
from scipy import linalg as sln | |
from scipy.sparse import linalg as sprsln | |
from svm import * | |
from svmutil import * | |
imp_num = 2000 #データ発生数 | |
usr_num = 100 # user数 | |
def create_data(): | |
adinfo = np.zeros((imp_num,1048)) #広告主データ | |
usrinfo = np.zeros((usr_num,1002)) #ユーザデータ | |
deviceinfo = np.zeros((imp_num,7)) #デバイスデータ | |
timeinfo = np.zeros((imp_num,1)) #訪問時間データ | |
np.random.seed(3) #ランダムシード | |
##ad情報 | |
#campaignID data | |
adcmpgn_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*20) | |
for i in xrange(imp_num): | |
adinfo[i,int(adcmpgn_idx[i])] = 1 | |
#creative type data | |
adcr_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*3) | |
for i in xrange(imp_num): | |
adinfo[i,20+int(adcr_idx[i])] = 1 | |
#creative category data | |
adctgry_idx = np.array( [ np.floor(np.random.uniform(0, 1, size=1)[0]*adcmpgn_idx[i]*50) | |
for i in xrange(imp_num) ] ) | |
for i in xrange(imp_num): | |
adinfo[i,23+int(adctgry_idx[i])] = 1 | |
#creative size data | |
adcrsz_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*15) | |
for i in xrange(imp_num): | |
adinfo[i,1023+int(adcrsz_idx[i])] = 1 | |
#creative position data | |
adcrps_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*10) | |
for i in xrange(imp_num): | |
adinfo[i,1038+int(adcrps_idx[i])] = 1 | |
##ここからuser情報 | |
#interest category data. 4week内で訪問したサイトのカテゴリ頻度 | |
usr_ctg_idx = np.floor(np.random.uniform(0, 1, size=usr_num)*1000) #ユーザの興味のあるカテゴリ | |
usrinfo[:,0:1000] = np.floor( np.array( | |
[ | |
np.array([np.random.uniform(0,1,size=1)*500 | |
if i-20<=usr_ctg_idx[j]<=i+20 else np.random.uniform(0,1,size=1)*10 | |
for i in xrange(1000)]).flatten() | |
for j in xrange(usr_num)] | |
)) | |
rec = np.random.uniform(0,1,size=usr_num)*15 | |
usrinfo[:, 1000:1001] = rec.reshape((usr_num, 1)) | |
frec = np.random.uniform(0,1,size=usr_num)*7 | |
usrinfo[:, 1001:1002] = frec.reshape((usr_num, 1)) | |
##ここからdevice情報 | |
#device data | |
dvc_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*2) | |
for i in xrange(imp_num): | |
deviceinfo[i,int(dvc_idx[i])] = 1 | |
#os data | |
os_idx = np.floor(np.random.uniform(0, 1, size=imp_num)*5) | |
for i in xrange(imp_num): | |
deviceinfo[i,2+int(os_idx[i])] = 1 | |
##ここから広告提示時間のデータ | |
#time data | |
timeinfo = np.floor(np.random.uniform(0, 1, size=imp_num)*24) | |
##各広告にどのユーザが現れるか | |
imp_usr = np.floor(np.random.uniform(0, 1, size=imp_num)*usr_num) | |
#CV条件 | |
ct = [] | |
ct_crt = np.zeros((imp_num, 5)) | |
for i in xrange(imp_usr.shape[0]): | |
print usr_ctg_idx[imp_usr[i]], imp_usr[i], adctgry_idx[i] | |
if usr_ctg_idx[imp_usr[i]]-20 <= adctgry_idx[i] <= usr_ctg_idx[imp_usr[i]]+20 \ | |
and adcrps_idx[i]<6 and usrinfo[imp_usr[i],1001] < 5 and \ | |
adcmpgn_idx[i] < 10 and adcrsz_idx[i] < 10: | |
ct.append(1) | |
else: ct.append(0) | |
print ct[i] | |
ct_crt[i,0] = usr_ctg_idx[imp_usr[i]] | |
ct_crt[i,1] = adctgry_idx[i] | |
ct_crt[i,2] = timeinfo[i] | |
ct_crt[i,3] = adcrps_idx[i] | |
ct_crt[i,4] = ct[i] | |
ct = np.array(ct) | |
return adinfo, usrinfo, deviceinfo, timeinfo, imp_usr, ct, ct_crt, usr_ctg_idx | |
if __name__=='__main__': | |
ad, usr, dvc, time, imp_usr, cv, cv_crt, usr_ctg = create_data() | |
imp_info = np.array( | |
[ np.hstack( (ad[i,:], usr[imp_usr[i],:], dvc[i,:], time[i]) ) | |
for i in xrange(len(imp_usr)) ] | |
) | |
# ad行列の圧縮 | |
U, S, Vh = sln.svd(imp_info) | |
imp_info = np.dot(imp_info, Vh[:, :1000]) | |
#svmによる識別 | |
problem = svm_problem(cv.tolist(),imp_info.tolist()) | |
param = svm_parameter('-s 0 -t 0 -b 1 -w1 100') | |
m = svm_train(problem, param) | |
p_labels, p_acc, p_vals = svm_predict(cv.tolist(), imp_info.tolist(), m) # result training | |
print p_labels | |
print p_acc | |
print p_vals |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment