Last active
August 29, 2015 14:18
-
-
Save puyokw/04b8ab53bc842f96a18c to your computer and use it in GitHub Desktop.
データサイエンス・カップ 2015 春
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 欠損値補完 | |
# トレーニングデータの読み込み(output.csv の読み込み) | |
data1<-read.csv("output.csv") | |
# ID, 湿度, 観客数, 節を除いた | |
# IDは予測すべきもののIDが大きくかけ離れているため | |
# 湿度と観客数は欠損している | |
# 節は月とほぼ同じ内容 | |
temp<-data1[,c(-1,-2,-5,-16)] | |
temp.x<-data1[,c(-1,-2,-5,-15,-16)] | |
temp.y<-data1[,15] | |
# predict with gbm | |
library(gbm) | |
# 気温の予測の変数重要度 | |
# 確率勾配ブースティング | |
set.seed(136) | |
gbm.fit<- gbm(気温~.,data=temp,distribution="gaussian", | |
var.monotone=c(0,0,0,0,0,0,0,0,0,0,0), n.trees=50000, interaction.depth=3, | |
cv.folds=10,shrinkage=0.005, bag.fraction=0.5) | |
#最適な木の数を求める | |
best.iter<-gbm.perf(gbm.fit, method="cv") | |
PRE<-predict(gbm.fit,data1,best.iter) | |
MSR<-sum((temp$気温-PRE)^2/nrow(temp)) | |
# 一応RMSEを出しているが、変数重要度を求めるために行った。 | |
print(sqrt(MSR)) # 1.60792 | |
print(best.iter) # 45045 | |
summary(gbm.fit) # 気温の予測に対する変数重要度 | |
# 変数重要度が高い方から7番目まで | |
# ただし収容人数はホームと情報が重複していると考えて除いた | |
# 気温の予測はsvmの方がよい | |
library(e1071) | |
# costを小さく保ちながら、できるだけ気温のRMSEが小さくなるように | |
# epsilonとgammaを調整した | |
model<-svm(temp.x[,c(1,4,5,6,7,8)],temp.y,epsilon=0.065,cost=2,gamma=1.8) | |
PRE<-predict(model,temp.x[,c(1,4,5,6,7,8)]) | |
MSR<-sum((temp$気温-PRE)^2/nrow(temp)) | |
print(sqrt(MSR)) # 0.9978517 | |
# テスト用のデータの読み込み(answer.csv の読み込み) | |
data2<-read.csv("answer.csv") | |
temp.pre<-predict(model,data2[,c(2,6,7,8,9,10)]) | |
# テスト用のデータに気温は含まれないので、付け足す | |
data2$気温<-temp.pre | |
# 観客数の予測モデルの構築 | |
# 確率勾配ブースティング | |
set.seed(136) | |
gbm.fit<- gbm(観客数~.,data=data1[,c(-1,-16)],distribution="gaussian", | |
var.monotone=c(0,0,0,0,0,0,0,0,0,0,0,0,0), n.trees=25000, interaction.depth=3, | |
cv.folds=10,shrinkage=0.005, bag.fraction=0.5) | |
#最適な木の数を求める | |
best.iter<-gbm.perf(gbm.fit, method="cv") | |
PRE<-predict(gbm.fit,data1,best.iter) | |
MSR<-sum((data1$観客数-PRE)^2/nrow(data1)) | |
print(sqrt(MSR)) # 2238.88 | |
print(best.iter) # 13655 | |
summary(gbm.fit) # 観客数の予測に対する変数重要度 | |
# 求めたモデルを用いて予測する | |
PRE<-predict(gbm.fit,data2,best.iter) | |
ans<-data2 | |
ans$観客数<-round(PRE) | |
ans<-ans[,c(1,15)] | |
# 結果をtest.csv として出力 | |
write.table(ans,"test.csv",sep=",",col.names=FALSE,row.names=FALSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.Collections.Generic; | |
class ConvertInput | |
{ | |
public int convertWeek(String str) | |
{ | |
switch (str) | |
{ | |
case "月": return 1; | |
case "火": return 2; | |
case "水": return 3; | |
case "木": return 4; | |
case "金": return 5; | |
case "土": return 6; | |
case "日": return 7; | |
} | |
return 0; | |
} | |
public String convertStrangeNum(String str) | |
{ | |
String res = ""; | |
for (int i = 0; i < str.Length; ++i) | |
{ | |
string STR = str.Substring(i, 1); | |
switch (STR) | |
{ | |
case "0": { STR = "0"; break; } | |
case "1": { STR = "1"; break; } | |
case "2": { STR = "2"; break; } | |
case "3": { STR = "3"; break; } | |
case "4": { STR = "4"; break; } | |
case "5": { STR = "5"; break; } | |
case "6": { STR = "6"; break; } | |
case "7": { STR = "7"; break; } | |
case "8": { STR = "8"; break; } | |
case "9": { STR = "9"; break; } | |
default: { STR = ""; break; } | |
} | |
res += STR; | |
} | |
return res; | |
} | |
public double convertTime(String str) | |
{ | |
double hour = double.Parse(str.Substring(0, 2)); | |
double min = double.Parse(str.Substring(3, 2)); | |
return hour + min / 60; | |
} | |
public String[] csvSep(string str) | |
{ | |
int prev = 0; int n = 1; | |
for (int i = 0; i < str.Length; ++i) if (str[i] == '/') ++n; | |
string[] tmp = new string[n]; | |
n = 0; | |
for (int i = 0; i < str.Length; ++i) | |
{ | |
if (str[i] == '/') | |
{ | |
tmp[n++] = str.Substring(prev, i - prev); | |
prev = i + 1; | |
} | |
} | |
tmp[n] = str.Substring(prev, str.Length - prev); | |
return tmp; | |
} | |
}; | |
class optDataLunch | |
{ | |
static void Main() | |
{ | |
String[] Stadium=new String[100]; // スタジアムの名称 | |
int[] StadiumCap = new int[100]; // スタジアムの収容人数 | |
String[] Team = new String[100]; // チームの名前(ホームチームとスタジアムは同じ内容) | |
String[] TV = new String[200]; // テレビ局はあまり重要ではなかった | |
int StadiumSize = 0; // スタジアムの総数 | |
int TeamSize = 0; // チームの総数 | |
int TVsize = 0; // テレビ局の総数 | |
int[] numTV=new int[200]; // 各テレビ局に割り当てられた番号 | |
StreamReader StaSR = new StreamReader("stadium.csv", System.Text.Encoding.GetEncoding("shift_jis")); | |
while (true) | |
{ | |
String _str = StaSR.ReadLine(); | |
if (_str == null) break; | |
String[] str = _str.Split(','); | |
Stadium[StadiumSize] = str[0]; | |
StadiumCap[StadiumSize] = int.Parse(str[2]); | |
++StadiumSize; | |
} | |
StreamReader srCond = new StreamReader("condition.csv"); | |
String[] srID = new String[2048]; | |
double[] srTemp = new double[2048]; | |
double[] srHumid = new double[2048]; | |
int srSize = 0; | |
while (true) | |
{ | |
String _str = srCond.ReadLine(); | |
if (_str == null) break; | |
String[] str = _str.Split(','); | |
srID[srSize]=str[0]; // IDが昇順にsortされているので二分探索したら早い (しない予定) | |
srTemp[srSize] = double.Parse(str[4]); | |
srHumid[srSize] = double.Parse(str[5].Substring(0, str[5].Length - 1)); | |
++srSize; | |
} | |
StreamReader ifs = new StreamReader("train.csv"); | |
StreamWriter ofs = new StreamWriter("output.txt"); | |
StreamWriter swWeather = new StreamWriter("preWeather.txt"); | |
ConvertInput CI = new ConvertInput(); | |
swWeather.WriteLine("ID,天気,湿度"); | |
// ofs.WriteLine("ID,観客数,年度,リーグ,節,試合日,開始時刻,ホーム,アウェイ,スタジアム,TV放送"); | |
ofs.WriteLine("ID,観客数,年度,リーグ,節,第何日,月,日,時間,ホーム,アウェイ,収容人数,NHK,地方放送,気温,湿度"); | |
while (true) | |
{ | |
String _str = ifs.ReadLine(); | |
if (_str == null) break; | |
String[] str = _str.Split(','); | |
str[3] = str[3].Substring(1, 1); | |
if (str[3] == "1") str[3] = "0"; | |
else str[3] = "1"; | |
int Month=0; | |
if (str[5][0] == '0') Month = int.Parse(str[5].Substring(1, 1)); | |
else Month = int.Parse(str[5].Substring(0, 2)); | |
int Day=0; | |
if (str[5][3] == '0') Day = int.Parse(str[5].Substring(4, 1)); | |
else Day = int.Parse(str[5].Substring(3, 2)); | |
int Week = CI.convertWeek( str[5].Substring(6, 1)); | |
double time = CI.convertTime(str[6]); | |
int StadiumNo = -1; | |
for (int i=0; i < StadiumSize; ++i) if (str[9] == Stadium[i]) { StadiumNo = i; break; } | |
if (StadiumNo==-1) | |
{ | |
StadiumNo = StadiumSize; | |
Stadium[StadiumSize++] = str[9]; | |
} | |
int homeTeamNo = -1; | |
int awayTeamNo = -1; | |
for (int i = 0; i < TeamSize; ++i) if (str[7] == Team[i]) { homeTeamNo = i; break; } | |
if (homeTeamNo == -1) | |
{ | |
homeTeamNo = TeamSize; | |
Team[TeamSize++] = str[7]; | |
} | |
for (int i = 0; i < TeamSize; ++i) if (str[8] == Team[i]) { awayTeamNo = i; break; } | |
if (awayTeamNo == -1) | |
{ | |
awayTeamNo = TeamSize; | |
Team[TeamSize++] = str[8]; | |
} | |
int ites = 0, itef=str[4].Length-1; | |
String Term=""; // 節 | |
String GameNo; // 第何日 | |
for (int i = 0; i < str[4].Length; ++i) | |
{ | |
if (str[4][i] == '節') | |
{ | |
Term = str[4].Substring(1, i - 1); | |
ites = i + 2; break; | |
} | |
} | |
GameNo = str[4].Substring(ites,itef-ites+1); | |
Term = CI.convertStrangeNum(Term); | |
GameNo = CI.convertStrangeNum(GameNo); | |
// スカパー(0),スカパー光(2,4,6,9,13),スカパー!(34,35,81),e2(1,38,40) | |
// NHK, 地方放送 | |
bool[] TVs = new bool[6]; | |
String[] EachTV = CI.csvSep(str[10]); | |
for (int j = 0; j < EachTV.Length; ++j) | |
{ | |
int TVNo = -1; | |
for (int i = 0; i < TVsize; ++i) if (EachTV[j] == TV[i]) { TVNo = i; break; } | |
if (TVNo == -1) | |
{ | |
TVNo = TVsize; | |
TV[TVsize++] = EachTV[j]; | |
} | |
if (TVNo != -1) | |
{ | |
++numTV[TVNo]; | |
if (TVNo == 0 || | |
TVNo == 2 || TVNo == 4 || TVNo == 6 || TVNo == 9 || TVNo == 13 || | |
TVNo == 34 || TVNo == 35 || TVNo == 81 || | |
TVNo == 1 || TVNo == 38 || TVNo == 40 | |
) continue; | |
else if (EachTV[j].Length >= 3 && EachTV[j].Substring(0, 3) == "NHK") TVs[0] = true; | |
else TVs[1] = true; | |
} | |
} | |
int srNo=-1; | |
for (int i = 0; i < srSize; ++i) | |
{ | |
if (str[0] == srID[i]) { srNo=i; break;} | |
} | |
for (int i = 0; i < 4; ++i) | |
{ | |
if(i!=0) ofs.Write(","); | |
ofs.Write(str[i]); | |
} | |
ofs.Write(","+Term+","+GameNo+","+Month+","+Day+","+time+","+homeTeamNo+","+awayTeamNo+","+StadiumCap[StadiumNo]); | |
for (int i = 0; i < 2; ++i) | |
{ | |
ofs.Write(','); | |
if (TVs[i] == true) ofs.Write(1); else ofs.Write(0); | |
} | |
ofs.Write(","+srTemp[srNo]+","+srHumid[srNo]); | |
ofs.WriteLine(); | |
} | |
ifs.Close(); ofs.Close(); | |
for (int i = 0; i < StadiumSize; ++i) | |
{ | |
Console.WriteLine(Stadium[i]+" "+StadiumCap[i]); | |
} | |
// | |
StreamReader ifstest = new StreamReader("test.csv"); | |
StreamWriter ofsans = new StreamWriter("answer.txt"); | |
ofsans.WriteLine("ID,年度,リーグ,節,第何日,月,日,時間,ホーム,アウェイ,収容人数,NHK,地方放送"); | |
while(true) | |
{ | |
String _str = ifstest.ReadLine(); | |
if (_str == null) break; | |
String[] str = _str.Split(','); | |
str[2] = str[2].Substring(1, 1); | |
if (str[2] == "1") str[2] = "0"; | |
else str[2] = "1"; | |
int Month = 0; | |
if (str[4][0] == '0') Month = int.Parse(str[4].Substring(1, 1)); | |
else Month = int.Parse(str[4].Substring(0, 2)); | |
int Day = 0; | |
if (str[4][3] == '0') Day = int.Parse(str[4].Substring(4, 1)); | |
else Day = int.Parse(str[4].Substring(3, 2)); | |
int Week = CI.convertWeek(str[4].Substring(6, 1)); | |
double time = CI.convertTime(str[5]); | |
int StadiumNo = -1; | |
for (int i = 0; i < StadiumSize; ++i) if (str[8] == Stadium[i]) { StadiumNo = i; break; } | |
if (StadiumNo == -1) | |
{ | |
StadiumNo = StadiumSize; | |
Stadium[StadiumSize++] = str[8]; | |
} | |
int homeTeamNo = -1; | |
int awayTeamNo = -1; | |
for (int i = 0; i < TeamSize; ++i) if (str[6] == Team[i]) { homeTeamNo = i; break; } | |
if (homeTeamNo == -1) | |
{ | |
homeTeamNo = TeamSize; | |
Team[TeamSize++] = str[6]; | |
} | |
for (int i = 0; i < TeamSize; ++i) if (str[7] == Team[i]) { awayTeamNo = i; break; } | |
if (awayTeamNo == -1) | |
{ | |
awayTeamNo = TeamSize; | |
Team[TeamSize++] = str[7]; | |
} | |
int ites = 0, itef = str[3].Length - 1; | |
String Term = ""; // 節 | |
String GameNo; // 第何日 | |
for (int i = 0; i < str[3].Length; ++i) | |
{ | |
if (str[3][i] == '節') | |
{ | |
Term = str[3].Substring(1, i - 1); | |
ites = i + 2; break; | |
} | |
} | |
GameNo = str[3].Substring(ites, itef - ites + 1); | |
Term = CI.convertStrangeNum(Term); | |
GameNo = CI.convertStrangeNum(GameNo); | |
// スカパー(0),スカパー光(2,4,6,9,13),スカパー!(34,35,81),e2(1,38,40) | |
// NHK, 地方放送 | |
bool[] TVs = new bool[6]; | |
String[] EachTV = CI.csvSep(str[9]); | |
for (int j = 0; j < EachTV.Length; ++j) | |
{ | |
int TVNo = -1; | |
for (int i = 0; i < TVsize; ++i) if (EachTV[j] == TV[i]) { TVNo = i; break; } | |
if (TVNo == -1) | |
{ | |
TVNo = TVsize; | |
TV[TVsize++] = EachTV[j]; | |
} | |
if (TVNo != -1) | |
{ | |
++numTV[TVNo]; | |
if (TVNo == 0 || | |
TVNo == 2 || TVNo == 4 || TVNo == 6 || TVNo == 9 || TVNo == 13 || | |
TVNo == 34 || TVNo == 35 || TVNo == 81 || | |
TVNo == 1 || TVNo == 38 || TVNo == 40 | |
) continue; | |
else if (EachTV[j].Length >= 3 && EachTV[j].Substring(0, 3) == "NHK") TVs[0] = true; | |
else TVs[1] = true; | |
} | |
} | |
int srNo=-1; | |
for (int i = 0; i < srSize; ++i) | |
{ | |
if (str[0] == srID[i]) { srNo=i; break;} | |
} | |
for (int i = 0; i < 3; ++i) | |
{ | |
if (i != 0) ofsans.Write(","); | |
ofsans.Write(str[i]); | |
} | |
ofsans.Write("," + Term + "," + GameNo + "," + Month + "," + Day + "," + time + "," + homeTeamNo + "," + awayTeamNo + "," + StadiumCap[StadiumNo]); | |
for (int i = 0; i < 2; ++i) | |
{ | |
ofsans.Write(','); | |
if (TVs[i] == true) ofsans.Write(1); else ofsans.Write(0); | |
} | |
ofsans.WriteLine(); | |
swWeather.Write(str[0]); | |
if (srNo == -1) swWeather.WriteLine(",,"); | |
else swWeather.WriteLine("," + srTemp[srNo] + "," + srHumid[srNo]); | |
} | |
ifstest.Close(); ofsans.Close(); swWeather.Close(); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment