Skip to content

Instantly share code, notes, and snippets.

@tommyfms2
Last active October 7, 2017 02:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tommyfms2/35385b5f7f67724e8a7e2c672b346d58 to your computer and use it in GitHub Desktop.
Save tommyfms2/35385b5f7f67724e8a7e2c672b346d58 to your computer and use it in GitHub Desktop.
chainerのデータセットの作り方 LinearやCNN ref: http://qiita.com/tommyfms2/items/c3fa0cb258c17468cb30
...
0,0,11,1,388,484,236,268,500,260,212,392,324,220,216,412,204,244,252,292,4,447,403,589,471,434,448,450,430,410,4,434,448,450,430,410,410,410,410,410,1
0,30,11,0,308,368,324,264,372,384,276,216,372,248,212,192,260,204,208,192,4,434,448,450,430,410,410,410,410,410,4,560,220,238,217,305,267,231,202,185,5
0,0,30,1,216,264,268,236,248,272,244,216,284,236,232,180,280,236,188,188,4,560,220,238,217,305,267,231,202,185,4,305,267,231,202,185,185,185,185,185,3
0,30,5,0,220,192,188,188,184,196,204,184,208,188,188,168,204,200,192,160,4,305,267,231,202,185,185,185,185,185,4,418,418,418,418,418,418,418,418,418,5
...
train, test = chainer.datasets.get_mnist()
print(train[0])
python train_minst.py
(array([ 0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0.71764708, 0.99215692, 0.99215692, 0.81176478, 0.00784314,
...
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. ], dtype=float32), 5)
from chainer.datasets import tuple_dataset
from PIL import Image
import numpy as np
import glob
pathsAndLabels = []
pathsAndLabels.append(np.asarray(["./imageDirectory0/", 0]))
pathsAndLabels.append(np.asarray(["./imageDirectory1/", 1]))
pathsAndLabels.append(np.asarray(["./imageDirectory2/", 2]))
# データを混ぜて、trainとtestがちゃんとまばらになるように。
allData = []
for pathAndLabel in pathsAndLabels:
path = pathAndLabel[0]
label = pathAndLabel[1]
imagelist = glob.glob(path + "*")
for imgName in imagelist:
allData.append([imgName, label])
allData = np.random.permutation(allData)
imageData = []
labelData = []
for pathAndLabel in allData:
img = Image.open(pathAndLabel[0])
#3チャンネルの画像をr,g,bそれぞれの画像に分ける
r,g,b = img.split()
rImgData = np.asarray(np.float32(r)/255.0)
gImgData = np.asarray(np.float32(g)/255.0)
bImgData = np.asarray(np.float32(b)/255.0)
imgData = np.asarray([rImgData, gImgData, bImgData])
imageData.append(imgData)
labelData.append(np.int32(pathAndLabel[1]))
threshold = np.int32(len(imageData)/8*7)
train = tuple_dataset.TupleDataset(imageData[0:threshold], labelData[0:threshold])
test = tuple_dataset.TupleDataset(imageData[threshold:], labelData[threshold:])
# ここが、上記したポイント部分
# Image.openで取り入れた画像のデータは
# [ [ [r,g,b], [r,g,b], ... ,[r,g,b] ],
# [ [r,g,b], [r,g,b], ... ,[r,g,b] ],
# ...
# [ [r,g,b], [r,g,b], ... ,[r,g,b] ] ]
# のようにrgbがおなじ位置に入ってる感じなのですが、これをそれぞれ赤の画像、緑の画像、青の画像の様にして行列にしないといけません。
# 以下のようなデータにする
# [ [ [ r, r, r, ... ,r ], ここから
# ...
# [ r, r, r, ... ,r ] ], ここまでが赤の画像
#
# [ [ g, g, g, ... ,g ], ここから
# ...
# [ g, g, g, ... ,g ] ], ここまでが緑の画像
#
# [ b, b, b, ... ,b ], ここから
# ...
# [ b, b, b, ... ,b ] ] ] ここまでが青の画像
train_data = []
train_label = []
data_raw = open("data.txt")
for line in data_raw:
train = np.array([np.float32(int(x)/255.0) for x in line.split(",")[0:input_num]])
label = np.int32(line.split(",")[input_num])
train_data.append(train)
train_label.append(label)
threshold = np.int32(len(imageData)/10*9)
train = tuple_dataset.TupleDataset(imageData[0:threshold], labelData[0:threshold])
test = tuple_dataset.TupleDataset(imageData[threshold:], labelData[threshold:])
...
# Load the MNIST dataset
train, test = chainer.datasets.get_mnist()
...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment