Skip to content

Instantly share code, notes, and snippets.

@takatakamanbou
Last active August 29, 2015 14:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takatakamanbou/413467c6c36177c5d6e3 to your computer and use it in GitHub Desktop.
Save takatakamanbou/413467c6c36177c5d6e3 to your computer and use it in GitHub Desktop.
cifar10.py & cifar10_sub150823.py
import numpy as np
import scipy as sp
import os
import cPickle
class CIFAR10( object ):
def __init__( self, dirname ):
self.path = dirname
f_meta = open( os.path.join( self.path, 'batches.meta'), 'r' )
self.meta = cPickle.load( f_meta )
f_meta.close()
self.nclass = len( self.meta['label_names'] )
print '##### CIFAR-10 #####'
print '# label_names =', self.meta['label_names']
print '# num_vis = ', self.meta['num_vis']
def _loadBatch( self, fn ):
p = os.path.join( self.path, fn )
f = open( p, 'r' )
d = cPickle.load( f )
f.close()
data = d['data'] # 10000 x 3072 ( 3072 = 3 x 32 x 32 ), unit8s
labels = d['labels'] # 10000-dim, in { 0, 1, ..., 9 }
return data, np.array( labels )
def _loadL( self ):
fnList = [ 'data_batch_%d' % i for i in range( 1, 6 ) ]
dataList, labelsList = [], []
for fn in fnList:
d, l = self._loadBatch( fn )
dataList.append( d )
labelsList.append( l )
return np.vstack( dataList ), np.hstack( labelsList )
def _loadT( self ):
return self._loadBatch( 'test_batch' )
##### loading the data
#
def loadData( self, LT ):
if LT == 'L':
dat, lab = self._loadL()
else:
dat, lab = self._loadT()
#X = np.asarray( dat, dtype = float ).reshape( ( -1, 3, 32, 32 ) ) / 255
X = np.asarray( dat, dtype = float ).reshape( ( -1, 3, 32, 32 ) )
t = np.zeros( ( lab.shape[0], self.nclass ), dtype = bool )
for ik in range( self.nclass ):
t[lab == ik, ik] = True
return X, lab, t
##### generating the index of training & validation data
#
def genIndexLV( self, lab, seed = 0 ):
np.random.seed( seed )
idx = np.random.permutation( lab.shape[0] )
idxV = np.zeros( lab.shape[0], dtype = bool )
# selecting 1000 images per class for validation
for ik in range( self.nclass ):
i = np.where( lab[idx] == ik )[0][:1000]
idxV[idx[i]] = True
idxL = -idxV
return idxL, idxV
if __name__ == "__main__":
import cv2
dirCIFAR10 = './cifar10/cifar-10-batches-py'
cifar10 = CIFAR10( dirCIFAR10 )
dataL, labelsL = cifar10._loadL()
w = h = 32
nclass = 10
nimg = 10
gap = 4
width = nimg * ( w + gap ) + gap
height = nclass * ( h + gap ) + gap
img = np.zeros( ( height, width, 3 ), dtype = int ) + 128
for iy in range( nclass ):
lty = iy * ( h + gap ) + gap
idx = np.where( labelsL == iy )[0]
for ix in range( nimg ):
ltx = ix * ( w + gap ) + gap
tmp = dataL[idx[ix], :].reshape( ( 3, h, w ) )
# BGR <= RGB
img[lty:lty+h, ltx:ltx+w, 0] = tmp[2, :, :]
img[lty:lty+h, ltx:ltx+w, 1] = tmp[1, :, :]
img[lty:lty+h, ltx:ltx+w, 2] = tmp[0, :, :]
cv2.imwrite( 'hoge.png', img )
import numpy as np
import scipy as sp
# ZCA whitening
def ZCAtrans( Xraw, Uzca = None ):
assert Xraw.ndim == 4 # Xraw is assumed to be N x 3 x 32 x 32
Xraw2 = Xraw.reshape( ( Xraw.shape[0], -1 ) )
if Uzca == None:
# Xraw is assumed to be zero-mean
C = np.dot( Xraw2.T, Xraw2 ) / Xraw2.shape[0]
U, eva, V = np.linalg.svd( C ) # U[:, i] is the i-th eigenvector
sqeva = np.sqrt( eva + 0.001 )
Uzca = np.dot( U / sqeva[np.newaxis, :], U.T )
X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
return X, Uzca
else:
X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
return X
# mini batch indicies for stochastic gradient ascent
def makebatchindex( N, batchsize ):
idx = np.random.permutation( N )
nbatch = int( np.ceil( float( N ) / batchsize ) )
idxB = np.zeros( ( nbatch, N ), dtype = bool )
for ib in range( nbatch - 1 ):
idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
ib = nbatch - 1
idxB[ib, idx[ib*batchsize:]] = True
return idxB
# making translated and horizontally flipped images
def translate( Xsrc, dstshape ):
assert Xsrc.ndim == 4 # Xsrc is assumed to be N x 3 x 32 x 32
N = Xsrc.shape[0]
h, w = dstshape
tmax_x, tmax_y = 32 - w, 32 - h
tx = np.random.randint( 0, tmax_x, N )
ty = np.random.randint( 0, tmax_y, N )
hf = np.random.random_sample( N ) < 0.5
Xdst = np.empty( ( N, 3, dstshape[0], dstshape[1] ), dtype = Xsrc.dtype )
for n in range( N ):
if hf[n]:
Xdst[n, :, :, :] = Xsrc[n, :, ty[n]:ty[n]+h, tx[n]:tx[n]+w]
else:
Xdst[n, :, :, :] = Xsrc[n, :, ty[n]:ty[n]+h, tx[n]+w:tx[n]:-1]
return Xdst
# making translated and horizontally flipped images
def translate2( Xsrc, dstshape ):
assert Xsrc.ndim == 4 # Xsrc is assumed to be N x 3 x 32 x 32
N = Xsrc.shape[0]
h, w = dstshape
tmax_x, tmax_y = 32 - w, 32 - h
tx = np.random.randint( 0, tmax_x )
ty = np.random.randint( 0, tmax_y )
hf = np.random.random_sample() < 0.5
if hf:
return Xsrc[:, :, ty:ty+h, tx:tx+w]
else:
return Xsrc[:, :, ty:ty+h, tx+w:tx:-1]
# clipping the center of the image w/o horizontal flip
def clipcenter( Xsrc, dstshape ):
assert Xsrc.ndim == 4 # Xsrc is assumed to be N x 3 x 32 x 32
h, w = dstshape
ty, tx = ( 32 - h ) / 2, ( 32 - w ) / 2
return Xsrc[:, :, ty:ty+h, tx:tx+w]
if __name__ == "__main__":
import cv2
import cifar10
dirCIFAR10 = './cifar10/cifar-10-batches-py'
c10 = cifar10.CIFAR10( dirCIFAR10 )
dataL, labelsL = c10._loadL()
Xorg = dataL[:500].reshape( ( -1, 3, 32, 32 ) )
lab = labelsL[:500]
w = h = 24
nclass = 10
nimg = 10
gap = 4
width = nimg * ( w + gap ) + gap
height = nclass * ( h + gap ) + gap
img = np.zeros( ( height, width, 3 ), dtype = int ) + 128
for iy in range( nclass ):
lty = iy * ( h + gap ) + gap
idx = np.where( lab == iy )[0]
X = translate( Xorg[idx], ( h, w ) )
for ix in range( nimg ):
ltx = ix * ( w + gap ) + gap
tmp = X[ix]
# BGR <= RGB
img[lty:lty+h, ltx:ltx+w, 0] = tmp[2, :, :]
img[lty:lty+h, ltx:ltx+w, 1] = tmp[1, :, :]
img[lty:lty+h, ltx:ltx+w, 2] = tmp[0, :, :]
cv2.imwrite( 'hoge.png', img )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment