takatakamanbou/00readme.md Secret

## 00readme.md

      
    Raw
  

              00readme.md
            
          
    cifar10.py


see http://takatakamanbou.hatenablog.com/entry/2015/08/24/173834

cifar10_sub150823.py


see http://takatakamanbou.hatenablog.com/entry/2015/08/24/173834


## cifar10.py
import numpy as np
import scipy as sp
import os
import cPickle


class CIFAR10( object ):

    def __init__( self, dirname ):

        self.path = dirname
        f_meta = open( os.path.join( self.path, 'batches.meta'), 'r' )
        self.meta = cPickle.load( f_meta )
        f_meta.close()
        self.nclass = len( self.meta['label_names'] )

        print '##### CIFAR-10 #####'
        print '# label_names =', self.meta['label_names']
        print '# num_vis = ', self.meta['num_vis']


    def _loadBatch( self, fn ):

        p = os.path.join( self.path, fn )
        f = open( p, 'r' )
        d = cPickle.load( f )
        f.close()
        data   = d['data']   # 10000 x 3072 ( 3072 = 3 x 32 x 32 ), unit8s
        labels = d['labels'] # 10000-dim, in { 0, 1, ..., 9 }

        return data, np.array( labels )


    def _loadL( self ):

        fnList = [ 'data_batch_%d' % i for i in range( 1, 6 ) ]
        dataList, labelsList = [], []
        for fn in fnList:
            d, l = self._loadBatch( fn )
            dataList.append( d )
            labelsList.append( l )

        return np.vstack( dataList ), np.hstack( labelsList )


    def _loadT( self ):

        return self._loadBatch( 'test_batch' )


    ##### loading the data
    #
    def loadData( self, LT ):

        if LT == 'L':
            dat, lab = self._loadL()
        else:
            dat, lab = self._loadT()

        #X = np.asarray( dat, dtype = float ).reshape( ( -1, 3, 32, 32 ) ) / 255
        X = np.asarray( dat, dtype = float ).reshape( ( -1, 3, 32, 32 ) )
        t = np.zeros( ( lab.shape[0], self.nclass ), dtype = bool )
        for ik in range( self.nclass ):
            t[lab == ik, ik] = True

        return X, lab, t


    ##### generating the index of training & validation data
    #
    def genIndexLV( self, lab, seed = 0 ):

        np.random.seed( seed )
        idx = np.random.permutation( lab.shape[0] )
        idxV = np.zeros( lab.shape[0], dtype = bool )

        # selecting 1000 images per class for validation
        for ik in range( self.nclass ):
            i = np.where( lab[idx] == ik )[0][:1000]
            idxV[idx[i]] = True

        idxL = -idxV

        return idxL, idxV


if __name__ == "__main__":

    import cv2

    dirCIFAR10 = './cifar10/cifar-10-batches-py'
    cifar10 = CIFAR10( dirCIFAR10 )
    dataL, labelsL = cifar10._loadL()

    w = h = 32
    nclass = 10
    nimg = 10
    gap = 4

    width  = nimg * ( w + gap ) + gap
    height = nclass * ( h + gap ) + gap
    img = np.zeros( ( height, width, 3 ), dtype = int ) + 128

    for iy in range( nclass ):
        lty = iy * ( h + gap ) + gap
        idx = np.where( labelsL == iy )[0]
        for ix in range( nimg ):
            ltx = ix * ( w + gap ) + gap
            tmp = dataL[idx[ix], :].reshape( ( 3, h, w ) )
            # BGR <= RGB
            img[lty:lty+h, ltx:ltx+w, 0] = tmp[2, :, :]
            img[lty:lty+h, ltx:ltx+w, 1] = tmp[1, :, :]
            img[lty:lty+h, ltx:ltx+w, 2] = tmp[0, :, :]

    cv2.imwrite( 'hoge.png', img )


## cifar10_sub150823.py
import numpy as np
import scipy as sp


# ZCA whitening
def ZCAtrans( Xraw, Uzca = None ):

    assert Xraw.ndim == 4  # Xraw is assumed to be N x 3 x 32 x 32

    Xraw2 = Xraw.reshape( ( Xraw.shape[0], -1 ) )

    if Uzca == None:
        # Xraw is assumed to be zero-mean
        C = np.dot( Xraw2.T, Xraw2 ) / Xraw2.shape[0]
        U, eva, V = np.linalg.svd( C )  # U[:, i] is the i-th eigenvector
        sqeva = np.sqrt( eva + 0.001 )
        Uzca = np.dot( U / sqeva[np.newaxis, :], U.T )
        X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
        return X, Uzca
    else:
        X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
        return X


# mini batch indicies for stochastic gradient ascent
def makebatchindex( N, batchsize ):

    idx = np.random.permutation( N )
    nbatch = int( np.ceil( float( N ) / batchsize ) )
    idxB = np.zeros( ( nbatch, N ), dtype = bool )
    for ib in range( nbatch - 1 ):
        idxB[ib, idx[ib*batchsize:(ib+1)*batchsize]] = True
    ib = nbatch - 1
    idxB[ib, idx[ib*batchsize:]] = True

    return idxB


# making translated and horizontally flipped images
def translate( Xsrc, dstshape ):

    assert Xsrc.ndim == 4  # Xsrc is assumed to be N x 3 x 32 x 32

    N = Xsrc.shape[0]
    h, w = dstshape
    tmax_x, tmax_y = 32 - w, 32 - h
    tx = np.random.randint( 0, tmax_x, N )
    ty = np.random.randint( 0, tmax_y, N )
    hf = np.random.random_sample( N ) < 0.5

    Xdst = np.empty( ( N, 3, dstshape[0], dstshape[1] ), dtype = Xsrc.dtype )

    for n in range( N ):
        if hf[n]:
            Xdst[n, :, :, :] = Xsrc[n, :, ty[n]:ty[n]+h, tx[n]:tx[n]+w]
        else:
            Xdst[n, :, :, :] = Xsrc[n, :, ty[n]:ty[n]+h, tx[n]+w:tx[n]:-1]

    return Xdst


# making translated and horizontally flipped images
def translate2( Xsrc, dstshape ):

    assert Xsrc.ndim == 4  # Xsrc is assumed to be N x 3 x 32 x 32

    N = Xsrc.shape[0]
    h, w = dstshape
    tmax_x, tmax_y = 32 - w, 32 - h
    tx = np.random.randint( 0, tmax_x )
    ty = np.random.randint( 0, tmax_y )
    hf = np.random.random_sample() < 0.5

    if hf:
        return Xsrc[:, :, ty:ty+h, tx:tx+w]
    else:
        return Xsrc[:, :, ty:ty+h, tx+w:tx:-1]


#  clipping the center of the image w/o horizontal flip
def clipcenter( Xsrc, dstshape ):

    assert Xsrc.ndim == 4  # Xsrc is assumed to be N x 3 x 32 x 32

    h, w = dstshape
    ty, tx = ( 32 - h ) / 2, ( 32 - w ) / 2

    return Xsrc[:, :, ty:ty+h, tx:tx+w]


if __name__ == "__main__":

    import cv2
    import cifar10

    dirCIFAR10 = './cifar10/cifar-10-batches-py'
    c10 = cifar10.CIFAR10( dirCIFAR10 )
    dataL, labelsL = c10._loadL()
    Xorg = dataL[:500].reshape( ( -1, 3, 32, 32 ) )
    lab  = labelsL[:500]

    w = h = 24
    nclass = 10
    nimg = 10
    gap = 4

    width  = nimg * ( w + gap ) + gap
    height = nclass * ( h + gap ) + gap
    img = np.zeros( ( height, width, 3 ), dtype = int ) + 128

    for iy in range( nclass ):
        lty = iy * ( h + gap ) + gap
        idx = np.where( lab == iy )[0]
        X = translate( Xorg[idx], ( h, w ) )
        for ix in range( nimg ):
            ltx = ix * ( w + gap ) + gap
            tmp = X[ix]
            # BGR <= RGB
            img[lty:lty+h, ltx:ltx+w, 0] = tmp[2, :, :]
            img[lty:lty+h, ltx:ltx+w, 1] = tmp[1, :, :]
            img[lty:lty+h, ltx:ltx+w, 2] = tmp[0, :, :]

    cv2.imwrite( 'hoge.png', img )
	import numpy as np
	import scipy as sp
	import os
	import cPickle


	class CIFAR10( object ):

	def __init__( self, dirname ):

	self.path = dirname
	f_meta = open( os.path.join( self.path, 'batches.meta'), 'r' )
	self.meta = cPickle.load( f_meta )
	f_meta.close()
	self.nclass = len( self.meta['label_names'] )

	print '##### CIFAR-10 #####'
	print '# label_names =', self.meta['label_names']
	print '# num_vis = ', self.meta['num_vis']


	def _loadBatch( self, fn ):

	p = os.path.join( self.path, fn )
	f = open( p, 'r' )
	d = cPickle.load( f )
	f.close()
	data = d['data'] # 10000 x 3072 ( 3072 = 3 x 32 x 32 ), unit8s
	labels = d['labels'] # 10000-dim, in { 0, 1, ..., 9 }

	return data, np.array( labels )


	def _loadL( self ):

	fnList = [ 'data_batch_%d' % i for i in range( 1, 6 ) ]
	dataList, labelsList = [], []
	for fn in fnList:
	d, l = self._loadBatch( fn )
	dataList.append( d )
	labelsList.append( l )

	return np.vstack( dataList ), np.hstack( labelsList )


	def _loadT( self ):

	return self._loadBatch( 'test_batch' )


	##### loading the data
	#
	def loadData( self, LT ):

	if LT == 'L':
	dat, lab = self._loadL()
	else:
	dat, lab = self._loadT()

	#X = np.asarray( dat, dtype = float ).reshape( ( -1, 3, 32, 32 ) ) / 255
	X = np.asarray( dat, dtype = float ).reshape( ( -1, 3, 32, 32 ) )
	t = np.zeros( ( lab.shape[0], self.nclass ), dtype = bool )
	for ik in range( self.nclass ):
	t[lab == ik, ik] = True

	return X, lab, t


	##### generating the index of training & validation data
	#
	def genIndexLV( self, lab, seed = 0 ):

	np.random.seed( seed )
	idx = np.random.permutation( lab.shape[0] )
	idxV = np.zeros( lab.shape[0], dtype = bool )

	# selecting 1000 images per class for validation
	for ik in range( self.nclass ):
	i = np.where( lab[idx] == ik )[0][:1000]
	idxV[idx[i]] = True

	idxL = -idxV

	return idxL, idxV



	if __name__ == "__main__":

	import cv2

	dirCIFAR10 = './cifar10/cifar-10-batches-py'
	cifar10 = CIFAR10( dirCIFAR10 )
	dataL, labelsL = cifar10._loadL()

	w = h = 32
	nclass = 10
	nimg = 10
	gap = 4

	width = nimg * ( w + gap ) + gap
	height = nclass * ( h + gap ) + gap
	img = np.zeros( ( height, width, 3 ), dtype = int ) + 128

	for iy in range( nclass ):
	lty = iy * ( h + gap ) + gap
	idx = np.where( labelsL == iy )[0]
	for ix in range( nimg ):
	ltx = ix * ( w + gap ) + gap
	tmp = dataL[idx[ix], :].reshape( ( 3, h, w ) )
	# BGR <= RGB
	img[lty:lty+h, ltx:ltx+w, 0] = tmp[2, :, :]
	img[lty:lty+h, ltx:ltx+w, 1] = tmp[1, :, :]
	img[lty:lty+h, ltx:ltx+w, 2] = tmp[0, :, :]

	cv2.imwrite( 'hoge.png', img )
	import numpy as np
	import scipy as sp


	# ZCA whitening
	def ZCAtrans( Xraw, Uzca = None ):

	assert Xraw.ndim == 4 # Xraw is assumed to be N x 3 x 32 x 32

	Xraw2 = Xraw.reshape( ( Xraw.shape[0], -1 ) )

	if Uzca == None:
	# Xraw is assumed to be zero-mean
	C = np.dot( Xraw2.T, Xraw2 ) / Xraw2.shape[0]
	U, eva, V = np.linalg.svd( C ) # U[:, i] is the i-th eigenvector
	sqeva = np.sqrt( eva + 0.001 )
	Uzca = np.dot( U / sqeva[np.newaxis, :], U.T )
	X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
	return X, Uzca
	else:
	X = np.dot( Xraw2, Uzca ).reshape( Xraw.shape )
	return X


	# mini batch indicies for stochastic gradient ascent
	def makebatchindex( N, batchsize ):

	idx = np.random.permutation( N )
	nbatch = int( np.ceil( float( N ) / batchsize ) )
	idxB = np.zeros( ( nbatch, N ), dtype = bool )
	for ib in range( nbatch - 1 ):
	idxB[ib, idx[ibbatchsize:(ib+1)batchsize]] = True
	ib = nbatch - 1
	idxB[ib, idx[ib*batchsize:]] = True

	return idxB


	# making translated and horizontally flipped images
	def translate( Xsrc, dstshape ):

	assert Xsrc.ndim == 4 # Xsrc is assumed to be N x 3 x 32 x 32

	N = Xsrc.shape[0]
	h, w = dstshape
	tmax_x, tmax_y = 32 - w, 32 - h
	tx = np.random.randint( 0, tmax_x, N )
	ty = np.random.randint( 0, tmax_y, N )
	hf = np.random.random_sample( N ) < 0.5

	Xdst = np.empty( ( N, 3, dstshape[0], dstshape[1] ), dtype = Xsrc.dtype )

	for n in range( N ):
	if hf[n]:
	Xdst[n, :, :, :] = Xsrc[n, :, ty[n]:ty[n]+h, tx[n]:tx[n]+w]
	else:
	Xdst[n, :, :, :] = Xsrc[n, :, ty[n]:ty[n]+h, tx[n]+w:tx[n]:-1]

	return Xdst


	# making translated and horizontally flipped images
	def translate2( Xsrc, dstshape ):

	assert Xsrc.ndim == 4 # Xsrc is assumed to be N x 3 x 32 x 32

	N = Xsrc.shape[0]
	h, w = dstshape
	tmax_x, tmax_y = 32 - w, 32 - h
	tx = np.random.randint( 0, tmax_x )
	ty = np.random.randint( 0, tmax_y )
	hf = np.random.random_sample() < 0.5

	if hf:
	return Xsrc[:, :, ty:ty+h, tx:tx+w]
	else:
	return Xsrc[:, :, ty:ty+h, tx+w:tx:-1]



	# clipping the center of the image w/o horizontal flip
	def clipcenter( Xsrc, dstshape ):

	assert Xsrc.ndim == 4 # Xsrc is assumed to be N x 3 x 32 x 32

	h, w = dstshape
	ty, tx = ( 32 - h ) / 2, ( 32 - w ) / 2

	return Xsrc[:, :, ty:ty+h, tx:tx+w]



	if __name__ == "__main__":

	import cv2
	import cifar10

	dirCIFAR10 = './cifar10/cifar-10-batches-py'
	c10 = cifar10.CIFAR10( dirCIFAR10 )
	dataL, labelsL = c10._loadL()
	Xorg = dataL[:500].reshape( ( -1, 3, 32, 32 ) )
	lab = labelsL[:500]

	w = h = 24
	nclass = 10
	nimg = 10
	gap = 4

	width = nimg * ( w + gap ) + gap
	height = nclass * ( h + gap ) + gap
	img = np.zeros( ( height, width, 3 ), dtype = int ) + 128

	for iy in range( nclass ):
	lty = iy * ( h + gap ) + gap
	idx = np.where( lab == iy )[0]
	X = translate( Xorg[idx], ( h, w ) )
	for ix in range( nimg ):
	ltx = ix * ( w + gap ) + gap
	tmp = X[ix]
	# BGR <= RGB
	img[lty:lty+h, ltx:ltx+w, 0] = tmp[2, :, :]
	img[lty:lty+h, ltx:ltx+w, 1] = tmp[1, :, :]
	img[lty:lty+h, ltx:ltx+w, 2] = tmp[0, :, :]

	cv2.imwrite( 'hoge.png', img )