Skip to content

Instantly share code, notes, and snippets.

@Coderx7
Created February 14, 2017 11:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Coderx7/26d31414b17db9d49b4a331d143e8ec7 to your computer and use it in GitHub Desktop.
Save Coderx7/26d31414b17db9d49b4a331d143e8ec7 to your computer and use it in GitHub Desktop.
This is the script which I wrote to convert the cifar10/100 (gcn,whitened) datasets from pylearn2 to lmdb.
#in the name of GOD
#pylearn2 cifar10 convertor to lmdb
#by:Seyyed Hossein Hasanpour
#Coderx7@gmail.com
#2/14/2017
import numpy as np
import cPickle
import lmdb
import caffe
from caffe.proto import caffe_pb2
def unpickle(file):
import cPickle
fo = open(file,'rb')
dict = cPickle.load(fo)
fo.close()
return dict
def writeToLMDB(data_set, name):
data = data_set.get_data()[0]
labels = data_set.get_data()[1]
size = data.shape[0]
lmdb_file = name + '_gcn_whitened'
batch_size = size
db = lmdb.open(lmdb_file, map_size=int(data.nbytes))
batch = db.begin(write=True)
datum = caffe_pb2.Datum()
print 'size = ',size
print 'label[0] shape = ',labels[0].shape
print 'data[0] shape = ',data[0].shape
for i in range(size):
if i % 1000 == 0:
print i
# save in datum
datum = caffe.io.array_to_datum(data[i].reshape(3,32,32), int(labels[i]))
keystr = '{:0>5d}'.format(i)
batch.put( keystr, datum.SerializeToString() )
# write batch
if(i + 1) % batch_size == 0:
batch.commit()
batch=db.begin(write=True)
print (i + 1)
# write last batch
if (i+1) % batch_size != 0:
batch.commit()
print 'last batch'
print (i + 1)
print('Successfullly created ', lmdb_file)
testset = unpickle('./test.pkl')
trainingset = unpickle('./train.pkl')
writeToLMDB(testset,'cifar10_test_lmdb')
writeToLMDB(trainingset,'cifar10_train_lmdb')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment