Created
June 14, 2016 10:16
-
-
Save mcchae/e5b9d1440fc84f26f9cb16aed2b664c9 to your computer and use it in GitHub Desktop.
python memory limit and persistent dict and list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#coding=utf8 | |
########################################################################################## | |
import os | |
import sys | |
import psutil | |
import resource | |
import getopt | |
# from random import randint | |
from datetime import datetime | |
from persistent import pDict, pList | |
########################################################################################## | |
def set_memory_limit(limit=1024): | |
limit *= 1024*1024 # MegaByte | |
rsrc = resource.RLIMIT_AS | |
# soft, hard = resource.getrlimit(rsrc) # defualt is -1,-1 (no limit) | |
resource.setrlimit(rsrc, (limit, limit)) #limit to one kilobyte | |
soft, hard = resource.getrlimit(rsrc) | |
return soft == limit and hard == limit | |
########################################################################################## | |
def getReadableSize(lv): | |
""" | |
'1.23 GB' 처럼 TB(TerraByte), GB(GigaByte), MB(MegaByte), KB(KilloByte) 로 표시 | |
:param lv: int 또는 long 형식의 값 | |
:return: 사람이 읽기 쉬운 형태의 바이트 문자열 값 | |
""" | |
if not isinstance(lv, (int, long)): | |
return '0' | |
if lv >= 1024 * 1024 * 1024 * 1024: | |
s = "%4.2f TB" % (float(lv) / (1024 * 1024 * 1024 * 1024)) | |
elif lv >= 1024 * 1024 * 1024: | |
s = "%4.2f GB" % (float(lv) / (1024 * 1024 * 1024)) | |
elif lv >= 1024 * 1024: | |
s = "%4.2f MB" % (float(lv) / (1024 * 1024)) | |
elif lv >= 1024: | |
s = "%4.2f KB" % (float(lv) / 1024) | |
else: | |
s = "%d B" % lv | |
return s | |
########################################################################################## | |
def print_heap(msg): | |
# return msg | |
rl = [] | |
if msg: | |
# print msg, | |
rl.append(msg) | |
pid = os.getpid() | |
p = psutil.Process(pid) | |
# print(pid) | |
for map in p.memory_maps(grouped=False): | |
if '[heap]' in map.path: | |
rl.append(getReadableSize(map.pss)) | |
break | |
return ' '.join(rl) | |
########################################################################################## | |
def big_dict_test(persistent=False, limit=1024): | |
sts = datetime.now() | |
bd = dict() if not persistent else pDict() | |
loop_limit = 200000 | |
print(print_heap('before loop [%s]'%loop_limit)) | |
for i in xrange(loop_limit): | |
try: | |
# k = randint(1,1000000) | |
k = i | |
bd[k] = '%s%s'%(k, '*'*1024) | |
if i and i % 10000 == 0: | |
print(print_heap('Dict %s inserted...'%i)) | |
except MemoryError: | |
print("[%s] error" % i) | |
return False | |
print(print_heap('after loop [%s]' % loop_limit)) | |
print "bd[%s]=%s" % (loop_limit/2, bd[loop_limit/2]) | |
for k in bd.keys(): | |
del bd[k] | |
del bd | |
print(print_heap('after delete [%s]' % loop_limit)) | |
ets = datetime.now() | |
print("big_list_test takes %s"%(ets-sts)) | |
return True | |
########################################################################################## | |
def big_list_test(persistent=False, limit=1024): | |
sts = datetime.now() | |
bl = list() if not persistent else pList() | |
loop_limit = 200000 | |
print(print_heap('before loop [%s]'%loop_limit)) | |
for i in xrange(loop_limit): | |
try: | |
bl.append('%s%s'%(i, '*'*1024)) | |
if i and i % 10000 == 0: | |
print(print_heap('List %s inserted...'%i)) | |
except MemoryError: | |
print("[%s] MemoryError" % i) | |
return False | |
print(print_heap('after loop [%s]' % loop_limit)) | |
print "bl[%s]=%s" % (loop_limit/2, bl[loop_limit/2]) | |
del bl | |
print(print_heap('after delete [%s]' % loop_limit)) | |
ets = datetime.now() | |
print("big_list_test takes %s"%(ets-sts)) | |
return True | |
########################################################################################## | |
def usage(msg=None): | |
if msg: | |
sys.stderr.write('%s\n'%msg) | |
sys.stderr.write('''usage:%s [options] | |
set memory limit and raise malloc error | |
[options] are: | |
-h : show this message | |
-p, --persistent : use persistent dict and list | |
-l, --limit : memory limit in MegaBytes (mendotory) | |
''' % sys.argv[0]) | |
sys.exit(1) | |
########################################################################################## | |
if __name__ == '__main__': | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], "hpl:", | |
["help", "persistent", 'limit']) | |
except getopt.GetoptError, e: | |
usage(str(e)) | |
Options = { 'persistent':False } | |
for o, a in opts: | |
if o in ("-h", "--help"): | |
usage() | |
elif o in ("-p", "--persistent"): | |
Options['persistent'] = True | |
elif o in ("-l", "--limit"): | |
Options['limit'] = int(a) | |
if 'limit' not in Options: | |
usage('-l, --limit is mandotory') | |
limit = Options['limit'] | |
print("set_memory_limit(%d)=%d" % (limit, set_memory_limit(limit))) | |
if not big_list_test(**Options): | |
sys.exit(2) | |
if not big_dict_test(**Options): | |
sys.exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#coding=utf8 | |
########################################################################################## | |
import os | |
import shelve | |
from random import randint | |
########################################################################################## | |
class pDict(object): | |
#===================================================================================== | |
FOLER='/ferret/tmp' | |
INT_PREFIX='__int__' | |
#===================================================================================== | |
def _open(self): | |
if not os.path.isdir(self.FOLER): | |
self.FOLER = '/tmp' | |
if not os.path.isdir(self.FOLER): | |
raise IOError('Cannot wirte at folder "%s"' % self.FOLER) | |
while (True): | |
self.filename = '%s/.%08d.__dict__' % (self.FOLER, randint(1,99999999)) | |
if not os.path.exists(self.filename): | |
break | |
self.d = shelve.open(self.filename) | |
#===================================================================================== | |
def _close(self, is_delete=True): | |
if self.d is not None: | |
self.d.close() | |
self.d = None | |
if is_delete and os.path.exists(self.filename): | |
os.remove(self.filename) | |
#===================================================================================== | |
def __init__(self, d={}): | |
self.filename = None | |
self.d = None | |
self._open() | |
if d and not isinstance(d, (dict, pDict)): | |
raise ReferenceError('pDict construct need only dict or pDict type but <%s>' | |
% str(type(d))) | |
for k, v in d.items(): | |
self.__setitem__(k, v) | |
#===================================================================================== | |
def __del__(self): | |
self._close() | |
#===================================================================================== | |
def __repr__(self): | |
rl = list() | |
rl.append('{') | |
for i,k in enumerate(sorted(self.d.keys())): | |
if i > 0: rl.append(',') | |
rk = self.__r_keytransform__(k) | |
if isinstance(rk, basestring): | |
rk = '"%s"' % rk | |
rv = self.d[k] | |
if isinstance(rv, basestring): | |
rv = '"%s"' % rv | |
rl.append('%s:%s'%(rk,rv)) | |
rl.append('}') | |
return ''.join(rl) | |
#===================================================================================== | |
def __getitem__(self, key): | |
if isinstance(key, slice): | |
return [ self.d[self.__keytransform__(k)] | |
for k in range(key.start, key.stop, key.step) ] | |
return self.d[self.__keytransform__(key)] | |
#===================================================================================== | |
def __setitem__(self, key, value): | |
self.d[self.__keytransform__(key)] = value | |
#===================================================================================== | |
def __delitem__(self, key): | |
r = self.d[self.__keytransform__(key)] | |
del self.d[self.__keytransform__(key)] | |
return r | |
#===================================================================================== | |
def __iter__(self): | |
return iter(self.d) | |
#===================================================================================== | |
def __len__(self): | |
return len(self.d) | |
#===================================================================================== | |
def __keytransform__(self, key): | |
if isinstance(key, basestring): | |
return key | |
if isinstance(key, (int, long)): | |
return '%s%10d' % (self.INT_PREFIX, key) | |
return str(key) | |
#===================================================================================== | |
def __r_keytransform__(self, key): | |
if key.startswith(self.INT_PREFIX): | |
return int(key[len(self.INT_PREFIX):].strip()) | |
return key | |
#===================================================================================== | |
def __contains__(self, key): | |
# return self.__keytransform__(key) in self.d | |
return self.has_key(key) | |
#===================================================================================== | |
def has_key(self, key): | |
return self.d.has_key(self.__keytransform__(key)) | |
#===================================================================================== | |
def keys(self): | |
# for k in self.d.keys(): | |
# yield self.__r_keytransform__(k) | |
return [ self.__r_keytransform__(k) for k in sorted(self.d.keys()) ] | |
#===================================================================================== | |
def values(self): | |
vl = list() | |
for k in sorted(self.d.keys()): | |
# yield self.d[k] | |
vl.append(self.d[k]) | |
return vl | |
#===================================================================================== | |
def items(self): | |
for k in sorted(self.d.keys()): | |
yield self.__r_keytransform__(k), self.d[k] | |
########################################################################################## | |
class pList(pDict): | |
#===================================================================================== | |
def __init__(self, l=[]): | |
pDict.__init__(self) | |
self.len = 0 | |
if l and not isinstance(l, (list, pList)): | |
raise ReferenceError('pList construct need only list or pList type but <%s>' | |
% str(type(l))) | |
for v in l: | |
self.append(v) | |
#===================================================================================== | |
def __repr__(self): | |
rl = list() | |
rl.append('[') | |
for i in xrange(self.len): | |
if i > 0: rl.append(',') | |
rv = self.d[self.__keytransform__(i)] | |
if isinstance(rv, basestring): | |
rv = '"%s"' % rv | |
rl.append('%s'%rv) | |
rl.append(']') | |
return ''.join(rl) | |
#===================================================================================== | |
def __setitem__(self, ndx, value): | |
if ndx < 0 or ndx > self.len: | |
raise IndexError('Invalid index <%s>' % ndx) | |
self.d[self.__keytransform__(ndx)] = value | |
#===================================================================================== | |
def __delitem__(self, ndx): | |
if ndx < 0 or ndx >= self.len: | |
raise IndexError('Invalid index <%s>' % ndx) | |
r = self.d[self.__keytransform__(ndx)] | |
del self.d[self.__keytransform__(ndx)] | |
for i in range(ndx, self.len-1): | |
self.d[self.__keytransform__(i)] = self.d[self.__keytransform__(i+1)] | |
self.len -= 1 | |
if self.len > 0: | |
del self.d[self.__keytransform__(self.len)] | |
return r | |
#===================================================================================== | |
def __contains__(self, v): | |
return v in self.values() | |
#===================================================================================== | |
def __iter__(self): | |
return iter(self.values()) | |
#===================================================================================== | |
def append(self, v): | |
self.__setitem__(self.len, v) | |
self.len += 1 | |
#===================================================================================== | |
def extend(self, l): | |
for item in l: | |
self.append(item) | |
self.len += len(l) | |
########################################################################################## | |
def test_pDict(): | |
d = { | |
'abc':111, | |
123:"def" | |
} | |
d = pDict(d) | |
print('d=%s' % d) | |
print('123 in d? %s' % (123 in d,)) | |
print('d.has_key(23)? %s' % d.has_key(23)) | |
for i in xrange(10): | |
d[i] = '%s_%s' % (i, '*'*1) | |
print('d=%s' % d) | |
for k, v in d.items(): | |
print k, v | |
if isinstance(k,int) and k % 3 == 0: | |
del d[k] | |
print('d=%s' % d) | |
print('len(d)=%s' % len(d)) | |
for k, v in d.items(): | |
print k, v | |
print("d.keys()=%s"%d.keys()) | |
print("d.values()=%s" % d.values()) | |
########################################################################################## | |
def test_pList(): | |
l = [ | |
'abc', | |
123 | |
] | |
l = pList(l) | |
print('l=%s' % l) | |
print('len(l)=%s' % len(l)) | |
print('"abc" in l? %s' % ("abc" in l,)) | |
print('23 in l? %s' % (23 in l,)) | |
for i in xrange(10): | |
l.append('%s_%s' % (i, '*'*1)) | |
print('l=%s' % l) | |
# for v in l: print v | |
for i in range(len(l)-1,-1,-1): | |
v = l[i] | |
print "[%s] %s"%(i, v) | |
if i % 3 == 0: | |
del l[i] | |
print('l=%s' % l) | |
print('l=%s' % l) | |
########################################################################################## | |
def test_shelve(): | |
filename='/tmp/foo.dic' | |
d = shelve.open(filename) # open -- file may get suffix added by low-level | |
key = 'foo_key' | |
data = 'bar_data'*3 | |
d[key] = data # store data at key (overwrites old data if | |
print d | |
data = d[key] # retrieve a COPY of data at key (raise KeyError if no | |
print data | |
flag = key in d # true if the key exists | |
print flag | |
del d[key] # delete data stored at key (raises KeyError | |
flag = d.has_key(key) # true if the key exists | |
print flag | |
klist = d.keys() # a list of all existing keys (slow!) | |
print klist | |
# as d was opened WITHOUT writeback=True, beware: | |
d['xx'] = range(4) # this works as expected, but... | |
d['xx'].append(5) # *this doesn't!* -- d['xx'] is STILL range(4)! | |
# having opened d without writeback=True, you need to code carefully: | |
temp = d['xx'] # extracts the copy | |
temp.append(5) # mutates the copy | |
d['xx'] = temp # stores the copy right back, to persist it | |
# or, d=shelve.open(filename,writeback=True) would let you just code | |
# d['xx'].append(5) and have it work as expected, BUT it would also | |
# consume more memory and make the d.close() operation slower. | |
d.close() # close it | |
########################################################################################## | |
if __name__ == '__main__': | |
# test_pDict() | |
test_pList() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment