regispires/mem.py

## mem.py
import sys
import numpy as np
import pandas as pd

def sizeof_fmt(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def print_top_mem_vars(variables):
    """Usage:
    mem.print_top_mem_vars(locals())
    mem.print_top_mem_vars(globals())
    """
    for name, size in sorted(((name, sys.getsizeof(value)) for name, value in variables.items()), key= lambda x: -x[1])[:10]:
        print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))

def reduce_mem_usage_automatic(df):
    """Reduces the memory usage of the given dataframe.
    Parameter
    ---------
    df : dataframe
        The input data to which the operation of memory reduction will be performed.
    """
    start_mem = df.memory_usage().sum() / 1024**2
    if 'dask' in str(type(df)):
        start_mem, = dd.compute(start_mem)
    print("Memory usage of dataframe is {:.2f} MB".format(start_mem))

    cols = df.columns

    for col in cols:
        col_type = df[col].dtype
        if col_type == int:
            c_min = df[col].min()
            c_max = df[col].max()
            if 'dask' in str(type(df)):
                c_min, c_max = dd.compute(c_min, c_max)

            if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                df[col] = df[col].astype(np.int8)
            elif c_min > np.iinfo(np.uint8).min and c_max < np.iinfo(np.uint8).max:
                df[col] = df[col].astype(np.uint8)
            elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                df[col] = df[col].astype(np.int16)
            elif c_min > np.iinfo(np.uint16).min and c_max < np.iinfo(np.uint16).max:
                df[col] = df[col].astype(np.uint16)
            elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                df[col] = df[col].astype(np.int32)
            elif c_min > np.iinfo(np.uint32).min and c_max < np.iinfo(np.uint32).max:
                df[col] = df[col].astype(np.uint32)
            elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                df[col] = df[col].astype(np.int64)
            elif c_min > np.iinfo(np.uint64).min and c_max < np.iinfo(np.uint64).max:
                df[col] = df[col].astype(np.uint64)
        elif col_type.char in np.typecodes["Float"] :
            typecodes = np.typecodes["Float"]
            float_32_char = np.dtype(np.float32).char
            float_32_ind = typecodes.index(float_32_char)
            typecodes = typecodes[float_32_ind:]

            # from smallest to largest
            for typecode in typecodes:
                dtype = np.dtype(typecode)
                if dtype.itemsize < df[col].dtype.itemsize:
                    df[col] = pd.core.dtypes.cast.maybe_downcast_numeric(df[col], dtype)

                    # successful conversion
                    if df[col].dtype == dtype:
                        break

    end_mem = df.memory_usage().sum() / 1024**2
    if 'dask' in str(type(df)):
        end_mem, = dd.compute(end_mem)
    print("Memory usage after optimization is: {:.2f} MB".format(end_mem))
    print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem))
	import sys
	import numpy as np
	import pandas as pd

	def sizeof_fmt(num, suffix='B'):
	for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
	if abs(num) < 1024.0:
	return "%3.1f %s%s" % (num, unit, suffix)
	num /= 1024.0
	return "%.1f %s%s" % (num, 'Yi', suffix)

	def print_top_mem_vars(variables):
	"""Usage:
	mem.print_top_mem_vars(locals())
	mem.print_top_mem_vars(globals())
	"""
	for name, size in sorted(((name, sys.getsizeof(value)) for name, value in variables.items()), key= lambda x: -x[1])[:10]:
	print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))

	def reduce_mem_usage_automatic(df):
	"""Reduces the memory usage of the given dataframe.
	Parameter
	---------
	df : dataframe
	The input data to which the operation of memory reduction will be performed.
	"""
	start_mem = df.memory_usage().sum() / 1024**2
	if 'dask' in str(type(df)):
	start_mem, = dd.compute(start_mem)
	print("Memory usage of dataframe is {:.2f} MB".format(start_mem))

	cols = df.columns

	for col in cols:
	col_type = df[col].dtype
	if col_type == int:
	c_min = df[col].min()
	c_max = df[col].max()
	if 'dask' in str(type(df)):
	c_min, c_max = dd.compute(c_min, c_max)

	if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
	df[col] = df[col].astype(np.int8)
	elif c_min > np.iinfo(np.uint8).min and c_max < np.iinfo(np.uint8).max:
	df[col] = df[col].astype(np.uint8)
	elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
	df[col] = df[col].astype(np.int16)
	elif c_min > np.iinfo(np.uint16).min and c_max < np.iinfo(np.uint16).max:
	df[col] = df[col].astype(np.uint16)
	elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
	df[col] = df[col].astype(np.int32)
	elif c_min > np.iinfo(np.uint32).min and c_max < np.iinfo(np.uint32).max:
	df[col] = df[col].astype(np.uint32)
	elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
	df[col] = df[col].astype(np.int64)
	elif c_min > np.iinfo(np.uint64).min and c_max < np.iinfo(np.uint64).max:
	df[col] = df[col].astype(np.uint64)
	elif col_type.char in np.typecodes["Float"] :
	typecodes = np.typecodes["Float"]
	float_32_char = np.dtype(np.float32).char
	float_32_ind = typecodes.index(float_32_char)
	typecodes = typecodes[float_32_ind:]

	# from smallest to largest
	for typecode in typecodes:
	dtype = np.dtype(typecode)
	if dtype.itemsize < df[col].dtype.itemsize:
	df[col] = pd.core.dtypes.cast.maybe_downcast_numeric(df[col], dtype)

	# successful conversion
	if df[col].dtype == dtype:
	break

	end_mem = df.memory_usage().sum() / 1024**2
	if 'dask' in str(type(df)):
	end_mem, = dd.compute(end_mem)
	print("Memory usage after optimization is: {:.2f} MB".format(end_mem))
	print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem))