AlenkaF/profiler_ouput.txt

## profiler_ouput.txt
Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
     7    147.8 MiB    147.8 MiB           1   @profile
     8                                         def my_func():
     9                                             # Load Vaex example
    10    173.0 MiB     25.3 MiB           1       df = vaex.example()
    11                                             # Create a virtual column
    12    173.0 MiB      0.0 MiB           1       df.add_virtual_column("r", "sqrt(x**2 + y**2 + z**2)")
    13
    14                                             # Create a __dataframe__ instance
    15    173.2 MiB      0.1 MiB           1       df_protocol = df.__dataframe__()
    16
    17                                             # Inspecting the metadata and selecting columns does not yet
    18                                             # materialize all the buffers
    19    173.2 MiB      0.0 MiB           1       df_protocol.num_columns()
    20    173.2 MiB      0.0 MiB           1       df_protocol.column_names()
    21
    22                                             # Chunk the data
    23    173.2 MiB      0.0 MiB           1       df_protocol.num_chunks()
    24    173.2 MiB      0.0 MiB           1       df_protocol.get_chunks(33)
    25    175.8 MiB      2.6 MiB           1       next(df_protocol.get_chunks(33)).num_rows()
    26
    27                                             # Select a subset of columns
    28    175.8 MiB      0.0 MiB           1       df_protocol.select_columns_by_name(['x', 'y']).num_rows()
    29
    30                                             # Read in the virtual column
    31    175.8 MiB      0.0 MiB           1       column = df_protocol.__dataframe__().get_column_by_name("r")
    32    175.8 MiB      0.0 MiB           1       column.size()
    33
    34                                             # Only when actually asking for the buffers of one chunk of a column,
    35                                             # the data needs to be in memory (to pass a pointer to the buffers)
    36    187.8 MiB     12.1 MiB           1       column.get_buffers()

## python_output.rst

      
    Raw
  

              python_output.rst
            
          
    >>> import numpy as np
>>> import vaex

>>> # Load Vaex example
>>> df = vaex.example()
>>> # Create a virtual column
>>> df.add_virtual_column("r", "sqrt(x**2 + y**2 + z**2)")

>>> # Create a __dataframe__ instance
>>> df_protocol = df.__dataframe__()

>>> # Inspecting the metadata and selecting columns does not yet
>>> # materialize all the buffers
>>> df_protocol.num_columns()
11
>>> df_protocol.column_names()
['id', 'x', 'y', 'z', 'vx', 'vy', 'vz', 'E', 'L', 'Lz', 'FeH', 'r']

>>> # Chunk the data
>>> df_protocol.num_chunks()
1
>>> df_protocol.get_chunks(33)
<generator object _VaexDataFrame.get_chunks at 0x16c1703c0>
>>> next(df_protocol.get_chunks(33)).num_rows()
10000
>>> # Select a subset of columns
>>> df_protocol.select_columns_by_name(['x', 'y']).num_rows()
330000

>>> # Read in the virtual column
>>> column = df_protocol.__dataframe__().get_column_by_name("r")
>>> column.size()
330000

>>> # Only when actually asking for the buffers of one chunk of a column,
>>> # the data needs to be in memory (to pass a pointer to the buffers)
>>> column.get_buffers()
{'data': (VaexBuffer({'bufsize': 1320000, 'ptr': 5236260864, 'device': 'CPU'}), (<_DtypeKind.FLOAT: 2>, 32, '<f4', '=')), 'validity': None, 'offsets': None}

  
## script.py
from memory_profiler import profile

import numpy as np
import vaex


@profile
def my_func():
    # Load Vaex example
    df = vaex.example()
    # Create a virtual column
    df.add_virtual_column("r", "sqrt(x**2 + y**2 + z**2)")

    # Create a __dataframe__ instance
    df_protocol = df.__dataframe__()

    # Inspecting the metadata and selecting columns does not yet
    # materialize all the buffers
    df_protocol.num_columns()
    df_protocol.column_names()

    # Chunk the data
    df_protocol.num_chunks()
    df_protocol.get_chunks(33)
    next(df_protocol.get_chunks(33)).num_rows()

    # Select a subset of columns
    df_protocol.select_columns_by_name(['x', 'y']).num_rows()

    # Read in the virtual column
    column = df_protocol.__dataframe__().get_column_by_name("r")
    column.size()

    # Only when actually asking for the buffers of one chunk of a column,
    # the data needs to be in memory (to pass a pointer to the buffers)
    column.get_buffers()

if __name__ == '__main__':
    my_func()
	Line # Mem usage Increment Occurrences Line Contents
	=============================================================
	7 147.8 MiB 147.8 MiB 1 @profile
	8 def my_func():
	9 # Load Vaex example
	10 173.0 MiB 25.3 MiB 1 df = vaex.example()
	11 # Create a virtual column
	12 173.0 MiB 0.0 MiB 1 df.add_virtual_column("r", "sqrt(x2 + y2 + z**2)")
	13
	14 # Create a __dataframe__ instance
	15 173.2 MiB 0.1 MiB 1 df_protocol = df.__dataframe__()
	16
	17 # Inspecting the metadata and selecting columns does not yet
	18 # materialize all the buffers
	19 173.2 MiB 0.0 MiB 1 df_protocol.num_columns()
	20 173.2 MiB 0.0 MiB 1 df_protocol.column_names()
	21
	22 # Chunk the data
	23 173.2 MiB 0.0 MiB 1 df_protocol.num_chunks()
	24 173.2 MiB 0.0 MiB 1 df_protocol.get_chunks(33)
	25 175.8 MiB 2.6 MiB 1 next(df_protocol.get_chunks(33)).num_rows()
	26
	27 # Select a subset of columns
	28 175.8 MiB 0.0 MiB 1 df_protocol.select_columns_by_name(['x', 'y']).num_rows()
	29
	30 # Read in the virtual column
	31 175.8 MiB 0.0 MiB 1 column = df_protocol.__dataframe__().get_column_by_name("r")
	32 175.8 MiB 0.0 MiB 1 column.size()
	33
	34 # Only when actually asking for the buffers of one chunk of a column,
	35 # the data needs to be in memory (to pass a pointer to the buffers)
	36 187.8 MiB 12.1 MiB 1 column.get_buffers()
	from memory_profiler import profile

	import numpy as np
	import vaex


	@profile
	def my_func():
	# Load Vaex example
	df = vaex.example()
	# Create a virtual column
	df.add_virtual_column("r", "sqrt(x2 + y2 + z**2)")

	# Create a __dataframe__ instance
	df_protocol = df.__dataframe__()

	# Inspecting the metadata and selecting columns does not yet
	# materialize all the buffers
	df_protocol.num_columns()
	df_protocol.column_names()

	# Chunk the data
	df_protocol.num_chunks()
	df_protocol.get_chunks(33)
	next(df_protocol.get_chunks(33)).num_rows()

	# Select a subset of columns
	df_protocol.select_columns_by_name(['x', 'y']).num_rows()

	# Read in the virtual column
	column = df_protocol.__dataframe__().get_column_by_name("r")
	column.size()

	# Only when actually asking for the buffers of one chunk of a column,
	# the data needs to be in memory (to pass a pointer to the buffers)
	column.get_buffers()

	if __name__ == '__main__':
	my_func()