Skip to content

Instantly share code, notes, and snippets.

@westonpace
Created June 8, 2023 18:56
Show Gist options
  • Save westonpace/361b202f10260f6be5729760eba18398 to your computer and use it in GitHub Desktop.
Save westonpace/361b202f10260f6be5729760eba18398 to your computer and use it in GitHub Desktop.
Measuring I/O usage of script
import pyarrow.parquet as pq
pq.read_table("/home/pace/dev/data/lineitem_10.parquet", columns=["l_partkey"])
(arrow-release-12) pace@pace-desktop:~/dev/experiments/parquet-partial-read$ strace --quiet=attach,exit -z -f -P /home/pace/dev/data/lineitem_10.parquet python whole_file_parquet.py
[pid 182506] stat("/home/pace/dev/data/lineitem_10.parquet", {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182506] openat(AT_FDCWD, "/home/pace/dev/data/lineitem_10.parquet", O_RDONLY) = 7
[pid 182506] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182506] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182524] pread64(7, "\1\377\241=\0211\22\341,\r%\16\241\10Q\243E\30A\212&\300\tI\310\16\341W!\210\16\206"..., 65536, 2006496747) = 65536
[pid 182506] close(7) = 0
[pid 182506] openat(AT_FDCWD, "/home/pace/dev/data/lineitem_10.parquet", O_RDONLY) = 7
[pid 182506] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182506] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182506] fadvise64(7, 4, 31980683, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 31980792, 80584237, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 112565136, 42934633, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 155499868, 19818795, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 175318765, 105865224, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 281184118, 27899157, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 309083361, 30046178, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 339129641, 30045944, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 369175689, 30046294, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 399222088, 12578507, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 411800685, 257055150, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 668855964, 31996331, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 700852406, 80583996, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 781436512, 42934649, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 824371262, 19823762, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 844195126, 105864556, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 950059811, 27917327, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 977977224, 30046190, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1008023516, 30045950, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1038069570, 30046302, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1068115977, 12578489, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1080694556, 257027785, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1337722478, 32000712, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1369723301, 80582589, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1450306000, 42934648, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1493240749, 19789058, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1513029909, 105863090, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1618893128, 27895361, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1646788575, 30046191, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1676834868, 30045951, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1706880923, 30046303, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1736927331, 12578500, POSIX_FADV_WILLNEED) = 0
[pid 182506] fadvise64(7, 1749505921, 257049221, POSIX_FADV_WILLNEED) = 0
[pid 182530] pread64(7, "\25\4\25\270\250\200\1\25\244\251\200\1L\25\216\212 \25\0\22\0\0\234\224@\364\377\377\26\256\27\0"..., 80584237, 31980792) = 80584237
[pid 182528] pread64(7, "\25\4\258\25<L\25\16\25\0\22\0\0\34l\1\0\0\0\2\0\0\0\3\0\0\0\4\0\0\0"..., 19818795, 155499868) = 19818795
[pid 182527] pread64(7, "\25\4\25\232\1\25hL\25\26\25\0\22\0\0M\0\0\5\1\0\4\5\6\4\0\t\t\7\0\n\t"..., 27899157, 281184118) = 27899157
[pid 182532] pread64(7, "\25\4\25\220\232\1\25\246\232\1L\25\304&\25\0\22\0\0\210M\364\304\1B%\0\0R%\0\0"..., 30045944, 339129641) = 30045944
[pid 182524] pread64(7, "\25\4\25\360\216\200\1\25\304\217\200\1L\25\334\203 \25\0\22\0\0\270\207@\364\377\377\1\0\0\0"..., 31980683, 4) = 31980683
[pid 182531] pread64(7, "\25\4\25\360\235\1\25\372\235\1L\25\274'\25\0\22\0\0\370N\364w'`%\0\0~%\0\0"..., 30046178, 309083361) = 30046178
[pid 182528] pread64(7, "\25\4\25\200\1\25\206\1L\25\10\25\0\22\0\0@\360?\21\0\0\0DELIVER I"..., 12578507, 399222088) = 12578507
[pid 182530] pread64(7, "\25\4\25\330\237\1\25\342\237\1L\25\366'\25\0\22\0\0\354O\364\353'i%\0\0\206%\0\0"..., 30046294, 369175689) = 30046294
[pid 182529] pread64(7, "\25\4\25\200\3520\25\264\3520L\25\300\232\f\25\0\22\0\0\200\265\30\364\377\377n,\1\0\204\35"..., 42934633, 112565136) = 42934633
[pid 182524] pread64(7, "\25\4\25\350\253\200\1\25\324\254\200\1L\25\372\212 \25\0\22\0\0\364\225@\364\377\377\376\207\23\0"..., 80583996, 700852406) = 80583996
[pid 182528] pread64(7, "\25\4\258\25<L\25\16\25\0\22\0\0\34l\2\0\0\0\3\0\0\0\4\0\0\0\5\0\0\0"..., 19823762, 824371262) = 19823762
[pid 182532] pread64(7, "\25\4\25\320\216\200\1\25\274\217\200\1L\25\324\203 \25\0\22\0\0\250\207@\364\377\377\347/1\1"..., 31996331, 668855964) = 31996331
[pid 182529] pread64(7, "\25\4\25\232\1\25hL\25\26\25\0\22\0\0M\0\0\5\1\0\5\5\6\4\0\2\t\7\0\3\t"..., 27917327, 950059811) = 27917327
[pid 182531] pread64(7, "\25\4\25\200\3520\25\260\3520L\25\300\232\f\25\0\22\0\0\200\265\30\364\377\377\223\23\0\0\351["..., 42934649, 781436512) = 42934649
[pid 182524] pread64(7, "\25\4\25\360\235\1\25\372\235\1L\25\274'\25\0\22\0\0\370N\364w'\r$\0\0\340#\0\0"..., 30046190, 977977224) = 30046190
[pid 182529] pread64(7, "\25\4\25\200\1\25\206\1L\25\10\25\0\22\0\0@\360?\21\0\0\0DELIVER I"..., 12578489, 1068115977) = 12578489
[pid 182528] pread64(7, "\25\4\25\220\232\1\25\232\232\1L\25\304&\25\0\22\0\0\210M\364\207&\4$\0\0\30$\0\0"..., 30045950, 1008023516) = 30045950
[pid 182532] pread64(7, "\25\4\25\320\237\1\25\332\237\1L\25\364'\25\0\22\0\0\350O\364\347'\24$\0\0\361#\0\0"..., 30046302, 1038069570) = 30046302
[pid 182529] pread64(7, "\25\4\25\300\203\200\1\25\252\204\200\1L\25\360\200 \25\0\22\0\0\340\201@\364\377\377<G\33\0"..., 80582589, 1369723301) = 80582589
[pid 182526] pread64(7, "\25\4\25\302\232\200\1\25\272\350`L\25\256\250\22\25\0\22\0\0\241\215@\30\0\0\0\0002yv"..., 105865224, 175318765) = 105865224
[pid 182524] pread64(7, "\25\4\25\230\204\200\1\25\204\205\200\1L\25\206\201 \25\0\22\0\0\214\202@\364\377\377\203Cb\2"..., 32000712, 1337722478) = 32000712
[pid 182532] pread64(7, "\25\4\258\25<L\25\16\25\0\22\0\0\34l\2\0\0\0\3\0\0\0\4\0\0\0\5\0\0\0"..., 19789058, 1493240749) = 19789058
[pid 182528] pread64(7, "\25\4\25\200\3520\25\260\3520L\25\300\232\f\25\0\22\0\0\200\265\30\364\377\377(\365\0\0\210\20"..., 42934648, 1450306000) = 42934648
[pid 182526] pread64(7, "\25\4\25\232\1\25hL\25\26\25\0\22\0\0M\0\0\5\1\0\2\5\6\4\0\4\t\7\r\1\0"..., 27895361, 1618893128) = 27895361
[pid 182524] pread64(7, "\25\4\25\360\235\1\25\372\235\1L\25\274'\25\0\22\0\0\370N\364w'c'\0\0u'\0\0"..., 30046191, 1646788575) = 30046191
[pid 182532] pread64(7, "\25\4\25\220\232\1\25\232\232\1L\25\304&\25\0\22\0\0\210M\364\207&\202'\0\0X'\0\0"..., 30045951, 1676834868) = 30045951
[pid 182530] pread64(7, "\25\4\25\250\274\200\1\25\362\201aL\25\230\255\22\25\0\22\0\0\224\236@\30\0\0\0\0\25 g"..., 105864556, 844195126) = 105864556
[pid 182526] pread64(7, "\25\4\25\200\1\25\206\1L\25\10\25\0\22\0\0@\360?\21\0\0\0DELIVER I"..., 12578500, 1736927331) = 12578500
[pid 182528] pread64(7, "\25\4\25\320\237\1\25\332\237\1L\25\364'\25\0\22\0\0\350O\364\347'f'\0\0\203'\0\0"..., 30046303, 1706880923) = 30046303
[pid 182529] pread64(7, "\25\4\25\346\201\200\1\25\312\324`L\25\352\244\22\25\0\22\0\0\363\200@\30\0\0\0\0e`R"..., 105863090, 1513029909) = 105863090
[pid 182527] pread64(7, "\25\4\25\236\315\200\1\25\210\2116L\25\352\226\4\25\0\22\0\0\317\246@\360O\27\0\0\0eg"..., 257055150, 411800685) = 257055150
[pid 182531] pread64(7, "\25\4\25\252\315\201\1\25\312\3166L\25\270\233\4\25\0\22\0\0\325\346@\300\27\0\0\0ull"..., 257027785, 1080694556) = 257027785
[pid 182524] pread64(7, "\25\4\25\342\333\202\1\25\342\3756L\25\310\237\4\25\0\22\0\0\361\255A\300\f\0\0\0tly"..., 257049221, 1749505921) = 257049221
[pid 182543] close(7) = 0
(arrow-release-12) pace@pace-desktop:~/dev/experiments/parquet-partial-read$ strace --quiet=attach,exit -z -f -P /home/pace/dev/data/lineitem_10.parquet python one_column_parquet.py
[pid 182444] stat("/home/pace/dev/data/lineitem_10.parquet", {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182444] openat(AT_FDCWD, "/home/pace/dev/data/lineitem_10.parquet", O_RDONLY) = 7
[pid 182444] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182444] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182463] pread64(7, "\1\377\241=\0211\22\341,\r%\16\241\10Q\243E\30A\212&\300\tI\310\16\341W!\210\16\206"..., 65536, 2006496747) = 65536
[pid 182444] close(7) = 0
[pid 182444] openat(AT_FDCWD, "/home/pace/dev/data/lineitem_10.parquet", O_RDONLY) = 7
[pid 182444] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182444] fstat(7, {st_mode=S_IFREG|0664, st_size=2006562283, ...}) = 0
[pid 182444] fadvise64(7, 31980792, 80584237, POSIX_FADV_WILLNEED) = 0
[pid 182444] fadvise64(7, 700852406, 80583996, POSIX_FADV_WILLNEED) = 0
[pid 182444] fadvise64(7, 1369723301, 80582589, POSIX_FADV_WILLNEED) = 0
[pid 182463] pread64(7, "\25\4\25\270\250\200\1\25\244\251\200\1L\25\216\212 \25\0\22\0\0\234\224@\364\377\377\26\256\27\0"..., 80584237, 31980792) = 80584237
[pid 182465] pread64(7, "\25\4\25\350\253\200\1\25\324\254\200\1L\25\372\212 \25\0\22\0\0\364\225@\364\377\377\376\207\23\0"..., 80583996, 700852406) = 80583996
[pid 182466] pread64(7, "\25\4\25\300\203\200\1\25\252\204\200\1L\25\360\200 \25\0\22\0\0\340\201@\364\377\377<G\33\0"..., 80582589, 1369723301) = 80582589
[pid 182470] close(7) = 0
import pyarrow.parquet as pq
pq.read_table("/home/pace/dev/data/lineitem_10.parquet")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment