Skip to content

Instantly share code, notes, and snippets.

@heronshoes
Last active April 7, 2022 10:56
Show Gist options
  • Save heronshoes/8c7c53e666d3a569a6e077a76e5c849a to your computer and use it in GitHub Desktop.
Save heronshoes/8c7c53e666d3a569a6e077a76e5c849a to your computer and use it in GitHub Desktop.
CSV_benchmark

Benchmark of CSV parse

This is CSV parse benchmark for Ruby.

Main article is written in ipynb below.

Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "24dc973b-e47b-4e17-9513-bc3067c86a1b",
"metadata": {},
"source": [
"## Benchmark of Ruby CSV readers"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "1688425e-c08e-4460-9999-05793d23e1f2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"ruby 3.1.1p18 (2022-02-18 revision 53f5fc4236) [x86_64-darwin20]\""
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"RUBY_DESCRIPTION"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "6f8c68d1-a46a-40ff-aea2-979cbde56bf7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{:csv=>\"3.2.2\", :Arrow=>\"7.0.0\", :DuckDB=>\"0.3.2.0\"}"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"require 'csv'\n",
"require 'fastest_csv'\n",
"require 'arrow'\n",
"require 'duckdb'\n",
"require 'benchmark/ips'\n",
"\n",
"{ csv: CSV::VERSION,\n",
"# fastestCSV: FastestCSV::VERSION, => 0.0.4, version.rb not 'require'd.\n",
" Arrow: Arrow::VERSION,\n",
" DuckDB: DuckDB::VERSION}"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "6f545a8e-8f35-4720-a1ae-cf31a7731572",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
":benchmark_read_csv"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Benchmark of csv readers\n",
"def benchmark_read_csv(file)\n",
" Benchmark.ips do |b|\n",
" b.time = 10\n",
" b.report(\"CSV.read\") { CSV.read(file) }\n",
" b.report(\"FastestCSV\") { FastestCSV.read(file) }\n",
" b.report(\"Arrow::table\") { Arrow::Table.load(file) }\n",
" b.report(\"DuckDB\") {\n",
" db = DuckDB::Database.open\n",
" con = db.connect\n",
" con.query(\"SELECT * FROM read_csv_auto('#{file}')\")\n",
" }\n",
" b.compare!(order: :baseline)\n",
" end\n",
" nil\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "d055e32c-10d4-4f28-a3b4-cdae9dd4b0eb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
":download"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# CSV file downloader\n",
"# locally save csv file and return its name.\n",
"def download(url)\n",
" require 'open-uri'\n",
" \n",
" file = File.basename(url)\n",
" unless File.exist?(file)\n",
" URI.open(url) { |r| IO.copy_stream(r, file) }\n",
" end\n",
" file\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "2a5c1183-57c2-4dbe-9676-70b960e1205b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"0.2.8\""
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"require 'rover'\n",
"require './prepend_rover'\n",
"Rover::VERSION"
]
},
{
"cell_type": "markdown",
"id": "bfe36b99-7d0e-4413-bcdb-c8d2960c7e61",
"metadata": {},
"source": [
"### diamonds dataset"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "d781708d-0771-4d40-b742-1d03f9c86fbc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warming up --------------------------------------\n",
" CSV.read 1.000 i/100ms\n",
" FastestCSV 1.000 i/100ms\n",
" Arrow::table 5.000 i/100ms\n",
" DuckDB 1.000 i/100ms\n",
"Calculating -------------------------------------\n",
" CSV.read 1.619 (± 0.0%) i/s - 17.000 in 10.566813s\n",
" FastestCSV 7.132 (±14.0%) i/s - 71.000 in 10.134367s\n",
" Arrow::table 75.964 (±18.4%) i/s - 710.000 in 10.010672s\n",
" DuckDB 9.939 (±20.1%) i/s - 96.000 in 10.082964s\n",
"\n",
"Comparison:\n",
" CSV.read: 1.6 i/s\n",
" Arrow::table: 76.0 i/s - 46.92x (± 0.00) faster\n",
" DuckDB: 9.9 i/s - 6.14x (± 0.00) faster\n",
" FastestCSV: 7.1 i/s - 4.40x (± 0.00) faster\n",
"\n"
]
}
],
"source": [
"diamonds = \n",
" download(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv\")\n",
"\n",
"benchmark_read_csv(diamonds)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "3cabca90-94ea-47da-bf28-db0cd899e6c0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rover::DataFrame : 53940 observations of 10 variables.\n",
"vars : 7 numeric, 3 ruby objects\n",
" 1 :carat <float> [0.23, 0.21, 0.23, 0.29, 0.31, ...], 273 levels\n",
" 2 :cut <object> {\"Ideal\"=>21551, \"Premium\"=>13791, \"Good\"=>4906, \"Very Good\"=>12082, \"Fair\"=>1610}, 5 levels\n",
" 3 :color <object> [E, E, E, I, J, ...], 7 levels\n",
" 4 :clarity <object> [SI2, SI1, VS1, VS2, SI2, ...], 8 levels\n",
" 5 :depth <float> [61.5, 59.8, 56.9, 62.4, 63.3, ...], 184 levels\n",
" 6 :table <float> [55.0, 61.0, 65.0, 58.0, 58.0, ...], 127 levels\n",
" 7 :price <int> [326, 326, 327, 334, 335, ...], 11602 levels\n",
" 8 :x <float> [3.95, 3.89, 4.05, 4.2, 4.34, ...], 554 levels\n",
" 9 :y <float> [3.98, 3.84, 4.07, 4.23, 4.35, ...], 552 levels\n",
"10 :z <float> [2.43, 2.31, 2.31, 2.63, 2.75, ...], 375 levels\n",
"\n"
]
},
{
"data": {
"text/html": [
"<table><tr><th>carat</th><th>cut</th><th>color</th><th>clarity</th><th>depth</th><th>table</th><th>price</th><th>x</th><th>y</th><th>z</th></tr><tr><td>0.23</td><td>Ideal</td><td>E</td><td>SI2</td><td>61.5</td><td>55.0</td><td>326</td><td>3.95</td><td>3.98</td><td>2.43</td></tr><tr><td>0.21</td><td>Premium</td><td>E</td><td>SI1</td><td>59.8</td><td>61.0</td><td>326</td><td>3.89</td><td>3.84</td><td>2.31</td></tr><tr><td>0.23</td><td>Good</td><td>E</td><td>VS1</td><td>56.9</td><td>65.0</td><td>327</td><td>4.05</td><td>4.07</td><td>2.31</td></tr><tr><td colspan='10'>&#8942;</td></tr><tr><td>0.7</td><td>Very Good</td><td>D</td><td>SI1</td><td>62.8</td><td>60.0</td><td>2757</td><td>5.66</td><td>5.68</td><td>3.56</td></tr><tr><td>0.86</td><td>Premium</td><td>H</td><td>SI2</td><td>61.0</td><td>58.0</td><td>2757</td><td>6.15</td><td>6.12</td><td>3.74</td></tr><tr><td>0.75</td><td>Ideal</td><td>D</td><td>SI2</td><td>62.2</td><td>55.0</td><td>2757</td><td>5.83</td><td>5.87</td><td>3.64</td></tr></table>"
],
"text/plain": [
"Rover::DataFrame : 53940 observations of 10 variables.\n",
"vars : 7 numeric, 3 ruby objects\n",
" 1 :carat <float> [0.23, 0.21, 0.23, 0.29, 0.31, ...], 273 levels\n",
" 2 :cut <object> {\"Ideal\"=>21551, \"Premium\"=>13791, \"Good\"=>4906, \"Very Good\"=>12082, \"Fair\"=>1610}, 5 levels\n",
" 3 :color <object> [E, E, E, I, J, ...], 7 levels\n",
" 4 :clarity <object> [SI2, SI1, VS1, VS2, SI2, ...], 8 levels\n",
" 5 :depth <float> [61.5, 59.8, 56.9, 62.4, 63.3, ...], 184 levels\n",
" 6 :table <float> [55.0, 61.0, 65.0, 58.0, 58.0, ...], 127 levels\n",
" 7 :price <int> [326, 326, 327, 334, 335, ...], 11602 levels\n",
" 8 :x <float> [3.95, 3.89, 4.05, 4.2, 4.34, ...], 554 levels\n",
" 9 :y <float> [3.98, 3.84, 4.07, 4.23, 4.35, ...], 552 levels\n",
"10 :z <float> [2.43, 2.31, 2.31, 2.63, 2.75, ...], 375 levels\n"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"p Rover.read_csv(diamonds)"
]
},
{
"cell_type": "markdown",
"id": "c0fa85ce-e85b-4787-b5e5-7a36215614d6",
"metadata": {},
"source": [
"### weather dataset"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "809c0203-6aee-428f-b4ad-38c7b717dcf2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warming up --------------------------------------\n",
" CSV.read 1.000 i/100ms\n",
" FastestCSV 1.000 i/100ms\n",
" Arrow::table 9.000 i/100ms\n",
" DuckDB 1.000 i/100ms\n",
"Calculating -------------------------------------\n",
" CSV.read 3.063 (± 0.0%) i/s - 31.000 in 10.153527s\n",
" FastestCSV 16.495 (±30.3%) i/s - 151.000 in 10.076591s\n",
" Arrow::table 95.463 (±12.6%) i/s - 945.000 in 10.073966s\n",
" DuckDB 7.357 (± 0.0%) i/s - 74.000 in 10.062782s\n",
"\n",
"Comparison:\n",
" CSV.read: 3.1 i/s\n",
" Arrow::table: 95.5 i/s - 31.17x (± 0.00) faster\n",
" FastestCSV: 16.5 i/s - 5.39x (± 0.00) faster\n",
" DuckDB: 7.4 i/s - 2.40x (± 0.00) faster\n",
"\n"
]
}
],
"source": [
"weather =\n",
" download(\"https://vincentarelbundock.github.io/Rdatasets/csv/nycflights13/weather.csv\")\n",
"benchmark_read_csv(weather)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "c33b8b3c-e72a-4bca-982f-acd6cfafd140",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rover::DataFrame : 26115 observations of 16 variables.\n",
"vars : 7 numeric, 9 ruby objects\n",
" 1 : <int> [1, 2, 3, 4, 5, ...], 26115 levels\n",
" 2 :origin <object> {\"EWR\"=>8703, \"JFK\"=>8706, \"LGA\"=>8706}, 3 levels\n",
" 3 :year <int> {2013=>26115}, 1 level\n",
" 4 :month <int> [1, 1, 1, 1, 1, ...], 12 levels\n",
" 5 :day <int> [1, 1, 1, 1, 1, ...], 31 levels\n",
" 6 :hour <int> [1, 2, 3, 4, 5, ...], 24 levels\n",
" 7 :temp <object> [39.02, 39.02, 39.02, 39.92, 39.02, ...], 174 levels\n",
" 8 :dewp <object> [26.06, 26.96, 28.04, 28.04, 28.04, ...], 154 levels\n",
" 9 :humid <object> [59.37, 61.63, 64.43, 62.21, 64.43, ...], 2500 levels\n",
"10 :wind_dir <object> [270, 250, 240, 250, 260, ...], 38 levels\n",
"11 :wind_speed <object> [10.35702, 8.05546, 11.5078, 12.65858, 12.65858, ...], 37 levels\n",
"12 :wind_gust <object> [NA, NA, NA, NA, NA, ...], 38 levels\n",
"13 :precip <float> [0.0, 0.0, 0.0, 0.0, 0.0, ...], 59 levels\n",
"14 :pressure <object> [1012, 1012.3, 1012.5, 1012.2, 1011.9, ...], 469 levels\n",
"15 :visib <float> [10.0, 10.0, 10.0, 10.0, 10.0, ...], 20 levels\n",
"16 :time_hour <object> [2013-01-01 01:00:00, 2013-01-01 02:00:00, 2013-01-01 03:00:00, 2013-01-01 04:00:00, 2013-01-01 05:00:00, ...], 8713 levels\n",
"\n"
]
},
{
"data": {
"text/html": [
"<table><tr><th></th><th>origin</th><th>year</th><th>month</th><th>day</th><th>hour</th><th>temp</th><th>&#8230;</th><th>wind_dir</th><th>wind_speed</th><th>wind_gust</th><th>precip</th><th>pressure</th><th>visib</th><th>time_hour</th></tr><tr><td>1</td><td>EWR</td><td>2013</td><td>1</td><td>1</td><td>1</td><td>39.02</td><td rowspan='3'>&#8230;</td><td>270</td><td>10.35702</td><td>NA</td><td>0.0</td><td>1012.0</td><td>10.0</td><td>2013-01-01 01:00:00</td></tr><tr><td>2</td><td>EWR</td><td>2013</td><td>1</td><td>1</td><td>2</td><td>39.02</td><td>250</td><td>8.05546</td><td>NA</td><td>0.0</td><td>1012.3</td><td>10.0</td><td>2013-01-01 02:00:00</td></tr><tr><td>3</td><td>EWR</td><td>2013</td><td>1</td><td>1</td><td>3</td><td>39.02</td><td>240</td><td>11.5078</td><td>NA</td><td>0.0</td><td>1012.5</td><td>10.0</td><td>2013-01-01 03:00:00</td></tr><tr><td colspan='7'>&#8942;</td><td>&#8945;</td><td colspan='7'>&#8942;</td></tr><tr><td>26113</td><td>LGA</td><td>2013</td><td>12</td><td>30</td><td>16</td><td>32.0</td><td rowspan='3'>&#8230;</td><td>340</td><td>14.96014</td><td>23.0156</td><td>0.0</td><td>1019.5</td><td>10.0</td><td>2013-12-30 16:00:00</td></tr><tr><td>26114</td><td>LGA</td><td>2013</td><td>12</td><td>30</td><td>17</td><td>30.92</td><td>320</td><td>17.2617</td><td>NA</td><td>0.0</td><td>1019.9</td><td>10.0</td><td>2013-12-30 17:00:00</td></tr><tr><td>26115</td><td>LGA</td><td>2013</td><td>12</td><td>30</td><td>18</td><td>28.94</td><td>330</td><td>18.41248</td><td>NA</td><td>0.0</td><td>1020.9</td><td>10.0</td><td>2013-12-30 18:00:00</td></tr></table>"
],
"text/plain": [
"Rover::DataFrame : 26115 observations of 16 variables.\n",
"vars : 7 numeric, 9 ruby objects\n",
" 1 : <int> [1, 2, 3, 4, 5, ...], 26115 levels\n",
" 2 :origin <object> {\"EWR\"=>8703, \"JFK\"=>8706, \"LGA\"=>8706}, 3 levels\n",
" 3 :year <int> {2013=>26115}, 1 level\n",
" 4 :month <int> [1, 1, 1, 1, 1, ...], 12 levels\n",
" 5 :day <int> [1, 1, 1, 1, 1, ...], 31 levels\n",
" 6 :hour <int> [1, 2, 3, 4, 5, ...], 24 levels\n",
" 7 :temp <object> [39.02, 39.02, 39.02, 39.92, 39.02, ...], 174 levels\n",
" 8 :dewp <object> [26.06, 26.96, 28.04, 28.04, 28.04, ...], 154 levels\n",
" 9 :humid <object> [59.37, 61.63, 64.43, 62.21, 64.43, ...], 2500 levels\n",
"10 :wind_dir <object> [270, 250, 240, 250, 260, ...], 38 levels\n",
"11 :wind_speed <object> [10.35702, 8.05546, 11.5078, 12.65858, 12.65858, ...], 37 levels\n",
"12 :wind_gust <object> [NA, NA, NA, NA, NA, ...], 38 levels\n",
"13 :precip <float> [0.0, 0.0, 0.0, 0.0, 0.0, ...], 59 levels\n",
"14 :pressure <object> [1012, 1012.3, 1012.5, 1012.2, 1011.9, ...], 469 levels\n",
"15 :visib <float> [10.0, 10.0, 10.0, 10.0, 10.0, ...], 20 levels\n",
"16 :time_hour <object> [2013-01-01 01:00:00, 2013-01-01 02:00:00, 2013-01-01 03:00:00, 2013-01-01 04:00:00, 2013-01-01 05:00:00, ...], 8713 levels\n"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"p Rover.read_csv(weather)"
]
},
{
"cell_type": "markdown",
"id": "47904afa-9991-4d69-8705-a69c16f606ed",
"metadata": {},
"source": [
"### flights dataset"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "7bcf5383-def3-4582-968c-2b07e97ecd4b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warming up --------------------------------------\n",
" CSV.read 1.000 i/100ms\n",
" FastestCSV 1.000 i/100ms\n",
" Arrow::table 1.000 i/100ms\n",
" DuckDB 1.000 i/100ms\n",
"Calculating -------------------------------------\n",
" CSV.read 0.076 (± 0.0%) i/s - 1.000 in 13.116515s\n",
" FastestCSV 0.127 (± 0.0%) i/s - 2.000 in 34.110241s\n",
" Arrow::table 6.327 (±15.8%) i/s - 61.000 in 10.045798s\n",
" DuckDB 1.161 (± 0.0%) i/s - 12.000 in 10.407487s\n",
"\n",
"Comparison:\n",
" CSV.read: 0.1 i/s\n",
" Arrow::table: 6.3 i/s - 82.99x (± 0.00) faster\n",
" DuckDB: 1.2 i/s - 15.23x (± 0.00) faster\n",
" FastestCSV: 0.1 i/s - 1.67x (± 0.00) faster\n",
"\n"
]
}
],
"source": [
"flights =\n",
" download(\"https://vincentarelbundock.github.io/Rdatasets/csv/nycflights13/flights.csv\")\n",
"benchmark_read_csv(flights)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "7e2f752b-e93d-4162-824d-96b5009a067d",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rover::DataFrame : 336776 observations of 20 variables.\n",
"vars : 10 numeric, 10 ruby objects\n",
" 1 : <int> [1, 2, 3, 4, 5, ...], 336776 levels\n",
" 2 :year <int> {2013=>336776}, 1 level\n",
" 3 :month <int> [1, 1, 1, 1, 1, ...], 12 levels\n",
" 4 :day <int> [1, 1, 1, 1, 1, ...], 31 levels\n",
" 5 :dep_time <object> [517, 533, 542, 544, 554, ...], 1319 levels\n",
" 6 :sched_dep_time <int> [515, 529, 540, 545, 600, ...], 1021 levels\n",
" 7 :dep_delay <object> [2, 4, 2, -1, -6, ...], 528 levels\n",
" 8 :arr_time <object> [830, 850, 923, 1004, 812, ...], 1412 levels\n",
" 9 :sched_arr_time <int> [819, 830, 850, 1022, 837, ...], 1163 levels\n",
"10 :arr_delay <object> [11, 20, 33, -18, -25, ...], 578 levels\n",
"11 :carrier <object> [UA, UA, AA, B6, DL, ...], 16 levels\n",
"12 :flight <int> [1545, 1714, 1141, 725, 461, ...], 3844 levels\n",
"13 :tailnum <object> [N14228, N24211, N619AA, N804JB, N668DN, ...], 4044 levels\n",
"14 :origin <object> {\"EWR\"=>120835, \"LGA\"=>104662, \"JFK\"=>111279}, 3 levels\n",
"15 :dest <object> [IAH, IAH, MIA, BQN, ATL, ...], 105 levels\n",
"16 :air_time <object> [227, 227, 160, 183, 116, ...], 510 levels\n",
"17 :distance <int> [1400, 1416, 1089, 1576, 762, ...], 214 levels\n",
"18 :hour <int> [5, 5, 5, 5, 6, ...], 20 levels\n",
"19 :minute <int> [15, 29, 40, 45, 0, ...], 60 levels\n",
"20 :time_hour <object> [2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 06:00:00, ...], 6936 levels\n",
"\n"
]
},
{
"data": {
"text/html": [
"<table><tr><th></th><th>year</th><th>month</th><th>day</th><th>dep_time</th><th>sched_dep_time</th><th>dep_delay</th><th>&#8230;</th><th>origin</th><th>dest</th><th>air_time</th><th>distance</th><th>hour</th><th>minute</th><th>time_hour</th></tr><tr><td>1</td><td>2013</td><td>1</td><td>1</td><td>517</td><td>515</td><td>2</td><td rowspan='3'>&#8230;</td><td>EWR</td><td>IAH</td><td>227</td><td>1400</td><td>5</td><td>15</td><td>2013-01-01 05:00:00</td></tr><tr><td>2</td><td>2013</td><td>1</td><td>1</td><td>533</td><td>529</td><td>4</td><td>LGA</td><td>IAH</td><td>227</td><td>1416</td><td>5</td><td>29</td><td>2013-01-01 05:00:00</td></tr><tr><td>3</td><td>2013</td><td>1</td><td>1</td><td>542</td><td>540</td><td>2</td><td>JFK</td><td>MIA</td><td>160</td><td>1089</td><td>5</td><td>40</td><td>2013-01-01 05:00:00</td></tr><tr><td colspan='7'>&#8942;</td><td>&#8945;</td><td colspan='7'>&#8942;</td></tr><tr><td>336774</td><td>2013</td><td>9</td><td>30</td><td>NA</td><td>1210</td><td>NA</td><td rowspan='3'>&#8230;</td><td>LGA</td><td>BNA</td><td>NA</td><td>764</td><td>12</td><td>10</td><td>2013-09-30 12:00:00</td></tr><tr><td>336775</td><td>2013</td><td>9</td><td>30</td><td>NA</td><td>1159</td><td>NA</td><td>LGA</td><td>CLE</td><td>NA</td><td>419</td><td>11</td><td>59</td><td>2013-09-30 11:00:00</td></tr><tr><td>336776</td><td>2013</td><td>9</td><td>30</td><td>NA</td><td>840</td><td>NA</td><td>LGA</td><td>RDU</td><td>NA</td><td>431</td><td>8</td><td>40</td><td>2013-09-30 08:00:00</td></tr></table>"
],
"text/plain": [
"Rover::DataFrame : 336776 observations of 20 variables.\n",
"vars : 10 numeric, 10 ruby objects\n",
" 1 : <int> [1, 2, 3, 4, 5, ...], 336776 levels\n",
" 2 :year <int> {2013=>336776}, 1 level\n",
" 3 :month <int> [1, 1, 1, 1, 1, ...], 12 levels\n",
" 4 :day <int> [1, 1, 1, 1, 1, ...], 31 levels\n",
" 5 :dep_time <object> [517, 533, 542, 544, 554, ...], 1319 levels\n",
" 6 :sched_dep_time <int> [515, 529, 540, 545, 600, ...], 1021 levels\n",
" 7 :dep_delay <object> [2, 4, 2, -1, -6, ...], 528 levels\n",
" 8 :arr_time <object> [830, 850, 923, 1004, 812, ...], 1412 levels\n",
" 9 :sched_arr_time <int> [819, 830, 850, 1022, 837, ...], 1163 levels\n",
"10 :arr_delay <object> [11, 20, 33, -18, -25, ...], 578 levels\n",
"11 :carrier <object> [UA, UA, AA, B6, DL, ...], 16 levels\n",
"12 :flight <int> [1545, 1714, 1141, 725, 461, ...], 3844 levels\n",
"13 :tailnum <object> [N14228, N24211, N619AA, N804JB, N668DN, ...], 4044 levels\n",
"14 :origin <object> {\"EWR\"=>120835, \"LGA\"=>104662, \"JFK\"=>111279}, 3 levels\n",
"15 :dest <object> [IAH, IAH, MIA, BQN, ATL, ...], 105 levels\n",
"16 :air_time <object> [227, 227, 160, 183, 116, ...], 510 levels\n",
"17 :distance <int> [1400, 1416, 1089, 1576, 762, ...], 214 levels\n",
"18 :hour <int> [5, 5, 5, 5, 6, ...], 20 levels\n",
"19 :minute <int> [15, 29, 40, 45, 0, ...], 60 levels\n",
"20 :time_hour <object> [2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 06:00:00, ...], 6936 levels\n"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"p Rover.read_csv(flights)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Ruby 3.1.1",
"language": "ruby",
"name": "ruby"
},
"language_info": {
"file_extension": ".rb",
"mimetype": "application/x-ruby",
"name": "ruby",
"version": "3.1.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment