Last active
March 26, 2024 23:25
Revisions
-
hinkelman revised this gist
Mar 26, 2024 . 1 changed file with 80 additions and 103 deletions.There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,104 +1,86 @@ (import (dataframe)) (define flights (tsv->dataframe "nycflights.tsv")) (dataframe-dim flights) ;; 3.1 Introduction ;; 3.1.3 (-> flights (dataframe-filter* (dest) (string=? dest "IAH")) (dataframe-aggregate* (year month day) (arr_delay (arr_delay) (inexact (mean arr_delay)))) (dataframe-display)) ;; 3.2 Rows ;; 3.2.1 (define delayed-flights (-> flights (dataframe-filter* (dep_delay) (and (not (na? dep_delay)) (> dep_delay 120))))) (apply min ($ delayed-flights 'dep_delay)) (-> flights (dataframe-filter* (month day) (and (not (or (na? month) (na? day))) (= month 1) (= day 1))) (dataframe-glimpse)) (define jan-feb-flights (-> flights (dataframe-filter* (month) (and (not (na? month)) (or (= month 1) (= month 2)))))) (remove-duplicates ($ jan-feb-flights 'month)) (-> flights (dataframe-filter* (month) (and (not (na? month)) (member month '(1 2)))) (dataframe-glimpse)) ;; 3.2.3 (-> flights (dataframe-filter* (year month day dep_time) (not (or (na? year) (na? month) (na? day) (na? dep_time)))) (dataframe-sort* (< year) (< month) (< day) (< dep_time)) (dataframe-glimpse)) (-> flights (dataframe-filter* (dep_delay) (not (na? dep_delay))) (dataframe-sort* (> dep_delay)) (dataframe-glimpse)) ;; 3.2.4 (-> flights (dataframe-unique) (dataframe-glimpse)) (-> flights (dataframe-select* origin dest) (dataframe-unique) (dataframe-glimpse)) ;; .keep_all in dplyr::distinct keeps all columns ;; for the first occurrence of the unique combos @@ -112,226 +94,221 @@ (> n)) (dataframe-display)) ;; 3.2.5 Exercises ;; not included in blog post (and only did 1-4) ;; 1. Single pipeline for each condition (-> flights (dataframe-filter* (arr_delay) (and (not (na? arr_delay)) (>= arr_delay 2))) (dataframe-glimpse)) (-> flights (dataframe-filter* (dest) (member dest '("IAH" "HOU"))) (dataframe-glimpse)) (-> flights (dataframe-filter* (carrier) (member carrier '("UA" "AA" "DL"))) (dataframe-glimpse)) (-> flights (dataframe-filter* (month) (and (not (na? month)) (member month '(7 8 9)))) (dataframe-glimpse))) (-> flights (dataframe-filter* (dep_delay arr_delay) (and (not (or (na? dep_delay) (na? arr_delay))) (<= dep_delay 0) (>= arr_delay 120))) (dataframe-glimpse)) (-> flights (dataframe-filter* (dep_delay arr_delay) (and (not (or (na? dep_delay) (na? arr_delay))) ;; no flights met this condition (>= dep_delay 60) (<= arr_delay -30)))) ;; no flights met this condition ;; triggers an exception ;; 2. Sort for longest departure delays and earliest flights (-> flights (dataframe-filter* (dep_delay) (not (na? dep_delay))) (dataframe-sort* (> dep_delay)) (dataframe-glimpse)) (-> flights (dataframe-filter* (dep_time) (not (na? dep_time))) (dataframe-sort* (< dep_time)) (dataframe-glimpse)) ;; 3. Sort flights to find the fastest flights (-> flights (dataframe-filter* (distance air_time) (not (or (na? distance) (na? air_time)))) (dataframe-modify* (speed (distance air_time) (* (/ distance air_time) 60))) (dataframe-sort* (> speed)) (dataframe-glimpse)) ;; 4. Was there a flight on every day in 2013? (remove-duplicates ($ flights 'year)) (define flights2 (-> flights (dataframe-filter* (month day) (not (or (na? month) (na? day)))) (dataframe-modify* (month-day (month day) (string-append (number->string month) "-" (number->string day)))))) (= 365 (length (remove-duplicates ($ flights2 'month-day)))) ;; 3.3 Columns ;; 3.3.1 (-> flights (dataframe-filter* (dep_delay arr_delay distance air_time) (not (or (na? dep_delay) (na? arr_delay) (na? distance) (na? air_time)))) (dataframe-modify* (gain (dep_delay arr_delay) (- dep_delay arr_delay)) (speed (distance air_time) (* (/ distance air_time) 60))) (dataframe-glimpse)) ;; no equivalent to .before or .after ;; using select instead to achieve similar affect as .after example (-> flights (dataframe-filter* (dep_delay arr_delay distance air_time) (not (or (na? dep_delay) (na? arr_delay) (na? distance) (na? air_time)))) (dataframe-modify* (gain (dep_delay arr_delay) (- dep_delay arr_delay)) (speed (distance air_time) (inexact (* (/ distance air_time) 60)))) (dataframe-select* year month day gain speed) (dataframe-display)) ;; 3.3.2 (-> flights (dataframe-select* year month day) (dataframe-display)) ;; none of the other advanced select examples work in dataframe->rowtable ;; 3.3.3 (-> flights (dataframe-rename* (tailnum tail_num)) (dataframe-glimpse)) ;; 3.3.4 ;; no equivalent ;; 3.3.5 exercises ;; skipped ;; 3.4 pipe (-> flights (dataframe-filter* (dest distance air_time) (and (not (or (na? dest) (na? distance) (na? air_time))) (string=? dest "IAH"))) (dataframe-modify* (speed (distance air_time) (inexact (* (/ distance air_time) 60)))) (dataframe-select* year month day dep_time carrier flight speed) (dataframe-sort* (> speed)) (dataframe-display)) (dataframe-display (dataframe-sort* (dataframe-select* (dataframe-modify* (dataframe-filter* flights (dest distance air_time) (and (not (or (na? dest) (na? distance) (na? air_time))) (string=? dest "IAH"))) (speed (distance air_time) (inexact (* (/ distance air_time) 60)))) year month day dep_time carrier flight speed) (> speed))) (define flights1 (dataframe-filter* flights (dest distance air_time) (and (not (or (na? dest) (na? distance) (na? air_time))) (string=? dest "IAH")))) (define flights2 (dataframe-modify* flights1 (speed (distance air_time) (inexact (* (/ distance air_time) 60))))) (define flights3 (dataframe-select* flights2 year month day dep_time carrier flight speed)) (dataframe-display (dataframe-sort* flights3 (> speed))) ;; 3.5 Groups ;; 3.5.2 ;; no automatic sorting in dataframe-aggregate (-> flights (dataframe-aggregate* (month) (avg_delay (dep_delay) (inexact (mean dep_delay)))) (dataframe-sort* (< month)) (dataframe-display)) (-> flights (dataframe-filter* (dep_delay) (not (na? dep_delay))) (dataframe-aggregate* (month) (avg_delay (dep_delay) (inexact (mean dep_delay))) (n (dep_delay) (length dep_delay))) (dataframe-sort* (< month)) (dataframe-display)) ;; 3.5.3 slice ;; no equivalent ;; 3.5.5 (define daily_flights (dataframe-aggregate* -
hinkelman created this gist
Jul 8, 2023 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,340 @@ (import (chez-stats) (dataframe)) (define flights (-> (read-delim "nycflights.tsv" #\tab) (rowtable->dataframe))) ;; using this as a check of missingness (define (all-true? . x) (for-all (lambda (y) y) x)) (dataframe-dim flights) ;; 4.1 Introduction ;; 4.1.3 (-> flights (dataframe-filter* (arr_delay dest) (and (all-true? arr_delay) (string=? dest "IAH"))) (dataframe-aggregate* (year month day) (arr_delay (arr_delay) (sum arr_delay))) (dataframe-display)) ;; 4.2 Rows ;; 4.2.1 (define delayed-flights (-> flights (dataframe-filter* (dep_delay) (and (all-true? dep_delay) (> dep_delay 120))))) (apply min ($ delayed-flights 'dep_delay)) (-> flights (dataframe-filter* (month day) (and (all-true? month day) (= month 1) (= day 1))) (dataframe-display)) (define jan-feb-flights (-> flights (dataframe-filter* (month) (and (all-true? month) (or (= month 1) (= month 2)))))) (dataframe-values-unique jan-feb-flights 'month) (-> flights (dataframe-filter* (month) (and (all-true? month) (member month '(1 2)))) (dataframe-display)) (define jan1 (-> flights (dataframe-filter* (month day) (and (all-true? month) (= month 1) (= day 1))))) ;; 4.2.3 (-> flights (dataframe-filter* (year month day dep_time) (all-true? year month day dep_time)) (dataframe-sort* (< year) (< month) (< day) (< dep_time)) (dataframe-display)) (-> flights (dataframe-filter* (dep_delay) (all-true? dep_delay)) (dataframe-sort* (> dep_delay)) (dataframe-display)) ;; 4.2.4 (-> flights (dataframe-unique) (dataframe-display)) (-> flights (dataframe-select 'origin 'dest) (dataframe-unique) (dataframe-display)) ;; .keep_all in dplyr::distinct keeps all columns ;; for the first occurrence of the unique combos ;; no equivalent in dataframe-unique (-> flights (dataframe-aggregate* (origin dest) (n (origin) (length origin))) (dataframe-sort* (> n)) (dataframe-display)) ;; 4.2.5 Exercises ;; not included in blog post (and only did 1-4) ;; 1. Single pipeline for each condition (-> flights (dataframe-filter* (arr_delay) (and (all-true? arr_delay) (>= arr_delay 2))) (dataframe-display)) (-> flights (dataframe-filter* (dest) (member dest '("IAH" "HOU"))) (dataframe-display)) (-> flights (dataframe-filter* (carrier) (member carrier '("UA" "AA" "DL"))) (dataframe-display)) (-> flights (dataframe-filter* (month) (and (all-true? month) (member month '(7 8 9)))) (dataframe-display)) (-> flights (dataframe-filter* (dep_delay arr_delay) (and (all-true? dep_delay arr_delay) (<= dep_delay 0) (>= arr_delay 120))) (dataframe-display)) (-> flights (dataframe-filter* (dep_delay arr_delay) (and (all-true? dep_delay arr_delay) ;; no flights met this condition (>= dep_delay 60) (<= arr_delay -30))) ;; no flights met this condition ;; empty dataframes trigger assertion-violation ;; in dataframe-display (dataframe-display)) ;; 2. Sort for longest departure delays and earliest flights (-> flights (dataframe-filter* (dep_delay) (all-true? dep_delay)) (dataframe-sort* (> dep_delay)) (dataframe-display)) (-> flights (dataframe-filter* (dep_time) (all-true? dep_time)) (dataframe-sort* (< dep_time)) (dataframe-display)) ;; 3. Sort flights to find the fastest flights (-> flights (dataframe-filter* (distance air_time) (all-true? distance air_time)) (dataframe-modify* (speed (distance air_time) (* (/ distance air_time) 60))) (dataframe-sort* (> speed)) (dataframe-display)) ;; 4. Was there a flight on every day in 2013? (dataframe-values-unique flights 'year) (define flights2 (-> flights (dataframe-filter* (month day) (all-true? month day)) (dataframe-modify* (month-day (month day) (string-append (number->string month) "-" (number->string day)))))) (= 365 (length (dataframe-values-unique flights2 'month-day))) ;; 4.3 Columns ;; 4.3.1 (-> flights (dataframe-filter* (dep_delay arr_delay distance air_time) (all-true? dep_delay arr_delay distance air_time)) (dataframe-modify* (gain (dep_delay arr_delay) (- dep_delay arr_delay)) (speed (distance air_time) (* (/ distance air_time) 60))) (dataframe-display)) ;; no equivalent to .before or .after ;; using select instead to achieve similar affect as .after example (-> flights (dataframe-filter* (dep_delay arr_delay distance air_time) (all-true? dep_delay arr_delay distance air_time)) (dataframe-modify* (gain (dep_delay arr_delay) (- dep_delay arr_delay)) (speed (distance air_time) (* (/ distance air_time) 60))) (dataframe-select 'year 'month 'day 'gain 'speed) (dataframe-display)) ;; 4.3.2 (-> flights (dataframe-select 'year 'month 'day) (dataframe-display)) ;; none of the other advanced select examples work in dataframe->rowtable ;; 4.3.3 (-> flights (dataframe-rename '(tailnum tail_num)) (dataframe-display)) ;; 4.3.4 ;; no equivalent ;; 4.3.5 exercises ;; skipped ;; 4.4 pipe (-> flights (dataframe-filter* (dest distance air_time) (and (all-true? dest distance air_time) (string=? dest "IAH"))) (dataframe-modify* (speed (distance air_time) (exact->inexact (* (/ distance air_time) 60)))) (dataframe-select 'year 'month 'day 'dep_time 'carrier 'flight 'speed) (dataframe-sort* (> speed)) (dataframe-display)) (dataframe-display (dataframe-sort* (dataframe-select (dataframe-modify* (dataframe-filter* flights (dest distance air_time) (and (all-true? dest distance air_time) (string=? dest "IAH"))) (speed (distance air_time) (exact->inexact (* (/ distance air_time) 60)))) 'year 'month 'day 'dep_time 'carrier 'flight 'speed) (> speed))) (define flights1 (dataframe-filter* flights (dest distance air_time) (and (all-true? dest distance air_time) (string=? dest "IAH")))) (define flights2 (dataframe-modify* flights1 (speed (distance air_time) (exact->inexact (* (/ distance air_time) 60))))) (define flights3 (dataframe-select flights2 'year 'month 'day 'dep_time 'carrier 'flight 'speed)) (dataframe-display (dataframe-sort* flights3 (> speed))) ;; 4.5 Groups ;; 4.5.2 ;; no automatic sorting in dataframe-aggregate (-> flights (dataframe-filter* (dep_delay) (all-true? dep_delay)) (dataframe-aggregate* (month) (avg_delay (dep_delay) (exact->inexact (mean dep_delay)))) (dataframe-sort* (< month)) (dataframe-display)) (-> flights (dataframe-filter* (dep_delay) (all-true? dep_delay)) (dataframe-aggregate* (month) (avg_delay (dep_delay) (exact->inexact (mean dep_delay))) (n (dep_delay) (length dep_delay))) (dataframe-sort* (< month)) (dataframe-display)) ;; 4.5.3 slice ;; no equivalent ;; 4.5.5 (define daily_flights (dataframe-aggregate* flights (year month day) (n (dep_time) (length dep_time))))