Skip to content

Instantly share code, notes, and snippets.

@DavisVaughan
Last active July 7, 2017 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DavisVaughan/36030e82efb227eddfc8104bfc0d7935 to your computer and use it in GitHub Desktop.
Save DavisVaughan/36030e82efb227eddfc8104bfc0d7935 to your computer and use it in GitHub Desktop.
odbc-access-testing
library(DBI)
library(dbplyr)
suppressPackageStartupMessages(library(dplyr))
#> Warning: package 'dplyr' was built under R version 3.4.1
options(tibble.width = Inf)
cn <- dbConnect(odbc::odbc(), dsn = "access-odbc")
#dbListTables(cn)
#### Make a tbl connection --------------------------------------------
pe2000 <- tbl(cn, "PE2000")
#### Testing tbl connection -------------------------------------------
# No TOP needed
show_query(pe2000)
#> <SQL>
#> SELECT *
#> FROM `PE2000`
collect_pe2000 <- collect(pe2000)
collect_pe2000
#> # A tibble: 1,802,000 x 3
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 NA 0 0.0000000000
#> 2 1 1 -0.0052702289
#> 3 1 2 0.0226466382
#> 4 1 3 -0.0077055008
#> 5 1 4 0.0078160326
#> 6 1 5 0.0055670534
#> 7 1 6 0.0193203957
#> 8 1 7 0.0102388967
#> 9 1 8 0.0005576238
#> 10 1 9 0.0154860805
#> # ... with 1,801,990 more rows
# TOP used
show_query(head(pe2000, 10))
#> <SQL>
#> SELECT TOP 10 *
#> FROM `PE2000`
pe2000
#> # Source: table<PE2000> [?? x 3]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 NA 0 0.0000000000
#> 2 1 1 -0.0052702289
#> 3 1 2 0.0226466382
#> 4 1 3 -0.0077055008
#> 5 1 4 0.0078160326
#> 6 1 5 0.0055670534
#> 7 1 6 0.0193203957
#> 8 1 7 0.0102388967
#> 9 1 8 0.0005576238
#> 10 1 9 0.0154860805
#> # ... with more rows
head(pe2000, 10)
#> # Source: lazy query [?? x 3]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 NA 0 0.0000000000
#> 2 1 1 -0.0052702289
#> 3 1 2 0.0226466382
#> 4 1 3 -0.0077055008
#> 5 1 4 0.0078160326
#> 6 1 5 0.0055670534
#> 7 1 6 0.0193203957
#> 8 1 7 0.0102388967
#> 9 1 8 0.0005576238
#> 10 1 9 0.0154860805
#> # ... with more rows
#### Testing DISTINCT -----------------------------------------------
pe2000_dis <- pe2000 %>% distinct()
show_query(pe2000_dis)
#> <SQL>
#> SELECT DISTINCT *
#> FROM `PE2000`
pe2000_dis
#> # Source: lazy query [?? x 3]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 NA 0 0.0000000000
#> 2 1 1 -0.0052702289
#> 3 1 2 0.0226466382
#> 4 1 3 -0.0077055008
#> 5 1 4 0.0078160326
#> 6 1 5 0.0055670534
#> 7 1 6 0.0193203957
#> 8 1 7 0.0102388967
#> 9 1 8 0.0005576238
#> 10 1 9 0.0154860805
#> # ... with more rows
#### (FIX) Testing SELECT --------------------------------------------
pe_trial <- pe2000 %>% select(Trial)
show_query(pe_trial)
#> <SQL>
#> SELECT `Trial` AS `Trial`
#> FROM `PE2000`
# Circular reference caused by alias 'Trial' in query definition's SELECT list.
# Cant do `Trial` as `Trial` in Access
pe_trial
#> Error: <SQL> 'SELECT TOP 10 `Trial` AS `Trial`
#> FROM `PE2000`'
#> nanodbc/nanodbc.cpp:1587: HY000: [Microsoft][ODBC Microsoft Access Driver] Circular reference caused by alias 'Trial' in query definition's SELECT list.
#### Testing WHERE ----------------------------------------------------
pe_where <- pe2000 %>% filter(Trial == 1)
show_query(pe_where)
#> <SQL>
#> SELECT *
#> FROM `PE2000`
#> WHERE (`Trial` = 1.0)
pe_where
#> # Source: lazy query [?? x 3]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 1 1 -0.0052702289
#> 2 1 2 0.0226466382
#> 3 1 3 -0.0077055008
#> 4 1 4 0.0078160326
#> 5 1 5 0.0055670534
#> 6 1 6 0.0193203957
#> 7 1 7 0.0102388967
#> 8 1 8 0.0005576238
#> 9 1 9 0.0154860805
#> 10 1 10 0.0111156869
#> # ... with more rows
#### Testing GROUP BY ----------------------------------------------------
# It correctly doesn't show up until a real grouped computation is done
pe_gb <- pe2000 %>% group_by(Trial)
show_query(pe_gb)
#> <SQL>
#> SELECT *
#> FROM `PE2000`
pe_gb
#> # Source: table<PE2000> [?? x 3]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> # Groups: Trial
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 NA 0 0.0000000000
#> 2 1 1 -0.0052702289
#> 3 1 2 0.0226466382
#> 4 1 3 -0.0077055008
#> 5 1 4 0.0078160326
#> 6 1 5 0.0055670534
#> 7 1 6 0.0193203957
#> 8 1 7 0.0102388967
#> 9 1 8 0.0005576238
#> 10 1 9 0.0154860805
#> # ... with more rows
# Now it shows up
pe_gb_sum <- pe_gb %>% summarise(sum = sum(Trial))
show_query(pe_gb_sum)
#> <SQL>
#> SELECT `Trial`, SUM(`Trial`) AS `sum`
#> FROM `PE2000`
#> GROUP BY `Trial`
pe_gb_sum
#> # Source: lazy query [?? x 2]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial sum
#> <int> <dbl>
#> 1 NA NA
#> 2 1 900
#> 3 2 1802
#> 4 3 2703
#> 5 4 3604
#> 6 5 4505
#> 7 6 5406
#> 8 7 6307
#> 9 8 7208
#> 10 9 8109
#> # ... with more rows
#### (Fix?) Testing ORDER BY ----------------------------------------------------
# Why does order by display 2000 rows??
pe_order <- pe2000 %>%
arrange(Timestep)
show_query(pe_order)
#> <SQL>
#> SELECT *
#> FROM `PE2000`
#> ORDER BY `Timestep`
# This displays 2000 rows?
# pe_order
head(pe_order)
#> # Source: lazy query [?? x 3]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> # Ordered by: Timestep
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 1599 0 0
#> 2 1592 0 0
#> 3 1596 0 0
#> 4 1601 0 0
#> 5 1595 0 0
#> 6 1588 0 0
#### (NEED TEST) Testing HAVING ------------------------------------------
# ???
#### (ANTI, FULL) Testing Joins --------------------------------------------------------
bar <- tbl(cn, "BAR Wide")
# Left
pe_bar_lj <- left_join(pe2000, bar, by = c("Trial", "Timestep"))
show_query(pe_bar_lj)
#> <SQL>
#> SELECT `TBL_LEFT`.`Trial` AS `Trial`, `TBL_LEFT`.`Timestep` AS `Timestep`, `TBL_LEFT`.`"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` AS `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, `TBL_RIGHT`.`"BAR"` AS `"BAR"`
#> FROM `PE2000` AS `TBL_LEFT`
#> LEFT JOIN `BAR Wide` AS `TBL_RIGHT`
#> ON (`TBL_LEFT`.`Trial` = `TBL_RIGHT`.`Trial` AND `TBL_LEFT`.`Timestep` = `TBL_RIGHT`.`Timestep`)
pe_bar_lj
#> # Source: lazy query [?? x 4]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` `"BAR"`
#> <int> <int> <dbl> <dbl>
#> 1 NA 0 0.0000000000 NA
#> 2 1 1 -0.0052702289 -0.0097456345
#> 3 1 2 0.0226466382 0.0078704412
#> 4 1 3 -0.0077055008 -0.0053305307
#> 5 1 4 0.0078160326 0.0077478051
#> 6 1 5 0.0055670534 -0.0030354456
#> 7 1 6 0.0193203957 0.0058512031
#> 8 1 7 0.0102388967 -0.0053858780
#> 9 1 8 0.0005576238 -0.0095552582
#> 10 1 9 0.0154860805 -0.0005211968
#> # ... with more rows
# Right
pe_bar_rj <- right_join(pe2000, bar, by = c("Trial", "Timestep"))
show_query(pe_bar_rj)
#> <SQL>
#> SELECT `TBL_RIGHT`.`Trial` AS `Trial`, `TBL_RIGHT`.`Timestep` AS `Timestep`, `TBL_LEFT`.`"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` AS `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, `TBL_RIGHT`.`"BAR"` AS `"BAR"`
#> FROM `PE2000` AS `TBL_LEFT`
#> RIGHT JOIN `BAR Wide` AS `TBL_RIGHT`
#> ON (`TBL_LEFT`.`Trial` = `TBL_RIGHT`.`Trial` AND `TBL_LEFT`.`Timestep` = `TBL_RIGHT`.`Timestep`)
pe_bar_rj
#> # Source: lazy query [?? x 4]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` `"BAR"`
#> <int> <int> <dbl> <dbl>
#> 1 4 595 -0.007811856 0.015956031
#> 2 4 596 0.021175964 0.020085728
#> 3 4 597 0.024332792 -0.010334556
#> 4 4 598 -0.060647186 -0.009334307
#> 5 4 599 0.025617452 0.003846061
#> 6 4 600 0.003405601 0.013955507
#> 7 4 601 -0.006339721 0.020214965
#> 8 4 602 0.022980868 0.021987957
#> 9 4 603 0.024024698 0.024954847
#> 10 4 604 0.015283395 0.004594106
#> # ... with more rows
# Inner
pe_bar_ij <- inner_join(pe2000, bar, by = c("Trial", "Timestep"))
show_query(pe_bar_ij)
#> <SQL>
#> SELECT `TBL_LEFT`.`Trial` AS `Trial`, `TBL_LEFT`.`Timestep` AS `Timestep`, `TBL_LEFT`.`"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` AS `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, `TBL_RIGHT`.`"BAR"` AS `"BAR"`
#> FROM `PE2000` AS `TBL_LEFT`
#> INNER JOIN `BAR Wide` AS `TBL_RIGHT`
#> ON (`TBL_LEFT`.`Trial` = `TBL_RIGHT`.`Trial` AND `TBL_LEFT`.`Timestep` = `TBL_RIGHT`.`Timestep`)
pe_bar_ij
#> # Source: lazy query [?? x 4]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` `"BAR"`
#> <int> <int> <dbl> <dbl>
#> 1 1 1 -0.0052702289 -0.0097456345
#> 2 1 2 0.0226466382 0.0078704412
#> 3 1 3 -0.0077055008 -0.0053305307
#> 4 1 4 0.0078160326 0.0077478051
#> 5 1 5 0.0055670534 -0.0030354456
#> 6 1 6 0.0193203957 0.0058512031
#> 7 1 7 0.0102388967 -0.0053858780
#> 8 1 8 0.0005576238 -0.0095552582
#> 9 1 9 0.0154860805 -0.0005211968
#> 10 1 10 0.0111156869 0.0088736209
#> # ... with more rows
# Semi
pe_bar_sj <- semi_join(pe2000, bar, by = c("Trial", "Timestep"))
show_query(pe_bar_sj)
#> <SQL>
#> SELECT * FROM `PE2000` AS `TBL_LEFT`
#>
#> WHERE EXISTS (
#> SELECT 1 FROM `BAR Wide` AS `TBL_RIGHT`
#> WHERE (`TBL_LEFT`.`Trial` = `TBL_RIGHT`.`Trial` AND `TBL_LEFT`.`Timestep` = `TBL_RIGHT`.`Timestep`)
#> )
pe_bar_sj
#> # Source: lazy query [?? x 3]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 1 1 -0.0052702289
#> 2 1 2 0.0226466382
#> 3 1 3 -0.0077055008
#> 4 1 4 0.0078160326
#> 5 1 5 0.0055670534
#> 6 1 6 0.0193203957
#> 7 1 7 0.0102388967
#> 8 1 8 0.0005576238
#> 9 1 9 0.0154860805
#> 10 1 10 0.0111156869
#> # ... with more rows
# (THIS ONE RAN FOREVER?) Anti
# pe_bar_aj <- anti_join(pe2000, bar, by = c("Trial", "Timestep"))
# show_query(pe_bar_aj)
# pe_bar_aj
# Full
# Coalesce not supported. NZ() is equivalent with 2 params?
pe_bar_fj <- full_join(pe2000, bar, by = c("Trial", "Timestep"))
show_query(pe_bar_fj)
#> <SQL>
#> SELECT coalesce(`TBL_LEFT`.`Trial`, `TBL_RIGHT`.`Trial`) AS `Trial`, coalesce(`TBL_LEFT`.`Timestep`, `TBL_RIGHT`.`Timestep`) AS `Timestep`, `TBL_LEFT`.`"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` AS `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, `TBL_RIGHT`.`"BAR"` AS `"BAR"`
#> FROM `PE2000` AS `TBL_LEFT`
#> FULL JOIN `BAR Wide` AS `TBL_RIGHT`
#> ON (`TBL_LEFT`.`Trial` = `TBL_RIGHT`.`Trial` AND `TBL_LEFT`.`Timestep` = `TBL_RIGHT`.`Timestep`)
pe_bar_fj
#> Error: <SQL> 'SELECT TOP 10 *
#> FROM (SELECT coalesce(`TBL_LEFT`.`Trial`, `TBL_RIGHT`.`Trial`) AS `Trial`, coalesce(`TBL_LEFT`.`Timestep`, `TBL_RIGHT`.`Timestep`) AS `Timestep`, `TBL_LEFT`.`"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` AS `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, `TBL_RIGHT`.`"BAR"` AS `"BAR"`
#> FROM `PE2000` AS `TBL_LEFT`
#> FULL JOIN `BAR Wide` AS `TBL_RIGHT`
#> ON (`TBL_LEFT`.`Trial` = `TBL_RIGHT`.`Trial` AND `TBL_LEFT`.`Timestep` = `TBL_RIGHT`.`Timestep`)
#> ) `tzftrwguau`'
#> nanodbc/nanodbc.cpp:1587: 42000: [Microsoft][ODBC Microsoft Access Driver] Syntax error in FROM clause.
#### (Logical = 0/-1) Conversion ------------------------------------------------------
pe_conv <- pe2000 %>%
mutate(Timestep_num = as.numeric(Timestep),
Timestep_dbl = as.double(Timestep),
Timestep_int = as.integer(Timestep),
Timestep_log = as.logical(Timestep),
Timestep_chr = as.character(Timestep),
Timestep_date = as.Date("2017-01-01")
)
show_query(pe_conv)
#> <SQL>
#> SELECT `Trial`, `Timestep`, `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, CDbl(`Timestep`) AS `Timestep_num`, CDbl(`Timestep`) AS `Timestep_dbl`, CInt(`Timestep`) AS `Timestep_int`, CBool(`Timestep`) AS `Timestep_log`, CStr(`Timestep`) AS `Timestep_chr`, CDate('2017-01-01') AS `Timestep_date`
#> FROM `PE2000`
pe_conv
#> # Source: lazy query [?? x 9]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` Timestep_num Timestep_dbl Timestep_int Timestep_log Timestep_chr Timestep_date
#> <int> <int> <dbl> <dbl> <dbl> <int> <int> <chr> <dttm>
#> 1 NA 0 0.0000000000 0 0 0 0 0 2017-01-01
#> 2 1 1 -0.0052702289 1 1 1 -1 1 2017-01-01
#> 3 1 2 0.0226466382 2 2 2 -1 2 2017-01-01
#> 4 1 3 -0.0077055008 3 3 3 -1 3 2017-01-01
#> 5 1 4 0.0078160326 4 4 4 -1 4 2017-01-01
#> 6 1 5 0.0055670534 5 5 5 -1 5 2017-01-01
#> 7 1 6 0.0193203957 6 6 6 -1 6 2017-01-01
#> 8 1 7 0.0102388967 7 7 7 -1 7 2017-01-01
#> 9 1 8 0.0005576238 8 8 8 -1 8 2017-01-01
#> 10 1 9 0.0154860805 9 9 9 -1 9 2017-01-01
#> # ... with more rows
#### Math ------------------------------------------------------------
pe_math <- pe2000 %>%
mutate(ts_exp = exp(Timestep),
ts_sqrt = sqrt(Timestep),
ts_atan = atan(Timestep),
ts_power = Timestep ^ 3,
ts_floor = floor(Timestep+.01),
ts_ceiling = ceiling(Timestep+.01),
ts_ceiling_whole_num = ceiling(Timestep)
)
show_query(pe_math)
#> <SQL>
#> SELECT `Trial`, `Timestep`, `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, EXP(`Timestep`) AS `ts_exp`, SQR(`Timestep`) AS `ts_sqrt`, ATN(`Timestep`) AS `ts_atan`, `Timestep` ^ 3.0 AS `ts_power`, INT(`Timestep` + 0.01) AS `ts_floor`, INT(`Timestep` + 0.01 + .9999999999) AS `ts_ceiling`, INT(`Timestep` + .9999999999) AS `ts_ceiling_whole_num`
#> FROM `PE2000`
pe_math
#> # Source: lazy query [?? x 10]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` ts_exp ts_sqrt ts_atan ts_power ts_floor ts_ceiling ts_ceiling_whole_num
#> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 NA 0 0.0000000000 1.000000 0.000000 0.0000000 0 0 1 0
#> 2 1 1 -0.0052702289 2.718282 1.000000 0.7853982 1 1 2 1
#> 3 1 2 0.0226466382 7.389056 1.414214 1.1071487 8 2 3 2
#> 4 1 3 -0.0077055008 20.085537 1.732051 1.2490458 27 3 4 3
#> 5 1 4 0.0078160326 54.598150 2.000000 1.3258177 64 4 5 4
#> 6 1 5 0.0055670534 148.413159 2.236068 1.3734008 125 5 6 5
#> 7 1 6 0.0193203957 403.428793 2.449490 1.4056476 216 6 7 6
#> 8 1 7 0.0102388967 1096.633158 2.645751 1.4288993 343 7 8 7
#> 9 1 8 0.0005576238 2980.957987 2.828427 1.4464413 512 8 9 8
#> 10 1 9 0.0154860805 8103.083928 3.000000 1.4601391 729 9 10 9
#> # ... with more rows
#### Strings ---------------------------------------------------------
pe_strings <- pe2000 %>%
mutate(
test_str = " hello world ",
test_nchar = nchar(test_str),
test_substr = substr(test_str, 2, 5), # remember empty space at beginning
test_trimws = trimws(test_str),
test_nchar_trim = trimws(test_str) %>% nchar()
)
show_query(pe_strings)
#> <SQL>
#> SELECT `Trial`, `Timestep`, `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, `test_str`, LEN(`test_str`) AS `test_nchar`, RIGHT(LEFT(`test_str`, 5.0), 4.0) AS `test_substr`, TRIM(`test_str`) AS `test_trimws`, LEN(TRIM(`test_str`)) AS `test_nchar_trim`
#> FROM (SELECT `Trial`, `Timestep`, `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, ' hello world ' AS `test_str`
#> FROM `PE2000`) `amgqbcvkeh`
pe_strings
#> # Source: lazy query [?? x 8]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` test_str test_nchar test_substr test_trimws test_nchar_trim
#> <int> <int> <dbl> <chr> <int> <chr> <chr> <int>
#> 1 NA 0 0.0000000000 hello world 13 hell hello world 11
#> 2 1 1 -0.0052702289 hello world 13 hell hello world 11
#> 3 1 2 0.0226466382 hello world 13 hell hello world 11
#> 4 1 3 -0.0077055008 hello world 13 hell hello world 11
#> 5 1 4 0.0078160326 hello world 13 hell hello world 11
#> 6 1 5 0.0055670534 hello world 13 hell hello world 11
#> 7 1 6 0.0193203957 hello world 13 hell hello world 11
#> 8 1 7 0.0102388967 hello world 13 hell hello world 11
#> 9 1 8 0.0005576238 hello world 13 hell hello world 11
#> 10 1 9 0.0154860805 hello world 13 hell hello world 11
#> # ... with more rows
#### Logic ---------------------------------------------------------
pe_logic <- pe2000 %>%
mutate(
test_null = is.null(Trial),
test_na = is.na(Trial),
test_ifelse = ifelse(Trial == 1, "its a 1", "its not a 1")
)
show_query(pe_logic)
#> <SQL>
#> SELECT `Trial`, `Timestep`, `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, IIF(ISNULL(`Trial`), 1, 0) AS `test_null`, IIF(ISNULL(`Trial`), 1, 0) AS `test_na`, IIF(`Trial` = 1.0, 'its a 1', 'its not a 1') AS `test_ifelse`
#> FROM `PE2000`
pe_logic
#> # Source: lazy query [?? x 6]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` test_null test_na test_ifelse
#> <int> <int> <dbl> <int> <int> <chr>
#> 1 NA 0 0.0000000000 1 1 its not a 1
#> 2 1 1 -0.0052702289 0 0 its a 1
#> 3 1 2 0.0226466382 0 0 its a 1
#> 4 1 3 -0.0077055008 0 0 its a 1
#> 5 1 4 0.0078160326 0 0 its a 1
#> 6 1 5 0.0055670534 0 0 its a 1
#> 7 1 6 0.0193203957 0 0 its a 1
#> 8 1 7 0.0102388967 0 0 its a 1
#> 9 1 8 0.0005576238 0 0 its a 1
#> 10 1 9 0.0154860805 0 0 its a 1
#> # ... with more rows
#### Dates ---------------------------------------------------------
pe_dates <- pe2000 %>%
mutate(
test_date = Sys.Date()
)
show_query(pe_dates)
#> <SQL>
#> SELECT `Trial`, `Timestep`, `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`, DATE() AS `test_date`
#> FROM `PE2000`
pe_dates
#> # Source: lazy query [?? x 4]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"` test_date
#> <int> <int> <dbl> <dttm>
#> 1 NA 0 0.0000000000 2017-07-07
#> 2 1 1 -0.0052702289 2017-07-07
#> 3 1 2 0.0226466382 2017-07-07
#> 4 1 3 -0.0077055008 2017-07-07
#> 5 1 4 0.0078160326 2017-07-07
#> 6 1 5 0.0055670534 2017-07-07
#> 7 1 6 0.0193203957 2017-07-07
#> 8 1 7 0.0102388967 2017-07-07
#> 9 1 8 0.0005576238 2017-07-07
#> 10 1 9 0.0154860805 2017-07-07
#> # ... with more rows
#### Agg functions -------------------------------------------------
pe_agg <- pe2000 %>%
group_by(Timestep) %>%
summarise(
test_mean = mean(Trial),
test_sd = sd(Trial),
test_var = var(Trial)
)
show_query(pe_agg)
#> <SQL>
#> SELECT `Timestep`, AVG(`Trial`) AS `test_mean`, STDEV(`Trial`) AS `test_sd`, VAR(`Trial`) AS `test_var`
#> FROM `PE2000`
#> GROUP BY `Timestep`
pe_agg
#> # Source: lazy query [?? x 4]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Timestep test_mean test_sd test_var
#> <int> <dbl> <dbl> <dbl>
#> 1 0 1001.0 577.2059 333166.7
#> 2 1 1000.5 577.4946 333500.0
#> 3 2 1000.5 577.4946 333500.0
#> 4 3 1000.5 577.4946 333500.0
#> 5 4 1000.5 577.4946 333500.0
#> 6 5 1000.5 577.4946 333500.0
#> 7 6 1000.5 577.4946 333500.0
#> 8 7 1000.5 577.4946 333500.0
#> 9 8 1000.5 577.4946 333500.0
#> 10 9 1000.5 577.4946 333500.0
#> # ... with more rows
#### (FIX) What happens when convering with a NA in the col? ---------------
# Notice the NA in Trial
head(pe2000)
#> # Source: lazy query [?? x 3]
#> # Database: ACCESS
#> # 12.00.0000[admin@ACCESS/R:\Life_Modeling_Team\Vaughan\Economic
#> # Scenario Database 2017 Part 1.accdb]
#> Trial Timestep `"ESGAssetsEquityAssetsPrivateEquityTotalReturn"`
#> <int> <int> <dbl>
#> 1 NA 0 0.000000000
#> 2 1 1 -0.005270229
#> 3 1 2 0.022646638
#> 4 1 3 -0.007705501
#> 5 1 4 0.007816033
#> 6 1 5 0.005567053
# Invalid use of Null
pe2000 %>%
mutate(trial_num = as.double(Trial))
#> Error in result_fetch(res@ptr, n, ...): nanodbc/nanodbc.cpp:2525: 22018: [Microsoft][ODBC Microsoft Access Driver] Invalid use of Null
#> Warning in dbClearResult(res): Result already cleared
#### (FIX) Can't copy to Access / Write tables --------------------------------------------
# I don't think temporary tables work, even though they say they do online
# https://msdn.microsoft.com/en-us/library/office/bb177893(v=office.12).aspx
# https://stackoverflow.com/questions/29698198/ms-access-database-2010-how-to-create-temporary-table-procedure-view-from-quer
# Syntax error in CREATE TABLE statement.
#<SQL> CREATE TEMPORARY TABLE `mtcars` (
# `row_names` VARCHAR(255),
# `mpg` DOUBLE,
# `cyl` DOUBLE,
# `disp` DOUBLE,
# `hp` DOUBLE,
# `drat` DOUBLE,
# `wt` DOUBLE,
# `qsec` DOUBLE,
# `vs` DOUBLE,
# `am` DOUBLE,
# `gear` DOUBLE,
# `carb` DOUBLE
# )
copy_to(cn, mtcars)
#> Error in new_result(connection@ptr, statement): nanodbc/nanodbc.cpp:1344: 42000: [Microsoft][ODBC Microsoft Access Driver] Syntax error in CREATE TABLE statement.
# There is actually a deeper problem, when you turn temporary off,
# it actually can't insert_results_dataframe
# This has been posted on odbc's github issues
# It does create the table though!!
# Invalid precision value
copy_to(cn, mtcars, temporary = FALSE)
#> Error in result_insert_dataframe(rs@ptr, values): nanodbc/nanodbc.cpp:1944: HY104: [Microsoft][ODBC Microsoft Access Driver]Invalid precision value
# The table is there, just empty
"mtcars" %in% dbListTables(cn)
#> [1] TRUE
# You can't drop them with overwrite = TRUE though, the DROP TABLE syntax is wrong
# Syntax error in DROP TABLE or DROP INDEX.
# <SQL> DROP TABLE IF EXISTS mtcars
# Access doesn't support IF EXISTS
# https://stackoverflow.com/questions/5847770/how-to-drop-table-in-access-if-exists
copy_to(cn, mtcars, temporary = FALSE, overwrite = TRUE)
#> Error in new_result(connection@ptr, statement): nanodbc/nanodbc.cpp:1344: 42000: [Microsoft][ODBC Microsoft Access Driver] Syntax error in DROP TABLE or DROP INDEX.
# But you can drop them with db_drop_table, as it doesnt check if it exists
db_drop_table(cn, "mtcars")
#> [1] 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment