Skip to content

Instantly share code, notes, and snippets.

@ateucher
Last active December 16, 2022 18:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ateucher/8a143749f1d2d44e72e8526903f7ae5f to your computer and use it in GitHub Desktop.
Save ateucher/8a143749f1d2d44e72e8526903f7ae5f to your computer and use it in GitHub Desktop.
library(duckdb)
#> Loading required package: DBI
library(glue)

url <- "http://www.env.gov.bc.ca/wsd/data_searches/water/Discharge.csv"

conn <- DBI::dbConnect(duckdb(), ":memory:")

# Install and load duckdb extensions to read from http
DBI::dbExecute(conn, "INSTALL 'httpfs';")
#> [1] 0
DBI::dbExecute(conn, "LOAD 'httpfs';")
#> [1] 0

# Describe the table
dbGetQuery(conn, glue("DESCRIBE SELECT * from '{url}';"))
#> Warning in duckdb_post_execute(res, out): NAs introduced by coercion
#>       column_name column_type null  key default extra
#> 1     Location ID     VARCHAR  YES <NA>    <NA>  <NA>
#> 2   Location Name     VARCHAR  YES <NA>    <NA>  <NA>
#> 3          Status     VARCHAR  YES <NA>    <NA>  <NA>
#> 4        Latitude      DOUBLE  YES <NA>    <NA>  <NA>
#> 5       Longitude      DOUBLE  YES <NA>    <NA>  <NA>
#> 6  Date/Time(UTC)   TIMESTAMP  YES <NA>    <NA>  <NA>
#> 7       Parameter     VARCHAR  YES <NA>    <NA>  <NA>
#> 8           Value      DOUBLE  YES <NA>    <NA>  <NA>
#> 9            Unit     VARCHAR  YES <NA>    <NA>  <NA>
#> 10          Grade     VARCHAR  YES <NA>    <NA>  <NA>


# Get the first 20 rows
dbGetQuery(conn, glue("SELECT * from '{url}' LIMIT 20;"))
#>    Location ID             Location Name Status Latitude Longitude
#> 1     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 2     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 3     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 4     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 5     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 6     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 7     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 8     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 9     08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 10    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 11    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 12    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 13    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 14    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 15    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 16    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 17    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 18    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 19    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#> 20    08HA0022 Koksilah River at Trestle Active 48.75529 -123.6831
#>         Date/Time(UTC) Parameter Value  Unit     Grade
#> 1  2022-10-01 00:00:00 Discharge 0.319 m^3/s Undefined
#> 2  2022-10-01 01:00:00 Discharge 0.317 m^3/s Undefined
#> 3  2022-10-01 02:00:00 Discharge 0.316 m^3/s Undefined
#> 4  2022-10-01 03:00:00 Discharge 0.311 m^3/s Undefined
#> 5  2022-10-01 04:00:00 Discharge 0.307 m^3/s Undefined
#> 6  2022-10-01 05:00:00 Discharge 0.305 m^3/s Undefined
#> 7  2022-10-01 06:00:00 Discharge 0.302 m^3/s Undefined
#> 8  2022-10-01 07:00:00 Discharge 0.304 m^3/s Undefined
#> 9  2022-10-01 08:00:00 Discharge 0.302 m^3/s Undefined
#> 10 2022-10-01 09:00:00 Discharge 0.300 m^3/s Undefined
#> 11 2022-10-01 10:00:00 Discharge 0.300 m^3/s Undefined
#> 12 2022-10-01 11:00:00 Discharge 0.298 m^3/s Undefined
#> 13 2022-10-01 12:00:00 Discharge 0.300 m^3/s Undefined
#> 14 2022-10-01 13:00:00 Discharge 0.299 m^3/s Undefined
#> 15 2022-10-01 14:00:00 Discharge 0.296 m^3/s Undefined
#> 16 2022-10-01 15:00:00 Discharge 0.298 m^3/s Undefined
#> 17 2022-10-01 16:00:00 Discharge 0.299 m^3/s Undefined
#> 18 2022-10-01 17:00:00 Discharge 0.300 m^3/s Undefined
#> 19 2022-10-01 18:00:00 Discharge 0.304 m^3/s Undefined
#> 20 2022-10-01 19:00:00 Discharge 0.306 m^3/s Undefined

# Get a particular station (note double-quoted column names
# when there are spaces)

# List station names
dbGetQuery(conn, glue("SELECT DISTINCT \"Location Name\" from '{url}';"))
#>                                          Location Name
#> 1                            Koksilah River at Trestle
#> 2             Chemainus River Upstream of Bannon Creek
#> 3                 Tsolum River at Fitzgerald Rd bridge
#> 4                        Haslam Creek near Rail Bridge
#> 5                   Nanoose Creek near HWY 19 Crossing
#> 6                   Black Creek at Miracle Beach Drive
#> 7                        Close Creek on Meare's Island
#> 8                      Duteau at Whitevale Road (DUT1)
#> 9        Nicklen Creek at outlet of Nicklen Dam (NIK1)
#> 10              Clapperton Creek near the Mouth (CLP2)
#> 11                          Guichon Creek at the mouth
#> 12                               Lillooet River at FSR
#> 13                                   Fitzsimmons Creek
#> 14                          Green River at Nairn Falls
#> 15      Archibald Creek at 142 Street - City of Surrey
#> 16             Latimer Creek at 192nd - City of Surrey
#> 17 Little Campbell Creek at 12 Avenue - City of Surrey
#> 18         Quibble Creek at 88 Avenue - City of Surrey
#> 19                        Equesis Creek near the Mouth
#> 20                         Vaseux Creek near the Mouth
#> 21                 Middle Vernon Creek at Reimche Road

res1 <- dbGetQuery(
  conn, 
  glue(
    "SELECT * 
    FROM '{url}'
    WHERE
      \"Location Name\" = 'Vaseux Creek near the Mouth';
    "
  )
)

head(res1)
#>   Location ID               Location Name Status Latitude Longitude
#> 1    08NM0003 Vaseux Creek near the Mouth Active 49.24553 -119.5247
#> 2    08NM0003 Vaseux Creek near the Mouth Active 49.24553 -119.5247
#> 3    08NM0003 Vaseux Creek near the Mouth Active 49.24553 -119.5247
#> 4    08NM0003 Vaseux Creek near the Mouth Active 49.24553 -119.5247
#> 5    08NM0003 Vaseux Creek near the Mouth Active 49.24553 -119.5247
#> 6    08NM0003 Vaseux Creek near the Mouth Active 49.24553 -119.5247
#>        Date/Time(UTC) Parameter Value  Unit     Grade
#> 1 2022-10-01 00:00:00 Discharge     0 m^3/s Undefined
#> 2 2022-10-01 01:00:00 Discharge     0 m^3/s Undefined
#> 3 2022-10-01 02:00:00 Discharge     0 m^3/s Undefined
#> 4 2022-10-01 03:00:00 Discharge     0 m^3/s Undefined
#> 5 2022-10-01 04:00:00 Discharge     0 m^3/s Undefined
#> 6 2022-10-01 05:00:00 Discharge     0 m^3/s Undefined

# Get last 24 hours:
ref_time <- format(Sys.time() - 60 * 60* 24, tz = "UTC")

res2 <- dbGetQuery(
  conn, 
  glue(
    "SELECT * 
    FROM '{url}'
    WHERE
      \"Date/Time(UTC)\" >= strptime('{ref_time}', '%x %X');
    "
  )
)

head(res2)
#>   Location ID                 Location Name Status Latitude Longitude
#> 1    08HB0007 Haslam Creek near Rail Bridge Active 49.06096 -123.8801
#> 2    08HB0007 Haslam Creek near Rail Bridge Active 49.06096 -123.8801
#> 3    08HB0007 Haslam Creek near Rail Bridge Active 49.06096 -123.8801
#> 4    08HB0007 Haslam Creek near Rail Bridge Active 49.06096 -123.8801
#> 5    08HB0007 Haslam Creek near Rail Bridge Active 49.06096 -123.8801
#> 6    08HB0007 Haslam Creek near Rail Bridge Active 49.06096 -123.8801
#>        Date/Time(UTC) Parameter Value  Unit     Grade
#> 1 2022-12-15 19:00:00 Discharge 3.285 m^3/s Undefined
#> 2 2022-12-15 20:00:00 Discharge 3.281 m^3/s Undefined
#> 3 2022-12-15 21:00:00 Discharge 3.266 m^3/s Undefined
#> 4 2022-12-15 22:00:00 Discharge 3.264 m^3/s Undefined
#> 5 2022-12-15 23:00:00 Discharge 3.247 m^3/s Undefined
#> 6 2022-12-16 00:00:00 Discharge 3.243 m^3/s Undefined

Created on 2022-12-16 with reprex v2.0.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment