Skip to content

Instantly share code, notes, and snippets.

@sckott
Last active August 29, 2015 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sckott/3ac81a3a4e1e477b33a7 to your computer and use it in GitHub Desktop.
Save sckott/3ac81a3a4e1e477b33a7 to your computer and use it in GitHub Desktop.

The rnoaa is a package to get data from various NOAA data sources. A new one we're working on is from the GHCND (Global Historical Climatology Network - Daily) FTP server at http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/. Below are examples of what we've got so far. We'd love feedback!

Installation, loading

install.packages("devtools")
devtools::install_github("ropensci/rnoaa")
library("rnoaa")

Usage

Get metadata

head(ghcnd_states())
#>   code           name
#> 1   AB        ALBERTA
#> 2   AK         ALASKA
#> 3   AL        ALABAMA
#> 4   AR       ARKANSAS
#> 5   AS AMERICAN SAMOA
#> 6   AZ        ARIZONA

Get stations, ghcnd-stations and ghcnd-inventory merged

(stations <- ghcnd_stations())
#> <GHCND Station Data>
#> Size: 554871 X 10
#> 
#>             id latitude longitude elevation                  name gsn_flag
#> 1  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 2  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 3  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 4  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 5  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 6  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 7  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 8  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 9  ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> 10 ACW00011604  17.1167  -61.7833      10.1 ST JOHNS COOLIDGE FLD         
#> ..         ...      ...       ...       ...                   ...      ...
#> Variables not shown: wmo_id (chr), element (chr), first_year (int),
#>      last_year (int)

Get data

ghcnd(stationid="AGE00147704")
#> <GHCND Data>
#> Size: 964 X 128
#> Source: ~/.rnoaa/ghcnd/AGE00147704.dly
#> 
#>             id year month element VALUE1 MFLAG1 QFLAG1 SFLAG1 VALUE2
#> 1  AGE00147704 1909    11    TMAX  -9999     NA                -9999
#> 2  AGE00147704 1909    11    TMIN  -9999     NA                -9999
#> 3  AGE00147704 1909    11    PRCP  -9999     NA                -9999
#> 4  AGE00147704 1909    12    TMAX    170     NA             E    160
#> 5  AGE00147704 1909    12    TMIN     90     NA             E    100
#> 6  AGE00147704 1909    12    PRCP     23     NA             E      0
#> 7  AGE00147704 1910     1    TMAX    140     NA             E    120
#> 8  AGE00147704 1910     1    TMIN    100     NA             E     80
#> 9  AGE00147704 1910     1    PRCP     81     NA             E    176
#> 10 AGE00147704 1910     2    TMAX    110     NA             E    140
#> ..         ...  ...   ...     ...    ...    ...    ...    ...    ...
#> Variables not shown: MFLAG2 (lgl), QFLAG2 (chr), SFLAG2 (chr), VALUE3
#>      (int), MFLAG3 (lgl), QFLAG3 (chr), SFLAG3 (chr), VALUE4 (int), MFLAG4
#>      (lgl), QFLAG4 (chr), SFLAG4 (chr), VALUE5 (int), MFLAG5 (lgl), QFLAG5
#>      (chr), SFLAG5 (chr), VALUE6 (int), MFLAG6 (lgl), QFLAG6 (chr), SFLAG6
#>      (chr), VALUE7 (int), MFLAG7 (lgl), QFLAG7 (lgl), SFLAG7 (chr), VALUE8
#>      (int), MFLAG8 (lgl), QFLAG8 (chr), SFLAG8 (chr), VALUE9 (int), MFLAG9
#>      (lgl), QFLAG9 (chr), SFLAG9 (chr), VALUE10 (int), MFLAG10 (lgl),
#>      QFLAG10 (chr), SFLAG10 (chr), VALUE11 (int), MFLAG11 (lgl), QFLAG11
#>      (chr), SFLAG11 (chr), VALUE12 (int), MFLAG12 (lgl), QFLAG12 (chr),
#>      SFLAG12 (chr), VALUE13 (int), MFLAG13 (lgl), QFLAG13 (chr), SFLAG13
#>      (chr), VALUE14 (int), MFLAG14 (lgl), QFLAG14 (chr), SFLAG14 (chr),
#>      VALUE15 (int), MFLAG15 (lgl), QFLAG15 (chr), SFLAG15 (chr), VALUE16
#>      (int), MFLAG16 (lgl), QFLAG16 (chr), SFLAG16 (chr), VALUE17 (int),
#>      MFLAG17 (lgl), QFLAG17 (chr), SFLAG17 (chr), VALUE18 (int), MFLAG18
#>      (lgl), QFLAG18 (chr), SFLAG18 (chr), VALUE19 (int), MFLAG19 (lgl),
#>      QFLAG19 (lgl), SFLAG19 (chr), VALUE20 (int), MFLAG20 (lgl), QFLAG20
#>      (lgl), SFLAG20 (chr), VALUE21 (int), MFLAG21 (lgl), QFLAG21 (chr),
#>      SFLAG21 (chr), VALUE22 (int), MFLAG22 (lgl), QFLAG22 (chr), SFLAG22
#>      (chr), VALUE23 (int), MFLAG23 (lgl), QFLAG23 (lgl), SFLAG23 (chr),
#>      VALUE24 (int), MFLAG24 (lgl), QFLAG24 (lgl), SFLAG24 (chr), VALUE25
#>      (int), MFLAG25 (lgl), QFLAG25 (lgl), SFLAG25 (chr), VALUE26 (int),
#>      MFLAG26 (lgl), QFLAG26 (chr), SFLAG26 (chr), VALUE27 (int), MFLAG27
#>      (lgl), QFLAG27 (lgl), SFLAG27 (chr), VALUE28 (int), MFLAG28 (lgl),
#>      QFLAG28 (lgl), SFLAG28 (chr), VALUE29 (int), MFLAG29 (lgl), QFLAG29
#>      (lgl), SFLAG29 (chr), VALUE30 (int), MFLAG30 (lgl), QFLAG30 (chr),
#>      SFLAG30 (chr), VALUE31 (int), MFLAG31 (lgl), QFLAG31 (lgl), SFLAG31
#>      (chr)
ghcnd(stations$data$id[40])
#> <GHCND Data>
#> Size: 2386 X 128
#> Source: ~/.rnoaa/ghcnd/AGE00135039.dly
#> 
#>             id year month element VALUE1 MFLAG1 QFLAG1 SFLAG1 VALUE2
#> 1  AGE00135039 1852     1    TMAX    160     NA     NA      E    158
#> 2  AGE00135039 1852     1    TMIN     90     NA     NA      E     93
#> 3  AGE00135039 1852     1    PRCP      0     NA     NA      E      0
#> 4  AGE00135039 1852     2    TMAX  -9999     NA     NA           140
#> 5  AGE00135039 1852     2    TMIN  -9999     NA     NA            55
#> 6  AGE00135039 1852     2    PRCP      0     NA     NA      E      0
#> 7  AGE00135039 1852     3    TMAX    150     NA     NA      E    140
#> 8  AGE00135039 1852     3    TMIN     50     NA     NA      E     40
#> 9  AGE00135039 1852     3    PRCP      0     NA     NA      E      0
#> 10 AGE00135039 1852     4    TMAX    225     NA     NA      E    220
#> ..         ...  ...   ...     ...    ...    ...    ...    ...    ...
#> Variables not shown: MFLAG2 (lgl), QFLAG2 (lgl), SFLAG2 (chr), VALUE3
#>      (int), MFLAG3 (lgl), QFLAG3 (lgl), SFLAG3 (chr), VALUE4 (int), MFLAG4
#>      (lgl), QFLAG4 (lgl), SFLAG4 (chr), VALUE5 (int), MFLAG5 (lgl), QFLAG5
#>      (lgl), SFLAG5 (chr), VALUE6 (int), MFLAG6 (lgl), QFLAG6 (lgl), SFLAG6
#>      (chr), VALUE7 (int), MFLAG7 (lgl), QFLAG7 (chr), SFLAG7 (chr), VALUE8
#>      (int), MFLAG8 (lgl), QFLAG8 (chr), SFLAG8 (chr), VALUE9 (int), MFLAG9
#>      (lgl), QFLAG9 (chr), SFLAG9 (chr), VALUE10 (int), MFLAG10 (lgl),
#>      QFLAG10 (chr), SFLAG10 (chr), VALUE11 (int), MFLAG11 (lgl), QFLAG11
#>      (chr), SFLAG11 (chr), VALUE12 (int), MFLAG12 (lgl), QFLAG12 (chr),
#>      SFLAG12 (chr), VALUE13 (int), MFLAG13 (lgl), QFLAG13 (chr), SFLAG13
#>      (chr), VALUE14 (int), MFLAG14 (lgl), QFLAG14 (chr), SFLAG14 (chr),
#>      VALUE15 (int), MFLAG15 (lgl), QFLAG15 (chr), SFLAG15 (chr), VALUE16
#>      (int), MFLAG16 (lgl), QFLAG16 (chr), SFLAG16 (chr), VALUE17 (int),
#>      MFLAG17 (lgl), QFLAG17 (chr), SFLAG17 (chr), VALUE18 (int), MFLAG18
#>      (lgl), QFLAG18 (chr), SFLAG18 (chr), VALUE19 (int), MFLAG19 (lgl),
#>      QFLAG19 (chr), SFLAG19 (chr), VALUE20 (int), MFLAG20 (lgl), QFLAG20
#>      (chr), SFLAG20 (chr), VALUE21 (int), MFLAG21 (lgl), QFLAG21 (chr),
#>      SFLAG21 (chr), VALUE22 (int), MFLAG22 (lgl), QFLAG22 (chr), SFLAG22
#>      (chr), VALUE23 (int), MFLAG23 (lgl), QFLAG23 (chr), SFLAG23 (chr),
#>      VALUE24 (int), MFLAG24 (lgl), QFLAG24 (chr), SFLAG24 (chr), VALUE25
#>      (int), MFLAG25 (lgl), QFLAG25 (chr), SFLAG25 (chr), VALUE26 (int),
#>      MFLAG26 (lgl), QFLAG26 (chr), SFLAG26 (chr), VALUE27 (int), MFLAG27
#>      (lgl), QFLAG27 (chr), SFLAG27 (chr), VALUE28 (int), MFLAG28 (lgl),
#>      QFLAG28 (chr), SFLAG28 (chr), VALUE29 (int), MFLAG29 (lgl), QFLAG29
#>      (lgl), SFLAG29 (chr), VALUE30 (int), MFLAG30 (lgl), QFLAG30 (lgl),
#>      SFLAG30 (chr), VALUE31 (int), MFLAG31 (lgl), QFLAG31 (lgl), SFLAG31
#>      (chr)

Manipulate data

Using built in fxns

dat <- ghcnd(stationid="AGE00147704")
alldat <- ghcnd_splitvars(dat)

Plot data

library("ggplot2")
ggplot(subset(alldat$tmax, tmax >= 0), aes(date, tmax)) + geom_point()

img

Using dplyr

library("dplyr")
dat <- ghcnd(stationid="AGE00147704")
dat$data %>%
 filter(element == "PRCP", year == 1909)
#>            id year month element VALUE1 MFLAG1 QFLAG1 SFLAG1 VALUE2 MFLAG2
#> 1 AGE00147704 1909    11    PRCP  -9999     NA                -9999     NA
#> 2 AGE00147704 1909    12    PRCP     23     NA             E      0     NA
#>   QFLAG2 SFLAG2 VALUE3 MFLAG3 QFLAG3 SFLAG3 VALUE4 MFLAG4 QFLAG4 SFLAG4
#> 1                -9999     NA                -9999     NA              
#> 2             E      0     NA             E      0     NA             E
#>   VALUE5 MFLAG5 QFLAG5 SFLAG5 VALUE6 MFLAG6 QFLAG6 SFLAG6 VALUE7 MFLAG7
#> 1  -9999     NA                -9999     NA                -9999     NA
#> 2      0     NA             E      0     NA             E      0     NA
#>   QFLAG7 SFLAG7 VALUE8 MFLAG8 QFLAG8 SFLAG8 VALUE9 MFLAG9 QFLAG9 SFLAG9
#> 1     NA         -9999     NA                -9999     NA              
#> 2     NA      E    250     NA             E     75     NA             E
#>   VALUE10 MFLAG10 QFLAG10 SFLAG10 VALUE11 MFLAG11 QFLAG11 SFLAG11 VALUE12
#> 1   -9999      NA                   -9999      NA                   -9999
#> 2     131      NA               E       0      NA               E       0
#>   MFLAG12 QFLAG12 SFLAG12 VALUE13 MFLAG13 QFLAG13 SFLAG13 VALUE14 MFLAG14
#> 1      NA                   -9999      NA                   -9999      NA
#> 2      NA               E       0      NA               E       0      NA
#>   QFLAG14 SFLAG14 VALUE15 MFLAG15 QFLAG15 SFLAG15 VALUE16 MFLAG16 QFLAG16
#> 1                   -9999      NA                   -9999      NA        
#> 2               E       0      NA               E       0      NA        
#>   SFLAG16 VALUE17 MFLAG17 QFLAG17 SFLAG17 VALUE18 MFLAG18 QFLAG18 SFLAG18
#> 1           -9999      NA                   -9999      NA                
#> 2       E       0      NA               E       0      NA               E
#>   VALUE19 MFLAG19 QFLAG19 SFLAG19 VALUE20 MFLAG20 QFLAG20 SFLAG20 VALUE21
#> 1   -9999      NA      NA           -9999      NA      NA           -9999
#> 2       0      NA      NA       E       0      NA      NA       E       0
#>   MFLAG21 QFLAG21 SFLAG21 VALUE22 MFLAG22 QFLAG22 SFLAG22 VALUE23 MFLAG23
#> 1      NA                   -9999      NA                      22      NA
#> 2      NA               E       0      NA               E       0      NA
#>   QFLAG23 SFLAG23 VALUE24 MFLAG24 QFLAG24 SFLAG24 VALUE25 MFLAG25 QFLAG25
#> 1      NA       E       9      NA      NA       E       5      NA      NA
#> 2      NA       E       0      NA      NA       E       0      NA      NA
#>   SFLAG25 VALUE26 MFLAG26 QFLAG26 SFLAG26 VALUE27 MFLAG27 QFLAG27 SFLAG27
#> 1       E       0      NA               E      86      NA      NA       E
#> 2       E       0      NA               E       0      NA      NA       E
#>   VALUE28 MFLAG28 QFLAG28 SFLAG28 VALUE29 MFLAG29 QFLAG29 SFLAG29 VALUE30
#> 1       0      NA      NA       E      28      NA      NA       E       0
#> 2       0      NA      NA       E       0      NA      NA       E       0
#>   MFLAG30 QFLAG30 SFLAG30 VALUE31 MFLAG31 QFLAG31 SFLAG31
#> 1      NA               E   -9999      NA      NA        
#> 2      NA               E      57      NA      NA       E

Search based on variable and/or date. This function actually downloads data first, wraps ghcnd() and ghcnd_splitvars(), and some filtering of rows to get the data you want.

Just variable

ghcnd_search("AGE00147704", var = "PRCP")
#> Source: local data frame [9,803 x 6]
#> 
#>             id  prcp       date mflag qflag sflag
#> 1  AGE00147704 -9999 1909-11-01    NA            
#> 2  AGE00147704    23 1909-12-01    NA           E
#> 3  AGE00147704    81 1910-01-01    NA           E
#> 4  AGE00147704     0 1910-02-01    NA           E
#> 5  AGE00147704    18 1910-03-01    NA           E
#> 6  AGE00147704     0 1910-04-01    NA           E
#> 7  AGE00147704   223 1910-05-01    NA           E
#> 8  AGE00147704     0 1910-06-01    NA           E
#> 9  AGE00147704     0 1910-07-01    NA           E
#> 10 AGE00147704     0 1910-08-01    NA           E
#> ..         ...   ...        ...   ...   ...   ...

Min date with single variable

ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01")
#> Source: local data frame [6,423 x 6]
#> 
#>             id prcp       date mflag qflag sflag
#> 1  AGE00147704    0 1920-02-01    NA           E
#> 2  AGE00147704    0 1920-03-01    NA           E
#> 3  AGE00147704    0 1920-04-01    NA           E
#> 4  AGE00147704    0 1920-05-01    NA           E
#> 5  AGE00147704    0 1920-06-01    NA           E
#> 6  AGE00147704    0 1920-07-01    NA           E
#> 7  AGE00147704    0 1920-08-01    NA           E
#> 8  AGE00147704    0 1920-09-01    NA           E
#> 9  AGE00147704    0 1920-10-01    NA           E
#> 10 AGE00147704    0 1920-11-01    NA           E
#> ..         ...  ...        ...   ...   ...   ...

Max date with single variable

ghcnd_search("AGE00147704", var = "PRCP", date_max = "1915-01-01")
#> Source: local data frame [1,887 x 6]
#> 
#>             id  prcp       date mflag qflag sflag
#> 1  AGE00147704 -9999 1909-11-01    NA            
#> 2  AGE00147704    23 1909-12-01    NA           E
#> 3  AGE00147704    81 1910-01-01    NA           E
#> 4  AGE00147704     0 1910-02-01    NA           E
#> 5  AGE00147704    18 1910-03-01    NA           E
#> 6  AGE00147704     0 1910-04-01    NA           E
#> 7  AGE00147704   223 1910-05-01    NA           E
#> 8  AGE00147704     0 1910-06-01    NA           E
#> 9  AGE00147704     0 1910-07-01    NA           E
#> 10 AGE00147704     0 1910-08-01    NA           E
#> ..         ...   ...        ...   ...   ...   ...

Min and max date with single variable

ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01", date_max = "1925-01-01")
#> Source: local data frame [1,826 x 6]
#> 
#>             id prcp       date mflag qflag sflag
#> 1  AGE00147704    0 1920-02-01    NA           E
#> 2  AGE00147704    0 1920-03-01    NA           E
#> 3  AGE00147704    0 1920-04-01    NA           E
#> 4  AGE00147704    0 1920-05-01    NA           E
#> 5  AGE00147704    0 1920-06-01    NA           E
#> 6  AGE00147704    0 1920-07-01    NA           E
#> 7  AGE00147704    0 1920-08-01    NA           E
#> 8  AGE00147704    0 1920-09-01    NA           E
#> 9  AGE00147704    0 1920-10-01    NA           E
#> 10 AGE00147704    0 1920-11-01    NA           E
#> ..         ...  ...        ...   ...   ...   ...

Min and max date with all variables

ghcnd_search("AGE00147704", date_min = "1920-01-01", date_max = "1925-01-01")
#> $tmax
#> Source: local data frame [1,826 x 6]
#> 
#>             id tmax       date mflag qflag sflag
#> 1  AGE00147704  150 1920-02-01    NA           E
#> 2  AGE00147704  150 1920-03-01    NA           E
#> 3  AGE00147704  170 1920-04-01    NA           E
#> 4  AGE00147704  190 1920-05-01    NA           E
#> 5  AGE00147704  240 1920-06-01    NA           E
#> 6  AGE00147704  310 1920-07-01    NA           E
#> 7  AGE00147704  320 1920-08-01    NA           E
#> 8  AGE00147704  270 1920-09-01    NA           E
#> 9  AGE00147704  280 1920-10-01    NA           E
#> 10 AGE00147704  220 1920-11-01    NA           E
#> ..         ...  ...        ...   ...   ...   ...
#> 
#> $tmin
#> Source: local data frame [1,826 x 6]
#> 
#>             id tmin       date mflag qflag sflag
#> 1  AGE00147704  100 1920-02-01    NA           E
#> 2  AGE00147704   80 1920-03-01    NA           E
#> 3  AGE00147704   90 1920-04-01    NA           E
#> 4  AGE00147704  140 1920-05-01    NA           E
#> 5  AGE00147704  180 1920-06-01    NA           E
#> 6  AGE00147704  200 1920-07-01    NA           E
#> 7  AGE00147704  210 1920-08-01    NA           E
#> 8  AGE00147704  190 1920-09-01    NA           E
#> 9  AGE00147704  200 1920-10-01    NA           E
#> 10 AGE00147704  150 1920-11-01    NA           E
#> ..         ...  ...        ...   ...   ...   ...
#> 
#> $prcp
#> Source: local data frame [1,826 x 6]
#> 
#>             id prcp       date mflag qflag sflag
#> 1  AGE00147704    0 1920-02-01    NA           E
#> 2  AGE00147704    0 1920-03-01    NA           E
#> 3  AGE00147704    0 1920-04-01    NA           E
#> 4  AGE00147704    0 1920-05-01    NA           E
#> 5  AGE00147704    0 1920-06-01    NA           E
#> 6  AGE00147704    0 1920-07-01    NA           E
#> 7  AGE00147704    0 1920-08-01    NA           E
#> 8  AGE00147704    0 1920-09-01    NA           E
#> 9  AGE00147704    0 1920-10-01    NA           E
#> 10 AGE00147704    0 1920-11-01    NA           E
#> ..         ...  ...        ...   ...   ...   ...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment