Skip to content

Instantly share code, notes, and snippets.

# zenodo large file upload
library(httr)
library(dplyr)
library(purrr)
# get your token here
# https://zenodo.org/account/settings/applications/
token = ""
deposit_id = 6137047 # fill in form
@jhnwllr
jhnwllr / html_table_for_outlook.r
Created December 22, 2021 15:20
make an html table for pasting into outlook
library(dplyr)
library(purrr)
library(xtable)
d = readr::read_tsv("C:/Users/ftw712/Desktop/Legume/data/legume_unmatched.tsv") %>%
arrange(-count) %>%
glimpse() %>%
head(30) %>%
mutate(n_suggestions = map_int(v_scientificname,~nrow(rgbif::name_suggest(.x)$data))) %>%
mutate(n_suggestions = format(n_suggestions, nsmall = 0)) %>%
We can't make this file beautiful and searchable because it's too large.
scientificName
Abarema abbottii
Abarema acreana
Abarema adenophora
Abarema alexandri
Abarema asplenifolia
Abarema auriculata
Abarema barbouriana
Abarema barnebyana
Abarema brachystachya
library(ggplot2)
library(ggtext)
library(extrafont)
font_import()
loadfonts(device = "win")
windowsFonts()
breaks = scales::pretty_breaks(n = 7)(c(0,900e3))
labels = gbifapi::plot_label_maker(breaks,unit_MK = "K",unit_scale = 1e-3)
@jhnwllr
jhnwllr / classic_bar_plot
Created October 25, 2021 10:49
Classic GBIF barplot used for everything
library(ggplot2)
breaks = scales::pretty_breaks(n = 5)(c(0,3e6))
labels = gbifapi::plot_label_maker(breaks,unit_MK = "K",unit_scale = 1e-3)
p = ggplot(pd,aes(class,n)) +
scale_y_continuous(breaks = breaks,label = labels) +
geom_col(stat="identity",fill="#4B9E46") +
coord_flip() +
xlab("") +
# example showing usefulness of GBIF API when combined with purrr
library(dplyr)
library(httr)
library(purrr)
library(jsonlite)
"Trochilidae" %>% # sci name for humming birds
paste0("https://api.gbif.org/v1/species/match?name=",.) %>%
GET() %>%
content() %>%
We can't make this file beautiful and searchable because it's too large.
0f85c38c-92ad-407b-b21e-54932bdc6058 1
267cc444-4df8-4d9e-8a6b-6b13e3600a84 92
a0992ffa-00c1-4d7b-8366-e18c9ec1a168 5
7df0563f-2415-43c3-8865-f803f92ca5fe 16
08b92fdc-9bba-469a-8382-0d7f0fb89bb7 1
93806ea9-3fff-4376-8d8e-062e5ece35bf 2
fc802397-f995-4145-93d7-fee098887602 11
ae830ee1-1095-40a7-8384-95d7039acaa4 4
dc935b9b-2f74-4b50-8dd3-9c4a91dbfe44 5
5b0dfb7c-9e7e-4e3e-b438-5e25073759ba 1
FILE_NAME="Taxon.tsv"
COPY_DIR="/cygdrive/c/Users/ftw712/Desktop/"
scp -r $COPY_DIR$FILE_NAME jwaller@c5gateway-vh.gbif.org:/mnt/auto/misc/download.gbif.org/custom_download/jwaller/
utils::zip(zipfile = dir, files = dir(dir, full.names = TRUE))
import org.apache.spark.sql.functions._
val wasbs_path = "wasbs://gbif@ai4edataeuwest.blob.core.windows.net/occurrence/20210413/occurrence.parquet/*"
val df = spark.read.parquet(wasbs_path)
// Number species total
df.select("specieskey").distinct().count()
// Number species by Kingdom
df.select("kingdom","specieskey").distinct().groupBy("kingdom").count().orderBy(desc("count")).show()
// Number records total