Skip to content

Instantly share code, notes, and snippets.

View erikgregorywebb's full-sized avatar
📈

Erik erikgregorywebb

📈
View GitHub Profile
library(tidyverse)
library(rvest)
# read in parcel list
url = 'https://gist.githubusercontent.com/erikgregorywebb/741b472a8ae1bb97d617eda3d6aaea3d/raw/aef7b5083670659788a009185e0e055d0ac1b350/gila-buttes-parcel-search.csv'
parcels_df = read_csv(url)
parcels = parcels_df %>% pull(`PARCEL NUMBER`) %>% str_replace_all(., '-', '')
# scrape details
PARCEL NUMBER OWNER NAME MAILING CITY & STATE SUBDIVISION NAME
509-84-3490 VALDEZ RACHEL SUSAN CASA GRANDE AZ GILA BUTTES PHASE 1
509-84-3500 PAUL SCHLANGER & MICHELLE SCHLANGER LLC CASA GRANDE AZ GILA BUTTES PHASE 1
509-84-3510 DR HORTON INC CASA GRANDE AZ GILA BUTTES PHASE 1
509-84-3520 BALAORO MARK B & LAURA TRS CASA GRANDE AZ GILA BUTTES PHASE 1
509-84-3530 CLEMENT MATTHEW CASA GRANDE AZ GILA BUTTES PHASE 1
509-84-3540 BALAORO MARK B & LAURA TRS CASA GRANDE AZ GILA BUTTES PHASE 1
509-84-3550 VENZON JENNIFER ANN & ROBBY PATDO CASA GRANDE AZ GILA BUTTES PHASE 1
509-84-3560 BOOKER WILLIAM DEVON JR CASA GRANDE AZ GILA BUTTES PHASE 1
509-84-3570 GEARY JAMES H CASA GRANDE AZ GILA BUTTES PHASE 1
library(rvest)
url = 'https://mesamarathon.com/results?pk=7675253'
page = read_html(url)
paths = page %>% html_nodes('.photo') %>% html_attr('src') %>% str_replace(., '_thumb', '')
setwd("~/Downloads")
for (i in 1:length(paths)) {
file_name = paste('file-kristin-', i, '.jpg', sep = '')
download.file(paths[i], file_name)
# import libraries
library(aws.s3)
library(tidyverse)
library(jsonlite)
library(stringr)
library(ggbump)
library(cowplot)
# setting keys
Sys.setenv("AWS_ACCESS_KEY_ID" = "AWS_ACCESS_KEY_ID",
library(rvest)
library(tidyverse)
url = 'https://getclair.com/about/team'
page = read_html(url)
raw = raw = page %>% html_nodes('.transition-all') %>% html_text2()
df = tibble(name = str_replace(raw[str_detect(raw, '\n\n')], '\n\n', ', '), scraped_at = Sys.time())
library(tidyverse)
library(rvest)
library(stringr)
# scape all the article links
all_links = c()
url = 'https://www.theneedledrop.com/articles?category=Reviews'
while (url != 'end') {
Sys.sleep(3)
page = read_html(url)
library(tidyverse)
library(rvest)
### EXTRACT
# marathon
url = 'https://mesamarathon.com/results?sort=&race=166724&date=&event=Marathon&gender=&division=&search=&page_167031=1&size_167031=100000&page_167067=1&size_167067=25'
page = read_html(url)
raw_marathon = page %>% html_table() %>% nth(., 2)
# import libraries
library(tidyverse)
library(rvest)
library(scales)
# get links to data files
url = 'https://github.com/erikgregorywebb/github-actions-demo/tree/main/data'
page = read_html(url)
all_paths = page %>% html_nodes('a') %>% html_attr('href')
paths = all_paths[str_detect(all_paths, '/erikgregorywebb/github-actions-demo/blob/main/data/fm-rates')]
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
# get page content
url = 'http://www.freddiemac.com/'
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
# data source: https://www.cargurus.com/Cars/price-trends/
library(tidyverse)
library(dplyr)
library(httr)
library(jsonlite)
library(broom)
library(lubridate)
library(scales)