Erik erikgregorywebb

## gila-buttes-parcel-details-scrape.R

library(tidyverse)
library(rvest)

# read in parcel list
url = 'https://gist.githubusercontent.com/erikgregorywebb/741b472a8ae1bb97d617eda3d6aaea3d/raw/aef7b5083670659788a009185e0e055d0ac1b350/gila-buttes-parcel-search.csv'
parcels_df = read_csv(url)
parcels = parcels_df %>% pull(`PARCEL NUMBER`) %>% str_replace_all(., '-', '')

# scrape details

## gila-buttes-parcel-search.csv

          
            PARCEL NUMBER
            OWNER NAME
            MAILING CITY & STATE
            SUBDIVISION NAME

            
              509-84-3490
              VALDEZ RACHEL SUSAN
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

            
              509-84-3500
              PAUL SCHLANGER & MICHELLE SCHLANGER LLC
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

            
              509-84-3510
              DR HORTON INC
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

            
              509-84-3520
              BALAORO MARK B & LAURA TRS
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

            
              509-84-3530
              CLEMENT MATTHEW
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

            
              509-84-3540
              BALAORO MARK B & LAURA TRS
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

            
              509-84-3550
              VENZON JENNIFER ANN & ROBBY PATDO
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

            
              509-84-3560
              BOOKER WILLIAM DEVON JR
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

            
              509-84-3570
              GEARY JAMES H
              CASA GRANDE AZ
              GILA BUTTES PHASE 1

## mesa-marathon-2023-scrape.R
library(rvest)

url = 'https://mesamarathon.com/results?pk=7675253'
page = read_html(url)
paths = page %>% html_nodes('.photo') %>% html_attr('src') %>% str_replace(., '_thumb', '')

setwd("~/Downloads")
for (i in 1:length(paths)) {
  file_name = paste('file-kristin-', i, '.jpg', sep = '')
  download.file(paths[i], file_name)

## apple-app-store-rankings-draft.R
# import libraries
library(aws.s3)
library(tidyverse)
library(jsonlite)
library(stringr)
library(ggbump)
library(cowplot)

# setting keys
Sys.setenv("AWS_ACCESS_KEY_ID" = "AWS_ACCESS_KEY_ID",

## clair-team-scrape.R
library(rvest)
library(tidyverse)

url = 'https://getclair.com/about/team'
page = read_html(url)
raw = raw = page %>% html_nodes('.transition-all') %>% html_text2()
df = tibble(name = str_replace(raw[str_detect(raw, '\n\n')], '\n\n', ', '), scraped_at = Sys.time())

## the-needle-drop-scrape.R
library(tidyverse)
library(rvest)
library(stringr)

# scape all the article links
all_links = c()
url = 'https://www.theneedledrop.com/articles?category=Reviews'
while (url != 'end') {
  Sys.sleep(3)
  page = read_html(url)

## mesa-marathon.r
library(tidyverse)
library(rvest)

### EXTRACT

# marathon
url = 'https://mesamarathon.com/results?sort=&race=166724&date=&event=Marathon&gender=&division=&search=&page_167031=1&size_167031=100000&page_167067=1&size_167067=25'
page = read_html(url)
raw_marathon = page %>% html_table() %>% nth(., 2)

## github-actions-demoplot-data.R
  # import libraries
  library(tidyverse)
  library(rvest)
  library(scales)

  # get links to data files
  url = 'https://github.com/erikgregorywebb/github-actions-demo/tree/main/data'
  page = read_html(url)
  all_paths = page %>% html_nodes('a') %>% html_attr('href')
  paths = all_paths[str_detect(all_paths, '/erikgregorywebb/github-actions-demo/blob/main/data/fm-rates')]

## github-actions-demo-freddiemac.py
    import requests
    from bs4 import BeautifulSoup
    import pandas as pd
    from datetime import datetime

    # get page content
    url = 'http://www.freddiemac.com/'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, "html.parser")

## cargurus-scraper.R
  # data source: https://www.cargurus.com/Cars/price-trends/

  library(tidyverse)
  library(dplyr)
  library(httr)
  library(jsonlite)
  library(broom)
  library(lubridate)
  library(scales)

	library(tidyverse)
	library(rvest)

	# read in parcel list
	url = 'https://gist.githubusercontent.com/erikgregorywebb/741b472a8ae1bb97d617eda3d6aaea3d/raw/aef7b5083670659788a009185e0e055d0ac1b350/gila-buttes-parcel-search.csv'
	parcels_df = read_csv(url)
	parcels = parcels_df %>% pull(`PARCEL NUMBER`) %>% str_replace_all(., '-', '')

	# scrape details
PARCEL NUMBER	OWNER NAME	MAILING CITY & STATE	SUBDIVISION NAME
509-84-3490	VALDEZ RACHEL SUSAN	CASA GRANDE AZ	GILA BUTTES PHASE 1
509-84-3500	PAUL SCHLANGER & MICHELLE SCHLANGER LLC	CASA GRANDE AZ	GILA BUTTES PHASE 1
509-84-3510	DR HORTON INC	CASA GRANDE AZ	GILA BUTTES PHASE 1
509-84-3520	BALAORO MARK B & LAURA TRS	CASA GRANDE AZ	GILA BUTTES PHASE 1
509-84-3530	CLEMENT MATTHEW	CASA GRANDE AZ	GILA BUTTES PHASE 1
509-84-3540	BALAORO MARK B & LAURA TRS	CASA GRANDE AZ	GILA BUTTES PHASE 1
509-84-3550	VENZON JENNIFER ANN & ROBBY PATDO	CASA GRANDE AZ	GILA BUTTES PHASE 1
509-84-3560	BOOKER WILLIAM DEVON JR	CASA GRANDE AZ	GILA BUTTES PHASE 1
509-84-3570	GEARY JAMES H	CASA GRANDE AZ	GILA BUTTES PHASE 1
	library(rvest)

	url = 'https://mesamarathon.com/results?pk=7675253'
	page = read_html(url)
	paths = page %>% html_nodes('.photo') %>% html_attr('src') %>% str_replace(., '_thumb', '')

	setwd("~/Downloads")
	for (i in 1:length(paths)) {
	file_name = paste('file-kristin-', i, '.jpg', sep = '')
	download.file(paths[i], file_name)
	# import libraries
	library(aws.s3)
	library(tidyverse)
	library(jsonlite)
	library(stringr)
	library(ggbump)
	library(cowplot)

	# setting keys
	Sys.setenv("AWS_ACCESS_KEY_ID" = "AWS_ACCESS_KEY_ID",
	library(rvest)
	library(tidyverse)

	url = 'https://getclair.com/about/team'
	page = read_html(url)
	raw = raw = page %>% html_nodes('.transition-all') %>% html_text2()
	df = tibble(name = str_replace(raw[str_detect(raw, '\n\n')], '\n\n', ', '), scraped_at = Sys.time())
	library(tidyverse)
	library(rvest)

	### EXTRACT

	# marathon
	url = 'https://mesamarathon.com/results?sort=&race=166724&date=&event=Marathon&gender=&division=&search=&page_167031=1&size_167031=100000&page_167067=1&size_167067=25'
	page = read_html(url)
	raw_marathon = page %>% html_table() %>% nth(., 2)
	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	from datetime import datetime

	# get page content
	url = 'http://www.freddiemac.com/'
	page = requests.get(url)
	soup = BeautifulSoup(page.content, "html.parser")
	# data source: https://www.cargurus.com/Cars/price-trends/

	library(tidyverse)
	library(dplyr)
	library(httr)
	library(jsonlite)
	library(broom)
	library(lubridate)
	library(scales)