SaintZeno/positional_adp_merge.R

## positional_adp_merge.R
options(stringsAsFactors = FALSE)
library(jsonlite)
library(dplyr)


## params
years_adp <-
   c(2018)

years_metrics <-
   c(2016:2017)

fp <-
   '/Users/zenomuscarella/Documents/FF_projs/data/ffstatistics/'

## merging all FFStats player data
temp <- list()

temp[['qb']] <-
   read.csv(paste0(fp, 'qbdata.csv'))

temp[['qb']]$position <-
   'QB'

temp[['rb']] <-
   read.csv(paste0(fp, 'rbdata.csv'))

temp[['rb']]$position <-
   'RB'

temp[['wr']] <-
   read.csv(paste0(fp, 'wrdata.csv'))

temp[['wr']]$position <-
   'WR'

temp[['te']] <-
   read.csv(paste0(fp, 'tedata.csv'))

temp[['te']]$position <-
   'TE'

all_cols <-
   unique(unlist(lapply(temp, names)))


for(i in names(temp)){
   # i <- names(temp)[1]
   names_to_add <-
      setdiff(all_cols, names(temp[[i]]))

   temp[[i]][, names_to_add] <-
      NA
}


ffstats <-
   data.frame(do.call(rbind, temp))


## format some of the data

names(ffstats) <-
   unlist(lapply(tolower(names(ffstats)),
                 function(x) {
                    y <-
                       gsub('..', '.', x, fixed=TRUE)
                    if(substr(y,nchar(y), nchar(y)) == '.') {
                       return(substr(y,1, nchar(y)-1))
                    } else {
                       return(y)

                    }
                 }
   ))


## check rows match
nrow(ffstats)
sum(unlist(lapply(temp, nrow)))


## subset down to yeras
ffstats <-
   data.frame(ffstats[ffstats$year %in% years_metrics, ])


ffstats$player <-
   gsub('.', '', ffstats$player, fixed = TRUE)


ffstats$ppg.ppr <-
   ifelse(is.na(ffstats$ppg.ppr), ffstats$ppg.4pt.td, ffstats$ppg.ppr)

ffstats$posrank.ppr <-
   ifelse(is.na(ffstats$posrank.ppr), ffstats$posrank.4pt.td, ffstats$posrank.ppr)


ffstats <-
   ffstats %>%
   group_by(position) %>%
   mutate(ppg_pos_rank = order(order(ppg.ppr, decreasing=TRUE)))


ffstats <-
   data.frame(ffstats)

#### ffstats data merged! -- could also replace the above w/ the db file but... Zeno doesn't have it
####

## Pull the FFcalc ADP!!
## FFCalc acutally has a solid API we can use.. no need for importing pesky csv's!

ffcalc <-
   NULL

for(i in years_adp){

   # i <- years[1]
   url <-
      paste0('https://fantasyfootballcalculator.com/api/v1/adp/ppr?teams=12&year=', i)

   temp <-
      fromJSON(url)

   # names(temp) ## highly recommend checking this out -- APIs are cool and this one gives you
   ## some #good info about what you're pulling

   temp <-
      temp$players

   names(temp) <-
      tolower(names(temp))

   temp$year <-
      i

   temp <-
      temp %>%
      group_by(position) %>%
      mutate(pos_redraft_adp = order(order(adp, decreasing=FALSE)),
             redraft_adp = adp) ## use order order to get psotional adp ranks


   ffcalc <- rbind(data.frame(temp), ffcalc)
}

ffcalc$player <-
   gsub('.', '', ffcalc$name, fixed =TRUE)


## try to run a dirty merge

res <-
   merge(ffstats, ffcalc, by = c('player', 'position'), all.x=TRUE)

res$year_metric <-
   res$year.x


res$year.x <-
   NULL

res$year.y <-
   NULL

res$team <-
   res$team.x

### pull in dlf data

dlf <-
   read.csv(paste0(fp, 'dlfadp players.csv'))

names(dlf) <-
   tolower(names(dlf))


dlf$player <-
   do.call(rbind, strsplit(dlf$player, ', ', fixed=FALSE))[,1]

dlf$player <-
   gsub('.', '', dlf$player, fixed=TRUE)

dlf$pos_dyno_adp <-
   dlf$posadp

dlf$dyno_adp <-
   dlf$adp

res <-
   merge(res, dlf, by = c('player', 'position'), all.x = TRUE)

res$age <-
   res$age.y

to_keep <-
   c('player', 'position', 'team','age','year_metric',
     'pos_redraft_adp', 'pos_dyno_adp','redraft_adp',
     'dyno_adp', 'ppg.ppr', 'ppg_pos_rank')


temp <-
   res %>%
   select(to_keep) %>%
   group_by(position, year_metric) %>%
   mutate(
      pos_redraft_adp_zscore = scale(pos_redraft_adp, center=TRUE, scale=TRUE),
      pos_dyno_adp_zscore = scale(pos_dyno_adp, center=TRUE, scale=TRUE),
      ppg.ppr_zscore = scale(ppg.ppr, center=TRUE, scale=TRUE),
      ppg_pos_rank_zscore = scale(ppg_pos_rank, center=TRUE, scale=TRUE)
      )


write.csv(res[, to_keep], paste0(fp, 'ffstats_playerrankings_data.csv'))
	options(stringsAsFactors = FALSE)
	library(jsonlite)
	library(dplyr)




	## params
	years_adp <-
	c(2018)

	years_metrics <-
	c(2016:2017)

	fp <-
	'/Users/zenomuscarella/Documents/FF_projs/data/ffstatistics/'

	## merging all FFStats player data
	temp <- list()

	temp[['qb']] <-
	read.csv(paste0(fp, 'qbdata.csv'))

	temp[['qb']]$position <-
	'QB'

	temp[['rb']] <-
	read.csv(paste0(fp, 'rbdata.csv'))

	temp[['rb']]$position <-
	'RB'

	temp[['wr']] <-
	read.csv(paste0(fp, 'wrdata.csv'))

	temp[['wr']]$position <-
	'WR'

	temp[['te']] <-
	read.csv(paste0(fp, 'tedata.csv'))

	temp[['te']]$position <-
	'TE'

	all_cols <-
	unique(unlist(lapply(temp, names)))


	for(i in names(temp)){
	# i <- names(temp)[1]
	names_to_add <-
	setdiff(all_cols, names(temp[[i]]))

	temp[[i]][, names_to_add] <-
	NA
	}


	ffstats <-
	data.frame(do.call(rbind, temp))


	## format some of the data

	names(ffstats) <-
	unlist(lapply(tolower(names(ffstats)),
	function(x) {
	y <-
	gsub('..', '.', x, fixed=TRUE)
	if(substr(y,nchar(y), nchar(y)) == '.') {
	return(substr(y,1, nchar(y)-1))
	} else {
	return(y)

	}
	}
	))


	## check rows match
	nrow(ffstats)
	sum(unlist(lapply(temp, nrow)))


	## subset down to yeras
	ffstats <-
	data.frame(ffstats[ffstats$year %in% years_metrics, ])


	ffstats$player <-
	gsub('.', '', ffstats$player, fixed = TRUE)


	ffstats$ppg.ppr <-
	ifelse(is.na(ffstats$ppg.ppr), ffstats$ppg.4pt.td, ffstats$ppg.ppr)

	ffstats$posrank.ppr <-
	ifelse(is.na(ffstats$posrank.ppr), ffstats$posrank.4pt.td, ffstats$posrank.ppr)


	ffstats <-
	ffstats %>%
	group_by(position) %>%
	mutate(ppg_pos_rank = order(order(ppg.ppr, decreasing=TRUE)))


	ffstats <-
	data.frame(ffstats)

	#### ffstats data merged! -- could also replace the above w/ the db file but... Zeno doesn't have it
	####

	## Pull the FFcalc ADP!!
	## FFCalc acutally has a solid API we can use.. no need for importing pesky csv's!

	ffcalc <-
	NULL

	for(i in years_adp){

	# i <- years[1]
	url <-
	paste0('https://fantasyfootballcalculator.com/api/v1/adp/ppr?teams=12&year=', i)

	temp <-
	fromJSON(url)

	# names(temp) ## highly recommend checking this out -- APIs are cool and this one gives you
	## some #good info about what you're pulling

	temp <-
	temp$players

	names(temp) <-
	tolower(names(temp))

	temp$year <-
	i

	temp <-
	temp %>%
	group_by(position) %>%
	mutate(pos_redraft_adp = order(order(adp, decreasing=FALSE)),
	redraft_adp = adp) ## use order order to get psotional adp ranks


	ffcalc <- rbind(data.frame(temp), ffcalc)
	}

	ffcalc$player <-
	gsub('.', '', ffcalc$name, fixed =TRUE)


	## try to run a dirty merge

	res <-
	merge(ffstats, ffcalc, by = c('player', 'position'), all.x=TRUE)

	res$year_metric <-
	res$year.x


	res$year.x <-
	NULL

	res$year.y <-
	NULL

	res$team <-
	res$team.x

	### pull in dlf data

	dlf <-
	read.csv(paste0(fp, 'dlfadp players.csv'))

	names(dlf) <-
	tolower(names(dlf))


	dlf$player <-
	do.call(rbind, strsplit(dlf$player, ', ', fixed=FALSE))[,1]

	dlf$player <-
	gsub('.', '', dlf$player, fixed=TRUE)

	dlf$pos_dyno_adp <-
	dlf$posadp

	dlf$dyno_adp <-
	dlf$adp

	res <-
	merge(res, dlf, by = c('player', 'position'), all.x = TRUE)

	res$age <-
	res$age.y

	to_keep <-
	c('player', 'position', 'team','age','year_metric',
	'pos_redraft_adp', 'pos_dyno_adp','redraft_adp',
	'dyno_adp', 'ppg.ppr', 'ppg_pos_rank')


	temp <-
	res %>%
	select(to_keep) %>%
	group_by(position, year_metric) %>%
	mutate(
	pos_redraft_adp_zscore = scale(pos_redraft_adp, center=TRUE, scale=TRUE),
	pos_dyno_adp_zscore = scale(pos_dyno_adp, center=TRUE, scale=TRUE),
	ppg.ppr_zscore = scale(ppg.ppr, center=TRUE, scale=TRUE),
	ppg_pos_rank_zscore = scale(ppg_pos_rank, center=TRUE, scale=TRUE)
	)





	write.csv(res[, to_keep], paste0(fp, 'ffstats_playerrankings_data.csv'))