Created
May 30, 2021 20:31
-
-
Save BillPetti/7216bd30522361705e959768f66ce5a1 to your computer and use it in GitHub Desktop.
Joining results table and urls where there are no common keys to use
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# first table of game results, where some games where not played | |
table_1 <- tibble(game = seq(1,10,1), | |
result = c('W', 'W', "Ppd", | |
"W", "W", "W", | |
"Ppd", "W", "W", | |
"Ppd")) | |
# second table where urls from completed games were scraped; for rows where result == Ppd there are no urls to scrape | |
table_2 <- tibble(url = rep("Yes", 7)) | |
# no key to join the two tables on | |
# filter the first table removing those rows where we won't have a url in the second table | |
binded <- table_1 %>% | |
filter(result != 'Ppd') %>% | |
bind_cols(table_2) | |
# now join the new table and the original one | |
table_1 %>% | |
left_join(binded, by = c('game', 'result')) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
with rs_games as (
select 'W' as game_state union all
select 'W' union all
select 'Ppd' union all
select 'W' union all
select 'W' union all
select 'W' union all
select 'Ppd' union all
select 'W' union all
select 'W' union all
select 'Ppd'
)
, rs_url as (
select 'Yes' as game_url union all
select 'Yes' union all
select 'Yes' union all
select 'Yes' union all
select 'Yes' union all
select 'Yes' union all
select 'Yes'
)
, rs_games_ext as (
select row_number() over (order by true) as rn_g
, case when game_state = 'Ppd' then null else row_number() over (partition by case when game_state = 'Ppd' then 0 else 1 end order by true) end as rn_good
, g.* from rs_games g
)
, rs_url_ext as (
select row_number() over (order by true) as rn_u
, u.* from rs_url u
)
, rs_match as (
select *
from rs_games_ext g left join rs_url_ext u
on g.rn_good = u.rn_u
)
select *
except(rn_good, rn_u)
from rs_match order by rn_g