Skip to content

Instantly share code, notes, and snippets.

@stevepowell99
Created October 8, 2023 18:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stevepowell99/ae16b31779d1e462e5a3c7003eef79a1 to your computer and use it in GitHub Desktop.
Save stevepowell99/ae16b31779d1e462e5a3c7003eef79a1 to your computer and use it in GitHub Desktop.
Causal Map 3 Functions
# These are all the main Causal Map 3 functions except for the NLP functions which are in a separate file.
# constants -----------------------------------------------------------------
contrary_color <- "#f2ab73"
ordinary_color <- "#058488"
# helpers -----------------------------------------------------------------
## standard functions -----------------------------------------------------------------
## from DT package
## overwriting the tidyr one because it is so picky
replace_na <- function(vec,rep){
# map(vec,~{if(is.na(.)) rep else .}) %>% unlist()
vec[is.na(vec)] <- rep
vec
}
replace_empty <- function(x,replacement=0){
if(x=="") replacement else x
}
replace_null <- function(x,replacement=0){
if(is.null(x)) replacement else x
}
replace_Inf <- function(x,replacement=0){
# browser()
ifelse(is.infinite(x),replacement , x)
}
replace_inf <- replace_Inf #alias
replace_zero <- function(x,replacement=0){
if(length(x)==0) replacement else x
}
collap <- function(vec,sep="\n"){
vec %>% paste0(collapse=sep)
}
collapc <- function(vec){
vec %>% collap(",")
}
uncollapc <- function(vec){
vec %>% map(function(x){
str_split(x,pattern=",") %>% pluck(1)
})
}
xc <- function(x, sep = " ") {
str_split(x, sep)[[1]]
}
`%notin%` <- Negate(`%in%`)
time_stamp <- function(){
Sys.time() %>% str_replace_all(":","-")
}
left_join_safe <- function (x, y, by = NULL, winner = "y", ...)
{
if (is.null(by))
by = intersect(colnames(x), colnames(y))
if (winner == "y")
x = x %>% select(-intersect(colnames(x), colnames(y)),
by)
else y = y %>% select(-intersect(colnames(x), colnames(y)),
by)
for (i in seq_along(by)) {
y[, by[i]] <- coerceValue(unlist(y[, by[i]]), unlist(x[,
by[i]]))
}
left_join(x, y, by, ...)
}
# the deal with statement_id??
# source_id and statement_code which together define statement_id
# but there is little chance of them coming separated. they are never redefined.
# so we just save statement_id in the form source_id|statement_code.
# we also provide statement codes as a convenience
make_statement_id <- function(row){paste0(row$source_id," | ",row_statement_code)}
get_statement_code <- function(statement_ids){str_remove_all(statement_ids,"^.* \\| ")}
get_source_id <- function(statement_ids){str_remove_all(statement_ids," \\| .*$")}
# colours --------------------------------------------------------------
colorfun <- function(numvec,add_zero=T){
# browser()
((scales::rescale(numvec,to=c(0,1),from=c(max(numvec),if(add_zero)0 else min(numvec))))^.8) %>%
colorRamp(c(ordinary_color,"#FFFFFF"),bias=1)(.) %>% apply(1,function(x)rgb(x[1]/255,x[2]/255,x[3]/255))
# colorRamp(xc("#2f78bc white"),bias=1)(.) %>% apply(1,function(x)rgb(x[1]/255,x[2]/255,x[3]/255))
# map(~modCol("#0000ff",darken=-.,saturate=1-.)) %>% unlist
}
## from DT package
coerceValue <- function (val, old)
{
# old=unlist(old)
if (is.integer(old))
return(as.integer(val))
if (is.numeric(old))
return(as.numeric(val))
if (is.character(old))
return(as.character(val))
if (inherits(old, "Date"))
return(as.Date(val))
if (inherits(old, c("POSIXlt", "POSIXct"))) {
val = strptime(val, "%Y-%m-%dT%H:%M:%SZ", tz = "UTC")
if (inherits(old, "POSIXlt"))
return(val)
return(as.POSIXct(val))
}
if (is.factor(old)) {
i = val %in% levels(old)
if (all(i))
return(val)
warning("New value(s) \"", paste(val[!i], collapse = ", "),
"\" not in the original factor levels: \"",
paste(levels(old), collapse = ", "), "\"; will be coerced to NA.")
val[!i] = NA
return(val)
}
# warning("The data type is not supported: ", classes(old))
val
}
fromJSONsafe <- function(vec){
vec <- vec %>% str_replace_all("\"\"","\"")
map(vec,function(tx){
if(validate(tx))fromJSON(tx)else NA
})
}
len_un <- function(vec){
length(unique(vec))
}
clean_grv <- function(tx){
tx %>% str_replace_all("'","&rsquo;") %>%
str_replace_all("\"","&rsquo;") %>%
str_replace_all("‘","&rsquo;") %>%
str_replace_all("’","&rsquo;") %>%
# strip_symbols() %>%
str_replace_all("\"","'") %>%
simplify_unicode
}
simplify_unicode <- function(texvec){
texvec %>%
str_replace_all("\u008d","'") %>%
str_replace_all("\U008d","'") %>%
str_replace_all("\u0085","-") %>%
str_replace_all("\u0085","-") %>%
str_replace_all("\u008e","'") %>%
str_replace_all("\U008e","'") %>%
str_replace_all("\u0092","`") %>%
str_replace_all("\u008f","'") %>%
str_replace_all("\u008g","'") %>%
str_replace_all("\u2019","'") %>%
str_replace_all("\u0090","'") %>%
str_replace_all("\U0090","'") %>%
str_replace_all("\UFFFD","") %>% #that is the weird character
str_replace_all("\xc9v","")
}
row_index <- function(df)1:nrow(df)
## special functions -----------------------------------------------------------------
maxrn <-function(vec)max(vec,na.rm=T)
minrn <-function(vec)min(vec,na.rm=T)
keep_top_level <- function(vec) vec %>% str_remove_all(";.*")
drop_top_level <- function(vec) vec %>% map(~str_match(., ";.*") %>% replace_na(";") %>% str_remove("^;")) %>%
unlist
make_factor_list <- function(links){
main <-
links %>%
get_both_labels()
tops <-
main %>% keep_top_level() %>% unique
c(tops,main) %>% unique
}
get_hashtags <- function(links){
links$hashtags %>% map(uncollapc) %>% unlist %>% unique %>% c("plain_coding")
}
get_both_labels <- function(link){c(link$from_label,link$to_label) %>% unique}
make_factors_from_links <- function(links){
# browser()
from_links <-
links %>%
select(from_label,source_id) %>%
group_by(from_label) %>%
summarise(from_source_count=len_un(source_id),
from_sources=list(source_id %>% unique),
from_frequency=n()) %>%
ungroup
to_links <-
links %>%
select(to_label,source_id) %>%
group_by(to_label) %>%
summarise(to_source_count=len_un(source_id),
to_sources=list(source_id %>% unique),
to_frequency=n()) %>%
ungroup
bind_rows(from_links %>% rename(label=from_label),to_links %>% rename(label=to_label)) %>%
group_by(label) %>%
summarise(source_count=c(unlist(from_sources),unlist(to_sources)) %>% len_un ,
link_count=sum(from_frequency,to_frequency,na.rm=T),
in_degree=sum(from_frequency,na.rm=T),
out_degree=sum(to_frequency,na.rm=T),
outcomeness=signif(100*out_degree/link_count) %>% replace_na(0)
)
}
make_igraph_from_edgelist <- function(links){
links %>% select(from_label,to_label) %>%
filter(!is.na(from_label) & !is.na(to_label)) %>%
as.matrix %>%
igraph::graph_from_edgelist(directed = TRUE)
}
maxrn <-function(vec)max(vec,na.rm=T)
minrn <-function(vec)min(vec,na.rm=T)
make_factor_list <- function(links){
main <-
links %>%
get_both_labels()
tops <-
main %>% keep_top_level() %>% unique
c(tops,main) %>% unique
}
get_hashtags <- function(links){
links$hashtags %>% map(uncollapc) %>% unlist %>% unique %>% c("plain_coding")
}
get_both_labels <- function(link){c(link$from_label,link$to_label) %>% unique}
make_factors_from_links <- function(links){
# browser()
from_links <-
links %>%
select(from_label,source_id) %>%
group_by(from_label) %>%
summarise(from_source_count=len_un(source_id),
from_sources=list(source_id %>% unique),
from_frequency=n()) %>%
ungroup
to_links <-
links %>%
select(to_label,source_id) %>%
group_by(to_label) %>%
summarise(to_source_count=len_un(source_id),
to_sources=list(source_id %>% unique),
to_frequency=n()) %>%
ungroup
bind_rows(from_links %>% rename(label=from_label),to_links %>% rename(label=to_label)) %>%
group_by(label) %>%
summarise(source_count=c(unlist(from_sources),unlist(to_sources)) %>% len_un ,
link_count=sum(from_frequency,to_frequency,na.rm=T),
in_degree=sum(from_frequency,na.rm=T),
out_degree=sum(to_frequency,na.rm=T),
outcomeness=signif(100*out_degree/link_count) %>% replace_na(0)
)
}
make_factors_from_transformed_linksWITHRETAINED <- function(links,sampled_links,original_links){ # this is the transformed version where we need to add original data
from_links <-
links %>%
select(
label=from_label,
from_link_id=link_id,
from_link_count=link_count,
from_source_count=source_count,
from_sources=sources
)
to_links <-
links %>%
# filter(retained) %>%
select(
label=to_label,
to_link_id=link_id,
to_link_count=link_count,
to_source_count=source_count,
to_sources=sources
)
bind_rows(from_links,to_links) %>%
group_by(label,retained) %>%
mutate(
out_degree=sum(from_link_count,na.rm=T),
in_degree=sum(to_link_count,na.rm=T),
link_count=sum(from_link_count,to_link_count,na.rm=T),
source_count=sum(from_source_count,to_source_count,na.rm=T),
outcomeness=signif(100*from_link_count/link_count) %>% replace_na(0),
original_out_degree=sum(from_link_count*retained,na.rm=T) %>% replace_na(0),
original_in_degree=sum(to_link_count*retained,na.rm=T) %>% replace_na(0),
original_link_count=sum(from_link_count*retained,to_link_count*retained,na.rm=T) %>% replace_na(0),
original_source_count=sum(from_source_count*retained,to_source_count*retained,na.rm=T) %>% replace_na(0),
original_outcomeness=signif(100*from_link_count*retained/link_count*retained) %>% replace_na(0)
)%>%
ungroup %>%
filter(retained)
}
make_factors_from_transformed_links <- function(links){ # this is the transformed version where we need to add original data
from_links <-
links %>%
select(
label=from_label,
from_link_id=link_id,
from_link_count=link_count,
from_source_count=source_count,
from_sources=source_ID
)
to_links <-
links %>%
# filter(retained) %>%
select(
label=to_label,
to_link_id=link_id,
to_link_count=link_count,
to_source_count=source_count,
to_sources=sources
)
bind_rows(from_links,to_links) %>%
group_by(label) %>%
mutate(
out_degree=sum(from_link_count,na.rm=T),
in_degree=sum(to_link_count,na.rm=T),
link_count=sum(from_link_count,to_link_count,na.rm=T),
source_count=sum(from_source_count,to_source_count,na.rm=T),
outcomeness=signif(100*from_link_count/link_count) %>% replace_na(0)
)%>%
ungroup
}
make_igraph_from_edgelist <- function(links){
links %>% select(from_label,to_label) %>%
filter(!is.na(from_label) & !is.na(to_label)) %>%
as.matrix %>%
igraph::graph_from_edgelist(directed = TRUE)
}
get_initials <- function(lis){
# browser()
oldlen <- length(unique(lis))
nch <- 1
new <- str_sub(lis,1,nch)
# shorten so still unique
while(
length(unique(str_sub(lis,1,nch)))!=oldlen
){
nch <- nch+1
new <- str_sub(lis,1,nch)
}
oldlen <- length(unique(new))
# now strip any non-unique leading chars
# nch <- 1
# if(length(unique(str_sub(new,5)))==oldlen)return(new)
if(length(unique(new))==1) return(new)
# browser()
# strip identical leading chars
while(length(unique(str_sub(new,2)))==oldlen
&
length(unique(str_sub(new,1,1)))==1 # only strip leading chars if they are the same
){
# if(min(nchar(new))<4) return(new)
new <- str_sub(new,2)
}
new
}
get_chi_surprises <- function(shown,not_shown,field,from_label,to_label,get_initials=F){
# tibble(shown,not_shown)
# get_initials()
# message(from_label %>% length %>% paste0(" : ",field,"; \n"))
# if(from_label[1]=="Community groups/learning") browser()
dat <-
data.frame(shown,not_shown,field,from_label,to_label) %>%
filter(!is.na(field)) %>%
filter("N/A"!=(field)) %>%
# filter(shown>0) %>%
select(-from_label,-to_label) %>%
distinct() %>%
column_to_rownames("field")
res <- chisq.test(dat,correct = T)
# res <- chisq.test(dat,simulate.p.value = T)
dat <- dat %>% rownames_to_column("field") %>% rename(shown_n=shown,not_shown_n=not_shown)
#argh because of inconsistent output of chi
if(nrow(dat)>1){
stdres <- res$stdres
} else {
message("skipping chisq with no comparison")
stdres <- tibble(not_shown=res$stdres[1],shown=res$stdres[2])
1 <- res$p.value
}
p <- res$p.value %>% replace_na(1)
if(p<.1) {
#browser()
stdres %>%
as_tibble %>%
add_column(field=dat$field %>% keep(~!is.na(.)) %>% keep(.!="N/A") %>% as.character()) %>%
left_join(dat,by="field") %>%
arrange(desc(shown)) %>%
filter(shown>0) %>%
{if(get_initials)mutate(field=(get_initials(field))) else .} %>%
mutate(new=paste0((field)," (",shown_n,"/",shown_n+not_shown_n,")")) %>%
pull(new) %>%
collap(", ")
} else "notsig"#list("notsig","notsig")
}
get_top_words <- function(tx){
# browser()
ww2 <- tx %>% unlist%>% str_replace_all(" "," ") %>% str_split(" ") %>% unlist %>% table %>% sort
ww2[setdiff(names(ww2) , stopwords(source="snowball"))] %>% tail(15) %>% rev %>% names %>% collap(", ")
}
get_surprises <- function(links,field,tots,type="Surprise_links"){
message("Looking for surprises")
groups <- links[,field] %>% unique %>% na.omit()
bundles <- links[,c("from_label","to_label")] %>% distinct
complete <- cross_join(groups,bundles)
# complete is all the combinations
# browser()
if(type=="Surprise_links"){
res1 <-
links %>%
ungroup %>%
group_by(from_label,to_label,UQ(sym(field))) %>%
summarise(shown=n())
} else if(type=="Surprise_sources"){
res1 <-
links %>%
ungroup %>%
group_by(from_label,to_label,UQ(sym(field))) %>%
summarise(shown=len_un(source_id))
}
# if(res$shown==1)
# browser()
res <-
res1 %>%
left_join(complete,.,by=c(field,"from_label","to_label")) %>%
mutate(shown=ifelse(is.na(shown),0,shown)) %>%
left_join(tots,by=field) %>%
filter(!is.na(shown)) %>%
mutate(not_shown=tot-shown) %>%
select(-tot) %>%
mutate(not_shown=replace_na(not_shown,0)) %>%
group_by(from_label,to_label) %>%
mutate(stat=get_chi_surprises(shown,not_shown,UQ(sym(field)),from_label,to_label)) %>%
# filter(retained) %>%
group_by(from_label,to_label) %>%
mutate(overall=sum(shown)) %>%
ungroup
links %>%
# filter(retained) %>%
left_join(res) %>%
mutate(label=ifelse(stat=="notsig",overall,paste0(overall," ↗️ ",stat))) %>%
filter(shown!=0)
}
pipe_retain_current_statements <- function(links,current_statements){
# browser()
if(is.null(current_statements))current_statements <- Inf
links %>%
filter(statement_id %in% current_statements) #%>%
# retain(x) %>%
# select(-x)
}
pipe_discard <- function(links){
if("retained" %notin% colnames(links))return(links)
links %>%
filter(retained)
}
# for combining opposites
clarify_opposites <- function(vec){
str_replace_all(vec,"~","Worse / less / no / not -- ")
}
declarify_opposites <- function(vec){
str_replace_all(vec,"Worse / less / no / not -- ","~")
}
flip_vector <- function(tex,flipchar="~",sepchar=";"){
lapply(tex,function(x)flip_inner(x,flipchar=flipchar,sepchar=sepchar)) %>%
unlist(recursive=F)
}
flip_fix_vector <- function(tex,flipchar="~",sepchar=";"){ # to get always one space between sep and flip
tex %>%
str_replace_all(paste0(sepchar," *",flipchar),paste0(sepchar,flipchar)) %>%
str_replace_all(paste0(sepchar,flipchar," *"),paste0(sepchar,flipchar))
}
flip_inner_component <- function(tex,flipchar="~"){
if_else(str_detect(tex,paste0("^ *",flipchar)),str_remove(tex,paste0("^ *",flipchar)),paste0("~",tex))
}
flip_inner <- function(tex,flipchar="~",sepchar=";"){
tex %>%
str_split(sepchar) %>%
`[[`(1) %>%
str_trim %>%
flip_inner_component(flipchar=flipchar) %>%
paste0(collapse="; ")
}
color_combined_links <- function(links){
if("from_flipped" %notin% colnames(links)) return(links %>% mutate(color=ordinary_color))
links %>% mutate(
from_color = case_when(
from_flipped ~ contrary_color,
T ~ ordinary_color
)) %>%
mutate(
to_color = case_when(
to_flipped ~ contrary_color,
T ~ ordinary_color
)) %>%
mutate(
color=paste0(from_color,";0.5:",to_color)
)
}
# main --------------------------------------------------------------------
make_print_map2 <- function(
slinks,
original,
map_nodesep=.5,
map_ranksep=.5,
map_colour_opposites_red=F,
map_color_factors_column="none",
map_size_factors="source_count",
map_size_links="source_count",
map_label_factors="none",
# map_label_links="source_count",
map_wrap_factor_labels=22,
map_wrap_link_labels=22,
legend=""
){
# browser()
original_nrow <- nrow(original)
if("retained" %notin% colnames(slinks))slinks$retained=T
if("label" %notin% colnames(slinks)) slinks$label <- "-"
# browser()
labelled_links <-
slinks %>%
# add_link_counts() %>%
pipe_discard() %>%
# select(-retained) %>%
unite(tooltip,xc("source_id statement_id quote"),sep = ": ",remove = F) %>%
mutate(from_label= clean_grv(from_label)) %>%
mutate(to_label= clean_grv(to_label)) %>%
mutate(from_label=str_wrap(from_label,map_wrap_factor_labels)) %>%
mutate(to_label=str_wrap(to_label,map_wrap_factor_labels)) %>%
mutate(size_links=.data[[map_size_links]])
links <-
labelled_links %>%
# group_by(sources,size_links,retained,from_label,to_label,label,source_count,link_count,original_sources,original_source_count,original_link_count) %>%
group_by(size_links,retained,from_label,to_label,source_count,link_count,across(any_of(xc("from_flipped to_flipped")))) %>%
summarise(
link_id=link_id %>% collap(","),
label=label %>% unique %>% collap(", ") %>% ifelse(.=="","-",.),
tooltip=clean_grv(collap(tooltip)),
.groups="keep"
) %>%
ungroup() %>%
{if(map_wrap_link_labels!="Off") mutate(.,label=str_wrap(label,map_wrap_link_labels)) else .} %>%
{if(map_size_links!="none") mutate(.,penwidth=size_links) else mutate(.,penwidth=1)} %>%
color_combined_links
if(nrow(links)==0)return()
# or this could be links$label
links$penwidth <- as.character(links$penwidth %>% as.numeric %>% scales::rescale(.,to=c(1,7)))
# if(is.null(recodes))
recodes <- tibble(old=c(links$from_label,links$to_label) %>% unique) %>% mutate(new=old) %>% mutate(cluster=row_number())
# links <- links[1:2,]
tooltip_df <-
recodes %>%
ungroup %>%
group_by(new,cluster) %>%
#mutate(old=clean_grv(old)) %>%
#mutate(new=clean_grv(new)) %>%
mutate(new=str_wrap(new,map_wrap_factor_labels)) %>%
summarise(.groups = "keep",n_factors=n(),tooltip=paste0('"',str_replace_all(old,'\n',' '),'"',collapse="\n") %>% clean_grv()) %>%
rename(label2=new) %>%
summarise_all(first) ## FIXME this shouldn't be necessary but i think we can get duplicates because of grv cleaning
# %>%
# mutate(new=str_replace_all(new,"\n"," ")) %>%
# browser()
nodes_df <-
labelled_links %>%
make_factors_from_links() %>%
mutate(label2=label) %>%
# c(links$from_label,links$to_label) %>% clean_grv() %>% unique %>%
# tibble(label2=.) %>%
left_join(.,tooltip_df) %>%
ungroup %>%
{if(map_color_factors_column!="none") mutate(.,fillcolor=.data[[map_color_factors_column]]%>% colorfun ) else .} %>%
{if(map_size_factors!="none") mutate(.,fontsize=.data[[map_size_factors]]) else mutate(.,fontsize=2)} %>%
mutate(fontsize=fontsize %>% as.numeric %>% scales::rescale(.,to=c(12,20))) %>%
rename(cluster_number=cluster) %>%
{if(map_label_factors!="none") mutate(.,label3=paste0(label2," (",.data[[map_label_factors]],")")) else mutate(.,label3=label2)} %>%
mutate(fontcolor="#000000")
# browser()
# this does not yet work because make_factors_from_links does not reconstruct is_flipped
if(F & "is_flipped" %in% colnames(nodes_df)){
# browser()
if(
any(as.numeric(nodes_df$is_flipped)>0,na.rm=T) %>% replace_na(F)
&
"color.border" %notin% colnames(nodes_df)
){
nodes_df$color= scales::div_gradient_pal(ordinary_color,"#eeeeee",contrary_color)(nodes_df$is_flipped)
}
} else {
}
nodes_df$color= ordinary_color
graph_title <- glue::glue("\n---\nFilename: {slinks$file %>% unique}. Citation coverage {signif(100*sum(links$link_count)/original_nrow,1)}%: {sum(links$link_count)} of {original_nrow} total citations are shown here.{if_else(map_label_factors!='none',paste0('\nNumbers on factors show ',map_label_factors %>% str_replace_all('_',' ') %>% str_replace_all('link','citation')),'')}{if_else(map_size_factors!='none',paste0(', sizes show ',map_size_factors %>% str_replace_all('_',' ') %>% str_replace_all('link','citation')),'')}{if_else(map_color_factors_column!='none',paste0(', colours show ',map_color_factors_column %>% str_replace_all('_',' ') %>% str_replace_all('link','citation')),'')}.\n{legend}")
grv_layout <- "dot"
grv_splines <- "splines"
grv_overlap <- F
# nodesep <- 10
# ranksep <- 10
# browser()
# links <-
# links %>%
# select(from_label,to_label,label,tooltip,link_id,penwidth)
graf <-
create_graph() %>%
add_nodes_from_table(
table = nodes_df ,
label_col = label
) %>%
add_edges_from_table(
table = links %>% select(from_label,to_label,label,penwidth,tooltip,color),
from_col = from_label,
to_col = to_label,
from_to_map = label
) %>%
set_node_attrs(label,nodes_df$label3)# %>% clean_grv() )
tmp <-
graf %>%
add_global_graph_attrs("label", graph_title, "graph") %>%
add_global_graph_attrs("layout", grv_layout, "graph") %>%
add_global_graph_attrs("splines", grv_splines, "graph") %>%
add_global_graph_attrs("overlap", grv_overlap, "graph") %>%
add_global_graph_attrs("labelloc", "bottom", "graph") %>%
add_global_graph_attrs("labeljust", "c", "graph") %>%
add_global_graph_attrs("outputorder", "nodesfirst","graph") %>%
add_global_graph_attrs("tooltip", " ", "graph") %>%
add_global_graph_attrs("rankdir", "LR", "graph") %>%
add_global_graph_attrs("fontname", "Arial","graph")%>%
add_global_graph_attrs("forcelabels", T, "graph") %>%
add_global_graph_attrs("nodesep", map_nodesep,"graph") %>%
add_global_graph_attrs("ranksep", map_ranksep,"graph") %>%
add_global_graph_attrs("width", "0", "node") %>%
add_global_graph_attrs("height", "0", "node") %>%
add_global_graph_attrs("style", "rounded, filled","node") %>%
add_global_graph_attrs("penwidth", "0.5","node") %>%
add_global_graph_attrs("fixedsize", "false","node") %>%
add_global_graph_attrs("margin", "0.19","node") %>%
add_global_graph_attrs("shape", "box","node") %>%
add_global_graph_attrs("arrowtail","none", "edge") %>%
add_global_graph_attrs("dir", "both","edge") %>%
add_global_graph_attrs("style", "solid","edge") %>%
add_global_graph_attrs("fontsize", 12, "edge") %>%
render_graph()
attr(tmp,"factors") <- nodes_df
attr(tmp,"links") <- links
tmp
}
get_from_excel <- function(path){
preloaded <-
readxl::excel_sheets(path %>% str_replace_all("\\\\", "/")) %>%
set_names %>% map(~readxl::read_excel(path,sheet = .))
names(preloaded) <- tolower(names(preloaded))
preloaded <-
preloaded %>% "["(xc("factors links statements sources")) %>% compact
}
convert_to_cm2 <- function(table_list) {
links <- table_list$links%>% rename(old_id=statement_id)
statements <- table_list$statements %>% rename(old_id=statement_id) %>% mutate(statement_id=row_number())
sources <- table_list$sources
factors <- tibble(label=get_both_labels(links)) %>%
mutate(factor_id=row_number())
recodes <- (factors$factor_id %>% set_names(factors$label))
links$from <- links$from_label %>% recode(!!!recodes)
links$to <- links$to_label %>% recode(!!!recodes)
srecodes <- (statements$statement_id %>% set_names(statements$old_id))
links$statement_id <- links$old_id %>% recode(!!!srecodes)
# links$hashtags <-
# links$hashtags %>% map(~{fromJSON(replace_na(.,"[]")) %>% unlist %>% collap(",")}) %>% unlist
# browser()
list(
factors=factors,
links=links,
statements=statements,
sources=sources
)
}
convert_from_cm2 <- function(table_list,file_name) {
links <- NULL
statements <- NULL
sources <- NULL
if(is.null(table_list$statements)) {
notify("Your table has no statements, not importing, sorry",4)
return()
}
statements <- table_list$statements %>%
select(text, source_id, question_id) %>%
mutate(statement_code=row_number()) %>%
mutate(statement_id=paste0(source_id," | ",statement_code))
# browser()
if(!is.null(table_list$links))links <-
table_list$links %>%
select(statement_id, from, to, quote,hashtags) %>%
left_join(table_list$factors %>% select(from_label=label,from=factor_id)) %>%
left_join(table_list$factors %>% select(to_label=label,to=factor_id)) %>%
select(-from,-to,statement_code=statement_id) %>%
left_join(statements,by="statement_code") %>%
mutate(link_id=row_number()) %>%
# mutate(hashtags = (hashtags %>% str_split(",") %>% map(toJSON)) %>% unlist) %>%
select(-statement_code,-any_of("text"),-any_of("question_id")) %>%
mutate(created = time_stamp()) %>%
mutate(modified = time_stamp())
# %>%
# add_link_counts()
# %>%
# mutate(retained=T)
# %>%
# add_link_counts()
if(!is.null(table_list$sources))sources <-
table_list$sources
# %>%
# mutate_all(as.character) %>%
# pivot_longer(cols=-(source_id))
if(!is.null(table_list$statements))statements <- statements %>%
select(-statement_code)%>%
mutate(created = time_stamp()) %>%
mutate(modified = time_stamp())
files <-
row <- tibble(
file=file_name,
modified=time_stamp()
# edit=input$file_access_edit %>% c(sess$user) %>% unique %>% toJSON(), # you can't delete yourself. You have to add someone else and get them to remove you.
# copy=input$file_access_copy %>% toJSON(),
# view=input$file_access_view %>% toJSON(),
# description=input$file_access_description,
# archived=input$file_access_archived,
# locked=input$file_access_locked
)
# browser()
links <-
links %>%
add_link_sources(statements,sources)
res <-
list(
files=files,
links=links,
statements=statements,
sources=sources,
settings=tibble(setting="")
)
res %>%
map(~mutate(.,file=file_name))
}
keep_level <- function(vec,level){
vec %>%
str_split(";") %>% map(~head(.,level) %>%
paste0(collapse=";")) %>% unlist
}
make_mentions_tabl <- function(links){
# %>% browser()
# graf$factors <- graf$factors[,colnames(graf$factors)!=""]
# graf$links <- add_labels_to_links(graf$links,factors=graf$factors)
influence <- links %>% mutate(label=from_label,direction="influence")
consequence <- links %>% mutate(label=to_label,direction="consequence")
either_from <- influence %>% mutate(direction="either")
either_to <- consequence %>% mutate(direction="either")
both <- bind_rows(consequence,influence,either_from,either_to)
both %>% select(-from_label,-from_label) %>%
mutate(label=str_replace_all(label,"\n"," ")) %>%
mutate(mentions="any") %>% ## this is actually just a hack so we can use this field in the Mentions table
select(label,direction,mentions,link_id,everything())
}
retain <- function(links,condition){
# browser()
links <-
links %>%
mutate(xretained={{condition}})
if("retained" %notin% colnames(links))links$retained <- T
links$retained <-links$retained & links$xretained
links$xretained <-NULL
links
}
add_link_sources <- function(links,statements,sources){
links %>%
select(-any_of("source_id")) %>%
left_join(statements %>% select(statement_id,source_id),by="statement_id") %>%
left_join_safe(sources,by="source_id",winner="x") %>%
mutate(statement_code=get_statement_code(statement_id))
}
# important that this has to enforce treating any flipped citations as separate links
add_link_counts_simple <- function(links){
links %>%
group_by(from_label,to_label,across(any_of(xc("from_flipped to_flipped")))) %>%
mutate(source_count=len_un(source_id),link_count=n()) %>%
mutate(bundle=paste0(from_label," / ",to_label)) %>%
ungroup
}
add_link_counts <- function(links,original_links){
if(nrow(links)==0)return(links)
links <-
links %>%
add_link_counts_simple()
original_links <-
original_links %>%
rename(original_link_count=original_link_count) %>%
rename(original_source_count=original_source_count) %>%
select(link_id,asdfadsfasdf)
links %>%
left_join(original_links,by="link_id")
}
links <- function(lis){
lis$links
}
# pipe_link_count_limit <- function(links,link_count_limit,type="Sources"){
# # browser()
# if("retained" %notin% colnames(links))links$retained <- T
# counter <- ifelse(type=="Sources","source_count","link_count")
#
# links <-
# links %>%
# add_link_counts()
#
# # maxcount <- links[links$retained,counter] %>% max
#
# # browser()
# links %>%
# mutate(x=.data[[counter]]>=as.numeric(link_count_limit)) %>%
# retain(x)%>%
# select(-x)
# }
# pipe_factor_count_limit <- function(links,factor_count_limit,type="Sources"){
# if("retained" %notin% colnames(links))links$retained <- T
# # browser()
# counter <- ifelse(type=="Sources","source_count","link_count")
# slinks <-
# links %>% add_link_counts()
# factors <-
# slinks %>%
# make_factors_from_transformed_links() %>%
# filter(.data[[counter]]>=as.numeric(factor_count_limit))
#
# links %>%
# mutate(x=from_label %in% factors$label & to_label %in% factors$label) %>%
# retain(x) %>%
# select(-x)
#
# }
# formatting funs -------------------------------------------------------
pipe_label <- function(slinks,map_label_links=NULL,type="None"){
#xc("None Count_all Count_unique List_all List_unique Surprise_links Surprise_sources")
# slinks <-
# slinks %>%
# add_link_counts()
# browser()
if((map_label_links==""))return(slinks)
if(is.null(map_label_links))return(slinks)
if(map_label_links %notin% colnames(slinks)){
message("map_label_links not in table")
map_label_links <- "source_count"
}
# we want the background for the surprise to be everything not just this
# do we need to calculate surprise
is_surprise <-str_detect(type,"Surprise")
if(type=="Surprise_links"){
message("going to look for surprises")
tots <-
slinks %>%
group_by(UQ(sym(map_label_links))) %>%
summarise(tot=n())
} else if(type=="Surprise_sources"){
message("going to look for surprises")
tots <-
slinks %>%
group_by(UQ(sym(map_label_links))) %>%
summarise(tot=len_un(source_id))
}
message("map label links is " %>% paste0(map_label_links,"\n"))
slinks %>%
{if(is_surprise) get_surprises(.,map_label_links,tots,type=type) else mutate(.,label="") } %>%
{
if(map_label_links=="sources") mutate(.,label=sources %>% unlist %>% unique %>% collap(", ")) else
if(map_label_links=="original_sources") mutate(.,label=original_sources %>% unlist %>% unique %>% collap(", ")) else
if(map_label_links=="none") mutate(.,label="") else
if(!is_surprise) mutate(.,label= .data[[map_label_links]]) else
.
# if(map_label_links %in% colnames(sess$file$sources)) mutate(.,label=get_surprises(link_count,original_link_count))else
# mutate(.,label= .data[[map_label_links]])
}
}
# transform filters -------------------------------------------------------
pipe_top_factors <- function(links,top=10,type="Sources",which="Top"){
# browser()
counter <- ifelse(type=="Sources","source_count","link_count")
factors <-
links %>%
add_link_counts_simple() %>%
make_factors_from_links() %>%
arrange(desc(.data[[counter]]))
if(which=="Top"){
factors <-
factors %>%
slice(1:top)
}
else {
# browser()
factors <-
factors %>%
filter(.data[[counter]]>=as.numeric(top))
}
links %>%
group_by(from_label,to_label) %>%
filter(all(from_label %in% factors$label) & all(to_label %in% factors$label))
}
pipe_top_links <- function(links,top=10,type="Sources",which="Top"){
counter <- ifelse(type=="Sources","source_count","link_count")
links <-
links %>%
add_link_counts_simple() # note add link counts provides numbers for retained and nonretained separately
if(which=="Top"){
indx <-
links %>%
ungroup %>%
group_by(.data[[counter]],from_label,to_label) %>%
arrange(.data[[counter]]) %>%
summarise(group=max(.data[[counter]]),.groups="keep") %>%
ungroup %>%
arrange(desc(.data[[counter]])) %>%
filter(row_number()<=top) %>%
select(from_label,to_label)
indx %>%
left_join(links,by=xc("from_label to_label")) %>%
add_link_counts_simple()
}
else {
links %>%
filter(.data[[counter]]>=as.numeric(top)) %>% # some of these are already nonretained but it doesn't matter
add_link_counts_simple()
}
}
pipe_zoom <- function(links,level=1){
links %>%
mutate(.,from_label=keep_level(from_label,level),to_label=keep_level(to_label,level)) %>%
add_link_counts_simple()
}
pipe_combine_opposites <- function(links){
# browser()
factors <-
links %>%
make_factors_from_links() %>%
mutate(
unflipped_label=label,
is_flipped=str_detect(label,paste0("^ *",flipchar)),
try_label=if_else(is_flipped,flip_vector(label,flipchar = flipchar) %>% replace_null(""),label),
label=flip_fix_vector(try_label)
)
# browser()
if(nrow(factors)>0) links <-
links %>%
mutate(from_flipped=(recode(from_label,!!!(factors$is_flipped %>% set_names(factors$unflipped_label)))) %>% as.logical) %>%
mutate(to_flipped=(recode(to_label,!!!(factors$is_flipped %>% set_names(factors$unflipped_label)))) %>% as.logical) %>%
mutate(from_label=(recode(from_label,!!!(factors$label %>% set_names(factors$unflipped_label))))) %>%
mutate(to_label=(recode(to_label,!!!(factors$label %>% set_names(factors$unflipped_label))))) %>%
unite("flipped_bundle",from_flipped,to_flipped,sep = "|",remove=F)
# %>%
# {if(add_colors)color_combined_links(.) else .}
links %>%
add_link_counts_simple()
}
pipe_trace <- function(links,
sess_links,
from_labels=NULL,
to_labels=NULL,
steps=4,
transforms_tracing_strict=F,
transforms_tracing_threads=F
){
if(is.null(from_labels) & is.null(to_labels))return(links)
fromids <- list()
toids <- list()
# browser()
if(is.null(transforms_tracing_strict))transforms_tracing_strict <- F
# work out what are the starting labels in order to arrive at fromids[[stage]] for each stage
if(transforms_tracing_strict){
dlinks <-
sess_links %>%
add_link_counts_simple() %>%
filter(!is.na(from_label) & !is.na(to_label)) %>%
select(link_id,from_label,to_label,source_id) %>%
filter(link_id %in% links$link_id)
} else {
dlinks <-
links %>%
add_link_counts_simple() %>%
filter(!is.na(from_label) & !is.na(to_label)) %>%
select(link_id,from_label,to_label,source_id)
}
if(!is.null(from_labels)) {
tolinks <- dlinks %>% rename(common=from_label)# just all the links, with the receiving slots renamed as common
stage0 <-
dlinks %>%
filter(from_label %in% from_labels) %>% ## doesn't work #FIXME
rename(common=from_label,common_source_id=source_id)
tmps <- list()
tmp <- stage0
# browser()
for(stage in 1:steps){
message(stage %>% paste0("step: ",.))
fromids[[stage]] <- tmp$link_id
# tmps[[stage]] <- tmp
tmp <-
tmp %>%
select(common=to_label,common_source_id) %>% # note how we flip it round
distinct %>%
left_join(tolinks,by="common",relationship="many-to-many") %>% # this is where we join it to the next stage
filter(!is.na(to_label)) %>%
{if(transforms_tracing_threads) filter(.,common_source_id==source_id) else .}
}
}
if(!is.null(to_labels)) {
fromlinks <- dlinks %>% rename(common=to_label)# just all the links, with the receiving slots renamed as common
stage0 <-
dlinks %>%
filter(to_label %in% to_labels) %>%
rename(common=to_label,common_source_id=source_id)
tmps <- list()
tmp <- stage0
# browser()
for(stage in 1:steps){
message(stage %>% paste0("step: ",.))
toids[[stage]] <- tmp$link_id
tmps[[stage]] <- tmp
tmp <-
tmp %>%
select(common=from_label,common_source_id) %>%
distinct %>%
left_join(fromlinks,by="common",relationship="many-to-many") %>%
filter(!is.na(from_label)) %>%
{if(transforms_tracing_threads) filter(.,common_source_id==source_id) else .}
}
}
if(!is.null(from_labels) & !is.null(to_labels)) {
froms <- imap(fromids, ~ tibble(step=.y, link_id=.x)) %>% bind_rows %>% group_by(link_id) %>% summarise(step=min(step))
tos <- imap(toids, ~ tibble(step=.y, link_id=.x)) %>% bind_rows %>% group_by(link_id) %>% summarise(step=min(step))
common_ids <-
full_join(froms,tos,by="link_id") %>%
filter(`+`(step.x,step.y)<=(steps+1)) %>%
pull(link_id)
} else {
# browser()
if(is.null(to_labels)) common_ids <- fromids %>% unlist %>% unique
if(is.null(from_labels)) common_ids <- toids %>% unlist %>% unique
}
links %>%
filter(link_id %in% common_ids) %>%
add_link_counts_simple()
# links %>% mutate(x=link_id %in% common_ids) %>% retain(x)
}
pipe_remove_brackets <- function(links,square=F,round=F){
if(!square & !round)return(links)
# browser()
if(square) links <-
links %>%
mutate(from_label=str_remove_all(from_label," \\s*\\[[^\\]]+\\]"))
if(round) links <-
links %>%
mutate(from_label=str_remove_all(from_label," \\s*\\([^\\)]+\\)"))
if(square) links <-
links %>%
mutate(to_label=str_remove_all(to_label," \\s*\\[[^\\]]+\\]"))
if(round) links <-
links %>%
mutate(to_label=str_remove_all(to_label," \\s*\\([^\\)]+\\)"))
links %>% add_link_counts_simple()
}
pipe_retain_hashtags <- function(links,hashtags,keep=T){
# browser()
targets=hashtags
if(keep){
links %>%
filter(map(hashtags,function(x){any(targets %in% uncollapc(x)) })%>% unlist) %>%
add_link_counts_simple()
} else {
links %>%
filter(map(hashtags,function(x){all(targets %notin% uncollapc(x)) })%>% unlist) %>%
add_link_counts_simple()
}
}
pipe_focus <- function(links,focus,any=F){
# browser()
links <-
links %>% ungroup
if(any){
links %>%
filter(map(.$from_label,~{any(str_detect(.,focus))}) %>% unlist | map(.$to_label,~{any(str_detect(.,focus))}) %>% unlist) %>%
add_link_counts_simple()
} else {
links %>%
filter(map(.$from_label,~{any(. %in% focus)}) %>% unlist | map(.$to_label,~{any(. %in% focus)}) %>% unlist) %>%
add_link_counts_simple()
}
}
pipe_exclude <- function(links,exclude,any=F){
links <-
links %>% ungroup
# browser()
if(any){
links <-
links %>%
filter(!(map(.$from_label,~{any(str_detect(.,exclude))}) %>% unlist | map(.$to_label,~{any(str_detect(.,exclude))}) %>% unlist))
} else {
links <-
links %>%
filter(!(map(.$from_label,~{any(. %in% exclude)}) %>% unlist | map(.$to_label,~{any(. %in% exclude)}) %>% unlist))
}
links %>% add_link_counts_simple()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment