Skip to content

Instantly share code, notes, and snippets.

@joaovissoci
Last active August 29, 2017 03:25
Show Gist options
  • Save joaovissoci/5b4e7a0e4f228c9f37da to your computer and use it in GitHub Desktop.
Save joaovissoci/5b4e7a0e4f228c9f37da to your computer and use it in GitHub Desktop.
template_network_analysis
#####################################################################################
#TEMPLATE DE ANALISE - contexto_study_3.R
#####################################################################################
#
# Glossário
# $ = Pertence a = data$sexo, que dizer variável sexo pertencente ao banco data
#
#
#####################################################################################
#ORGANIZANDO O AMBIENTE DO R
#####################################################################################
#Exemplo de funçao para instalar pacotes
#install.packages("Hmisc")
#Exemplo de função para carregar os pacotes
#library(Hmisc)
#Carregar todos os pacotes ao mesmo tempo
#Substituir o termo em " " pelo nome do pacote que quer carregar
#Por exemplo, se eu queri inserir o pacote SEM pra analise do
#Modelo de equaçoes estruturais, eu inclui na função "sem"
source("http://bioconductor.org/biocLite.R")
biocLite("RBGL")
lapply(c("igraph","repmis","gdata","polycor","qgraph","pcalg","ggplot2","psych"),
library, character.only=T)
#####################################################################################
#ORGANIZANDO O BANCO DE DADOS
#####################################################################################
#Exemplo de como inserir o banco de dados direto de um arquivo físico do computador
#data<-read.csv("/Users/rpietro/Desktop/MDD_BIPD_Baseline.csv",sep=",")
#a informação entre " " são os caminhos para o diretório no seu computador
#no qual está o seu arquivo
#Carregando os dados do drpobox - devreria funcionar automaticamente
#Functions to pull the dara from the internet file
#see http://goo.gl/mQwxO on how to get this link
#Importar os dados do Dropbox, no formato .csv
#Instruções em http://goo.gl/Ofa7gQ
dados <- repmis::source_DropboxData("dat_study_2.csv",
"qcl23mway5u2xsj",
sep = ",",
header = TRUE)
#####################################################################################
#GENERATING NETWORK
#####################################################################################
#omitting 1 NA
dados<-na.omit(dados)
#removing vector (variable) from the dataset
data<-remove.vars(dados,c("Atleta"))
#Generating Adjacency Matrix
dataMatrix <- t(as.matrix(data)) %*% as.matrix(data)
#Specifying vector with names for each node
names<-rownames(dataMatrix)
#Generating Network based on an adjacency matrix with:
# frequency of 2 as cut, minimum pf 0.5 (to emphasize the colours - there is probably a better way to do it, just feeling lazy now)
# undirected in gray scale
Q3_atleta3 <- qgraph(dataMatrix, borders = FALSE, cut=2,
minimum = 1, labels=names,label.cex = 0.60, label.color="black",
layout = "spring",directed=FALSE,label.scale=FALSE,gray=TRUE)
#Calculate the same network bu based on a POLYCHLORIC CORRELATION MATRIX
#requires polycor package
dataMatrix2<-hetcor(data)
Q3_atleta3 <- qgraph(dados, borders = FALSE, cut=1,
minimum = 0.4,label.cex = 0.60, label.color="black",
layout = "spring",directed=FALSE,label.scale=FALSE,gray=FALSE)
#####################################################################################
#CALCULATING DESCRIPTIVES FROM THE NETWORK
#####################################################################################
#Transforming network into a IGRAPH object (created with the igraph package)
Q3<-as.igraph(qgraph(Q3_atleta3, DoNotPlot = TRUE))
#Calculating shortest path - association metric
#ADD A DEFINITION
shortest.paths(Q3)
#Calculating degree
#ADD A DEFINITION
degree(Q3)
#Calculating centrality
#ADD A DEFINITION
assortativity.degree(Q3)
#Calculating closeness
#ADD A DEFINITION
closeness(Q3)
#Calculating betweeness
#ADD A DEFINITION
betweenness(Q3)
#Calculating Eigenvalue centrality
#ADD A DEFINITION
evcent(Q3)
#Calculating Network density
#ADD A DEFINITION
graph.density(Q3)
#Calculating Network diameter
#ADD A DEFINITION
diameter(Q3)
### Clustering
#The local clustering coefficient, cl(v), gives for node n the proportion that the neighboors of v are also connected to each other.
#Maximum value is 1, value closer to 1 means higher clustering
transitivity(Q3, "local")
transitivity(Q3, "global")
transitivity(Q3, "average")
### Checking for Small World configuration of your network
#Function to calculate avareg length path in random graphs
#Just run every line without changing anything
APLr <- function(x) {
if ("qgraph" %in% class(x))
x <- as.igraph(x)
if ("igraph" %in% class(x))
x <- get.adjacency(x)
N = nrow(x)
p = sum(x/2)/sum(lower.tri(x))
eulers_constant <- 0.57721566490153
l = (log(N) - eulers_constant)/log(p * (N - 1)) + 0.5
l
}
#Run function
APLr(Q3)
#Function to calculate clustering in random graphs
#Just run every line without changing anything
Cr <- function(x) {
if ("qgraph" %in% class(x))
x <- as.igraph(x)
if ("igraph" %in% class(x))
x <- get.adjacency(x)
N = nrow(x)
p = sum(x/2)/sum(lower.tri(x))
t = (p * (N - 1)/N)
t
}
#Run function
Cr(Q3)
#Valiues above 3 means your network is a small world configured network
(transitivity(Q3) / Cr(Q3)) / (average.path.length(Q3) / APLr(Q3))
#Calculating all descriptives from a qgraph object (might take a while)
#ADD A DEFINITION
x<-centrality(Q3_atleta3) #Returns all descriptives
##############################################################
#NETWORK APPROACH
#############################################################
################# Importance ###################
sem_data<-data.frame(Import,outcome=bancocerto$Q13)#,outcome=bancocerto$Q13)
sem_data<-na.omit(sem_data)
cor_data<-cor_auto(sem_data)
#qsgc<-qsgc$rho
node_groups<-list(Import1=c(1,2,3,6,8,15),Import2=c(4,12),Import3=c(7,13),Other=c(5,9,10,11,14))
node_names<-c("Why is this study being done?", "What is involved in this study?", "Who is going to be my doctor in this study?","How many people will take part in this study?","How long will I be in this study?","What are the benefits of being in this study?","What about compensation?","What are the risks of being in this study?","What are the costs?","Will my information be kept confidential?","What about research related injuries?","What are the alternatives to being in this study?","What if I want decline participation or withdraw?","Whom do I call if I have questions or trouble?","Willingness to participate")
varNames<-c("Q1","Q2","Q3","Q4","Q5","Q6","Q7","Q8","Q9","Q10","Q11","Q12","Q13","Q14","WP")
mean_data<-sapply(sem_data,mean)
vSize<-c(mean_data[1:14]/min(mean_data[1:14]),1.81)
network_glasso<-qgraph(cor_data,layout="spring",vsize=6,esize=20,graph="glasso",sampleSize=nrow(sem_data),legend.cex = 0.5,GLratio=1.5)
network_pcor<-qgraph(cor_data,layout="spring",vsize=6,esize=20,graph="pcor",threshold="holm",sampleSize=nrow(sem_data),legend.cex = 0.5,GLratio=1.5)
network_cor<-qgraph(cor_data,layout="spring",vsize=6,esize=20,legend.cex = 0.5,GLratio=1.5)
layout1<-averageLayout(network_glasso,network_pcor,network_cor)
#qsgG1<-qgraph(qsgc,layout=Lqsg,vsize=6,esize=20,legend.cex = 0.3,cut = 0.3, maximum = 1, minimum = 0, esize = 20,vsize = 5, repulsion = 0.8,groups=qsggr,gray=TRUE,color=c("gray80","gray50"),legend=F)#nodeNames=nomesqsg,
#qsgG2<-qgraph(qsgc,layout=Lqsg,vsize=6,esize=20,graph="pcor",legend.cex = 0.3,cut = 0.1, maximum = 1, minimum = 0, esize = 20,vsize = 5, repulsion = 0.8,groups=qsggr,gray=TRUE,color=c("gray80","gray50"),legend=F)#,nodeNames=nomesqsg
#centralityPlot(qsgG3)
#clusteringPlot(qsgG3)
g<-as.igraph(qsgg3)
h<-walktrap.community(g)
#h<-spinglass.community(g)
plot(h,g)
h$membership
# Para identificar no qgraph o resultado do algortimo de comunidade, criar objeto de "groups"
# com o resultado de wcG1
predictors<-centrality(qsgg3)$ShortestPaths[,15]
predictors
#centralityPlot(qsgG3)
as.data.frame(predictors[[1]])[2,]
dim(as.data.frame(predictors[[1]]))[1]
qsgg3$Edgelist$from
qsgg3$Edgelist$to
qsgg3$Edgelist$weight
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==1 & qsgg3$Edgelist$to==15)
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==2 & qsgg3$Edgelist$to==15)
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==3 & qsgg3$Edgelist$to==15)
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==10 & qsgg3$Edgelist$to==15)
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==13 & qsgg3$Edgelist$to==15)
#####################################################################################
#CREATING A LINE GRAPH FOR APL
#####################################################################################
#Isolating only Shortest Path Lenghts
data_line_plot<-data.frame(x$ShortestPathLengths[3:64,1:2])
#Names for each variable
names_line_plot<-c("Prática por diversão",
"Percepção de futuro",
"Diferenças de maturidade",
"Futsal como um emprego",
"Modelo para Políticas de Identidade",
"Amizade com outros atletas",
"Vontade de acolher outros",
"Identificação com o grupo",
"Relacionamento entre os atletas",
"Treinadores positivos/negativo",
"Pouca estrutura para carreira",
"Início com futebol e futsal",
"Pouca estabilidade",
"Bom salário",
"Ascenção e mudança hábitos",
"Projeto de vida no esporte",
"Amadurecimento pessoal",
"Experiências estimulantes",
"Prática de diversos esporte",
"Foco no futebol",
"Pouco interesse em outras aulas",
"Estudo como segundo opção",
"Retorno aos estudos pós carreira",
"Percepção de suporte familiar",
"Influência familiar",
"Necessidade da presença familiar",
"Amadurecimento da relação",
"Família não ajuda",
"Família indiferente",
"Cultura da cidade/comunidade",
"Cobrança por estudos",
"Famílias de atletas interagindo",
"Prejuízo familiar",
"Família levou para o esporte",
"Jogando pelo bem da família",
"Família criando valores",
"Preocupação dos pais",
"Necessitando trabalho externo",
"Carreira futsal/estudos/trabalho",
"Opção autonoma pelo futsal",
"Esporte vs. drogas/criminalidade",
"Abandono da escola para jogar",
"Vontade de voltar aos estudos",
"Incentivo da equipe ao estudo",
"Interesse financeiro/diretoria",
"Desempenho atraindo contratos",
"Imagem midiática distorcida",
"Especulação midiática",
"Família dos colegas de equipe",
"Condição ruim dos pais",
"Preocupação com o prazer",
"Dificuldade com outra carreira",
"Importância ao clube",
"Prazer pela prática",
"Valorização da comunidade",
"Impacto da cobertura midiática",
"Pouco reconhecimento",
"Relevância cultural da modalidade",
"Diferença entre futebol e futsal",
"Reconhecimento apenas ao vencedor",
"Estrutura precária dos times",
"Contratos precários e incertos")
#Some organiztion needed for the codes
value<-with(data_line_plot,c(X1,X2))
Profile<-c(rep(c("Heteronomia"),62),rep(c("Autonomia"),62))
Themes<-rep(names_line_plot,2)
data_plot<-data.frame(value,Profile,Themes)
#using GGPLOT TO PLOT THE LINES (Still fixing variable names)
ggplot(data=data_plot, aes(y=value, x=Themes, group=Profile,
color=Profile)) +
geom_line(size=1.5) +
geom_point(size=3,fill="white") +
ylab("") + xlab("") +
theme_bw()+
scale_colour_manual(values=c("#999999","darkred"))+
theme(axis.text.x = element_text(angle= 270,
hjust = 0, colour = "black",size=14))
#+
#scale_colour_manual(values=c("#999999", "#E69F00","darkred"),
# name="Latent Profiles",
# breaks=c(1,2,3),
# labels=c("Introversive", "Extroversive","BFI Norm"))
#####################################################################################
#CREATING A WORDCLOUD GRAPH FOR BETWEENESS AND CLOSENESS
#####################################################################################
### WORD CLOUD
library(RXKCD)
library(tm)
library(wordcloud)
library(RColorBrewer)
#path <- system.file("xkcd", package = "RXKCD")
#datafiles <- list.files(path)
#xkcd.df <- read.csv(file.path(path, datafiles))
#xkcd.corpus <- Corpus(DataframeSource(data.frame(xkcd.df[, 3])))
#xkcd.corpus <- tm_map(xkcd.corpus, removePunctuation)
#xkcd.corpus <- tm_map(xkcd.corpus, tolower)
#xkcd.corpus <- tm_map(xkcd.corpus, function(x) removeWords(x, stopwords("english")))
#tdm <- TermDocumentMatrix(xkcd.corpus)
m <- as.matrix(x$Betweenness)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
pal <- brewer.pal(8, "Dark2")
pal <- pal[-(1:2)]
png("wordcloud.png", width=1280,height=800)
wordcloud(d$word,d$freq, scale=c(8,.1),min.freq=0,max.words=Inf,
random.order=FALSE, rot.per=.15, colors=pal,
vfont=c("sans serif","plain"))
dev.off()
#####################################################################################
#CAUSATION ANALYSIS
#####################################################################################
#This network, for my data, still sucks. Trying to arrange it better
###Causation Analysis
#requisres package "pcalg"
n <- nrow(dataMatrix)
p <- ncol(dataMatrix)
#names<-as.character(c(1:8))
## define independence test (partial correlations)
indepTest <- gaussCItest
C<-polychoric(dataMatrix)
## define sufficient statistics
suffStat <- list(C = C$correlations, n = n)
## estimate CPDAG
alpha <- 0.01
pc.fit <- pc(suffStat, indepTest, alpha, labels=names)
qgraph(pc.fit)
tetrachoric(data)
setwd("/Users/rpietro/Google Drive/ToDos")
#lendo o arquivo "noname"
#file <- file.choose()
dados <- read.csv("MHCpolymatrix.csv", sep=";")
dados<-as.matrix(dados)
dados[upper.tri(dados,diag=FALSE)] <- t(dados)[upper.tri(dados)]
x<-as.igraph(qgraph(dados, layout="spring",minimum=0.50))
assortativity.degree(x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment