Last active
August 29, 2017 03:25
-
-
Save joaovissoci/5b4e7a0e4f228c9f37da to your computer and use it in GitHub Desktop.
template_network_analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################################################################################### | |
#TEMPLATE DE ANALISE - contexto_study_3.R | |
##################################################################################### | |
# | |
# Glossário | |
# $ = Pertence a = data$sexo, que dizer variável sexo pertencente ao banco data | |
# | |
# | |
##################################################################################### | |
#ORGANIZANDO O AMBIENTE DO R | |
##################################################################################### | |
#Exemplo de funçao para instalar pacotes | |
#install.packages("Hmisc") | |
#Exemplo de função para carregar os pacotes | |
#library(Hmisc) | |
#Carregar todos os pacotes ao mesmo tempo | |
#Substituir o termo em " " pelo nome do pacote que quer carregar | |
#Por exemplo, se eu queri inserir o pacote SEM pra analise do | |
#Modelo de equaçoes estruturais, eu inclui na função "sem" | |
source("http://bioconductor.org/biocLite.R") | |
biocLite("RBGL") | |
lapply(c("igraph","repmis","gdata","polycor","qgraph","pcalg","ggplot2","psych"), | |
library, character.only=T) | |
##################################################################################### | |
#ORGANIZANDO O BANCO DE DADOS | |
##################################################################################### | |
#Exemplo de como inserir o banco de dados direto de um arquivo físico do computador | |
#data<-read.csv("/Users/rpietro/Desktop/MDD_BIPD_Baseline.csv",sep=",") | |
#a informação entre " " são os caminhos para o diretório no seu computador | |
#no qual está o seu arquivo | |
#Carregando os dados do drpobox - devreria funcionar automaticamente | |
#Functions to pull the dara from the internet file | |
#see http://goo.gl/mQwxO on how to get this link | |
#Importar os dados do Dropbox, no formato .csv | |
#Instruções em http://goo.gl/Ofa7gQ | |
dados <- repmis::source_DropboxData("dat_study_2.csv", | |
"qcl23mway5u2xsj", | |
sep = ",", | |
header = TRUE) | |
##################################################################################### | |
#GENERATING NETWORK | |
##################################################################################### | |
#omitting 1 NA | |
dados<-na.omit(dados) | |
#removing vector (variable) from the dataset | |
data<-remove.vars(dados,c("Atleta")) | |
#Generating Adjacency Matrix | |
dataMatrix <- t(as.matrix(data)) %*% as.matrix(data) | |
#Specifying vector with names for each node | |
names<-rownames(dataMatrix) | |
#Generating Network based on an adjacency matrix with: | |
# frequency of 2 as cut, minimum pf 0.5 (to emphasize the colours - there is probably a better way to do it, just feeling lazy now) | |
# undirected in gray scale | |
Q3_atleta3 <- qgraph(dataMatrix, borders = FALSE, cut=2, | |
minimum = 1, labels=names,label.cex = 0.60, label.color="black", | |
layout = "spring",directed=FALSE,label.scale=FALSE,gray=TRUE) | |
#Calculate the same network bu based on a POLYCHLORIC CORRELATION MATRIX | |
#requires polycor package | |
dataMatrix2<-hetcor(data) | |
Q3_atleta3 <- qgraph(dados, borders = FALSE, cut=1, | |
minimum = 0.4,label.cex = 0.60, label.color="black", | |
layout = "spring",directed=FALSE,label.scale=FALSE,gray=FALSE) | |
##################################################################################### | |
#CALCULATING DESCRIPTIVES FROM THE NETWORK | |
##################################################################################### | |
#Transforming network into a IGRAPH object (created with the igraph package) | |
Q3<-as.igraph(qgraph(Q3_atleta3, DoNotPlot = TRUE)) | |
#Calculating shortest path - association metric | |
#ADD A DEFINITION | |
shortest.paths(Q3) | |
#Calculating degree | |
#ADD A DEFINITION | |
degree(Q3) | |
#Calculating centrality | |
#ADD A DEFINITION | |
assortativity.degree(Q3) | |
#Calculating closeness | |
#ADD A DEFINITION | |
closeness(Q3) | |
#Calculating betweeness | |
#ADD A DEFINITION | |
betweenness(Q3) | |
#Calculating Eigenvalue centrality | |
#ADD A DEFINITION | |
evcent(Q3) | |
#Calculating Network density | |
#ADD A DEFINITION | |
graph.density(Q3) | |
#Calculating Network diameter | |
#ADD A DEFINITION | |
diameter(Q3) | |
### Clustering | |
#The local clustering coefficient, cl(v), gives for node n the proportion that the neighboors of v are also connected to each other. | |
#Maximum value is 1, value closer to 1 means higher clustering | |
transitivity(Q3, "local") | |
transitivity(Q3, "global") | |
transitivity(Q3, "average") | |
### Checking for Small World configuration of your network | |
#Function to calculate avareg length path in random graphs | |
#Just run every line without changing anything | |
APLr <- function(x) { | |
if ("qgraph" %in% class(x)) | |
x <- as.igraph(x) | |
if ("igraph" %in% class(x)) | |
x <- get.adjacency(x) | |
N = nrow(x) | |
p = sum(x/2)/sum(lower.tri(x)) | |
eulers_constant <- 0.57721566490153 | |
l = (log(N) - eulers_constant)/log(p * (N - 1)) + 0.5 | |
l | |
} | |
#Run function | |
APLr(Q3) | |
#Function to calculate clustering in random graphs | |
#Just run every line without changing anything | |
Cr <- function(x) { | |
if ("qgraph" %in% class(x)) | |
x <- as.igraph(x) | |
if ("igraph" %in% class(x)) | |
x <- get.adjacency(x) | |
N = nrow(x) | |
p = sum(x/2)/sum(lower.tri(x)) | |
t = (p * (N - 1)/N) | |
t | |
} | |
#Run function | |
Cr(Q3) | |
#Valiues above 3 means your network is a small world configured network | |
(transitivity(Q3) / Cr(Q3)) / (average.path.length(Q3) / APLr(Q3)) | |
#Calculating all descriptives from a qgraph object (might take a while) | |
#ADD A DEFINITION | |
x<-centrality(Q3_atleta3) #Returns all descriptives | |
############################################################## | |
#NETWORK APPROACH | |
############################################################# | |
################# Importance ################### | |
sem_data<-data.frame(Import,outcome=bancocerto$Q13)#,outcome=bancocerto$Q13) | |
sem_data<-na.omit(sem_data) | |
cor_data<-cor_auto(sem_data) | |
#qsgc<-qsgc$rho | |
node_groups<-list(Import1=c(1,2,3,6,8,15),Import2=c(4,12),Import3=c(7,13),Other=c(5,9,10,11,14)) | |
node_names<-c("Why is this study being done?", "What is involved in this study?", "Who is going to be my doctor in this study?","How many people will take part in this study?","How long will I be in this study?","What are the benefits of being in this study?","What about compensation?","What are the risks of being in this study?","What are the costs?","Will my information be kept confidential?","What about research related injuries?","What are the alternatives to being in this study?","What if I want decline participation or withdraw?","Whom do I call if I have questions or trouble?","Willingness to participate") | |
varNames<-c("Q1","Q2","Q3","Q4","Q5","Q6","Q7","Q8","Q9","Q10","Q11","Q12","Q13","Q14","WP") | |
mean_data<-sapply(sem_data,mean) | |
vSize<-c(mean_data[1:14]/min(mean_data[1:14]),1.81) | |
network_glasso<-qgraph(cor_data,layout="spring",vsize=6,esize=20,graph="glasso",sampleSize=nrow(sem_data),legend.cex = 0.5,GLratio=1.5) | |
network_pcor<-qgraph(cor_data,layout="spring",vsize=6,esize=20,graph="pcor",threshold="holm",sampleSize=nrow(sem_data),legend.cex = 0.5,GLratio=1.5) | |
network_cor<-qgraph(cor_data,layout="spring",vsize=6,esize=20,legend.cex = 0.5,GLratio=1.5) | |
layout1<-averageLayout(network_glasso,network_pcor,network_cor) | |
#qsgG1<-qgraph(qsgc,layout=Lqsg,vsize=6,esize=20,legend.cex = 0.3,cut = 0.3, maximum = 1, minimum = 0, esize = 20,vsize = 5, repulsion = 0.8,groups=qsggr,gray=TRUE,color=c("gray80","gray50"),legend=F)#nodeNames=nomesqsg, | |
#qsgG2<-qgraph(qsgc,layout=Lqsg,vsize=6,esize=20,graph="pcor",legend.cex = 0.3,cut = 0.1, maximum = 1, minimum = 0, esize = 20,vsize = 5, repulsion = 0.8,groups=qsggr,gray=TRUE,color=c("gray80","gray50"),legend=F)#,nodeNames=nomesqsg | |
#centralityPlot(qsgG3) | |
#clusteringPlot(qsgG3) | |
g<-as.igraph(qsgg3) | |
h<-walktrap.community(g) | |
#h<-spinglass.community(g) | |
plot(h,g) | |
h$membership | |
# Para identificar no qgraph o resultado do algortimo de comunidade, criar objeto de "groups" | |
# com o resultado de wcG1 | |
predictors<-centrality(qsgg3)$ShortestPaths[,15] | |
predictors | |
#centralityPlot(qsgG3) | |
as.data.frame(predictors[[1]])[2,] | |
dim(as.data.frame(predictors[[1]]))[1] | |
qsgg3$Edgelist$from | |
qsgg3$Edgelist$to | |
qsgg3$Edgelist$weight | |
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==1 & qsgg3$Edgelist$to==15) | |
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==2 & qsgg3$Edgelist$to==15) | |
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==3 & qsgg3$Edgelist$to==15) | |
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==10 & qsgg3$Edgelist$to==15) | |
subset(qsgg3$Edgelist$weight,qsgg3$Edgelist$from==13 & qsgg3$Edgelist$to==15) | |
##################################################################################### | |
#CREATING A LINE GRAPH FOR APL | |
##################################################################################### | |
#Isolating only Shortest Path Lenghts | |
data_line_plot<-data.frame(x$ShortestPathLengths[3:64,1:2]) | |
#Names for each variable | |
names_line_plot<-c("Prática por diversão", | |
"Percepção de futuro", | |
"Diferenças de maturidade", | |
"Futsal como um emprego", | |
"Modelo para Políticas de Identidade", | |
"Amizade com outros atletas", | |
"Vontade de acolher outros", | |
"Identificação com o grupo", | |
"Relacionamento entre os atletas", | |
"Treinadores positivos/negativo", | |
"Pouca estrutura para carreira", | |
"Início com futebol e futsal", | |
"Pouca estabilidade", | |
"Bom salário", | |
"Ascenção e mudança hábitos", | |
"Projeto de vida no esporte", | |
"Amadurecimento pessoal", | |
"Experiências estimulantes", | |
"Prática de diversos esporte", | |
"Foco no futebol", | |
"Pouco interesse em outras aulas", | |
"Estudo como segundo opção", | |
"Retorno aos estudos pós carreira", | |
"Percepção de suporte familiar", | |
"Influência familiar", | |
"Necessidade da presença familiar", | |
"Amadurecimento da relação", | |
"Família não ajuda", | |
"Família indiferente", | |
"Cultura da cidade/comunidade", | |
"Cobrança por estudos", | |
"Famílias de atletas interagindo", | |
"Prejuízo familiar", | |
"Família levou para o esporte", | |
"Jogando pelo bem da família", | |
"Família criando valores", | |
"Preocupação dos pais", | |
"Necessitando trabalho externo", | |
"Carreira futsal/estudos/trabalho", | |
"Opção autonoma pelo futsal", | |
"Esporte vs. drogas/criminalidade", | |
"Abandono da escola para jogar", | |
"Vontade de voltar aos estudos", | |
"Incentivo da equipe ao estudo", | |
"Interesse financeiro/diretoria", | |
"Desempenho atraindo contratos", | |
"Imagem midiática distorcida", | |
"Especulação midiática", | |
"Família dos colegas de equipe", | |
"Condição ruim dos pais", | |
"Preocupação com o prazer", | |
"Dificuldade com outra carreira", | |
"Importância ao clube", | |
"Prazer pela prática", | |
"Valorização da comunidade", | |
"Impacto da cobertura midiática", | |
"Pouco reconhecimento", | |
"Relevância cultural da modalidade", | |
"Diferença entre futebol e futsal", | |
"Reconhecimento apenas ao vencedor", | |
"Estrutura precária dos times", | |
"Contratos precários e incertos") | |
#Some organiztion needed for the codes | |
value<-with(data_line_plot,c(X1,X2)) | |
Profile<-c(rep(c("Heteronomia"),62),rep(c("Autonomia"),62)) | |
Themes<-rep(names_line_plot,2) | |
data_plot<-data.frame(value,Profile,Themes) | |
#using GGPLOT TO PLOT THE LINES (Still fixing variable names) | |
ggplot(data=data_plot, aes(y=value, x=Themes, group=Profile, | |
color=Profile)) + | |
geom_line(size=1.5) + | |
geom_point(size=3,fill="white") + | |
ylab("") + xlab("") + | |
theme_bw()+ | |
scale_colour_manual(values=c("#999999","darkred"))+ | |
theme(axis.text.x = element_text(angle= 270, | |
hjust = 0, colour = "black",size=14)) | |
#+ | |
#scale_colour_manual(values=c("#999999", "#E69F00","darkred"), | |
# name="Latent Profiles", | |
# breaks=c(1,2,3), | |
# labels=c("Introversive", "Extroversive","BFI Norm")) | |
##################################################################################### | |
#CREATING A WORDCLOUD GRAPH FOR BETWEENESS AND CLOSENESS | |
##################################################################################### | |
### WORD CLOUD | |
library(RXKCD) | |
library(tm) | |
library(wordcloud) | |
library(RColorBrewer) | |
#path <- system.file("xkcd", package = "RXKCD") | |
#datafiles <- list.files(path) | |
#xkcd.df <- read.csv(file.path(path, datafiles)) | |
#xkcd.corpus <- Corpus(DataframeSource(data.frame(xkcd.df[, 3]))) | |
#xkcd.corpus <- tm_map(xkcd.corpus, removePunctuation) | |
#xkcd.corpus <- tm_map(xkcd.corpus, tolower) | |
#xkcd.corpus <- tm_map(xkcd.corpus, function(x) removeWords(x, stopwords("english"))) | |
#tdm <- TermDocumentMatrix(xkcd.corpus) | |
m <- as.matrix(x$Betweenness) | |
v <- sort(rowSums(m),decreasing=TRUE) | |
d <- data.frame(word = names(v),freq=v) | |
pal <- brewer.pal(8, "Dark2") | |
pal <- pal[-(1:2)] | |
png("wordcloud.png", width=1280,height=800) | |
wordcloud(d$word,d$freq, scale=c(8,.1),min.freq=0,max.words=Inf, | |
random.order=FALSE, rot.per=.15, colors=pal, | |
vfont=c("sans serif","plain")) | |
dev.off() | |
##################################################################################### | |
#CAUSATION ANALYSIS | |
##################################################################################### | |
#This network, for my data, still sucks. Trying to arrange it better | |
###Causation Analysis | |
#requisres package "pcalg" | |
n <- nrow(dataMatrix) | |
p <- ncol(dataMatrix) | |
#names<-as.character(c(1:8)) | |
## define independence test (partial correlations) | |
indepTest <- gaussCItest | |
C<-polychoric(dataMatrix) | |
## define sufficient statistics | |
suffStat <- list(C = C$correlations, n = n) | |
## estimate CPDAG | |
alpha <- 0.01 | |
pc.fit <- pc(suffStat, indepTest, alpha, labels=names) | |
qgraph(pc.fit) | |
tetrachoric(data) | |
setwd("/Users/rpietro/Google Drive/ToDos") | |
#lendo o arquivo "noname" | |
#file <- file.choose() | |
dados <- read.csv("MHCpolymatrix.csv", sep=";") | |
dados<-as.matrix(dados) | |
dados[upper.tri(dados,diag=FALSE)] <- t(dados)[upper.tri(dados)] | |
x<-as.igraph(qgraph(dados, layout="spring",minimum=0.50)) | |
assortativity.degree(x) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment