mhawksey/gist:3799483

## gistfile1.rebol
# compdata week 1 pracitce
# Script reads a NodeXL twitter search for #compdata hashtag that's been uploaded to Google Spreadsheet
# Data is reshaped using subsetting to get a slice of rows columns fitting a certiain condition

# read csv from Google Spreadsheet, headers in row 2 in this case an vertices list
vertices <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=1&output=csv",header=TRUE,skip=1,)

# see number of rows
nrow(vertices)


# read csv from Google Spreadsheet, headers in row 2 in this case an edges list
edges <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=0&output=csv",header=TRUE,skip=1,)

# look at the data
str(edges)

# Note that $ Relationship             : Factor w/ 4 levels "Followed","Mentions"
# What are all the levels in $Relationship
table(edges$Relationship)

# how many rows are there where $Tweet that contains 'I just signed up for Computing for Data Analysis .. '
iJust <- grepl("^I just signed up for Computing for Data Analysis", edges$Tweet)
table(iJust)

# Want to get a subset of data of $Vertex.1 and $Vertex.2 where $Relationship is 'Followed'
# To get 'Followed' subset
followed <- edges$Relationship == "Followed"

# now make a new data.frame with 1st two cols of edges $Vertex.1 and $Vertex.2 where followed
edgeList <- edges[followed,1:2]
str(edgeList)

# lines 10 and 13 can be combined using
edgeList <- edges[edges$Relationship == "Followed",1:2]

# look at the new data
str(edgeList)

# Now look at most frequent occurences of $Vertex.1 values from edges

# table will give us a frquency table
topInVert1 <-data.frame(table(edges$Vertex.1))

# now we can change the order
topInVert1 <- topInVert1[order(-topInVert1$Freq), ]

#print the top 10 results
topInVert1[1:10,]
	# compdata week 1 pracitce
	# Script reads a NodeXL twitter search for #compdata hashtag that's been uploaded to Google Spreadsheet
	# Data is reshaped using subsetting to get a slice of rows columns fitting a certiain condition

	# read csv from Google Spreadsheet, headers in row 2 in this case an vertices list
	vertices <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=1&output=csv",header=TRUE,skip=1,)

	# see number of rows
	nrow(vertices)


	# read csv from Google Spreadsheet, headers in row 2 in this case an edges list
	edges <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=0&output=csv",header=TRUE,skip=1,)

	# look at the data
	str(edges)

	# Note that $ Relationship : Factor w/ 4 levels "Followed","Mentions"
	# What are all the levels in $Relationship
	table(edges$Relationship)

	# how many rows are there where $Tweet that contains 'I just signed up for Computing for Data Analysis .. '
	iJust <- grepl("^I just signed up for Computing for Data Analysis", edges$Tweet)
	table(iJust)

	# Want to get a subset of data of $Vertex.1 and $Vertex.2 where $Relationship is 'Followed'
	# To get 'Followed' subset
	followed <- edges$Relationship == "Followed"

	# now make a new data.frame with 1st two cols of edges $Vertex.1 and $Vertex.2 where followed
	edgeList <- edges[followed,1:2]
	str(edgeList)

	# lines 10 and 13 can be combined using
	edgeList <- edges[edges$Relationship == "Followed",1:2]

	# look at the new data
	str(edgeList)

	# Now look at most frequent occurences of $Vertex.1 values from edges

	# table will give us a frquency table
	topInVert1 <-data.frame(table(edges$Vertex.1))

	# now we can change the order
	topInVert1 <- topInVert1[order(-topInVert1$Freq), ]

	#print the top 10 results
	topInVert1[1:10,]