Skip to content

Instantly share code, notes, and snippets.

@Btibert3
Created November 4, 2012 22:49
Show Gist options
  • Save Btibert3/4014174 to your computer and use it in GitHub Desktop.
Save Btibert3/4014174 to your computer and use it in GitHub Desktop.
Collect Twitter and Insert into MongoDB
## load the packages
require(XML)
require(RCurl)
require(rjson)
require(plyr)
## will create a request to the twitter search API - returns JSON
## https://dev.twitter.com/docs/api/1/get/search
## max combo can be 1500 tweets (100*15 pages)
## API limit = 150 requests / hour for requests that are not authenticated
TAG <- "#nfl"
HASHTAG <- URLencode(TAG)
BASE_URL <- "http://search.twitter.com/search.json"
RESULTS <- 10 #max results that can be returned
LANG <- "en"
TYPE <- "recent"
PAGES <- 1
## should make this a function, but use a for loop for basic example
for (page in 1:PAGES) {
#page=1
URL <- paste(BASE_URL,
"?q=", HASHTAG,
"&rpp=", RESULTS,
"&lang=", LANG,
"&result_type=", TYPE,
"&page=", page,
sep="")
out <- try(getURL(URL))
twitter_data <- fromJSON(out)
twitter_data <- twitter_data$results
}
## load the r mongo libary -- Assumes that Mongo is running in the terminal
# ## help(mongo)
library(rmongodb)
## great help resource for rmongodb
## http://goo.gl/B6tYz
## follow the basics -- db is the datbase, ns = the collection of documents
db <- "twitter"
ns <- "brock_test"
dbns <- paste(db, ns, sep=".")
# mongo <- mongo.create(db=db)
mongo <- mongo.create()
mongo.is.connected(mongo)
## @ interactive shell for mongo, type "dbs" to show "twitter" was created
## create an entry from the first response in the list from twitter search
## needs to be a bson doc -- need to check that list converts ok
tmp <- twitter_data[[1]]
(tmp.bson <- mongo.bson.from.list(tmp))
## Make sure we start with an empty collection
## ensure there is nothing there - T = was dropped, F may mean didnt exist
mongo.drop(mongo, dbns)
mongo.count(mongo, dbns)
## insert the record -- only 1 twitter response -- T = successful!!
mongo.insert(mongo, dbns, tmp.bson)
## did the record stay? -- should show 1, not 0 from above
mongo.count(mongo, dbns)
## query the record
## disconnect
mongo.disconnect(mongo)
## note the comments below -- I couldnt get RMongo to play nicely
## but appears to have a compact syntax.
## NB: rmongodb was produced by 10gen, the same as MongoDB
##
## another Mongo package -- need to install from github
## mongodb needs to be running
# library(devtools)
# install_github("RMongo", "tc")
# library(RMongo)
# mongo <- mongoDbConnect("test", "localhost", 27017)
# str(mongo)
#
# ## connect to a database, even if not connected
# mongo <- mongoDbConnect("twitter", "localhost", 27017)
# dbShowCollections(mongo)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment