This gist contains Java code for archiving tweets in MongoDB, and R code for retrieving tweets for analysis. Please note that there're a couple of dependencies: (1) You should setup your MongoDB (I used the free plan at; (2) You should have setup your Twitter api (check; (3) Java code: include twi…
## Host info and credentials
host <- ""
username <- "your_username"
password <- "your_pass"
db <- "your_db"
## Connect to mongodb
mongo <- mongo.create(host=host, db=db,
username=username, password=password)
## Get a list of collections within our namespace
# here I used each collection for a twitter archive
mongo.get.database.collections(mongo, db)
## Create a string that points to the namespace
# the collection I'm interested in is "#mri13"
collection <- "#mri13"
namespace <- paste(db, collection, sep=".")
## Check the total number of tweets in "#mri13"
mongo.count(mongo, namespace, mongo.bson.empty())
## Build a query to find how many tweets were posted by me
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.append(buf, "user_name", "bodongchen")
query <- mongo.bson.from.buffer(buf)
# get the count
count <- mongo.count(mongo, namespace, query)
## Get all tweets posted by me
tweets <- list()
cursor <- mongo.find(mongo, namespace, query)
while ( {
val <- mongo.cursor.value(cursor)
tweets[[length(tweets)+1]] <- mongo.bson.value(val, "tweet_text")
## Retrieve all tweets and put into a dataframe
df_arch1 = data.frame(stringsAsFactors = FALSE)
cursor <- mongo.find(mongo, namespace)
while ( {
# iterate and grab the next record
tmp =
# make it a dataframe
tmp.df =, stringsAsFactors = F)
# bind to the master dataframe
df_arch1 = rbind.fill(df_arch1, tmp.df)
### Try with another collection
collection2 <- "#edtechchat"
namespace2 <- paste(db, collection2, sep=".")
mongo.count(mongo, namespace2, mongo.bson.empty())
package twitter_loop;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.Mongo;
import com.mongodb.MongoException;
import java.util.List;
import java.util.Scanner;
import twitter4j.Query;
import twitter4j.QueryResult;
import twitter4j.Status;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.UserMentionEntity;
import twitter4j.conf.ConfigurationBuilder;
public class Twitter_loop {
* Settings before running the program
// Mongodb info
private String host = "";
private int port = 53858;
private String db_name = "your_db";
private String username = "your_username";
private char[] password = "your_password".toCharArray();
// Twitter api
private String consumerKey = "yourkey";
private String consumerSecret = "yoursecret";
private String accessToken = "yourtoken";
private String accessTokenSecret = "yourtokensecret";
// Time interval
private int seconds = 60;
// Number of tweets to get each time
private int count = 100;
private ConfigurationBuilder cb;
private DB db;
private DBCollection items;
* static block used to construct a connection with tweeter with twitter4j
* configuration with provided settings. This configuration builder will be
* used for next search action to fetch the tweets from
public static void main(String[] args) throws InterruptedException {
Twitter_loop taskObj = new Twitter_loop();
public void loadMenu() throws InterruptedException {
System.out.print("Please choose your Keyword:\t");
Scanner input = new Scanner(;
String keyword = input.nextLine();
int i = 0;
while (i < 1) {
cb = new ConfigurationBuilder();
getTweetByQuery(true, keyword);
cb = null;
Thread.sleep(seconds * 1000); // wait
public void connectdb(String keyword) {
try {
// on constructor load initialize MongoDB and load collection
items = db.getCollection(keyword);
//make the tweet_ID unique in the database
BasicDBObject index = new BasicDBObject("tweet_ID", 1);
items.ensureIndex(index, new BasicDBObject("unique", true));
} catch (MongoException ex) {
System.out.println("MongoException :" + ex.getMessage());
* initMongoDB been called in constructor so every object creation this
* initialize MongoDB.
public void initMongoDB() throws MongoException {
try {
System.out.println("Connecting to Mongo DB..");
Mongo mongo;
// mongo = new Mongo("");
// db = mongo.getDB("tweetDB2");
mongo = new Mongo(host, port);
db = mongo.getDB(db_name);
db.authenticate(username, password);
} catch (UnknownHostException ex) {
System.out.println("MongoDB Connection Error :" + ex.getMessage());
public void getTweetByQuery(boolean loadRecords, String keyword) throws InterruptedException {
TwitterFactory tf = new TwitterFactory(;
Twitter twitter = tf.getInstance();
if (cb != null) {
try {
Query query = new Query(keyword);
QueryResult result;
result =;
System.out.println("Getting Tweets...");
List<Status> tweets = result.getTweets();
for (Status tweet : tweets) {
BasicDBObject basicObj = new BasicDBObject();
basicObj.put("user_name", tweet.getUser().getScreenName());
basicObj.put("retweet_count", tweet.getRetweetCount());
basicObj.put("tweet_followers_count", tweet.getUser().getFollowersCount());
basicObj.put("source", tweet.getSource());
UserMentionEntity[] mentioned = tweet.getUserMentionEntities();
basicObj.put("tweet_mentioned_count", mentioned.length);
basicObj.put("tweet_ID", tweet.getId());
basicObj.put("tweet_text", tweet.getText());
try {
} catch (Exception e) {
System.out.println("MongoDB Connection Error : " + e.getMessage());
// Printing fetched records from DB.
if (loadRecords) {
} catch (TwitterException te) {
System.out.println("te.getErrorCode() " + te.getErrorCode());
System.out.println("te.getExceptionCode() " + te.getExceptionCode());
System.out.println("te.getStatusCode() " + te.getStatusCode());
if (te.getStatusCode() == 401) {
System.out.println("Twitter Error : \nAuthentication credentials ( were missing or incorrect.\nEnsure that you have set valid consumer key/secret, access token/secret, and the system clock is in sync.");
} else {
System.out.println("Twitter Error : " + te.getMessage());
} else {
System.out.println("MongoDB is not Connected! Please check mongoDB intance running..");
public void getTweetsRecords() throws InterruptedException {
BasicDBObject fields = new BasicDBObject("_id", true).append("user_name", true).append("tweet_text", true);
DBCursor cursor = items.find(new BasicDBObject(), fields);
while (cursor.hasNext()) {
SONEINT commented Mar 22, 2014

Dear dirkchen, I'm currently working on the same code to transfer my data collections of tweets from my MongoDB databases to R to deals with text mining & social network analysis.
I have not understand the conversion of your collection with the function paste () which gave me a NULL result for me.
Anyway, I have a limitation with rmongodb and plyr packages on large tweets collection data sets, and things for me don't want to work properly. I have posted a stack overflow here :
Any help would be merely appreciated.


