Skip to content

Instantly share code, notes, and snippets.

@wingchen
Last active January 4, 2016 12:19
Show Gist options
  • Save wingchen/8621215 to your computer and use it in GitHub Desktop.
Save wingchen/8621215 to your computer and use it in GitHub Desktop.
Turning my MongoDB queries into R data.frame type. This code example is written for my blog post: Building up an easy data analysis platform with RStudio server on top of your MongoDB: http://winston.attlin.com/2014/01/building-up-easy-data-analysis-platform.html
## The collection structure looks like this:
##{
## "col1": "some value 1",
## "col2": 1234,
## "col3": "somevalue 3",
## "col4": "some value 4",
## "col5": "some value 5",
## "col6": [
## {
## "some": 1,
## "other": "other value 1"
## },
## {
## "some": 0,
## "other": "other value "
## }
## ],
## "col6_1": 456,
## "col6_2": 6789,
## "col7": "some value 7"
##}
## Here comes the way to conduct your query:
##
## my_data_frame <- queryIntoDataFrame('col2', 1234)
library(rmongodb)
getCountsByMatch <- function(column_name, column_value){
mongo <- mongo.create(host='localhost:27018' , db='my_mongodb')
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.append(buf, column_name, column_value)
query <- mongo.bson.from.buffer(buf)
count <- mongo.count(mongo, "my_mongodb.my_mongodb_collection", query)
mongo.disconnect(mongo)
mongo.destroy(mongo)
if (count < 0) {
print(paste('mongo.get.err code: ', mongo.get.err(mongo)))
return (0)
} else return(count)
}
queryIntoDataFrame <- function(column_name, column_value){
total_count <- getCountsByMatch(column_name, column_value)
# setup some vectors to hold our results
col1 <- vector("character",total_count)
col2 <- vector("numeric",total_count)
col3 <- vector("character",total_count)
col4 <- vector("character",total_count)
col5 <- vector("character",total_count)
col6_1 <- vector("numeric",total_count)
col6_2 <- vector("numeric",total_count)
col7 <- vector("character",total_count)
mongo <- mongo.create(host='localhost:27018' , db='my_mongodb')
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.append(buf, column_name, column_value)
query <- mongo.bson.from.buffer(buf)
cursor = mongo.find(mongo,"my_mongodb.my_mongodb_collection",query,options=mongo.find.exhaust)
# queries are faster with mongo.find.exhaust in the options
i <- 1
while (mongo.cursor.next(cursor)) {
cval <- mongo.cursor.value(cursor)
if (is.null(mongo.bson.value(cval,"col1"))) col1[i] <- ''
else col1[i] <- mongo.bson.value(cval,"col1")
if (is.null(mongo.bson.value(cval,"col2"))) col2[i] <- ''
else col2[i] <- mongo.bson.value(cval,"col2")
if (is.null(mongo.bson.value(cval,"col3"))) col3[i] <- ''
else col3[i] <- mongo.bson.value(cval,"col3")
if (is.null(mongo.bson.value(cval,"col4"))) col4[i] <- ''
else col4[i] <- mongo.bson.value(cval,"col4")
if (is.null(mongo.bson.value(cval,"col5"))) col5[i] <- ''
else col5[i] <- mongo.bson.value(cval,"col5")
# below is an example of extracting data from a bson list, based on the value stored in the list.
if (is.null(mongo.bson.value(cval,"col6"))) {
# pick a default -1
col6_1[i] <- -1
col6_2[i] <- -1
} else {
for (item in mongo.bson.value(cval,"col6")) {
# populate col6_1 and col6_2 based on item$some's value
if (item$some == 0){
col6_1[i] <- item$other
} else if (item$some == 1) {
col6_2[i] <- item$other
}
}
# pick a default -2, just tot show the difference
if (is.null(col6_1[i])) col6_1[i] <- -2
if (is.null(col6_2[i])) col6_2[i] <- -2
}
if (is.null(mongo.bson.value(cval,"col7"))) col7[i] <- ''
else col7[i] <- mongo.bson.value(cval,"col7")
i <- i + 1
}
df <- as.data.frame(list(col1=col1,col2=col2,col3=col3,col4=col4,col5=col5,col6_1=col6_1,col6_2=col6_2,col7=col7))
mongo.disconnect(mongo)
mongo.destroy(mongo)
# return the data frame
return(df)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment