Last active
January 4, 2016 12:19
-
-
Save wingchen/8621215 to your computer and use it in GitHub Desktop.
Turning my MongoDB queries into R data.frame type. This code example is written for my blog post: Building up an easy data analysis platform with RStudio server on top of your MongoDB: http://winston.attlin.com/2014/01/building-up-easy-data-analysis-platform.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## The collection structure looks like this: | |
| ##{ | |
| ## "col1": "some value 1", | |
| ## "col2": 1234, | |
| ## "col3": "somevalue 3", | |
| ## "col4": "some value 4", | |
| ## "col5": "some value 5", | |
| ## "col6": [ | |
| ## { | |
| ## "some": 1, | |
| ## "other": "other value 1" | |
| ## }, | |
| ## { | |
| ## "some": 0, | |
| ## "other": "other value " | |
| ## } | |
| ## ], | |
| ## "col6_1": 456, | |
| ## "col6_2": 6789, | |
| ## "col7": "some value 7" | |
| ##} | |
| ## Here comes the way to conduct your query: | |
| ## | |
| ## my_data_frame <- queryIntoDataFrame('col2', 1234) | |
| library(rmongodb) | |
| getCountsByMatch <- function(column_name, column_value){ | |
| mongo <- mongo.create(host='localhost:27018' , db='my_mongodb') | |
| buf <- mongo.bson.buffer.create() | |
| mongo.bson.buffer.append(buf, column_name, column_value) | |
| query <- mongo.bson.from.buffer(buf) | |
| count <- mongo.count(mongo, "my_mongodb.my_mongodb_collection", query) | |
| mongo.disconnect(mongo) | |
| mongo.destroy(mongo) | |
| if (count < 0) { | |
| print(paste('mongo.get.err code: ', mongo.get.err(mongo))) | |
| return (0) | |
| } else return(count) | |
| } | |
| queryIntoDataFrame <- function(column_name, column_value){ | |
| total_count <- getCountsByMatch(column_name, column_value) | |
| # setup some vectors to hold our results | |
| col1 <- vector("character",total_count) | |
| col2 <- vector("numeric",total_count) | |
| col3 <- vector("character",total_count) | |
| col4 <- vector("character",total_count) | |
| col5 <- vector("character",total_count) | |
| col6_1 <- vector("numeric",total_count) | |
| col6_2 <- vector("numeric",total_count) | |
| col7 <- vector("character",total_count) | |
| mongo <- mongo.create(host='localhost:27018' , db='my_mongodb') | |
| buf <- mongo.bson.buffer.create() | |
| mongo.bson.buffer.append(buf, column_name, column_value) | |
| query <- mongo.bson.from.buffer(buf) | |
| cursor = mongo.find(mongo,"my_mongodb.my_mongodb_collection",query,options=mongo.find.exhaust) | |
| # queries are faster with mongo.find.exhaust in the options | |
| i <- 1 | |
| while (mongo.cursor.next(cursor)) { | |
| cval <- mongo.cursor.value(cursor) | |
| if (is.null(mongo.bson.value(cval,"col1"))) col1[i] <- '' | |
| else col1[i] <- mongo.bson.value(cval,"col1") | |
| if (is.null(mongo.bson.value(cval,"col2"))) col2[i] <- '' | |
| else col2[i] <- mongo.bson.value(cval,"col2") | |
| if (is.null(mongo.bson.value(cval,"col3"))) col3[i] <- '' | |
| else col3[i] <- mongo.bson.value(cval,"col3") | |
| if (is.null(mongo.bson.value(cval,"col4"))) col4[i] <- '' | |
| else col4[i] <- mongo.bson.value(cval,"col4") | |
| if (is.null(mongo.bson.value(cval,"col5"))) col5[i] <- '' | |
| else col5[i] <- mongo.bson.value(cval,"col5") | |
| # below is an example of extracting data from a bson list, based on the value stored in the list. | |
| if (is.null(mongo.bson.value(cval,"col6"))) { | |
| # pick a default -1 | |
| col6_1[i] <- -1 | |
| col6_2[i] <- -1 | |
| } else { | |
| for (item in mongo.bson.value(cval,"col6")) { | |
| # populate col6_1 and col6_2 based on item$some's value | |
| if (item$some == 0){ | |
| col6_1[i] <- item$other | |
| } else if (item$some == 1) { | |
| col6_2[i] <- item$other | |
| } | |
| } | |
| # pick a default -2, just tot show the difference | |
| if (is.null(col6_1[i])) col6_1[i] <- -2 | |
| if (is.null(col6_2[i])) col6_2[i] <- -2 | |
| } | |
| if (is.null(mongo.bson.value(cval,"col7"))) col7[i] <- '' | |
| else col7[i] <- mongo.bson.value(cval,"col7") | |
| i <- i + 1 | |
| } | |
| df <- as.data.frame(list(col1=col1,col2=col2,col3=col3,col4=col4,col5=col5,col6_1=col6_1,col6_2=col6_2,col7=col7)) | |
| mongo.disconnect(mongo) | |
| mongo.destroy(mongo) | |
| # return the data frame | |
| return(df) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment