Last active
January 5, 2022 09:11
-
-
Save ilantoren/6c21b571d1595f2d7255f008c0cecb58 to your computer and use it in GitHub Desktop.
Mongo Loves Data post 1 - From Atlas to Compass to Graphs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# SETUP: | |
# python: | |
# python-example.py has the following dependencies | |
# pymongo, pandas, dnspython | |
# R | |
# reticulate, ggplot2, dplyr, ggmap, leaflet | |
LONG = -73.730564 | |
LAT = 40.6737 | |
DISTANCE = 1500 | |
library( reticulate) | |
library(ggplot2) | |
library( dplyr ) | |
use_python('/usr/local/bin/python3') | |
source_python( 'python-example.py') | |
data <- python_script(c(LONG, LAT), DISTANCE) | |
# | |
# Use dply to add colums with displayable names | |
# then plot with ggplot | |
# | |
grph <- data.frame(data[1]) %>% mutate( Cusine=X_id, Count=count) %>% ggplot( aes(X_id, Count)) + | |
geom_col(aes( fill=Cusine)) + labs(title='Restaurants within 1.5 km', x='Cusine', y='Count') + | |
theme( axis.text.x=element_blank()) | |
# Spatial graph see https://rpubs.com/jhofman/nycma | |
library(leaflet) | |
library(sp) | |
library(ggmap) | |
library(broom) | |
m <- leaflet() %>% | |
addTiles() %>% | |
setView(LONG, LAT, zoom = 13) | |
# data is an array, second member is a list of restaurants with coords | |
d <- data.frame( data[2]) %>% select( lng, lat, name) | |
m %>% addMarkers(d, lng=d$lng, lat = d$lat, popup = d$name ) %>% | |
addCircles(LONG, LAT, radius=DISTANCE, color = 'yellow') %>% | |
addCircles(LONG, LAT, radius=30, color = 'red', popup="YOU ARE HERE") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# make sure you have pymongo and pandas installed - I used pip | |
from pymongo import MongoClient | |
import pandas as pd | |
from os import environ | |
SECRET = environ["secret_pwd"] | |
USER = environ['secret_user'] | |
# set defaults on the function | |
# set defaults on the function | |
def python_script(coord=[-73.730564, 40.67372], distance = 1500): | |
# geoNear takes a point and finds all data records within a given distance | |
# https://docs.mongodb.com/manual/reference/operator/aggregation/geoNear/#mongodb-pipeline-pipe.-geoNear | |
# facet creats two or more intermediate pipelines, in this case a summary and listing objects | |
# Requires the PyMongo package. | |
# https://api.mongodb.com/python/current | |
client = MongoClient( | |
# this isn't a valid user/pwd combo | |
'mongodb+srv://'+ USER+':' + SECRET +'@mflix.beal2.mongodb.net/test?authSource=admin&replicaSet=atlas-a7tqy4-shard-0&readPreference=primary&appname=MongoDB%20Compass&ssl=true') | |
result = client['sample_restaurants']['restaurants'].aggregate([ | |
{ | |
'$geoNear': { | |
'near': { | |
'type': 'Point', | |
'coordinates': coord | |
}, | |
'distanceField': 'dist', | |
'maxDistance': distance, | |
'key': 'geometry.coordinates', | |
'spherical': True | |
} | |
}, { | |
'$facet': { | |
'summary': [ | |
{ | |
'$group': { | |
'_id': '$cuisine', | |
'closest': { | |
'$min': { | |
'$round': '$dist' | |
} | |
}, | |
'count': { | |
'$sum': 1 | |
}, | |
'farthest': { | |
'$max': { | |
'$round': '$dist' | |
} | |
} | |
} | |
} | |
], | |
'listings': [ | |
{ | |
'$project': { | |
'name': 1, | |
'long': { | |
'$arrayElemAt': [ | |
'$address.coord', 0 | |
] | |
}, | |
'lat': { | |
'$arrayElemAt': [ | |
'$address.coord', 1 | |
] | |
}, | |
'address': { | |
'$concat': [ | |
'$address.building', ' ', '$address.street' | |
] | |
}, | |
'cuisine': 1, | |
'last_grade': { | |
'$arrayElemAt': [ | |
'$grades', 0 | |
] | |
}, | |
'distance': { | |
'$round': '$dist.calculated' | |
} | |
} | |
} | |
] | |
} | |
} | |
]) | |
# panadas can create a DataFrame from the result, but in this case | |
# there are a couple of manipulations before it is ready to pass onto R | |
df = pd.DataFrame(result) | |
# create two new DataFrame from both the summary and listing | |
summary = df.pop('summary') | |
df2 = pd.DataFrame(summary.T[0]) | |
listings = df.pop('listings') | |
df3 = pd.DataFrame(listings.T[0]) | |
return [df2, df3] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment