Skip to content

Instantly share code, notes, and snippets.

@ilantoren
Last active January 5, 2022 09:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ilantoren/6c21b571d1595f2d7255f008c0cecb58 to your computer and use it in GitHub Desktop.
Save ilantoren/6c21b571d1595f2d7255f008c0cecb58 to your computer and use it in GitHub Desktop.
Mongo Loves Data post 1 - From Atlas to Compass to Graphs
#
# SETUP:
# python:
# python-example.py has the following dependencies
# pymongo, pandas, dnspython
# R
# reticulate, ggplot2, dplyr, ggmap, leaflet
LONG = -73.730564
LAT = 40.6737
DISTANCE = 1500
library( reticulate)
library(ggplot2)
library( dplyr )
use_python('/usr/local/bin/python3')
source_python( 'python-example.py')
data <- python_script(c(LONG, LAT), DISTANCE)
#
# Use dply to add colums with displayable names
# then plot with ggplot
#
grph <- data.frame(data[1]) %>% mutate( Cusine=X_id, Count=count) %>% ggplot( aes(X_id, Count)) +
geom_col(aes( fill=Cusine)) + labs(title='Restaurants within 1.5 km', x='Cusine', y='Count') +
theme( axis.text.x=element_blank())
# Spatial graph see https://rpubs.com/jhofman/nycma
library(leaflet)
library(sp)
library(ggmap)
library(broom)
m <- leaflet() %>%
addTiles() %>%
setView(LONG, LAT, zoom = 13)
# data is an array, second member is a list of restaurants with coords
d <- data.frame( data[2]) %>% select( lng, lat, name)
m %>% addMarkers(d, lng=d$lng, lat = d$lat, popup = d$name ) %>%
addCircles(LONG, LAT, radius=DISTANCE, color = 'yellow') %>%
addCircles(LONG, LAT, radius=30, color = 'red', popup="YOU ARE HERE")
# make sure you have pymongo and pandas installed - I used pip
from pymongo import MongoClient
import pandas as pd
from os import environ
SECRET = environ["secret_pwd"]
USER = environ['secret_user']
# set defaults on the function
# set defaults on the function
def python_script(coord=[-73.730564, 40.67372], distance = 1500):
# geoNear takes a point and finds all data records within a given distance
# https://docs.mongodb.com/manual/reference/operator/aggregation/geoNear/#mongodb-pipeline-pipe.-geoNear
# facet creats two or more intermediate pipelines, in this case a summary and listing objects
# Requires the PyMongo package.
# https://api.mongodb.com/python/current
client = MongoClient(
# this isn't a valid user/pwd combo
'mongodb+srv://'+ USER+':' + SECRET +'@mflix.beal2.mongodb.net/test?authSource=admin&replicaSet=atlas-a7tqy4-shard-0&readPreference=primary&appname=MongoDB%20Compass&ssl=true')
result = client['sample_restaurants']['restaurants'].aggregate([
{
'$geoNear': {
'near': {
'type': 'Point',
'coordinates': coord
},
'distanceField': 'dist',
'maxDistance': distance,
'key': 'geometry.coordinates',
'spherical': True
}
}, {
'$facet': {
'summary': [
{
'$group': {
'_id': '$cuisine',
'closest': {
'$min': {
'$round': '$dist'
}
},
'count': {
'$sum': 1
},
'farthest': {
'$max': {
'$round': '$dist'
}
}
}
}
],
'listings': [
{
'$project': {
'name': 1,
'long': {
'$arrayElemAt': [
'$address.coord', 0
]
},
'lat': {
'$arrayElemAt': [
'$address.coord', 1
]
},
'address': {
'$concat': [
'$address.building', ' ', '$address.street'
]
},
'cuisine': 1,
'last_grade': {
'$arrayElemAt': [
'$grades', 0
]
},
'distance': {
'$round': '$dist.calculated'
}
}
}
]
}
}
])
# panadas can create a DataFrame from the result, but in this case
# there are a couple of manipulations before it is ready to pass onto R
df = pd.DataFrame(result)
# create two new DataFrame from both the summary and listing
summary = df.pop('summary')
df2 = pd.DataFrame(summary.T[0])
listings = df.pop('listings')
df3 = pd.DataFrame(listings.T[0])
return [df2, df3]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment