fordhamedu/get-wework-office-locations.py

## get-wework-office-locations.py
import urllib2
import json
import csv
import pandas as pd
import numpy as np
import decimal
D = decimal.Decimal

url = "https://www.wework.com/locations/all"
data = urllib2.urlopen(url).read()
data = json.loads(data)

ouJSON = 'data/wework.json'
with open(ouJSON, 'w') as f:
	json.dump(data, f)

varsList = ['uuid','zip','line1','location_path','city','latitude','longitude','path','slug','open_for_touring','name','phone','image_url','location_address','format_desk_pricing','format_office_pricing']

dataList = [] #dictionaries and json is cool, but just being lazy with this list.

for i in range(len(data)):
	locVarsList = []
	for j in varsList:
		locVarsList.append(data[i][j])
	dataList.append(locVarsList)

df = pd.DataFrame(dataList, columns=varsList)
df['url'] = 'https://www.wework.com' + df.path.map(str)

df['format_desk_pricing_us'] = (df['format_desk_pricing'].str.replace(r'[^-+\d.]', '').astype(D))
df['format_office_pricing_us'] = (df['format_office_pricing'].str.replace(r'[^-+\d.]', '').astype(D))

df['format_desk_pricing_us'] = df['format_desk_pricing_us'].astype(float)
df['format_office_pricing_us'] = df['format_office_pricing_us'].astype(float)
print df.dtypes

df['format_desk_pricing_us'] = np.where(df['format_desk_pricing_us'] <= 800, df['format_desk_pricing_us'], 0) #lazy hack to remove foreign values higher than the highest us values
df['format_office_pricing_us'] = np.where(df['format_office_pricing_us'] <= 1400, df['format_office_pricing_us'], 0) #lazy hack to remove foreign values higher than the highest us values

df.to_csv('data/weworklocations_20161019.csv',index=False, encoding='utf-8')
	import urllib2
	import json
	import csv
	import pandas as pd
	import numpy as np
	import decimal
	D = decimal.Decimal

	url = "https://www.wework.com/locations/all"
	data = urllib2.urlopen(url).read()
	data = json.loads(data)

	ouJSON = 'data/wework.json'
	with open(ouJSON, 'w') as f:
	json.dump(data, f)

	varsList = ['uuid','zip','line1','location_path','city','latitude','longitude','path','slug','open_for_touring','name','phone','image_url','location_address','format_desk_pricing','format_office_pricing']

	dataList = [] #dictionaries and json is cool, but just being lazy with this list.

	for i in range(len(data)):
	locVarsList = []
	for j in varsList:
	locVarsList.append(data[i][j])
	dataList.append(locVarsList)

	df = pd.DataFrame(dataList, columns=varsList)
	df['url'] = 'https://www.wework.com' + df.path.map(str)

	df['format_desk_pricing_us'] = (df['format_desk_pricing'].str.replace(r'[^-+\d.]', '').astype(D))
	df['format_office_pricing_us'] = (df['format_office_pricing'].str.replace(r'[^-+\d.]', '').astype(D))

	df['format_desk_pricing_us'] = df['format_desk_pricing_us'].astype(float)
	df['format_office_pricing_us'] = df['format_office_pricing_us'].astype(float)
	print df.dtypes

	df['format_desk_pricing_us'] = np.where(df['format_desk_pricing_us'] <= 800, df['format_desk_pricing_us'], 0) #lazy hack to remove foreign values higher than the highest us values
	df['format_office_pricing_us'] = np.where(df['format_office_pricing_us'] <= 1400, df['format_office_pricing_us'], 0) #lazy hack to remove foreign values higher than the highest us values

	df.to_csv('data/weworklocations_20161019.csv',index=False, encoding='utf-8')