Skip to content

Instantly share code, notes, and snippets.

@sven4all
Last active October 25, 2018 21:59
Show Gist options
  • Save sven4all/56bf566e3df3837098962accc2e7dc2a to your computer and use it in GitHub Desktop.
Save sven4all/56bf566e3df3837098962accc2e7dc2a to your computer and use it in GitHub Desktop.
This scripts was written for a Master Thesis research at the TU Delft to collect data about the usage characteristics of Mobike and derive trips of that data, for a movie that was made with help of the data collection performed with this script see https://www.youtube.com/watch?v=MVqJtJA6_wg
-- Database structure that was used to store the data
-- Postgresql 10.5 + PostGis 2.4.4
CREATE TABLE cycle_measurement (
sample_id INT,
bike_id VARCHAR(255),
location GEOGRAPHY,
bike_type VARCHAR(10)
);
CREATE TABLE sample (
sample_id SERIAL,
start_time timestamp,
end_time timestamp,
number_of_bicycles INT
);
import psycopg2
import csv
conn = psycopg2.connect("dbname=mobike")
cur = conn.cursor()
# Query all bikes that appeared at least once in the data.
stmt1= """SELECT DISTINCT(bike_id) FROM cycle_measurement"""
cur.execute(stmt1)
# Bike that has made the most trips
max_number_of_trips = 0
bike_id_most_trips = ''
# Indicates number of trips within time period.
active_bike_counter = 0
# Array that contains all the trips that are found with the query
all_trips = []
index_bike = 0
colnames = None
bike_ids = cur.fetchall()
# Execute for every bike_id
for idx, bike_id in enumerate(bike_ids):
# Returns all the trips bike made within time frame.
stmt2 = """
SELECT bike_id, bike_type, start_x, start_y, end_x, end_y, s2.start_time + '2 HOURS' as trip_start_time, s1.start_time + '2 HOURS' as trip_end_time
FROM
(SELECT bike_id,
bike_type,
ST_DISTANCE(LOCATION, prev_geom) AS distance,
ST_X(LOCATION::geometry) AS end_x,
ST_Y(LOCATION::geometry) AS end_y,
ST_X(prev_geom::geometry) AS start_x,
ST_Y(prev_geom::geometry) AS start_y,
sample_id,
prev_sample_id,
sample_id - prev_sample_id as sample_difference
FROM
(SELECT bike_id,
bike_type,
LOCATION,
sample_id,
lag(LOCATION) OVER (
ORDER BY sample_id) AS prev_geom,
lag(sample_id) OVER (
ORDER BY sample_id) AS prev_sample_id
FROM cycle_measurement
WHERE bike_id = '%s') AS q1) AS q2
JOIN sample s1
ON q2.sample_id = s1.sample_id
JOIN sample s2
ON prev_sample_id = s2.sample_id
WHERE distance > 200 and (s2.start_time + '2 HOURS') >= '2018-10-23' and (s2.end_time + '2 HOURS') < '2018-10-24';
""" % bike_id
cur.execute(stmt2)
trips = cur.fetchall()
if not colnames:
colnames = [desc[0] for desc in cur.description]
if len(trips) > 0:
active_bike_counter += 1
if len(trips) > max_number_of_trips:
max_number_of_trips= len(trips)
bike_id_most_trips = bike_id
# append all trips at end of list
all_trips.extend(trips)
print("Progress {} / {} ".format(idx , len(bike_ids)), end="\r")
csvfile = "trips.csv"
with open(csvfile, "w") as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerow(colnames)
writer.writerows(all_trips)
print("Most trips %s, with bike_id %s." % (max_number_of_trips, bike_id_most_trips))
print("Total number of trips: %s" % len(all_trips))
print("Number of active bikes: %s" % active_bike_counter)
import requests
import psycopg2
import logging
import time
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
class MobikeLocation():
def __init__(self, bike_id, lat, lng, bike_type):
self.bike_id = bike_id
self.lat = lat
self.lng = lng
self.bike_type = bike_type
def save(self, cur, measurement_id):
cur.execute("""
INSERT INTO cycle_measurement
(sample_id, bike_id, location, bike_type) VALUES (%s, %s, ST_SetSRID(ST_MakePoint(%s, %s),4326), %s)
""", (measurement_id, self.bike_id, self.lng, self.lat, self.bike_type))
class MobikeSniffer():
def __init__(self, start_lat, start_lng, end_lat, end_lng):
self.start_lat = start_lat
self.start_lng = start_lng
self.end_lat = end_lat
self.end_lng = end_lng
# This is hardcoded for the Delft situation, adjust for your own situation.
self.diff_lat = 0.005
self.diff_lng = 0.004
def snif_bikes(self):
bikes = {}
lat = self.start_lat
while lat > self.end_lat:
lat = lat - self.diff_lat
lng = self.start_lng
while lng < self.end_lng:
lng = lng + self.diff_lng
tmp_bikes = self.sample(lat, lng, 0)
for cycle in tmp_bikes:
bikes[cycle["distId"]] = MobikeLocation(cycle["distId"], cycle["distY"], cycle["distX"], cycle["biketype"])
return bikes
def sample(self, lat, lng, level):
if level > 1:
logging.info("Level %s, stop recursion", level)
return []
url = "https://app.mobike.com/api/nearby/v4/nearbyBikeInfo?latitude=%s&longitude=%s&sign=132d4d239a9f6923190562d0a77c0b39&bikenum=50&scope=500" % (lat,lng)
r = requests.get(url)
if r.status_code != 200:
logging.warn(r.status_code)
logging.warn(r.text)
return []
bikes = r.json()["bike"]
if len(bikes) >= 50:
bikes += self.sample(lat + 0.0014, lng + 0.0014, level + 1)
bikes += self.sample(lat + 0.0014, lng - 0.0014, level + 1)
bikes += self.sample(lat - 0.0014, lng + 0.0014, level + 1)
bikes += self.sample(lat - 0.0014, lng - 0.0014, level + 1)
return bikes
class MobikeImporter():
def __init__(self):
# The 'box' for where data was collected.
self.sniffer = MobikeSniffer(52.030233, 4.309318, 51.983844, 4.411370)
self.conn = psycopg2.connect("dbname=mobike")
def import_bikes(self):
cur = self.conn.cursor()
cur.execute("INSERT INTO sample (start_time) VALUES (NOW()) returning sample_id")
measurement_id = cur.fetchone()[0]
self.conn.commit()
bikes = self.sniffer.snif_bikes()
for _, bike in bikes.items():
bike.save(cur, measurement_id)
cur.execute("""UPDATE sample
SET end_time = NOW(),
number_of_bicycles = %s
WHERE sample_id = %s""", (len(bikes), measurement_id,))
self.conn.commit()
return bikes
importer = MobikeImporter()
while True:
start = time.time()
logging.info("Start import.")
number_of_bikes = len(importer.import_bikes())
logging.info("Completed import in %.2f, imported %s bikes." % ((time.time() - start), number_of_bikes))
time.sleep(60 * 5 - (time.time() - start) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment