Last active
March 29, 2020 04:01
-
-
Save RustingSword/5963008 to your computer and use it in GitHub Desktop.
extract stay point from Geolife
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# extract stay points from a GPS log file | |
# implementation of algorithm in | |
# [1] Q. Li, Y. Zheng, X. Xie, Y. Chen, W. Liu, and W.-Y. Ma, "Mining user similarity based on location history", in Proceedings of the 16th ACM SIGSPATIAL international conference on Advances in geographic information systems, New York, NY, USA, 2008, pp. 34:1--34:10. | |
import time | |
import os | |
import sys | |
from ctypes import * | |
from math import radians, cos, sin, asin, sqrt | |
time_format = '%Y-%m-%d,%H:%M:%S' | |
# structure of stay point | |
class stayPoint(Structure): | |
_fields_ = [ | |
("longitude", c_double), | |
("laltitude", c_double), | |
("arrivTime", c_uint64), | |
("leaveTime", c_uint64) | |
] | |
# calculate distance between two points from their coordinate | |
def getDistance(lon1, lat1, lon2, lat2): | |
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) | |
dlon = lon2 - lon1 | |
dlat = lat2 - lat1 | |
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 | |
c = 2 * asin(sqrt(a)) | |
m = 6371000 * c | |
return m | |
def computMeanCoord(gpsPoints): | |
lon = 0.0 | |
lat = 0.0 | |
for point in gpsPoints: | |
fields = point.rstrip().split(',') | |
lon += float(fields[0]) | |
lat += float(fields[1]) | |
return (lon/len(gpsPoints), lat/len(gpsPoints)) | |
# extract stay points from a GPS log file | |
# input: | |
# file: the name of a GPS log file | |
# distThres: distance threshold | |
# timeThres: time span threshold | |
# default values of distThres and timeThres are 200 m and 30 min respectively, according to [1] | |
def stayPointExtraction(file, distThres = 200, timeThres = 20*60): | |
stayPointList = [] | |
log = open(file, 'r') | |
points = log.readlines()[6:] # first 6 lines are useless | |
pointNum = len(points) | |
i = 0 | |
while i < pointNum-1: | |
j = i+1 | |
while j < pointNum: | |
field_pointi = points[i].rstrip().split(',') | |
field_pointj = points[j].rstrip().split(',') | |
dist = getDistance(float(field_pointi[0]),float(field_pointi[1]), | |
float(field_pointj[0]),float(field_pointj[1])) | |
if dist > distThres: | |
t_i = time.mktime(time.strptime(field_pointi[-2]+','+field_pointi[-1],time_format)) | |
t_j = time.mktime(time.strptime(field_pointj[-2]+','+field_pointj[-1],time_format)) | |
deltaT = t_j - t_i | |
if deltaT > timeThres: | |
sp = stayPoint() | |
sp.laltitude, sp.longitude = computMeanCoord(points[i:j+1]) | |
sp.arrivTime, sp.leaveTime = int(t_i), int(t_j) | |
stayPointList.append(sp) | |
i = j | |
break | |
j += 1 | |
# Algorithm in [1] lacks following line | |
i += 1 | |
return stayPointList | |
def main(): | |
for dirname, dirnames, filenames in os.walk('Data'): | |
filenum = len(filenames) | |
for filename in filenames: | |
if filename.endswith('plt'): | |
gpsfile = os.path.join(dirname, filename) | |
spt = stayPointExtraction(gpsfile) | |
if len(spt) > 0: | |
spfile = gpsfile.replace('Data', 'StayPoint') | |
if not os.path.exists(os.path.dirname(spfile)): | |
os.makedirs(os.path.dirname(spfile)) | |
spfile_handle = open(spfile, 'w+') | |
print >> spfile_handle, 'Extracted stay points:\nlongitude\tlaltitude\tarriving time\tleaving time' | |
for sp in spt: | |
print >> spfile_handle, sp.laltitude, sp.longitude, time.strftime(time_format, time.localtime(sp.arrivTime)), time.strftime(time_format, time.localtime(sp.leaveTime)) | |
spfile_handle.close() | |
if __name__ == '__main__': | |
main() |
(a) line 79 should be removed,
@gunjanthesystem I think line 79 should be changed to i=j
and line 75 needs to be removed.
So the stayPointExtraction function after the modification looks like the following:
def stayPointExtraction(file, distThres = 200, timeThres = 20*60):
stayPointList = []
log = open(file, 'r')
points = log.readlines()[6:] # first 6 lines are useless
pointNum = len(points)
i = 0
while i < pointNum:
j = i+1
while j < pointNum:
field_pointi = points[i].rstrip().split(',')
field_pointj = points[j].rstrip().split(',')
dist = getDistance(float(field_pointi[0]),float(field_pointi[1]),
float(field_pointj[0]),float(field_pointj[1]))
if dist > distThres:
t_i = time.mktime(time.strptime(field_pointi[-2]+','+field_pointi[-1],time_format))
t_j = time.mktime(time.strptime(field_pointj[-2]+','+field_pointj[-1],time_format))
deltaT = t_j - t_i
if deltaT > timeThres:
sp = stayPoint()
sp.laltitude, sp.longitude = computMeanCoord(points[i:j+1])
sp.arrivTime, sp.leaveTime = int(t_i), int(t_j)
stayPointList.append(sp)
break
j += 1
# Algorithm in [1] lacks following line
i = j
return stayPointList
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
i am implementing the stay point detection in java and i saw your python code. IMHO: (a) line 79 should be removed, (b) line 72-73, points from i to j-1 should be merged and not i to j (i suspect this to be an error in the pseudo code in the paper). Cheers.