2017-07-22 21:00:56 +02:00

400 lines
18 KiB
Python

# -*- coding: utf-8 -*-
import numpy as np
import utilities as ut
from math import cos
from osgeo import ogr, osr
import pytess, operator
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
# gdal.SetConfigOption('OGR_ENABLE_PARTIAL_REPROJECTION', 'YES')
##############################################################################
# Boot Up Phase
print 'initialising geographical toolchain'
WGS84 = osr.SpatialReference()
WorldMercator = osr.SpatialReference()
WGS84.ImportFromEPSG(4326)
WorldMercator.ImportFromEPSG(3395)
trans_to_m = osr.CoordinateTransformation(WGS84, WorldMercator)
trans_to_degree = osr.CoordinateTransformation(WorldMercator, WGS84)
esriDriver = ogr.GetDriverByName('ESRI Shapefile')
print 'create an temporal datasource in memory'
memDriver = ogr.GetDriverByName('MEMORY')
mem_source = memDriver.CreateDataSource('memdrive')
def load_from_textfile(filename):
memoryLayer = mem_source.CreateLayer('textfile', WGS84, ogr.wkbPoint)
memoryLayer.CreateField(ogr.FieldDefn('ID', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('userID', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('lon', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('lat', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('info1', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('info2', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('dtime_str', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('status', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('info3', ogr.OFTString))
memoryLayer.CreateField(ogr.FieldDefn('info4', ogr.OFTString))
with open(filename) as f:
for line in f:
if f is not None or f != '':
attrList = line.split(',')
textFeature = ogr.Feature(memoryLayer.GetLayerDefn())
geometry = ogr.Geometry(ogr.wkbPoint)
geometry.AddPoint_2D(float(attrList[2]),float(attrList[3]))
textFeature.SetGeometry(geometry)
for i, value in enumerate(attrList):
textFeature.SetField(i, value)
memoryLayer.CreateFeature(textFeature)
return memoryLayer
def load_to_memory(ShapeName):
print 'loading local files'
ShapeName = ShapeName if ShapeName.endswith('.shp') else '%s%s' % (ShapeName, '.shp')
shapefile = esriDriver.Open(ShapeName, 0)
layer = mem_source.CreateLayer('layer', WGS84, ogr.wkbPoint)
inputLayer = shapefile.GetLayer()
OLDDefn = inputLayer.GetLayerDefn()
print 'copying to memory'
for i in range(OLDDefn.GetFieldCount()):
layer.CreateField(OLDDefn.GetFieldDefn(i))
for feature in inputLayer:
layer.CreateFeature(feature)
print 'finished loading - closing Input - cleanup'
layer.ResetReading()
return layer
def transform_to_array(layer):
print 'transfering shapefile to numpy Array'
array = np.array([[feature.GetGeometryRef().GetX(), feature.GetGeometryRef().GetY()] for feature in layer])
layer.ResetReading()
return array
def apply_dbscan(layer, epsFactor, minpts=0, eps=0, show=False, plot=None):
def plot_results():
# Plot result
# Black removed and is used for noise instead.
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'
class_member_mask = (labels == k)
xy = array[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
xy = array[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=0.2)
plt.title('Estimated number of clusters: %d' % n_clusters_)
if show:
plt.show()
if plot is not None:
plt.savefig(plot, bbox_inches='tight')
if minpts <= 0:
print 'calculate minPts'
geomcol = ogr.Geometry(ogr.wkbGeometryCollection)
for feature in layer:
geomcol.AddGeometry(feature.GetGeometryRef())
layer.ResetReading()
convexhull = geomcol.ConvexHull()
convexhull.Transform(trans_to_m)
minpts = (convexhull.GetArea() / layer.GetFeatureCount()) / 4000 # per 5km**2
print 'automatic minPts at', minpts
else:
print 'determined minPts =', minpts
if eps <= 0:
print 'calculate eps'
minX, maxX, minY, maxY = layer.GetExtent()
eps = epsFactor * cos(minY + (maxY - minY)/2) # Change the factor for a variation in search Radius
print 'eps at', eps
else:
print 'determined eps =', eps
# Compute DBSCAN
array = transform_to_array(layer)
print 'starting DBSCAN'
db = DBSCAN(eps=eps, min_samples=minpts).fit(array)
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
if plot or show:
plot_results()
return labels
def build_cluster_layer(labels, array, shapename=None):
print 'creating cluster layer'
xValues, yValues = [], []
# Create an new Layer with Cluster centroids
layer = mem_source.CreateLayer('ClusterLayer', WGS84, ogr.wkbPoint)
layer.CreateField(ogr.FieldDefn('lon', ogr.OFTReal))
layer.CreateField(ogr.FieldDefn('lat', ogr.OFTReal))
layer.CreateField(ogr.FieldDefn('cluster', ogr.OFTInteger64))
print 'sorting points to clusters'
geomColDict = dict()
for num, label in enumerate(labels):
if label != -1:
if geomColDict.get(label, None) is None:
geomColDict[label] = ogr.Geometry(ogr.wkbGeometryCollection)
point = ogr.Geometry(ogr.wkbPoint)
point.AddPoint(array[num, 0], array[num, 1])
geomColDict[label].AddGeometry(point)
print 'create clusters centroid features'
for label in set(labels):
if label != -1:
feature = ogr.Feature(layer.GetLayerDefn())
centre = geomColDict[label].Centroid()
feature.SetGeometry(centre)
feature.SetField('lon', centre.GetX())
xValues.append(centre.GetX())
feature.SetField('lat', centre.GetY())
yValues.append(centre.GetY())
feature.SetField('cluster', label)
layer.SetFeature(feature)
print 'Cluster Centroids Created'
if shapename is not None:
ut.CreateShapefile(shapename, ogr.wkbPoint, layer)
return [layer, xValues, yValues]
def build_voronois(xValues, yValues, shapename=None):
"""
Create an new Layer with Voronoi Polygons from clusters
:return:
"""
print 'building Voronoi Polygons from Cluster Centroids'
layer = mem_source.CreateLayer('VoronoiLayer', WGS84, ogr.wkbPoint)
layer.CreateField(ogr.FieldDefn('lon', ogr.OFTReal))
layer.CreateField(ogr.FieldDefn('lat', ogr.OFTReal))
layer.CreateField(ogr.FieldDefn('cluster', ogr.OFTInteger64))
voronois = pytess.voronoi(zip(xValues, yValues))
print 'wirte features in Voronoi Layer'
for label, (centre, coordinates) in enumerate(voronois):
wkt = 'Polygon (('
coordinates.append(coordinates[0])
for x, y in coordinates:
wkt = '%s %f %f,' % (wkt, x, y)
wkt = '%s))' % wkt[:-1]
feature = ogr.Feature(layer.GetLayerDefn())
feature.SetGeometry(ogr.CreateGeometryFromWkt(wkt))
feature.SetField('lon', centre[0] if centre is not None else 0)
feature.SetField('lat', centre[1] if centre is not None else 0)
feature.SetField('cluster', label)
layer.CreateFeature(feature)
if shapename is not None:
ut.CreateShapefile(shapename, ogr.wkbPolygon, layer)
print 'Voronois Done'
return layer
def build_trajectorys(inputLayer, inputSource, voronoiLayer, shapename=None, initial=False, generalized=False, final=False):
print 'Preparing User Dictionaries and setting Layers'
layerDefn = inputLayer.GetLayerDefn()
memInitialTrajectories = mem_source.CreateLayer('InitialTrajecLayer', WGS84, ogr.wkbPoint)
for i in range(layerDefn.GetFieldCount()):
memInitialTrajectories.CreateField(layerDefn.GetFieldDefn(i))
memTrajectoryLayer = mem_source.CreateLayer('TrajectoryLayer', WGS84, ogr.wkbPoint)
for i in range(layerDefn.GetFieldCount()):
memTrajectoryLayer.CreateField(layerDefn.GetFieldDefn(i))
trajectoryDict, featureDict = dict(), dict()
print 'Creating All initial Trajectories'
for i in range(0, inputLayer.GetFeatureCount()):
feature = inputLayer.GetFeature(i)
featureGeom = feature.GetGeometryRef()
pointX = pointY = None
for v in range(0, voronoiLayer.GetFeatureCount()):
areaFeature = voronoiLayer.GetFeature(v)
areaGeom = areaFeature.GetGeometryRef()
if featureGeom.Intersects(areaGeom):
pointX, pointY = areaFeature.GetField('lon'), areaFeature.GetField('lat')
# featureGeom = areaGeom.Centroid() # If u want to use the geometric center of a Vornonoi Shape
break
if pointX and pointY is not None:
userID = feature.GetFieldAsDouble('userID')
if trajectoryDict.get(userID, None) is None:
trajectoryDict[userID] = ogr.Geometry(ogr.wkbLineString)
trajectoryDict[userID].AddPoint(pointX, pointY)
if featureDict.get(userID, None) is None:
featureDict[userID] = feature
fieldID = 0
for trajectory, userID in zip(trajectoryDict.values(), trajectoryDict):
if trajectory.GetPointCount() >= 2 and trajectory.Length() >= 0.0:
trajectoryFeature = featureDict[userID]
trajectoryFeature.SetGeometry(trajectory)
trajectoryFeature.SetFID(fieldID)
fieldID += 1
memInitialTrajectories.CreateFeature(trajectoryFeature)
if generalized or final:
percentageBorder, initalCount, similarityDict, FID, deletedFeature = \
0, memInitialTrajectories.GetFeatureCount(), dict(), 0, 0
print 'mapping Trajectories to Cluster centroids'
# This is just for visualization of progress
for i in range(0, initalCount):
trajectoryFeature = memInitialTrajectories.GetFeature(i)
if i >= percentageBorder:
print 'processing %i of %i' % (i, initalCount)
print str(i / float(initalCount) * 100)[:2], '%'
percentageBorder += (initalCount/20.0)
newTrajectoryGeom = ogr.Geometry(ogr.wkbLineString)
trajectoryGeom = trajectoryFeature.GetGeometryRef()
pointList = [trajectoryGeom.GetPoint(x) for x in range(0, trajectoryGeom.GetPointCount())]
for ((x1, y1, z1), (x2, y2, z2)) in zip(pointList[:-1], pointList[1:]):
wkt = 'LINESTRING (%f %f, %f %f)' % (x1, y1, x2, y2)
segmentGeom = ogr.CreateGeometryFromWkt(wkt)
pointAndDistList = []
start = ogr.Geometry(ogr.wkbPoint)
start.AddPoint(x1, y1)
for v in range(0, voronoiLayer.GetFeatureCount()):
areaFeature = voronoiLayer.GetFeature(v)
areaGeom = areaFeature.GetGeometryRef()
if segmentGeom.Intersects(areaGeom):
point = ogr.Geometry(ogr.wkbPoint)
point.AddPoint(areaFeature.GetField('lon'), areaFeature.GetField('lat'))
segmentCentre = point
# segmentCentre = areaGeom.Centroid()
pointAndDistList.append((segmentCentre, start.Distance(segmentCentre),
areaFeature.GetField('lon'), areaFeature.GetField('lat')))
pointAndDistList.sort(key=operator.itemgetter(1))
for point, distance, lon, lat in pointAndDistList:
newTrajectoryGeom.AddPoint(lon, lat)
if newTrajectoryGeom.Length() == 0.0:
memInitialTrajectories.DeleteFeature(i)
deletedFeature += 1
continue # TODO: Find a better solution for just deleting the specific Feature, e.g. Count this occurence
else:
pointList = [newTrajectoryGeom.GetPoint(i) for i in range(0, newTrajectoryGeom.GetPointCount())]
for ((x1, y1, z1), (x2, y2, z2)) in zip(pointList[:-1], pointList[1:]):
if x1 != x2 and y1 != y2:
wkt = 'LINESTRING (%s %s, %s %s)' % (x1, y1, x2, y2)
splitLine = ogr.CreateGeometryFromWkt(wkt)
trajectoryFeature.SetGeometry(splitLine)
trajectoryFeature.SetFID(FID)
FID += 1
memTrajectoryLayer.CreateFeature(trajectoryFeature)
if similarityDict.get(wkt, None) is None:
similarityDict[wkt] = [trajectoryFeature.Clone()]
else:
similarityDict[wkt].append(trajectoryFeature.Clone())
print deletedFeature, 'Features were deleted due to inappropiated Lengths'
if final:
userfollow, userfriend, userstatus = 0, 0, 0
for i in range(0, memTrajectoryLayer.GetFeatureCount()):
feature = memTrajectoryLayer.GetFeature(i)
userfollow += feature.GetFieldAsDouble('userfollow')
userfriend += feature.GetFieldAsDouble('userfriend')
userstatus += feature.GetFieldAsDouble('userstatus')
pass
userfollow /= memTrajectoryLayer.GetFeatureCount()
userfriend /= memTrajectoryLayer.GetFeatureCount()
userstatus /= memTrajectoryLayer.GetFeatureCount()
finalTrajectories = mem_source.CreateLayer('FinalTrajectoriLayer', WGS84, ogr.wkbPoint)
finalTrajectories.CreateField(ogr.FieldDefn('MaxFollow', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('MaxFriend', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('MaxStatus', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('MinFollow', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('MinFriend', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('MinStatus', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('MeanFollow', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('MeanFriend', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('MeanStatus', ogr.OFTReal))
finalTrajectories.CreateField(ogr.FieldDefn('Group', ogr.OFTInteger64))
finalTrajectories.CreateField(ogr.FieldDefn('Size', ogr.OFTInteger64))
groupDict = dict()
for num, key in enumerate([(True, True, True), (True, True, False), (True, False, True), (True, False, False),
(False, True, True), (False, True, False), (False, False, True), (False, False, False)],
start=1):
groupDict[key] = num
for featureList in similarityDict.values():
summary = np.array(zip(*[
(feat.GetFieldAsDouble('userfollow'),
feat.GetFieldAsDouble('userfriend'),
feat.GetFieldAsDouble('userstatus')) for feat in featureList]))
Max, Min, Mean = [max(summary[0, ]), max(summary[1, ]), max(summary[2, ])], \
[min(summary[0, ]), min(summary[1, ]), min(summary[2, ])],\
[sum(summary[0, ]) / float(len(summary[0, ])),
sum(summary[1, ]) / float(len(summary[1, ])),
sum(summary[2, ]) / float(len(summary[2, ]))]
key = (Mean[0] >= userfollow, Mean[1] >= userfriend, Mean[2] >= userstatus)
group = groupDict[key]
allLists = []
[allLists.extend(el) for el in [Max, Min, Mean, [group], [len(featureList)]]]
finalFeature = ogr.Feature(finalTrajectories.GetLayerDefn())
finalFeature.SetGeometry(featureList[0].GetGeometryRef())
for i, value in enumerate(allLists):
finalFeature.SetField(i, value)
finalTrajectories.CreateFeature(finalFeature)
if shapename is not None:
if initial:
ut.CreateShapefile('%s_initial' % shapename, ogr.wkbLineString, memInitialTrajectories)
if generalized:
ut.CreateShapefile('%s_general' % shapename, ogr.wkbLineString, memTrajectoryLayer)
if final:
ut.CreateShapefile('%s_final' % shapename, ogr.wkbLineString, finalTrajectories)
print len(similarityDict), 'was the length of the dict'
if __name__ == '__main__':
shanghai_all = load_from_textfile('sjtu_taxigps20070201.txt')
#weibo_all = load_to_memory('weibo_all.shp')
weibo_array = transform_to_array(shanghai_all)
cluster_list = apply_dbscan(shanghai_all, 0.003, show=False)
#cluster_list = apply_dbscan(weibo_all, 0.010, minpts=20, plot='smaller_cluster_minpts_20_%f.png' % 0.010)
cluster_layer, xs, ys = build_cluster_layer(cluster_list, weibo_array)
voronoi_layer = build_voronois(xs, ys)
# Test Algorithm Mode
#shapefileName = 'weibo_test.shp'
#shapefile = esriDriver.Open(shapefileName, 0)
#testlayer = shapefile.GetLayer()
#build_trajectorys(testlayer, shapefile, voronoi_layer,
# shapename='test', initial=False, generalized=False, final=True)
#Full Mode
build_trajectorys(shanghai_all, mem_source, voronoi_layer, 'trajec', generalized=True, final=False)