# -*- coding: utf-8 -*- import numpy as np import utilities as ut from math import cos from osgeo import ogr, osr import pytess, operator from sklearn.cluster import DBSCAN import matplotlib.pyplot as plt # gdal.SetConfigOption('OGR_ENABLE_PARTIAL_REPROJECTION', 'YES') ############################################################################## # Boot Up Phase print 'initialising geographical toolchain' WGS84 = osr.SpatialReference() WorldMercator = osr.SpatialReference() WGS84.ImportFromEPSG(4326) WorldMercator.ImportFromEPSG(3395) trans_to_m = osr.CoordinateTransformation(WGS84, WorldMercator) trans_to_degree = osr.CoordinateTransformation(WorldMercator, WGS84) esriDriver = ogr.GetDriverByName('ESRI Shapefile') print 'create an temporal datasource in memory' memDriver = ogr.GetDriverByName('MEMORY') mem_source = memDriver.CreateDataSource('memdrive') def load_from_textfile(filename): memoryLayer = mem_source.CreateLayer('textfile', WGS84, ogr.wkbPoint) memoryLayer.CreateField(ogr.FieldDefn('ID', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('userID', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('lon', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('lat', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('info1', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('info2', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('dtime_str', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('status', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('info3', ogr.OFTString)) memoryLayer.CreateField(ogr.FieldDefn('info4', ogr.OFTString)) with open(filename) as f: for line in f: if f is not None or f != '': attrList = line.split(',') textFeature = ogr.Feature(memoryLayer.GetLayerDefn()) geometry = ogr.Geometry(ogr.wkbPoint) geometry.AddPoint_2D(float(attrList[2]),float(attrList[3])) textFeature.SetGeometry(geometry) for i, value in enumerate(attrList): textFeature.SetField(i, value) memoryLayer.CreateFeature(textFeature) return memoryLayer def load_to_memory(ShapeName): print 'loading local files' ShapeName = ShapeName if ShapeName.endswith('.shp') else '%s%s' % (ShapeName, '.shp') shapefile = esriDriver.Open(ShapeName, 0) layer = mem_source.CreateLayer('layer', WGS84, ogr.wkbPoint) inputLayer = shapefile.GetLayer() OLDDefn = inputLayer.GetLayerDefn() print 'copying to memory' for i in range(OLDDefn.GetFieldCount()): layer.CreateField(OLDDefn.GetFieldDefn(i)) for feature in inputLayer: layer.CreateFeature(feature) print 'finished loading - closing Input - cleanup' layer.ResetReading() return layer def transform_to_array(layer): print 'transfering shapefile to numpy Array' array = np.array([[feature.GetGeometryRef().GetX(), feature.GetGeometryRef().GetY()] for feature in layer]) layer.ResetReading() return array def apply_dbscan(layer, epsFactor, minpts=0, eps=0, show=False, plot=None): def plot_results(): # Plot result # Black removed and is used for noise instead. core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True unique_labels = set(labels) colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) for k, col in zip(unique_labels, colors): if k == -1: # Black used for noise. col = 'k' class_member_mask = (labels == k) xy = array[class_member_mask & core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14) xy = array[class_member_mask & ~core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=0.2) plt.title('Estimated number of clusters: %d' % n_clusters_) if show: plt.show() if plot is not None: plt.savefig(plot, bbox_inches='tight') if minpts <= 0: print 'calculate minPts' geomcol = ogr.Geometry(ogr.wkbGeometryCollection) for feature in layer: geomcol.AddGeometry(feature.GetGeometryRef()) layer.ResetReading() convexhull = geomcol.ConvexHull() convexhull.Transform(trans_to_m) minpts = (convexhull.GetArea() / layer.GetFeatureCount()) / 4000 # per 5km**2 print 'automatic minPts at', minpts else: print 'determined minPts =', minpts if eps <= 0: print 'calculate eps' minX, maxX, minY, maxY = layer.GetExtent() eps = epsFactor * cos(minY + (maxY - minY)/2) # Change the factor for a variation in search Radius print 'eps at', eps else: print 'determined eps =', eps # Compute DBSCAN array = transform_to_array(layer) print 'starting DBSCAN' db = DBSCAN(eps=eps, min_samples=minpts).fit(array) labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print('Estimated number of clusters: %d' % n_clusters_) if plot or show: plot_results() return labels def build_cluster_layer(labels, array, shapename=None): print 'creating cluster layer' xValues, yValues = [], [] # Create an new Layer with Cluster centroids layer = mem_source.CreateLayer('ClusterLayer', WGS84, ogr.wkbPoint) layer.CreateField(ogr.FieldDefn('lon', ogr.OFTReal)) layer.CreateField(ogr.FieldDefn('lat', ogr.OFTReal)) layer.CreateField(ogr.FieldDefn('cluster', ogr.OFTInteger64)) print 'sorting points to clusters' geomColDict = dict() for num, label in enumerate(labels): if label != -1: if geomColDict.get(label, None) is None: geomColDict[label] = ogr.Geometry(ogr.wkbGeometryCollection) point = ogr.Geometry(ogr.wkbPoint) point.AddPoint(array[num, 0], array[num, 1]) geomColDict[label].AddGeometry(point) print 'create clusters centroid features' for label in set(labels): if label != -1: feature = ogr.Feature(layer.GetLayerDefn()) centre = geomColDict[label].Centroid() feature.SetGeometry(centre) feature.SetField('lon', centre.GetX()) xValues.append(centre.GetX()) feature.SetField('lat', centre.GetY()) yValues.append(centre.GetY()) feature.SetField('cluster', label) layer.SetFeature(feature) print 'Cluster Centroids Created' if shapename is not None: ut.CreateShapefile(shapename, ogr.wkbPoint, layer) return [layer, xValues, yValues] def build_voronois(xValues, yValues, shapename=None): """ Create an new Layer with Voronoi Polygons from clusters :return: """ print 'building Voronoi Polygons from Cluster Centroids' layer = mem_source.CreateLayer('VoronoiLayer', WGS84, ogr.wkbPoint) layer.CreateField(ogr.FieldDefn('lon', ogr.OFTReal)) layer.CreateField(ogr.FieldDefn('lat', ogr.OFTReal)) layer.CreateField(ogr.FieldDefn('cluster', ogr.OFTInteger64)) voronois = pytess.voronoi(zip(xValues, yValues)) print 'wirte features in Voronoi Layer' for label, (centre, coordinates) in enumerate(voronois): wkt = 'Polygon ((' coordinates.append(coordinates[0]) for x, y in coordinates: wkt = '%s %f %f,' % (wkt, x, y) wkt = '%s))' % wkt[:-1] feature = ogr.Feature(layer.GetLayerDefn()) feature.SetGeometry(ogr.CreateGeometryFromWkt(wkt)) feature.SetField('lon', centre[0] if centre is not None else 0) feature.SetField('lat', centre[1] if centre is not None else 0) feature.SetField('cluster', label) layer.CreateFeature(feature) if shapename is not None: ut.CreateShapefile(shapename, ogr.wkbPolygon, layer) print 'Voronois Done' return layer def build_trajectorys(inputLayer, inputSource, voronoiLayer, shapename=None, initial=False, generalized=False, final=False): print 'Preparing User Dictionaries and setting Layers' layerDefn = inputLayer.GetLayerDefn() memInitialTrajectories = mem_source.CreateLayer('InitialTrajecLayer', WGS84, ogr.wkbPoint) for i in range(layerDefn.GetFieldCount()): memInitialTrajectories.CreateField(layerDefn.GetFieldDefn(i)) memTrajectoryLayer = mem_source.CreateLayer('TrajectoryLayer', WGS84, ogr.wkbPoint) for i in range(layerDefn.GetFieldCount()): memTrajectoryLayer.CreateField(layerDefn.GetFieldDefn(i)) trajectoryDict, featureDict = dict(), dict() print 'Creating All initial Trajectories' for i in range(0, inputLayer.GetFeatureCount()): feature = inputLayer.GetFeature(i) featureGeom = feature.GetGeometryRef() pointX = pointY = None for v in range(0, voronoiLayer.GetFeatureCount()): areaFeature = voronoiLayer.GetFeature(v) areaGeom = areaFeature.GetGeometryRef() if featureGeom.Intersects(areaGeom): pointX, pointY = areaFeature.GetField('lon'), areaFeature.GetField('lat') # featureGeom = areaGeom.Centroid() # If u want to use the geometric center of a Vornonoi Shape break if pointX and pointY is not None: userID = feature.GetFieldAsDouble('userID') if trajectoryDict.get(userID, None) is None: trajectoryDict[userID] = ogr.Geometry(ogr.wkbLineString) trajectoryDict[userID].AddPoint(pointX, pointY) if featureDict.get(userID, None) is None: featureDict[userID] = feature fieldID = 0 for trajectory, userID in zip(trajectoryDict.values(), trajectoryDict): if trajectory.GetPointCount() >= 2 and trajectory.Length() >= 0.0: trajectoryFeature = featureDict[userID] trajectoryFeature.SetGeometry(trajectory) trajectoryFeature.SetFID(fieldID) fieldID += 1 memInitialTrajectories.CreateFeature(trajectoryFeature) if generalized or final: percentageBorder, initalCount, similarityDict, FID, deletedFeature = \ 0, memInitialTrajectories.GetFeatureCount(), dict(), 0, 0 print 'mapping Trajectories to Cluster centroids' # This is just for visualization of progress for i in range(0, initalCount): trajectoryFeature = memInitialTrajectories.GetFeature(i) if i >= percentageBorder: print 'processing %i of %i' % (i, initalCount) print str(i / float(initalCount) * 100)[:2], '%' percentageBorder += (initalCount/20.0) newTrajectoryGeom = ogr.Geometry(ogr.wkbLineString) trajectoryGeom = trajectoryFeature.GetGeometryRef() pointList = [trajectoryGeom.GetPoint(x) for x in range(0, trajectoryGeom.GetPointCount())] for ((x1, y1, z1), (x2, y2, z2)) in zip(pointList[:-1], pointList[1:]): wkt = 'LINESTRING (%f %f, %f %f)' % (x1, y1, x2, y2) segmentGeom = ogr.CreateGeometryFromWkt(wkt) pointAndDistList = [] start = ogr.Geometry(ogr.wkbPoint) start.AddPoint(x1, y1) for v in range(0, voronoiLayer.GetFeatureCount()): areaFeature = voronoiLayer.GetFeature(v) areaGeom = areaFeature.GetGeometryRef() if segmentGeom.Intersects(areaGeom): point = ogr.Geometry(ogr.wkbPoint) point.AddPoint(areaFeature.GetField('lon'), areaFeature.GetField('lat')) segmentCentre = point # segmentCentre = areaGeom.Centroid() pointAndDistList.append((segmentCentre, start.Distance(segmentCentre), areaFeature.GetField('lon'), areaFeature.GetField('lat'))) pointAndDistList.sort(key=operator.itemgetter(1)) for point, distance, lon, lat in pointAndDistList: newTrajectoryGeom.AddPoint(lon, lat) if newTrajectoryGeom.Length() == 0.0: memInitialTrajectories.DeleteFeature(i) deletedFeature += 1 continue # TODO: Find a better solution for just deleting the specific Feature, e.g. Count this occurence else: pointList = [newTrajectoryGeom.GetPoint(i) for i in range(0, newTrajectoryGeom.GetPointCount())] for ((x1, y1, z1), (x2, y2, z2)) in zip(pointList[:-1], pointList[1:]): if x1 != x2 and y1 != y2: wkt = 'LINESTRING (%s %s, %s %s)' % (x1, y1, x2, y2) splitLine = ogr.CreateGeometryFromWkt(wkt) trajectoryFeature.SetGeometry(splitLine) trajectoryFeature.SetFID(FID) FID += 1 memTrajectoryLayer.CreateFeature(trajectoryFeature) if similarityDict.get(wkt, None) is None: similarityDict[wkt] = [trajectoryFeature.Clone()] else: similarityDict[wkt].append(trajectoryFeature.Clone()) print deletedFeature, 'Features were deleted due to inappropiated Lengths' if final: userfollow, userfriend, userstatus = 0, 0, 0 for i in range(0, memTrajectoryLayer.GetFeatureCount()): feature = memTrajectoryLayer.GetFeature(i) userfollow += feature.GetFieldAsDouble('userfollow') userfriend += feature.GetFieldAsDouble('userfriend') userstatus += feature.GetFieldAsDouble('userstatus') pass userfollow /= memTrajectoryLayer.GetFeatureCount() userfriend /= memTrajectoryLayer.GetFeatureCount() userstatus /= memTrajectoryLayer.GetFeatureCount() finalTrajectories = mem_source.CreateLayer('FinalTrajectoriLayer', WGS84, ogr.wkbPoint) finalTrajectories.CreateField(ogr.FieldDefn('MaxFollow', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('MaxFriend', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('MaxStatus', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('MinFollow', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('MinFriend', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('MinStatus', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('MeanFollow', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('MeanFriend', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('MeanStatus', ogr.OFTReal)) finalTrajectories.CreateField(ogr.FieldDefn('Group', ogr.OFTInteger64)) finalTrajectories.CreateField(ogr.FieldDefn('Size', ogr.OFTInteger64)) groupDict = dict() for num, key in enumerate([(True, True, True), (True, True, False), (True, False, True), (True, False, False), (False, True, True), (False, True, False), (False, False, True), (False, False, False)], start=1): groupDict[key] = num for featureList in similarityDict.values(): summary = np.array(zip(*[ (feat.GetFieldAsDouble('userfollow'), feat.GetFieldAsDouble('userfriend'), feat.GetFieldAsDouble('userstatus')) for feat in featureList])) Max, Min, Mean = [max(summary[0, ]), max(summary[1, ]), max(summary[2, ])], \ [min(summary[0, ]), min(summary[1, ]), min(summary[2, ])],\ [sum(summary[0, ]) / float(len(summary[0, ])), sum(summary[1, ]) / float(len(summary[1, ])), sum(summary[2, ]) / float(len(summary[2, ]))] key = (Mean[0] >= userfollow, Mean[1] >= userfriend, Mean[2] >= userstatus) group = groupDict[key] allLists = [] [allLists.extend(el) for el in [Max, Min, Mean, [group], [len(featureList)]]] finalFeature = ogr.Feature(finalTrajectories.GetLayerDefn()) finalFeature.SetGeometry(featureList[0].GetGeometryRef()) for i, value in enumerate(allLists): finalFeature.SetField(i, value) finalTrajectories.CreateFeature(finalFeature) if shapename is not None: if initial: ut.CreateShapefile('%s_initial' % shapename, ogr.wkbLineString, memInitialTrajectories) if generalized: ut.CreateShapefile('%s_general' % shapename, ogr.wkbLineString, memTrajectoryLayer) if final: ut.CreateShapefile('%s_final' % shapename, ogr.wkbLineString, finalTrajectories) print len(similarityDict), 'was the length of the dict' if __name__ == '__main__': shanghai_all = load_from_textfile('sjtu_taxigps20070201.txt') #weibo_all = load_to_memory('weibo_all.shp') weibo_array = transform_to_array(shanghai_all) cluster_list = apply_dbscan(shanghai_all, 0.003, show=False) #cluster_list = apply_dbscan(weibo_all, 0.010, minpts=20, plot='smaller_cluster_minpts_20_%f.png' % 0.010) cluster_layer, xs, ys = build_cluster_layer(cluster_list, weibo_array) voronoi_layer = build_voronois(xs, ys) # Test Algorithm Mode #shapefileName = 'weibo_test.shp' #shapefile = esriDriver.Open(shapefileName, 0) #testlayer = shapefile.GetLayer() #build_trajectorys(testlayer, shapefile, voronoi_layer, # shapename='test', initial=False, generalized=False, final=True) #Full Mode build_trajectorys(shanghai_all, mem_source, voronoi_layer, 'trajec', generalized=True, final=False)