Commits

Dmitri Lebedev committed b99a809 Merge

Merge with refactoring

  • Participants
  • Parent commits be3985f, 92b2a81

Comments (0)

Files changed (8)

+syntax: glob
+**/\#*\#
+.*
+*.app
+*.orig
+*.py[co]
+ipython_log.py
+*~
+
+pypy
+*.mp
+python-osm = [git]https://github.com/werner2101/python-osm.git
+fedbca0e514b12f2c76a84f42e1549c8e1644096 python-osm
+from collections import defaultdict
+from itertools import izip, ifilter, chain, starmap
+from math import radians, sin, cos
+
+from numpy import array, sum as npsum
+
+from util import concurrent_map, merge, pairs
+
+
+class Vertex(object):
+	"""
+	A vertex that is connected to other vertices with edges.
+	In a single coordinate, only 1 vertex instance can exist. If you
+	try to create a new vertex at the same co-ordinate,
+	the existing vertex will be returned:
+
+	> a = Vertex(1, 2)
+	> b = Vertex(1, 2)
+	> a == b
+	True
+
+	Object a will be equal to object b.
+
+	Also stores 3-dimensional co-ordinates to simplify distance
+	calculations.
+	"""
+	_instances = {}
+
+	def __new__(cls, lat, lon):
+		c = (lat, lon)
+
+		if c not in cls._instances:  # create a new instance only if none exists in this place
+			self = super(Vertex, cls).__new__(cls)
+			self.coords = lat, lon
+			cls._instances[c] = self
+
+		return cls._instances[c]
+
+	@staticmethod
+	def get_3d(lat, lon):
+		"""
+		Returns the 3-dimensional co-ordinates of the point specified by
+		latitude and longtitude.
+
+		If you look at (0, 0), X axis goes to the left (Indian ocean),
+		Y axis is directed at you (Greenwich meridian x Equator),
+		Z axis is directed at the North pole.
+		"""
+
+		la, lo = map(radians, (lat, lon))
+		r = 6371  # earth mean radius
+		return array((sin(lo) * cos(la) * r, cos(lo) * cos(la) * r, sin(la) * r))
+
+	def __init__(self, lat, lon):
+		# store co-ordinates in 3d space to ease distance calculations
+		self.xyz = self.get_3d(lat, lon)
+
+	def __repr__(self):
+		return '%s%s' % (self.__class__.__name__, self.coords)
+
+
+class Edge(object):
+	"""
+	A connection between two Vertices.
+	"""
+	VertexClass = Vertex
+
+	def __init__(self, v1, v2):
+		self.vertices = v1, v2
+
+	@classmethod
+	def from_coords(cls, coords1, coords2):
+		return cls(*starmap(cls.VertexClass, (coords1, coords2)))
+
+	def __repr__(self):
+		return '%s%s' % (self.__class__.__name__, self.vertices)
+
+	@staticmethod
+	def combine_lists(l1, l2):
+		return l1 + l2[1:]
+
+
+class Graph(object):
+	EdgeClass = Edge
+
+	def __init__(self):
+		self.vertices = defaultdict(list)
+
+	def add_edge(self, edge):
+		for v in edge.vertices:
+			self.vertices[v].append(edge)
+
+	def remove_edge(self, edge):
+		for v in edge.vertices:
+			self.vertices[v].remove(edge)
+
+	def compact(self):
+		print len(self.vertices), 'initial vertices'
+
+		# remove vertices that have only 2 edges connected (== not intersections)
+		for vertex, edges in self.vertices.items():
+			if len(edges) == 2:
+				self.add_edge(self.EdgeClass.glue(*edges))  # glue and add the new edge
+				map(self.remove_edge, edges)  # must glue and add before removing
+
+		keys = [k for k, v in self.vertices.items() if not v]  # empty lists
+		for k in keys:
+			del self.vertices[k]
+
+		print sum(map(len, self.vertices.values())), 'edges'
+		print len(self.vertices), 'vertices after compacting'
+
+	def crossings(self):
+		clusters = [set(self.vertices.keys())]
+		print len(clusters[0]), 'intersections in graph'
+
+		key = lambda x: x.xyz[dim]
+		grouper = lambda (i, (x, y)): (key(x) - key(y)) > self.threshold
+
+		def run(cluster):
+			sorted_vertices = sorted(cluster, key=key)
+
+			indexed_pairs = enumerate(pairs(sorted_vertices), 1)  # enumerate from 1, since a gap between x1 & x0 is a split at position 1
+			split_indices = [i for i, pair in ifilter(grouper, indexed_pairs)]
+
+			index_pairs = izip(chain([0], split_indices), chain(split_indices, [None]))
+			return [sorted_vertices[i:j] for i, j in index_pairs]
+
+		for dim in range(3) + range(3):
+			clusters = chain(*map(run, clusters))
+
+		return list(clusters)
+
+	def cluster_vertices(self, nvertices):
+		coords = array([v.xyz for v in nvertices])
+		result = []
+		while len(nvertices) > 1:
+			v = nvertices.pop()
+			coords = coords[:-1]
+
+			tests = npsum(array(coords - v.xyz) ** 2, 1) < self.thr_square
+			closest = [d for d, t in izip(nvertices, tests) if t]
+			if closest:
+				result.append(closest + [v])
+
+		return result
+
+	def outer_vertices(self, clusters):
+		outer_vertices = defaultdict(list)
+		for cluster in clusters:
+			inner_set = cluster
+			key = tuple(cluster)
+			for vertex in cluster:
+				if not set(v for edge in self.vertices[vertex] for v in edge.vertices).issubset(inner_set):
+					outer_vertices[key].append(vertex)
+
+		return dict(outer_vertices)
+
+	def subgraphs(self, threshold):
+		"""
+		Groups the vertices in the graph into clusters not larger than threshold (in metres).
+		Returns a list of clusters, that have multiple points, which are lists of Vertices.
+		"""
+		self.threshold = threshold / 1000.
+		self.thr_square = self.threshold ** 2
+
+		results = concurrent_map(self.cluster_vertices, self.crossings())
+		results = map(merge, results)
+		results = [i for c in ifilter(None, results) for i in c]
+		return results
+
+	def do_all(self, threshold):
+		clusters = self.subgraphs(threshold)
+		print len(clusters), sum(map(len, clusters)), map(len, clusters)
+		compact_data = self.outer_vertices(clusters)
+		print len(compact_data), sum(map(len, compact_data.values())), map(len, compact_data.values())
+from collections import defaultdict
+from itertools import izip, ifilter, starmap
+
+from classes import Edge, Graph
+
+class MpEdge(Edge):
+	@classmethod
+	def from_points(cls, points, ids, data=None):
+		self = cls.from_coords(points[0], points[-1])
+
+		self.points = points
+		self.ids = ids
+		self.data = data  # list of dictionaries
+
+		return self
+
+	@classmethod
+	def glue(cls, e1, e2):
+		if e1.ids[0] == e2.ids[1]:
+			e1, e2 = e2, e1
+
+		points = cls.combine_lists(e1.points, e2.points)
+		ids = (e1.ids[0], e2.ids[1])
+		data = cls.combine_lists(e1.data, e2.data)
+		return cls.from_points(points, ids, data)
+
+
+class MpGraph(Graph):
+	EdgeClass = MpEdge
+
+	def __init__(self, handle=()):
+		super(MpGraph, self).__init__()
+		clean_handle = (line.strip() for line in handle)
+
+		polyline = None
+		is_road = False
+
+		for line in clean_handle:
+			if '[POLYLINE]' in line:  # start recording lines when polyline starts
+				polyline = []
+				is_road = False
+
+			elif '[END]' in line and polyline:  # stop recording at [END]
+				if is_road:
+					self.add_polyline(*self.parse_polyline(polyline))  # if it was a road, add to the graph
+				polyline = None
+				is_road = False
+
+			elif polyline is not None:
+				polyline.append(line)
+				if line.startswith('RoadID'):
+					is_road = True  # mark this as a routable road only if it has RoadID
+
+		self.compact()
+
+	@staticmethod
+	def parse_polyline(lines):
+		points = []
+		ids = []
+		data = defaultdict(list)
+		for l in lines:
+			k, v = l.split('=')
+
+			if k == 'Data0':
+				arr = map(float, v.replace(')', '').replace('(', '').split(','))
+				points += zip(arr[::2], arr[1::2])
+
+			elif k.startswith('Nod'):
+				ids.append(map(int, v.split(',')))
+
+			else:
+				data[k].append(v)  # save the other data to re-create polylines in the end of the process
+
+		return points, ids, [dict(data)]
+
+	def add_polyline(self, points, ids, data=None):
+		"""
+		Splits the polyline into edges and adds them to the graph.
+		Polylines are specified by points (lat, lon),
+		vertices are specified in ids tuples (point index, id, 0), where
+		point index is the number of the point in `points` list.
+		"""
+		for (idx1, id1, a), (idx2, id2, b) in izip(ids[:-1], ids[1:]):
+			self.add_edge(self.EdgeClass.from_points(points[idx1:idx2 + 1], (id1, id2), data))
+
+python-osm/src/osm
+import cProfile
+import sys
+
+from mp import MpGraph
+
+if __name__ == '__main__':
+	threshold = 300
+
+	"""
+	for test data, use http://dl.dropbox.com/u/6721960/2012-03-11/test.mp
+	to run with profiler, do
+	$ python structure.py test.mp x
+	(where x is a second parameter that turns profiling on)
+	"""
+
+	if len(sys.argv) > 2:
+		if sys.argv[2] == 'test':
+			line = [(0, 0), (0, 1), (0, 2), (0, 3), (0, 5)]
+			g = MpGraph()
+			g.add_polyline(line, [(i,0,0) for i in range(len(line))])
+			threshold = 150000
+		else:
+			g = MpGraph(open(sys.argv[1]))
+
+		p = cProfile.Profile()
+		stat = p.runctx('g.do_all(threshold)', {'g': g, 'threshold': threshold}, {})
+		stat.print_stats()
+
+	else:
+		g = MpGraph(open(sys.argv[1]))
+		g.do_all(threshold)

structure.py

-import cProfile
-import sys
-
-from collections import defaultdict
-from itertools import izip, ifilter, chain
-from math import radians, sin, cos
-
-from numpy import array, sum as npsum
-
-from util import concurrent_map, merge, pairs
-
-
-class Vertex(object):
-	"""
-	A vertex that is connected to other vertices with edges.
-	In a single coordinate, only 1 vertex instance can exist. If you
-	try to create a new vertex at the same co-ordinate,
-	the existing vertex will be returned:
-
-	> a = Vertex(1, 2)
-	> b = Vertex(1, 2)
-	> a == b
-	True
-
-	Object a will be equal to object b.
-
-	Also stores 3-dimensional co-ordinates to simplify distance
-	calculations.
-	"""
-	_instances = {}
-
-	def __new__(cls, lat, lon):
-		c = (lat, lon)
-
-		if c not in cls._instances:  # create a new instance only if none exists in this place
-			self = super(Vertex, cls).__new__(cls)
-			self.coords = lat, lon
-			self.edges = []
-			self.closest = []
-			cls._instances[c] = self
-
-		return cls._instances[c]
-
-	@staticmethod
-	def get_3d(lat, lon):
-		"""
-		Returns the 3-dimensional co-ordinates of the point specified by
-		latitude and longtitude.
-
-		If you look at (0, 0), X axis goes to the left (Indian ocean),
-		Y axis is directed at you (Greenwich meridian x Equator),
-		Z axis is directed at the North pole.
-		"""
-
-		la, lo = map(radians, (lat, lon))
-		r = 6371  # earth mean radius
-		return array((sin(lo) * cos(la) * r, cos(lo) * cos(la) * r, sin(la) * r))
-
-	def __init__(self, lat, lon):
-
-		# store co-ordinates in 3d space to ease distance calculations
-		self.xyz = self.get_3d(lat, lon)
-
-	def __repr__(self):
-		return 'Vertex(%s,%s)' % self.coords
-
-
-class Edge(object):
-	"""
-	A connection between two Vertices.
-	"""
-	def __init__(self, points, id1, id2, data=None):
-		self.points = points
-
-		# vertex id are for working with mp only
-		self.ids = id1, id2
-		self.data = data
-		self.vertices = Vertex(*points[0]), Vertex(*points[-1])
-
-		# vertex instances should know which edges are connected to them
-		for v in self.vertices:
-			v.edges.append(self)
-
-	def __repr__(self):
-		return 'Edge (%s, %s)' % self.ids
-
-
-class Graph(object):
-	def __init__(self):
-		self.edges = []
-
-	def add_polyline(self, points, ids, data=None):
-		"""
-		Splits the polyline into edges and adds them to the graph.
-		Polylines are specified by points (lat, lon),
-		vertices are specified in ids tuples (point index, id, 0), where
-		point index is the number of the point in `points` list.
-		"""
-		for (idx1, id1, a), (idx2, id2, b) in izip(ids[:-1], ids[1:]):
-			self.edges.append(Edge(points[idx1:idx2 + 1], id1, id2, data))
-
-	def parse_polyline(self, lines):
-		points = []
-		ids = []
-		data = defaultdict(list)
-		for l in lines:
-			k, v = l.split('=')
-
-			if k == 'Data0':
-				arr = map(float, v.replace(')', '').replace('(', '').split(','))
-				points += zip(arr[::2], arr[1::2])
-
-			elif k.startswith('Nod'):
-				ids.append(map(int, v.split(',')))
-
-			else:
-				data[k].append(v)  # save the other data to re-create polylines in the end of the process
-
-		self.add_polyline(points, ids, dict(data))
-
-	def from_mp(self, handle):
-		clean_handle = (line.strip() for line in handle)
-
-		polyline = None
-		is_road = False
-
-		for line in clean_handle:
-			if '[POLYLINE]' in line:  # start recording lines when polyline starts
-				polyline = []
-				is_road = False
-
-			elif '[END]' in line and polyline:  # stop recording at [END]
-				if is_road:
-					self.parse_polyline(polyline)  # if it was a road, add to the graph
-				polyline = None
-				is_road = False
-
-			elif polyline is not None:
-				polyline.append(line)
-				if line.startswith('RoadID'):
-					is_road = True  # mark this as a routable road only if it has RoadID
-
-	def crossings(self):
-		vertices = set([v for e in self.edges for v in e.vertices if len(v.edges) > 2])
-		clusters = [vertices]
-
-		key = lambda x: x.xyz[dim]
-		grouper = lambda (i, (x, y)): (key(x) - key(y)) > self.threshold
-
-		def run(cluster):
-			sorted_vertices = sorted(cluster, key=key)
-
-			indexed_pairs = enumerate(pairs(sorted_vertices), 1)  # enumerate from 1, since a gap between x1 & x0 is a split at position 1
-			split_indices = [i for i, pair in ifilter(grouper, indexed_pairs)]
-
-			index_pairs = izip(chain([0], split_indices), chain(split_indices, [None]))
-			return [sorted_vertices[i:j] for i, j in index_pairs]
-
-		for dim in range(3) + range(3):
-			clusters = chain(*map(run, clusters))
-
-		return list(clusters)
-
-	def clusterize(self):
-		"""
-		Groups the vertices in the graph into clusters not larger than threshold (in metres).
-		Returns a list of clusters, that have multiple points, which are lists of Vertices.
-		"""
-		crossings = self.crossings()
-
-		results = concurrent_map(self.cluster_vertices, crossings)
-		results = map(merge, results)
-		print sum(map(len, results)), map(len, results)
-		return results
-
-	def cluster_vertices(self, nvertices):
-		coords = array([v.xyz for v in nvertices])
-		result = []
-		while len(nvertices) > 1:
-			v = nvertices.pop()
-			coords = coords[:-1]
-
-			tests = npsum(array(coords - v.xyz) ** 2, 1) < self.thr_square
-			closest = [d for d, t in izip(nvertices, tests) if t]
-			if closest:
-				result.append(closest + [v])
-
-		return result
-
-	def subgraphs(self, threshold):
-		self.threshold = threshold / 1000.
-		self.thr_square = self.threshold ** 2
-		clusters = concurrent_map(self.grow_cluster, self.clusterize())
-		return merge([e.vertices for c in clusters for v in c for e in v.edges])
-
-	def grow_cluster(self, cluster):
-		return []
-		added = True
-		cl = set(cluster)
-
-		while added:
-			added = False
-			new = set([])
-			for vertex in cl:
-				new |= set([v for edge in vertex.edges for v in edge.vertices])
-			new = set([v for v in new if len(v.edges) == 2]) - cl
-
-			if new:
-				cl |= new
-				added = True
-
-		return cl
-
-	def contract(self, subgraph):
-		# coords = array([v.xyz for v in subgraph if len(v.edges) > 2])
-		# average = sum(coords) / 1. / len(coords)
-		subgraph_set = set(subgraph)
-
-		# remove edges completely in the subgraph
-		edges_to_remove = [e for v in subgraph for e in v.edges if not set(e.vertices) - subgraph_set]
-		for e in edges_to_remove:
-			self.edges.remove(e)
-			for v in e.vertices:
-				v.edges.remove(e)
-
-
-
-if __name__ == '__main__':
-	threshold = 300
-
-	"""
-	for test data, use http://dl.dropbox.com/u/6721960/2012-03-11/test.mp
-	to run with profiler, do
-	$ python structure.py test.mp x
-	(where x is a second parameter that turns profiling on)
-	"""
-
-	g = Graph()
-
-	if len(sys.argv) > 2:
-		if sys.argv[2] == 'test':
-			line = [(0, 0), (0, 1), (0, 2), (0, 3), (0, 5)]
-			g.add_polyline(line, [(i,0,0) for i in range(len(line))])
-			threshold = 150000
-		else:
-			g.from_mp(open(sys.argv[1]))
-
-		p = cProfile.Profile()
-		stat = p.runctx('g.subgraphs(threshold)', {'g': g, 'threshold': threshold}, {})
-		stat.print_stats()
-
-	else:
-		g.from_mp(open(sys.argv[1]))
-		g.subgraphs(threshold)