Commits

Anonymous committed 9f88c93

kml generator

  • Participants
  • Parent commits a010cb1

Comments (0)

Files changed (2)

+#!/usr/bin/env python
+# coding: utf-8
+
+
+import urllib
+import cPickle as pickle
+import pprint
+
+import simplekml
+
+DOC_ROOT = 'http://stredovek.com/'
+
+with open('places.pkl', 'rb') as f:
+    places = pickle.load(f)
+
+
+#places = places[:5]
+#pprint.pprint(places)
+
+def fixurl(url):
+	idx = url.rfind('=')
+	return url[:idx] + '=' + urllib.quote(url[idx+1:])	
+
+def format_desc(place):
+	return '<img src="%s%s"><p>%s</p>' % (DOC_ROOT, place['image'], place['desc'])
+
+kml = simplekml.Kml()
+for place in places[5:10]:
+	url = fixurl(place['url']).replace('&', '&amp;')
+	kml.newpoint(
+		name=place['title'],
+		coords=[(place['lon'],place['lat'])],
+		description=format_desc(place),
+		atomlink=DOC_ROOT+url,
+	)
+kml.save("stredovek.kml")
 #!/usr/bin/env python
+# coding: utf-8
 
-import requests
 import codecs
+import re
+import time 
+import cPickle as pickle
+
+import requests
 from pyquery import PyQuery as pq
 
+DOC_ROOT = 'http://stredovek.com/'
 SOURCES = [
-    {
-        'url': 'http://stredovek.com/list_directory.php?category=hradiste'
+    {        
+        'url': 'list_directory.php?category=hradiste'
     }
 ]
 
+re_gps = re.compile(r'GPS:[^\d]*(\d+)[^\d]*(\d+)[^\d]*([\d\.]+)[^\d]*(\d+)[^\d]*(\d+)[^\d]*([\d\.]+)')
 
-def process_source(html):
+def process_index(html):
     d = pq(html)
     places = []
 
     return places
 
 
-#for source in SOURCES:
-    #req = requests.get(source['url'])
-    #places = process_source(req.txt)
-    #print len(places)
+def process_detail(html, place):
+    d = pq(html)
+    place['desc'] = d('table').eq(0).find('p').eq(0).text()    
+    m = re_gps.search(html)
+
+    place['lat'] = int(m.group(1)) + int(m.group(2))/60.0 + float(m.group(3))/3600.0
+    place['lon'] = int(m.group(4)) + int(m.group(5))/60.0 + float(m.group(6))/3600.0
+
+all_places = []
+
+for source in SOURCES:    
+    index = requests.get(DOC_ROOT + source['url'])
+    index.encoding = 'windows-1250'    
+    places = process_index(index.text)
+
+    for place in places:
+        print place['title']
+        time.sleep(0.2)
+        detail = requests.get(DOC_ROOT + place['url'])        
+        if detail.status_code != 200:
+            print DOC_ROOT + place['url'], detail.status_code
+            continue
+        detail.encoding = 'windows-1250'    
+        process_detail(detail.text, place)
+
+    all_places.extend(places)
+
+with open('places.pkl', 'wb') as f:
+    pickle.dump(all_places, f)
 
-with codecs.open("sample.html", "r", 'windows-1250') as f:
-    places = process_source(f.read())
-    print places[:3]
-    for p in places[:10]:
-        pass
-        #print u"%s %s" % (p['title'], p['url'])
+# with codecs.open("sample.html", "r", 'windows-1250') as f:
+#     places = process_index(f.read())
+#     print places[:3]
+    
+# with codecs.open("sample-detail.html", "r", 'windows-1250') as f:
+#    process_detail(f.read(), {})