zuroc avatar zuroc committed baa72f3

f

Comments (0)

Files changed (2)

-../misc/config/_env.py
+
+import sys
+
+if sys.getdefaultencoding() != 'utf-8':
+    reload(sys)
+    sys.setdefaultencoding('utf-8')
 from operator import itemgetter
 from html2txt import html2txt
 
-
-@route('/archives')
-@route('/archives/P\d+')
+@route('/(\d+)')
 class _(Handler):
     def get(self):
-        ##<article class="listing-article clearfix"><h3><a href="http://teamspeed.com/forums/showthread.php?p=1400215#post1400215">First Drive: 2013 Aston Martin Vanquish</a></h3>
-        for link in self.extract_all(
-            '<article class="listing-article clearfix">', '</a></h3>'
-        ):
-            link = extract('href="','"',link)
-            print link
+        pass
 
-        next_link = self.extract_all(
-            'href="http://teamspeed.com/archives/P','"'
-        )[-1]
-        next_link = "http://teamspeed.com/archives/P"+next_link
-        spider.put(next_link)
-
-
+@route('/find/recommend')
+class _(Handler):
+    def get(self):
+        now_id = int(self.get_argument("id", 0))
+        page = int(self.get_argument("pi", 0))
+        if now_id:
+            for link in self.extract_all('<h3 class="nickname">','</h3>'):
+                spider.put(link)
+            if page == 0:
+                page_list = set(self.extract_all("href=\"/find/recommend?pi=","&"))
+                for i in map(int,page_list):
+                    if page:
+                        spider.put("http://xianguo.com/find/recommend?id=%s&pi=%s"%(now_id,page))
+        else:
+            for id in self.extract_all(
+                'href="/find/recommend?id=', '"'
+            ):
+                spider.put("http://xianguo.com/find/recommend?id=%s&pi=0"%id)
 
 if __name__ == '__main__':
 
-    URL = "http://teamspeed.com/archives"
+    URL = 'http://xianguo.com/find/recommend'
     spider.put(URL)
 
     #10个并发抓取线程 , 网页读取超时时间为30秒
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.