zuroc avatar zuroc committed fac5162

f

Comments (0)

Files changed (2)

teamspeed_com.py

-#coding:utf-8
-from spider.spider import route, Handler, spider, extract
-import _env
-from os.path import abspath, dirname, join
-from operator import itemgetter
-from html2txt import html2txt
-
-@route('/(\d+)')
-class _(Handler):
-    def get(self):
-        pass
-
-@route('/find/recommend')
-class _(Handler):
-    def get(self):
-        now_id = int(self.get_argument("id", 0))
-        page = int(self.get_argument("pi", 0))
-        if now_id:
-            for link in self.extract_all('<h3 class="nickname">','</h3>'):
-                spider.put(link)
-            if page == 0:
-                page_list = set(self.extract_all("href=\"/find/recommend?pi=","&"))
-                for i in map(int,page_list):
-                    if page:
-                        spider.put("http://xianguo.com/find/recommend?id=%s&pi=%s"%(now_id,page))
-        else:
-            for id in self.extract_all(
-                'href="/find/recommend?id=', '"'
-            ):
-                spider.put("http://xianguo.com/find/recommend?id=%s&pi=0"%id)
-
-if __name__ == '__main__':
-
-    URL = 'http://xianguo.com/find/recommend'
-    spider.put(URL)
-
-    #10个并发抓取线程 , 网页读取超时时间为30秒
-    spider.run(10, 30)
-
+#coding:utf-8
+from spider.spider import route, Handler, spider, extract
+import _env
+from os.path import abspath, dirname, join
+from operator import itemgetter
+from html2txt import html2txt, unescape
+
+@route('/(\d+)')
+class _(Handler):
+    def get(self, id):
+        title = unescape(self.extract('<div class="beings-name">','</div>'))
+        link =  self.extract('<div class="beings-website"><a href="','"')
+        img = extract(
+            'src="',
+            '"', 
+            self.extract('<a class="avatar" href="/','</a>')
+        )
+        print id, img, link,  title
+
+
+@route('/find/recommend')
+class _(Handler):
+    def get(self):
+        now_id = int(self.get_argument("id", 0))
+        page = int(self.get_argument("pi", 0))
+        if now_id:
+            for link in self.extract_all('<h3 class="nickname">','</h3>'):
+                link = extract('"/','"', link)
+                spider.put("http://xianguo.com/"+link)
+            if page == 0:
+                page_list = set(self.extract_all("href=\"/find/recommend?pi=","&"))
+                for i in map(int,page_list):
+                    if page:
+                        spider.put("http://xianguo.com/find/recommend?id=%s&pi=%s"%(now_id,page))
+        else:
+            for id in self.extract_all(
+                'href="/find/recommend?id=', '"'
+            ):
+                spider.put("http://xianguo.com/find/recommend?id=%s&pi=0"%id)
+
+if __name__ == '__main__':
+
+    URL = 'http://xianguo.com/find/recommend'
+    spider.put(URL)
+
+    #10个并发抓取线程 , 网页读取超时时间为30秒
+    spider.run(10, 30)
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.