Commits

Sai Krishna K  committed 275e0f4

added link parser

  • Participants
  • Parent commits 00d119c

Comments (0)

Files changed (5)

-Usage python get_page.py .. make sure to excute the script in a seperate folder
+Usage
+	 python get_page.py 
+make sure to excute the script in a seperate folder
+
+Use
+	python link_parser.py
+to get all the links in the page use online json viewer to view the links . http://jsonviewer.stack.hu/
+
+
+

File base_server.py

+import time
+import BaseHttpServer
+
+HOST_NAME='w4rlock'
+PORT_NUMBER = 8084
+
+class MyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+	def do_HEAD(s):
+		s.send_response(200)
+		s.send_header("Content-type", "text/html")
+		s.end_headers()
+	def do_GET(s):
+		s.send_response(200)
+		s.send_header("Content-type", "text/html")
+		s.end_headers()
+		s.wfile.write("<html><head><title>Title goes here.</title></head>")
+		s.wfile.write("<body><p>This is a test.</p>")
+
+		s.wfile.write("<p>You accessed path: %s</p>" % s.path)
+		s.wfile.write("</body></html>")
+
+if __name__ == '__main__':
+	server_class = BaseHTTPServer.HTTPServer
+	httpd = server_class((HOST_NAME, PORT_NUMBER), MyHandler)
+	print time.asctime(), "Server Starts - %s:%s" % (HOST_NAME, PORT_NUMBER)
+	try:
+	    httpd.serve_forever()
+	except KeyboardInterrupt:
+	    pass
+	httpd.server_close()
+	print time.asctime(), "Server Stops - %s:%s" % (HOST_NAME, PORT_NUMBER)
 
 universe_page_urls = []
 
-
-get_page_data(url,universe_domain)
+root_file_name = "index.html"
+get_page_data(url,universe_domain,root_file_name)
 
 
 

File page_data.py

 
 #universe_page_urls = []
 
-def get_page_data(url,universe_domain):
-
-	root_file_name = "index.html"
+def get_page_data(url,universe_domain,root_file_name):
 	try:
 		opener = urllib2.build_opener()
 		opener.addheaders = BROWSER_HEADERS