Commits

Tim Tomes committed 62c2aa7

initial import.

Comments (0)

Files changed (5)

+# OS generated files #
+######################
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+Icon?
+ehthumbs.db
+Thumbs.db
+
+# Project specific items #
+##########################
+phantomjs
+1. Install pre-requisites.
+
+    - Captures require PyQt4 or Phantomjs (recommended).
+    - Phantomjs:
+        - Compile Phantomjs and place the binary in the same directory as the source files.
+        - Make sure the binary is called "phantomjs".
+
+2. Run the script.
+
+    - python ./webtopng.py -h
+
+Changelog
+=========
+v1.1:
+ * no longer freezes on redirects to 401 authentication.
+ * stores each run in a unique directory.
+ * shows headers for final destination rather than redirect.
+ * denotes redirect next to the status header.
+
+NOTE: Keep in mind that there is no good way to follow a JavaScript redirect in an automated fashion. Pages using JavaScript to redirect the browser will show up as a blank screen shot.
+var page = require('webpage').create(),
+    url, filename, size;
+
+url = phantom.args[0];
+filename = phantom.args[1];
+page.viewportSize = { width: 800, height: 600 };
+page.clipRect = { top: 0, left: 0, width: 800, height: 600 };
+page.open(url, function (status) {
+    //if (status !== 'success') {
+    //    console.log('Unable to load the address!');
+    //} else {
+        window.setTimeout(function () {
+            page.render(filename);
+            //console.log(url  + ' complete.');
+            phantom.exit();
+        }, 200);
+    //}
+});
+# -*- coding: utf-8 -*-
+
+"""
+This tries to do more or less the same thing as CutyCapt, but as a
+python module.
+
+Modified by Tim Tomes (@LaNMaSteR53) July 2012 to support PeepingTom:
+http://code.google.com/p/ptscripts/source/browse/trunk/peepingtom
+
+This is a derived work from CutyCapt: http://cutycapt.sourceforge.net/
+
+////////////////////////////////////////////////////////////////////
+//
+// CutyCapt - A Qt WebKit Web Page Rendering Capture Utility
+//
+// Copyright (C) 2003-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// $Id$
+//
+////////////////////////////////////////////////////////////////////
+
+"""
+
+import sys
+from PyQt4 import QtCore, QtGui, QtWebKit, QtNetwork
+
+
+class Capturer(object):
+    """A class to capture webpages as images"""
+
+    def __init__(self, url, filename):
+        self.url = url
+        self.filename = filename
+        self.saw_initial_layout = False
+        self.saw_document_complete = False
+
+    def loadFinishedSlot(self):
+        self.saw_document_complete = True
+        if self.saw_initial_layout and self.saw_document_complete:
+            self.doCapture()
+
+    def initialLayoutSlot(self):
+        self.saw_initial_layout = True
+        if self.saw_initial_layout and self.saw_document_complete:
+            self.doCapture()
+
+    def capture(self):
+        """Captures url as an image to the file specified"""
+        self.wb = QtWebKit.QWebPage()
+        self.network_manager = QtNetwork.QNetworkAccessManager()
+        self.network_manager.sslErrors.connect(self.on_ssl_errors)
+        self.wb.setNetworkAccessManager(self.network_manager)
+        self.wb.mainFrame().setScrollBarPolicy(
+            QtCore.Qt.Horizontal, QtCore.Qt.ScrollBarAlwaysOff)
+        self.wb.mainFrame().setScrollBarPolicy(
+            QtCore.Qt.Vertical, QtCore.Qt.ScrollBarAlwaysOff)
+
+        self.wb.loadFinished.connect(self.loadFinishedSlot)
+        self.wb.mainFrame().initialLayoutCompleted.connect(
+            self.initialLayoutSlot)
+
+        self.wb.mainFrame().load(QtCore.QUrl(self.url))
+
+    def doCapture(self):
+        # Set the size of the (virtual) browser window
+        size = self.wb.mainFrame().contentsSize()
+        size.setWidth(800)
+        size.setHeight(600)
+        self.wb.setViewportSize(size)
+        #self.wb.setViewportSize(self.wb.mainFrame().contentsSize())
+        img = QtGui.QImage(self.wb.viewportSize(), QtGui.QImage.Format_ARGB32)
+        #print self.wb.viewportSize()
+        painter = QtGui.QPainter(img)
+        self.wb.mainFrame().render(painter)
+        painter.end()
+        img.save(self.filename)
+        QtCore.QCoreApplication.instance().quit()
+
+    def on_ssl_errors(self, reply, errors):
+        url = unicode(reply.url().toString())
+        reply.ignoreSslErrors()
+        #print "SSL certificate error ignored: %s" % url
+
+if __name__ == "__main__":
+    """Run a simple capture"""
+    app = QtGui.QApplication(sys.argv)
+    c = Capturer(sys.argv[1], sys.argv[2])
+    c.capture()
+    app.exec_()
+import sys, threading, Queue, urllib2, subprocess, httplib, re, time, os
+from urlparse import urlparse
+
+#=================================================
+# MAIN FUNCTION
+#=================================================
+
+def main():
+    import optparse
+    usage = "%prog [options]\n\n%prog - Tim Tomes (@LaNMaSteR53) (www.lanmaster53.com)"
+    parser = optparse.OptionParser(usage=usage, version="%prog 1.1")
+    parser.add_option('-v', help='Enable verbose mode.', dest='verbose', default=False, action='store_true')
+    parser.add_option('-i', help='File input mode. Name of input file. [IP:PORT]', dest='infile', type='string', action='store')
+    parser.add_option('-u', help='Single URL input mode. URL as a string.', dest='url', type='string', action='store')
+    parser.add_option('-q', help='PyQt4 capture mode. PyQt4 python modules required.', dest='pyqt', default=False, action='store_true')
+    parser.add_option('-p', help='Phantonjs capture mode. Phantomjs required.', dest='phantom', default=False, action='store_true')
+    (opts, args) = parser.parse_args()
+
+    if not opts.infile and not opts.url:
+        parser.error("[!] Must provide input. Mode option required.")
+    if not opts.pyqt and not opts.phantom:
+        capture = False
+        print '[!] WARNING: No capture mode provided. Retrieving header data only.'
+    else:
+        capture = True
+    if opts.infile:
+        targets = open(opts.infile).read().split()
+    if opts.url:
+        targets = []
+        targets.append(opts.url)
+
+    dir = time.strftime('%y%m%d_%H%M%S', time.localtime())
+    os.mkdir(dir)
+    outfile = '%s/report.html' % (dir)
+
+    zombies = []
+    servers = {}
+    #logic for validating list of urls and building a new list which understands the 302 redirected sites.
+    for target in targets:
+        headers = None
+        prefix = ''
+        # best guess at protocol prefix
+        if not target.startswith('http'):
+            if target.find(':') == -1: target += ':80'
+            prefix = 'http://'
+            if target.split(':')[1].find('443') != -1:
+                prefix = 'https://'
+        # drop port suffix where not needed
+        if target.endswith(':80'): target = ':'.join(target.split(':')[:-1])
+        if target.endswith(':443'): target = ':'.join(target.split(':')[:-1])
+        # build legitimate target url
+        target = prefix + target
+        code, headers = getHeaderData(target)
+        if code == 'zombie':
+            zombies.append((target, headers))
+        else:
+            filename = '%s.png' % re.sub('\W','',target)
+            servers[target] = [code, filename, headers]
+            if capture: getCapture(code, target, '%s/%s' % (dir,filename), opts)
+    
+    generatePage(servers, zombies, outfile)
+    print 'Done.'
+
+#=================================================
+# SUPPORT FUNCTIONS
+#=================================================
+
+def getCapture(code, url, filename, opts):
+    if code != 401:
+        sys.stdout.write("[+] retrieving image for %s...\n" % url); sys.stdout.flush()
+        try:
+            if opts.pyqt:
+                proc = subprocess.Popen(['python ./capture.py %s %s' % (url, filename)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
+            elif opts.phantom:
+                proc = subprocess.Popen(['./phantomjs --ignore-ssl-errors=yes ./capture.js %s %s' % (url, filename)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
+            else:
+                print '[!] No capture mode detected.'
+                return
+            stdout, stderr = proc.communicate()
+            response = str(stdout) + str(stderr)
+            returncode = proc.returncode
+            if returncode != 0: print '[!] %d: %s' % (returncode, response)
+            elif response != 'None': print '[+] %s' % response
+        except KeyboardInterrupt:
+            pass
+
+def getHeaderData(target):
+    server = None
+    url = None
+    code = None
+    status = None
+    headers = None
+    header_str = None
+    server = urlparse(target)
+    # set up request for getting header information
+    opener = urllib2.build_opener(SmartRedirectHandler) # debug with urllib2.HTTPHandler(debuglevel=1)
+    urllib2.install_opener(opener)
+    req = urllib2.Request(server.geturl())
+    # force head request
+    #req = HeadRequest(server.geturl())
+    # spoof user-agent
+    #req.add_header('User-agent', 'Mozilla/5.0')
+    # retrieve header information
+    try:
+        res = urllib2.urlopen(req)#,'',3)
+        print '[+] %s %s, Good.' % (target, res.getcode())
+        """
+    except httplib.BadStatusLine:
+        print '[+] %s bad status, visit manually.' % (target)
+        return 'zombie', res.args[0].__str__()
+        """
+    except Exception as res:
+        try:
+            res.getcode()
+            print '[+] %s %s, Good.' % (target, res.getcode())
+        except:
+            error = res.args[0].__str__()
+            print '[+] %s Error. Visit manually from report.\n[!] %s' % (target, error)
+            return 'zombie', error
+
+    url = res.geturl()
+    code = res.code
+    status = res.msg
+    headers = res.info().headers       
+    header_str = '<br />%s %s<br />\n' % (code, status)
+    for header in headers:
+        header_str += '<span class="header">%s</span>: %s<br />\n' % (header.split(':')[0].strip(), header.split(':')[1].strip())
+    return code, header_str
+
+def generatePage(servers, zombies, outfile):
+    tmarkup = ''
+    zmarkup = ''
+    for server in servers.keys():
+        tmarkup += "<tr><td class='img'><img src='%s' /></td><td class='head'><a href='%s' target='_blank'>%s</a> %s</td></tr>\n" % (servers[server][1],server,server,servers[server][2])
+    if len(zombies) > 0:
+      zmarkup = '<tr><td><h2>Failed Requests</h2></td><td>\n'
+      for server in zombies:
+          zmarkup +=  "<a href='%s' target='_blank'>%s</a> %s<br />\n" % (server[0],server[0],server[1])
+      zmarkup += '</td></tr>\n'
+    file = open(outfile, 'w')
+    file.write("""
+<!doctype html>
+<head>
+<style>
+table, td, th {border: 1px solid black;border-collapse: collapse;padding: 5px;font-size: .9em;font-family: tahoma;}
+table {table-layout:fixed;}
+td.img {width: 400px;white-space: nowrap;}
+td.head {vertical-align: top;word-wrap:break-word;}
+.header {font-weight: bold;}
+img {width: 400px;}
+</style>
+</head>
+<body>
+<table width='100%%'>
+%s%s
+</table>
+</body>
+</html>""" % (tmarkup, zmarkup))
+    file.close()
+
+#=================================================
+# CUSTOM CLASS WRAPPERS
+#=================================================
+
+class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
+
+    def http_error_301(self, req, fp, code, msg, headers):
+        result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers)
+        result.status = code
+        result.msg = msg + ' (Redirected to here)'
+        return result
+
+    def http_error_302(self, req, fp, code, msg, headers):
+        result = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
+        result.status = code
+        result.msg = msg + ' (Redirected to here)'
+        return result
+
+"""
+class AvoidBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        pass
+    
+class AvoidDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        pass
+
+class AvoidRedirectHandler(urllib2.HTTPRedirectHandler):
+    
+    def http_error_302(self, req, fp, code, msg, headers):
+        pass
+    http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+class HeadRequest(urllib2.Request):
+
+    def get_method(self):
+        return "HEAD"
+"""
+
+#=================================================
+# START
+#=================================================
+
+if __name__ == "__main__": main()