Source

Recon-ng / modules / recon / hosts / gather / http / web / netcraft.py

Full commit
import framework
# unique to module
import urllib
import re
import hashlib
import time
import random

class Module(framework.module):

    def __init__(self, params):
        framework.module.__init__(self, params)
        self.register_option('domain', self.goptions['domain']['value'], 'yes', self.goptions['domain']['desc'])
        self.info = {
                     'Name': 'Netcraft Hostname Enumerator',
                     'Author': 'thrapt (thrapt@gmail.com)',
                     'Description': 'Harvests hosts from Netcraft.com and updates the \'hosts\' table of the database with the results.',
                     'Comments': []
                     }

    def module_run(self):
        domain = self.options['domain']['value']
        url = 'http://searchdns.netcraft.com/'        
        payload = {'restriction': 'site+ends+with', 'host': domain}
        pattern = '<td align\=\"left\">\s*<a href=\"http://(.*?)/"'
        subs = []
        cnt = 0
        cookies = {}
        # control variables
        New = True
        # execute search engine queries and scrape results storing subdomains in a list
        # loop until no Next Page is available
        while New:
            self.verbose('URL: %s?%s' % (url, urllib.urlencode(payload)))

            resp = self.request(url, payload=payload, cookies=cookies)
            if 'set-cookie' in resp.headers:
                # we have a cookie to set!
                cookie = resp.headers['set-cookie']
                # this was taken from the netcraft page's JavaScript, no need to use big parsers just for that
                # grab the cookie sent by the server, hash it and send the response
                challenge_token = (cookie.split('=')[1].split(';')[0])
                response = hashlib.sha1(urllib.unquote(challenge_token))
                cookies = {
                      'netcraft_js_verification_response': '%s' % response.hexdigest(),
                      'netcraft_js_verification_challenge': '%s' % challenge_token,
                      'path' : '/'
                      }

                # Now we can request the page again
                resp = self.request(url, payload=payload, cookies=cookies)

            content = resp.text

            sites = re.findall(pattern, content)
            # create a unique list
            sites = list(set(sites))
            
            # add subdomain to list if not already exists
            for site in sites:
                if site not in subs:
                    subs.append(site)
                    self.output('%s' % (site))
                    cnt += self.add_host(site)
            
            # Verifies if there's more pages to look while grabbing the correct 
            # values for our payload...
            link = re.findall(r'(\blast\=\b|\bfrom\=\b)(.*?)&', content)
            if not link:
                New = False
                break
            else:
                payload['last'] = link[0][1]
                payload['from'] = link[1][1]
                self.verbose('Next page available! Requesting again...' )
                # sleep script to avoid lock-out
                self.verbose('Sleeping to Avoid Lock-out...')
                time.sleep(random.randint(5,15))

        self.verbose('Final Query String: %s?%s' % (url, urllib.urlencode(payload)))
        self.output('%d total hosts found.' % (len(subs)))
        if cnt: self.alert('%d NEW hosts found!' % (cnt))