sphinx / sphinx / linkcheck.py

# -*- coding: utf-8 -*-
"""
    sphinx.linkcheck
    ~~~~~~~~~~~~~~~~

    The CheckExternalLinksBuilder class.

    :copyright: 2008 by Georg Brandl, Thomas Lamb.
    :license: BSD.
"""

import socket
from os import path
from urllib2 import build_opener, HTTPError

from docutils import nodes

from sphinx.builder import Builder
from sphinx.util.console import purple, red, darkgreen

# create an opener that will simulate a browser user-agent
opener = build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]


class CheckExternalLinksBuilder(Builder):
    """
    Checks for broken external links.
    """
    name = 'linkcheck'

    def init(self):
        self.good = set()
        self.broken = {}
        self.redirected = {}
        # set a timeout for non-responding servers
        socket.setdefaulttimeout(5.0)
        # create output file
        open(path.join(self.outdir, 'output.txt'), 'w').close()

    def get_target_uri(self, docname, typ=None):
        return ''

    def get_outdated_docs(self):
        return self.env.all_docs

    def prepare_writing(self, docnames):
        return

    def write_doc(self, docname, doctree):
        self.info()
        for node in doctree.traverse(nodes.reference):
            try:
                self.check(node, docname)
            except KeyError:
                continue

    def check(self, node, docname):
        uri = node['refuri']

        if '#' in uri:
            uri = uri.split('#')[0]

        if uri in self.good:
            return

        if uri[0:5] == 'http:' or uri[0:6] == 'https:':
            self.info(uri, nonl=1)
            lineno = None
            while lineno is None and node:
                node = node.parent
                lineno = node.line

            if uri in self.broken:
                (r, s) = self.broken[uri]
            elif uri in self.redirected:
                (r, s) = self.redirected[uri]
            else:
                (r, s) = self.resolve(uri)

            if r == 0:
                self.info(' - ' + darkgreen('working'))
                self.good.add(uri)
            elif r == 2:
                self.info(' - ' + red('broken: ') + s)
                self.broken[uri] = (r, s)
                self.write_entry('broken', docname, lineno, uri + ': ' + s)
            else:
                self.info(' - ' + purple('redirected') + ' to ' + s)
                self.redirected[uri] = (r, s)
                self.write_entry('redirected', docname, lineno, uri + ' to ' + s)

        elif len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:':
            return
        else:
            self.info(uri + ' - ' + red('malformed!'))
            self.write_entry('malformed', docname, lineno, uri)

        return

    def write_entry(self, what, docname, line, uri):
        output = open(path.join(self.outdir, 'output.txt'), 'a')
        output.write("%s:%s: [%s] %s\n" % (self.env.doc2path(docname, None),
                                           line, what, uri))
        output.close()

    def resolve(self, uri):
        try:
            f = opener.open(uri)
            f.close()
        except HTTPError, err:
            #if err.code == 403 and uri.startswith('http://en.wikipedia.org/'):
            #    # Wikipedia blocks requests from urllib User-Agent
            #    return (0, 0)
            return (2, str(err))
        except Exception, err:
            return (2, str(err))
        if f.url.rstrip('/') == uri.rstrip('/'):
            return (0, 0)
        else:
            return (1, f.url)

    def finish(self):
        return
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.