Yu-Jie Lin  committed ead56fc

add -x or --exclude-status option for specifying excluded statuses from report section. Closes #2

  • Participants
  • Parent commits c16e76c
  • Branches default

Comments (0)

Files changed (5)

 ## Development
+* add `-x` (`--exclude-status`) option for specifying excluded statuses from report section. (#2)
 ## Version 0.1.5 (2013-04-29T16:15:30Z)
 * [base] fix query string getting quoted by mistake by e79523b
-    linkcheck [-c CHECKER] [-u|--update] [-f FILE] [-j JSON] [-s STATUS]
+    linkcheck [-c CHECKER] [-u|--update] [-f FILE] [-j JSON] [-s STATUS] [-x EXCLUDE_STATUS]
 * `CHECKER` is what checker used to process `FILE`.
 * `-u` or `--update` instructs lnkckr to update the JSON with input file. Normally, when `-j JSON` presents, lnkckr ignore the input file.
 * `FILE` is the input filename or URL.
 * `JSON` is the filename of saved progress file. If the `FILE` is a filename, then `FILE` can be omitted, an filename is assigned automatically unless using different filename is desired.
 * `STATUS` indicates re-check url with specific status.
+* `EXCLUDE_STATUS` is a CSV which lists what links with specified status will not be shown in report section. Default is `unchecked,200`.
 Here is a sample:
 from __future__ import print_function
 import argparse
-import json
-from os import path
 import sys
 from lnkckr import __version__
   parser = argparse.ArgumentParser()
   parser.add_argument('-c', '--checker')
-  parser.add_argument('-u', '--update', action='store_true', help='update JSON')
+  parser.add_argument('-u', '--update', action='store_true',
+                      help='update JSON')
   parser.add_argument('-f', '--file')
   parser.add_argument('-j', '--json')
   parser.add_argument('-s', '--status',
                       help=('re-check links with status. '
                             'Valid values: all, HTTP status code'))
+  parser.add_argument('-x', '--exclude-status',
+                      default='unchecked,200',
+                      help=('Exclude links with checking status from listing '
+                            'in report section. (Default: %(default)s)'))
   parser.add_argument('--version', action='version',
                       version='%(prog)s ' + __version__)
   args = parser.parse_args()
     print('No files to process', file=sys.stderr)
-  checker = Checker()
+  f = lambda s: None if s == 'unchecked' else s
+  exclude_status = args.exclude_status.split(',')
+  exclude_status = tuple(map(f, exclude_status))
+  cfg = {
+    'exclude_status': exclude_status
+  }
+  checker = Checker(**cfg)
   checker.load(args.file, args.json, args.update)
-  links = checker.links
   f = None
   if args.status:

File lnkckr/checkers/

   QUEUE_SIZE = 20
   SAVE_INT = 100
-  def __init__(self):
+  def __init__(self, **cfg): = {}
     self.links = {}
     self.json_filename = None
+    self.exclude_status = (cfg['exclude_status'] if 'exclude_status' in cfg
+                           else (None, 200))
     # User-Agent for some website like Wikipedia. Without it, most of requests
     # result in 403.
     self.HEADERS = {
   def print_report_link(self, url, link):
-    if link['status'] in (None, '200', 'SCH', 'SKP'):
+    if link['status'] in self.exclude_status:
     self.format_status(url, link)
     self.print_report_link_data(url, link)

File lnkckr/checkers/

     # make list of (status, postlink) from links
-    f = lambda link: link['status'] not in (None, '200', 'SCH', 'SKP')
+    f = lambda link: link['status'] not in self.exclude_status
     links = filter(f, self.links.values())
     links = (product((link['status'],), link['posts']) for link in links)
     links = chain.from_iterable(links)