Yu-Jie Lin avatar Yu-Jie Lin committed ead56fc

add -x or --exclude-status option for specifying excluded statuses from report section. Closes #2

Comments (0)

Files changed (5)

 
 ## Development
 
+* add `-x` (`--exclude-status`) option for specifying excluded statuses from report section. (#2)
+
 ## Version 0.1.5 (2013-04-29T16:15:30Z)
 
 * [base] fix query string getting quoted by mistake by e79523b
 Usage
 -----
 
-    linkcheck [-c CHECKER] [-u|--update] [-f FILE] [-j JSON] [-s STATUS]
+    linkcheck [-c CHECKER] [-u|--update] [-f FILE] [-j JSON] [-s STATUS] [-x EXCLUDE_STATUS]
 
 * `CHECKER` is what checker used to process `FILE`.
 * `-u` or `--update` instructs lnkckr to update the JSON with input file. Normally, when `-j JSON` presents, lnkckr ignore the input file.
 * `FILE` is the input filename or URL.
 * `JSON` is the filename of saved progress file. If the `FILE` is a filename, then `FILE` can be omitted, an filename is assigned automatically unless using different filename is desired.
 * `STATUS` indicates re-check url with specific status.
+* `EXCLUDE_STATUS` is a CSV which lists what links with specified status will not be shown in report section. Default is `unchecked,200`.
 
 Here is a sample:
 
 
 from __future__ import print_function
 import argparse
-import json
-from os import path
 import sys
 
 from lnkckr import __version__
 
   parser = argparse.ArgumentParser()
   parser.add_argument('-c', '--checker')
-  parser.add_argument('-u', '--update', action='store_true', help='update JSON')
+  parser.add_argument('-u', '--update', action='store_true',
+                      help='update JSON')
   parser.add_argument('-f', '--file')
   parser.add_argument('-j', '--json')
   parser.add_argument('-s', '--status',
                       help=('re-check links with status. '
                             'Valid values: all, HTTP status code'))
+  parser.add_argument('-x', '--exclude-status',
+                      default='unchecked,200',
+                      help=('Exclude links with checking status from listing '
+                            'in report section. (Default: %(default)s)'))
   parser.add_argument('--version', action='version',
                       version='%(prog)s ' + __version__)
   args = parser.parse_args()
     print('No files to process', file=sys.stderr)
     sys.exit(1)
 
-  checker = Checker()
+  f = lambda s: None if s == 'unchecked' else s
+  exclude_status = args.exclude_status.split(',')
+  exclude_status = tuple(map(f, exclude_status))
+  cfg = {
+    'exclude_status': exclude_status
+  }
+  checker = Checker(**cfg)
   checker.load(args.file, args.json, args.update)
-  links = checker.links
 
   f = None
   if args.status:

lnkckr/checkers/base.py

   QUEUE_SIZE = 20
   SAVE_INT = 100
 
-  def __init__(self):
+  def __init__(self, **cfg):
 
     self.data = {}
     self.links = {}
     self.json_filename = None
 
+    self.exclude_status = (cfg['exclude_status'] if 'exclude_status' in cfg
+                           else (None, 200))
+
     # User-Agent for some website like Wikipedia. Without it, most of requests
     # result in 403.
     self.HEADERS = {
 
   def print_report_link(self, url, link):
 
-    if link['status'] in (None, '200', 'SCH', 'SKP'):
+    if link['status'] in self.exclude_status:
       return
     self.format_status(url, link)
     self.print_report_link_data(url, link)

lnkckr/checkers/blogger.py

     self.print_heading('toplist')
 
     # make list of (status, postlink) from links
-    f = lambda link: link['status'] not in (None, '200', 'SCH', 'SKP')
+    f = lambda link: link['status'] not in self.exclude_status
     links = filter(f, self.links.values())
     links = (product((link['status'],), link['posts']) for link in links)
     links = chain.from_iterable(links)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.