Source

xemacsweb / batch-psgml-validate.el

;;;
;;; $Id$
;;;
;;; Adrian Aichner (APA), aichner@ecf.teradyne.com, Teradyne GmbH, 2000-07-16.
;;;

(require 'psgml)
(require 'psgml-parse)                  ;;; for (defstruct (sgml-dtd ...))
;;; APA: 'psgml-html does not provide itself.  Why not?
(load "psgml-html")                   ;;; for html-quote-region

(defconst psgml-validate-conform
  "<small>Conform with "
  "Phrase to insert for conforming SGML document.")

(defconst psgml-validate-non-conform
  "<small><strong>Not</strong> conform with "
  "Phrase to insert for non-conforming SGML document.")

(defgroup batch-psgml-validate nil
  "SGML document validation using PSGML."
  :group 'emacs)

(defcustom psgml-validate-ignore-directories
  (quote ("CVS" "genpage" "RSC" "SCCS"))
  "*List of directory names to be ignored by `batch-psgml-validate'."
  :type '(repeat
          :custom-show t
          :documentation-shown t
          string)
  :group 'batch-psgml-validate)

(defvar batch-psgml-last-warning nil
  "Save last warning.")

(defun psgml-find-file-hook ()
  (condition-case error
      (save-excursion
        (let (mdo)
          (goto-char (point-min))
          (setq mdo
                (sgml-with-parser-syntax
                 (let (start)
                   (sgml-skip-upto "MDO")
                   (setq start (point))
                   (sgml-skip-upto-mdc)
                   (forward-char 1)
                   (buffer-substring start (point)))))
          (string-match "\\bDTD\\s-+\\(\\w+\\)\\b" mdo)
          (cond
           ((string= (match-string 1 mdo) "XHTML")
            (xml-mode))
           ((string= (match-string 1 mdo) "XML")
            (xml-mode))
           ((string= (match-string 1 mdo) "HTML")
            (html-mode))
           (t
            nil))))
    (t nil)))

;;; (add-hook 'find-file-hook 'psgml-find-file-hook)

(defun batch-psgml-validate (&optional file-or-dir)
  "Uses `sgml-next-trouble-spot' from the PSGML package to validate
conformance of files in FILE-OR-DIR with the specified DTD.  See
`batch-psgml-validate-buffer'.  If FILE-OR-DIR is missing,
`batch-psgml-validate' is performed for each `command-line-args-left'."
  (interactive "DHTML directory to validate: ")
  (if (null file-or-dir)
      (progn
        (let (file-or-dir)
          (while command-line-args-left
            (setq file-or-dir (expand-file-name (car command-line-args-left)))
            (batch-psgml-validate file-or-dir)
            (setq command-line-args-left (cdr command-line-args-left)))))
    (if (file-directory-p file-or-dir)
        (dolist (file (directory-files file-or-dir t nil nil nil))
          (cond
           ((member (file-name-nondirectory file) (list "." ".."))
            nil)
           ((file-directory-p file)
            (batch-psgml-validate file))
           ((and
             (member (file-name-extension file) (list "htm" "html"))
             (null
              (backup-file-name-p file)))
            (batch-psgml-validate-file file t t))))
      (batch-psgml-validate-file file-or-dir t t))))

(defun batch-psgml-validate-file (file &optional insert-result indent)
  "Uses `sgml-next-trouble-spot' from the PSGML package to validate
conformance of FILE with the specified DTD.  See
`batch-psgml-validate-buffer'."
  (interactive
   (list
    (read-file-name "HTML file to validate: ")
    (yes-or-no-p "insert compliance text ")
    (yes-or-no-p "indent buffer ")))
  (when
      (catch 'file-should-be-validated
        (mapc
         (function
          (lambda (c)
            (when (member c psgml-validate-ignore-directories)
              (message "ignoring directory named %s" c)
              ;; APA: throw a nil value, causing catch to return nil
              (throw 'file-should-be-validated nil))))
         (split-string-by-char
          file
          directory-sep-char)))
    (message "validating %s" file)
    (with-current-buffer
        (find-file-noselect file)
      (batch-psgml-validate-buffer insert-result indent))
    (message "batch-psgml-validate %s is done" file)))

(defun batch-psgml-validate-buffer (&optional insert-result indent)
  "Uses `sgml-next-trouble-spot' from the PSGML package to validate
conformance of buffer with the specified DTD.  INSERT-RESULT inserts a
\(non-\)compliance messsage before \"</BODY>\".  INDENT will
`indent-according-to-mode' as well.  The buffer is saved in the end if
not called interactively."
  (interactive
   (list
    (yes-or-no-p "insert compliance text ")
    (yes-or-no-p "indent buffer ")))
  (let (old-result-begin
        old-result-end
        new-result-begin
        result
        file
        line)
    (psgml-find-file-hook)
    (goto-char (point-min))
    (condition-case err
        (progn
          (setq result (sgml-next-trouble-spot))
          (unless (string= result "Ok")
            (setf file (buffer-file-name))
            (setq line (count-lines (point-min) (point)))
            (if batch-psgml-last-warning
                (message
                 "%s:%d:%s" 
                 file line batch-psgml-last-warning))))
      (error (message ">> Error: %s" (prin1-to-string err)))
      (t (message "trouble: %s" (prin1-to-string err))))
    (when insert-result
      (goto-char (point-max))
      (setq old-result-end
            (re-search-backward "^\\s-*</body>" (point-min) t))
      (while
          (re-search-backward
           (format "^\\s-*\\(%s\\|%s\\)"
                   psgml-validate-conform
                   psgml-validate-non-conform)
           (point-min) t)
        (setq old-result-begin (point)))
      (and
       old-result-begin
       old-result-end
       (delete-region
        old-result-begin
        old-result-end))
      (insert
       (format "%s"
               (if (string= result "Ok")
                   psgml-validate-conform
                 psgml-validate-non-conform)))
      (setq new-result-begin (point))
      (insert
       (format
        "%s\n"
        ;; As suggested in
        ;; http://www.xemacs.org/list-archives/xemacs-beta/200008/msg00251.html
        (save-excursion
          (goto-char (point-min))
          (sgml-with-parser-syntax
           (let (start)
             (sgml-skip-upto "MDO")
             (setq start (point))
             (sgml-skip-upto-mdc)
             (forward-char 1)
             (buffer-substring start (point)))))))
      (html-quote-region new-result-begin (point))
      (insert
       "<br clear=\"none\"/>Automatically validated by PSGML "
       psgml-version ", "
       emacs-version ".</small>\n"))
    (when indent
      (indent-region (point-min) (point-max) nil))
    (if (noninteractive)
        (if (buffer-modified-p)
            (progn (message "Saving modified %s" (buffer-file-name))
                   (save-buffer))))))

;; APA: Encapsulate sgml-log-warning to capture batch-psgml-last-warning.
(defun batch-psgml-validate-log-warning (format &rest things)
  (setq batch-psgml-last-warning (apply 'format format things))
  (when sgml-throw-on-warning
    (throw sgml-throw-on-warning t)))
(fset 'sgml-log-warning 'batch-psgml-validate-log-warning)

;; APA: Encapsulate sgml-message to get rid of progress messages.
(defun batch-psgml-validate-message (format &rest things)
;;; APA: Original body of sgml-message as of 2000-08-13:
;;;   (let ((buf (get-buffer sgml-log-buffer-name)))
;;;     (when (and buf
;;;                (> (save-excursion (set-buffer buf)
;;;                                   (point-max))
;;;                   sgml-log-last-size))
;;;       (sgml-display-log)))
;;;   (apply 'message format things)
  )
(fset 'sgml-message 'batch-psgml-validate-message)

;; APA: Encapsulate sgml-lazy-message to get rid of progress messages.
(defun batch-psgml-lazy-message (&rest args)
;;;   (unless (= sgml-lazy-time (second (current-time)))
;;;     (apply 'message args)
;;;     (setq sgml-lazy-time (second (current-time))))
)
(fset 'sgml-lazy-message 'batch-psgml-lazy-message)