Commits

Anonymous committed 9c99d9a

Sync htmlize with upstream version 1.4.

Comments (0)

Files changed (2)

+2003-10-16  Norbert Koch  <viteno@xemacs.org>
+
+	* htmlize.el: Sync with upstream version 1.4.
+
 2003-10-15  Norbert Koch  <viteno@xemacs.org>
 
 	* Makefile (VERSION): XEmacs package 1.67 released.
 ;; <hniksic@xemacs.org> to discuss features and additions.  All
 ;; suggestions are more than welcome.
 
-;; To use this just switch to the buffer you want HTML-ized and type
-;; `M-x htmlize-buffer'.  You will be switched into a new buffer with
-;; the resulting HTML code.  You can edit and inspect this buffer, or
-;; you can just save it with C-x C-w.  `M-x htmlize-file' will find a
-;; file, font-lock it, and save the HTML version in FILE.html, without
-;; any additional intervention.  `M-x htmlize-many-files' allows you
-;; to htmlize any number of files in the same manner.  `M-x
-;; htmlize-many-files-dired' does the same for files marked in a dired
-;; buffer.
+;; To use this, just switch to the buffer you want HTML-ized and type
+;; `M-x htmlize-buffer'.  You will be switched to a new buffer that
+;; contains the resulting HTML code.  You can edit and inspect this
+;; buffer, or you can just save it with C-x C-w.  `M-x htmlize-file'
+;; will find a file, fontify it, and save the HTML version in
+;; FILE.html, without any additional intervention.  `M-x
+;; htmlize-many-files' allows you to htmlize any number of files in
+;; the same manner.  `M-x htmlize-many-files-dired' does the same for
+;; files marked in a dired buffer.
 
 ;; htmlize supports two types of HTML output, selected by setting
 ;; `htmlize-output-type': `css' and `font'.  In `css' mode, htmlize
 ;; HTML, which is not standard-compliant, but works better in older
 ;; browsers.  `css' mode is the default.
 
+;; You can also use htmlize from your Emacs Lisp code.  When called
+;; non-interactively, `htmlize-buffer' and `htmlize-region' will
+;; return the resulting HTML buffer, but will not switch current
+;; buffer or move the point.
+
 ;; I tried to make the package elisp-compatible with multiple Emacsen,
 ;; specifically aiming for XEmacs 19.14+ and GNU Emacs 19.34+.  Please
 ;; let me know if it doesn't work on some of those, and I'll try to
 ;; remove that particular dependency.  When byte-compiling under GNU
 ;; Emacs, you're likely to get lots of warnings; just ignore them.
 
-;; For htmlize to work, you need to run Emacs under a window-system --
-;; anything else is very likely to fail.
-
 ;; The latest version should be available at:
 ;;
 ;;        <http://fly.srk.fer.hr/~hniksic/emacs/htmlize.el>
   (defvar font-lock-auto-fontify)
   (defvar global-font-lock-mode))
 
-(defconst htmlize-version "1.0")
+(defconst htmlize-version "1.4")
 
 ;; Incantations to make custom stuff work without customize, e.g. on
 ;; XEmacs 19.14 or GNU Emacs 19.34.
 htmlize.  Therefore, when this variable is non-nil, htmlize inserts
 the following in the <head> section of the HTML:
 
-  <meta http-equiv=\"Content-Type\" content=\"CHARSET\">
+  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=CHARSET\">
 
 where CHARSET is the value you've set for htmlize-html-charset.  Valid
 charsets are defined by MIME and include strings like \"iso-8859-1\",
   :type 'string
   :group 'htmlize)
 
-(defcustom htmlize-use-rgb-map t
-  "*Controls when `rgb.txt' should be looked up for color values.
+(defcustom htmlize-use-rgb-txt t
+  "*Whether `rgb.txt' should be used to convert color names to RGB.
 
-When set to t (the default), htmlize will, when running under an X
-display, look for the `rgb.txt' file and use it to obtain the RGB
-values for named colors.  This is useful when the values reported by
-`color-instance-rgb-components'/`x-color-values' are incorrect because
-of color approximation.
+This conversion means determining, for instance, that the color
+\"IndianRed\" corresponds to the (205, 92, 92) RGB triple.  `rgb.txt'
+is the X color database that maps hundreds of color names to such RGB
+triples.  When this variable is non-nil, `htmlize' uses `rgb.txt' to
+look up color names.
 
-When set to nil, htmlize will never look for `rgb.txt' and will always
-use the values Emacs returns.
+If this variable is nil, htmlize queries Emacs for RGB components of
+colors using `color-instance-rgb-components' and `x-color-values'.
+This can yield incorrect results on non-true-color displays.
 
-When set to `force', htmlize will try to look for `rgb.txt' even on
-non-X devices."
-  :type '(choice (const :tag "When Appropriate" t)
-		 (const :tag "Never"          nil)
-		 (const :tag "Always"       force))
+If the `rgb.txt' file is not found (which will be the case if you're
+running Emacs on non-X11 systems), this option is ignored."
+  :type 'boolean
   :group 'htmlize)
 
 (defcustom htmlize-html-major-mode nil
 (defconst htmlize-running-xemacs (string-match "XEmacs" emacs-version))
 
 
-;;; Transformation of buffer text: untabification, HTML escapes, etc.
+;;; Transformation of buffer text: HTML escapes, untabification, etc.
+
+(if (fboundp 'char-int)
+    (defalias 'htmlize-char-int 'char-int)
+  (defalias 'htmlize-char-int 'identity))
+
+(defvar htmlize-character-table
+  ;; Map characters in the 0-255 range to strings.
+  (let ((table (make-vector 256 ?\0)))
+    ;; Map characters in the 32-126 range to themselves, others to
+    ;; &#CODE entities;
+    (dotimes (i 256)
+      (setf (aref table i) (if (and (>= i 32) (<= i 126))
+			       (char-to-string i)
+			     (format "&#%d;" i))))
+    ;; Set exceptions manually.
+    (setf
+     ;; Don't quote newline, carriage return, and TAB.
+     (aref table ?\n) "\n"
+     (aref table ?\r) "\r"
+     (aref table ?\t) "\t"
+     ;; Encode &, <, and > as symbolic entities, as is customary.
+     (aref table ?&) "&amp;"
+     (aref table ?<) "&lt;"
+     (aref table ?>) "&gt;"
+     ;; Not quoting '"' buys us a measurable speed increase.  It's
+     ;; only necessary to quote it for strings used in attribute
+     ;; values, which htmlize doesn't do.
+     ;(aref table ?\") "&quot;"
+     )
+    table))
+
+;; Table that maps extended characters to their numeric Unicode
+;; entities.  This is used by htmlize-protect-string to avoid consing
+;; "&CHAR-CODE;" strings for the characters that repeat over and over.
+(defvar htmlize-extended-character-table (make-hash-table :test 'eq))
+
+(defun htmlize-protect-string (string)
+  "HTML-protect string, escaping HTML metacharacters and I18N chars."
+  ;; Only protecting strings that actually contain unsafe chars
+  ;; removes a lot of unnecessary consing.
+  (if (not (string-match "[^\r\n\t -%'-;=?-~]" string))
+      string
+    (mapconcat (lambda (char)
+		 (cond
+		  ((> (htmlize-char-int char) 255)
+		   (if (and (fboundp 'encode-char)
+			    ;; Emacs's unicode tables are incomplete;
+			    ;; encode-char returns nil for Arabic.
+			    (encode-char char 'ucs))
+		       ;; encode-char is available: convert CHAR to
+		       ;; "&#UCS-CODE;".  Cache the resulting string
+		       ;; in htmlize-extended-character-table, so we
+		       ;; don't have to cons new strings for chars
+		       ;; we've already seen.
+		       (or (gethash char htmlize-extended-character-table)
+			   (setf (gethash char
+					  htmlize-extended-character-table)
+				 (format "&#%d;" (encode-char char 'ucs))))
+		     ;; Conversion to Unicode not available --
+		     ;; simply copy the char unchanged.
+		     (char-to-string char)))
+		  (t
+		   ;; Use htmlize-character-table to convert CHAR to
+		   ;; string without consing a new string each time.
+		   (aref htmlize-character-table char))))
+	       string "")))
+
+;; We need a function that efficiently finds the next change of a
+;; property (usually `face'), preferably regardless of whether the
+;; change occurred because of a text property or an extent/overlay.
+;; As it turns out, it is not easy to do that compatibly.
+
+;; Under XEmacs, `next-single-property-change' does that.  Under GNU
+;; Emacs beginning with version 21, `next-single-char-property-change'
+;; is available and works.  GNU Emacs 20 had
+;; `next-char-property-change', which we can use.  GNU Emacs 19 didn't
+;; provide any means for simultaneously examining overlays and text
+;; properties, so when using Emacs 19.34, we punt and fall back to
+;; `next-single-property-change', thus ignoring overlays altogether.
+
+(cond
+ (htmlize-running-xemacs
+  ;; XEmacs: good.
+  (defun htmlize-next-change (pos prop &optional limit)
+    (next-single-property-change pos prop nil (or limit (point-max)))))
+ ((fboundp 'next-single-char-property-change)
+  ;; GNU Emacs 21: good.
+  (defun htmlize-next-change (pos prop &optional limit)
+    (next-single-char-property-change pos prop nil limit)))
+ ((fboundp 'next-char-property-change)
+  ;; GNU Emacs 20: bad, but fixable.
+  (defun htmlize-next-change (pos prop &optional limit)
+    (let ((done nil)
+	  (current-value (get-char-property pos prop))
+	  newpos next-value)
+      ;; Loop over positions returned by next-char-property-change
+      ;; until the value of PROP changes or we've hit EOB.
+      (while (not done)
+	(setq newpos (next-char-property-change pos limit)
+	      next-value (get-char-property newpos prop))
+	(cond ((eq newpos pos)
+	       ;; Possibly at EOB?  Whatever, just don't infloop.
+	       (setq done t))
+	      ((eq next-value current-value)
+	       ;; PROP hasn't changed -- keep looping.
+	       )
+	      (t
+	       (setq done t)))
+	(setq pos newpos))
+      pos)))
+ (t
+  ;; GNU Emacs 19.34: hopeless, cannot properly support overlays.
+  (defun htmlize-next-change (pos prop &optional limit)
+    (unless limit
+      (setq limit (point-max)))
+    (let ((res (next-single-property-change pos prop)))
+      (if (or (null res)
+	      (> res limit))
+	  limit
+	res)))))
 
 (defun htmlize-buffer-substring (beg end)
   ;; Like buffer-substring-no-properties, but also ignores invisible
   ;; text.
-  (if (not (text-property-not-all beg end 'invisible nil))
-      ;; Make the simple case fast: if the region contains no
-      ;; invisible text, use the buffer-substring-no-properties
-      ;; builtin.
-      (buffer-substring-no-properties beg end)
-    ;; Iterate over the changes in the `invisible' property and filter
-    ;; out the portions where it's non-nil, i.e. where the text is
-    ;; invisible.
-    (let ((visible-text ())
-	  invisible next-change)
-      (save-excursion
-	(save-restriction
-	  (narrow-to-region beg end)
-	  (goto-char (point-min))
-	  (while (not (eobp))
-	    (setq invisible (get-char-property (point) 'invisible)
-		  next-change (or (htmlize-next-change (point) 'invisible)
-				  (point-max)))
-	    (unless invisible
-	      (push (buffer-substring-no-properties (point) next-change)
-		    visible-text))
-	    (goto-char next-change))))
-      (apply #'concat (nreverse visible-text)))))
+
+  ;; Iterate over the changes in the `invisible' property and filter
+  ;; out the portions where it's non-nil, i.e. where the text is
+  ;; invisible.
+  (let ((pos beg)
+	visible-list invisible next-change)
+    (while (< pos end)
+      (setq invisible (get-char-property pos 'invisible)
+	    next-change (htmlize-next-change pos 'invisible end))
+      (unless invisible
+	(push (buffer-substring-no-properties pos next-change)
+	      visible-list))
+      (setq pos next-change))
+    (apply #'concat (nreverse visible-list))))
 
 (defun htmlize-untabify-1 (line start-column)
   ;; Replaces tabs in LINE with the number of spaces sufficient to
 	;; Advance to the next position in TEXT.
 	(setq line-beg line-end)))
     (apply #'concat (nreverse output))))
-
-;; Currently we don't handle non-ASCII characters specially: they are
-;; copied to the output buffer as-is.  The user is expected to make
-;; them work, e.g. by filling in a META tag in htmlize-head-tags.
-;;
-;; This is because IMHO doing nothing is (in this case) better than
-;; doing the wrong thing and corrupting data.  Doing the right thing
-;; is *hard* because it would require converting Emacs characters to
-;; Unicode code points.  Making this work under different versions of
-;; Mule is tricky and would require large conversion tables.  What's
-;; worse, making it work under non-Mule Emacsen is next to impossible
-;; because the meaning of 8-bit characters depends on the locale and
-;; font in use.  (Contrary to popular belief, you cannot assume that
-;; characters in the 160-255 range are Latin 1.)
-
-(if (fboundp 'char-int)
-    (defalias 'htmlize-char-int 'char-int)
-  (defalias 'htmlize-char-int 'identity))
-
-(defvar htmlize-character-table
-  (let ((table (make-vector 256 ?\0)))
-    (dotimes (i 256)
-      (setf (aref table i) (char-to-string i)))
-    (setf (aref table ?&) "&amp;"
-	  (aref table ?<) "&lt;"
-	  (aref table ?>) "&gt;"
-	  ;; Not quoting '"' buys us a measurable speed increase.
-	  ;; It's only necessary to quote it for strings used in
-	  ;; attribute values, which htmlize doesn't do.
-	  ;(aref table ?\") "&quot;"
-
-	  ;; This character often shows in GNU sources, and the W3
-	  ;; validator complains of "invalid SGML character".  So we
-	  ;; convert it to an entity, which only elicits a warning.
-	  ;; We could do the same for other non-ASCII characters, but
-	  ;; we don't because it would slow us down.
-	  (aref table ?\C-l) "&#12;"
-	  )
-    table))
-
-(defun htmlize-protect-string (string)
-  ;; Checking whether STRING contains dangerous stuff removes a lot of
-  ;; unnecessary consing.
-  (if (not (string-match "[&<>\C-l]" string))
-      string
-    (mapconcat (lambda (char)
-		 (if (> (htmlize-char-int char) 255)
-		     ;; Leave multibyte characters as they are, see
-		     ;; above for explanation.
-		     (char-to-string char)
-		   (aref htmlize-character-table char)))
-	       string "")))
 
 ;;; Color handling.
 
 	     (and (buffer-live-p ,temp-buffer)
 		  (kill-buffer ,temp-buffer))))))))
 
-;; We need a function that efficiently finds the next change of a
-;; property (usually `face'), preferably regardless of whether the
-;; change occurred because of a text property or an extent/overlay.
-;; As it turns out, it is not easy to do that compatibly.
-
-;; Under XEmacs, `next-single-property-change' does that.  Under GNU
-;; Emacs beginning with version 21, `next-single-char-property-change'
-;; is available and works.  GNU Emacs 20 had
-;; `next-char-property-change', which we can use.  GNU Emacs 19 didn't
-;; provide any means for simultaneously examining overlays and text
-;; properties, so when using Emacs 19.34, we punt and fall back to
-;; `next-single-property-change', thus ignoring overlays altogether.
-
-(cond
- (htmlize-running-xemacs
-  ;; XEmacs: good.
-  (defalias 'htmlize-next-change 'next-single-property-change))
- ((fboundp 'next-single-char-property-change)
-  ;; GNU Emacs 21: good.
-  (defalias 'htmlize-next-change 'next-single-char-property-change))
- ((fboundp 'next-char-property-change)
-  ;; GNU Emacs 20: bad, but fixable.
-  (defun htmlize-next-change (pos prop)
-    (let ((done nil)
-	  (current-value (get-char-property pos prop))
-	  newpos next-value)
-      ;; Loop over positions returned by next-char-property-change
-      ;; until the value of PROP changes or we've hit EOB.
-      (while (not done)
-	(setq newpos (next-char-property-change pos)
-	      next-value (get-char-property newpos prop))
-	(cond ((eq newpos pos)
-	       ;; Possibly at EOB?  Whatever, just don't infloop.
-	       (setq done t))
-	      ((eq next-value current-value)
-	       ;; PROP hasn't changed -- keep looping.
-	       )
-	      (t
-	       (setq done t)))
-	(setq pos newpos))
-      pos)))
- (t
-  ;; GNU Emacs 19.34: hopeless.
-  (defalias 'htmlize-next-change 'next-single-property-change)))
-
 (defvar htmlize-x-library-search-path
   '("/usr/X11R6/lib/X11/"
     "/usr/X11R5/lib/X11/"
 values are the #rrggbb RGB specifications, extracted from `rgb.txt'.
 
 If RGB-FILE is nil, the function will try hard to find a suitable file
-in the system directories."
+in the system directories.
+
+If no rgb.txt file is found, return nil."
   (let ((rgb-file (or rgb-file (htmlize-locate-file
 				"rgb.txt"
 				htmlize-x-library-search-path)))
-	(hash (make-hash-table :test 'equal)))
-    (with-temp-buffer
-      (insert-file-contents rgb-file)
-      (while (not (eobp))
-	(cond ((looking-at "^\\s-*\\([!#]\\|$\\)")
-	       ;; Skip comments and empty lines.
-	       )
-	      ((looking-at "[ \t]*\\([0-9]+\\)[ \t]+\\([0-9]+\\)[ \t]+\\([0-9]+\\)[ \t]+\\(.*\\)")
-	       (setf (gethash (downcase (match-string 4)) hash)
-		     (format "#%02x%02x%02x"
-			     (string-to-number (match-string 1))
-			     (string-to-number (match-string 2))
-			     (string-to-number (match-string 3)))))
-	      (t
-	       (error "Unrecognized line in rgb.txt: %s"
-		      (buffer-substring (point) (progn (end-of-line) (point))))))
-	(forward-line 1)))
+	(hash nil))
+    (when rgb-file
+      (with-temp-buffer
+	(insert-file-contents rgb-file)
+	(setq hash (make-hash-table :test 'equal))
+	(while (not (eobp))
+	  (cond ((looking-at "^\\s-*\\([!#]\\|$\\)")
+		 ;; Skip comments and empty lines.
+		 )
+		((looking-at
+		  "[ \t]*\\([0-9]+\\)[ \t]+\\([0-9]+\\)[ \t]+\\([0-9]+\\)[ \t]+\\(.*\\)")
+		 (setf (gethash (downcase (match-string 4)) hash)
+		       (format "#%02x%02x%02x"
+			       (string-to-number (match-string 1))
+			       (string-to-number (match-string 2))
+			       (string-to-number (match-string 3)))))
+		(t
+		 (error
+		  "Unrecognized line in rgb.txt: %s"
+		  (buffer-substring (point) (progn (end-of-line) (point))))))
+	  (forward-line 1))))
     hash))
 
-(defvar htmlize-color-rgb-hash nil)
-(and (or (eq htmlize-use-rgb-map 'force)
-	 (and (eq htmlize-use-rgb-map t)
-	      (eq window-system 'x)))
-     (null htmlize-color-rgb-hash)
-     (setq htmlize-color-rgb-hash (htmlize-get-color-rgb-hash)))
+;; Compile the RGB map when loaded.  On systems where rgb.txt is
+;; missing, the value of the variable will be nil, and rgb.txt will
+;; not be used.
+(defvar htmlize-color-rgb-hash (htmlize-get-color-rgb-hash))
 
 ;;; Face handling
 
-(defun htmlize-face-has-property (face prop)
-  ;; Return t if face has PROP set rather than inherited.  The problem
-  ;; with say, `face-foreground-instance', is that it returns an
-  ;; instance for EVERY face because every face inherits from the
-  ;; default face.  However, we'd like htmlize-face-{fore,back}ground
-  ;; to return nil when called with a face that doesn't specify its
-  ;; own foreground or background.
+(defun htmlize-face-specifies-property (face prop)
+  ;; Return t if face specifies PROP, as opposed to it being inherited
+  ;; from the default face.  The problem with e.g.
+  ;; `face-foreground-instance' is that it returns an instance for
+  ;; EVERY face because every face inherits from the default face.
+  ;; However, we'd like htmlize-face-{fore,back}ground to return nil
+  ;; when called with a face that doesn't specify its own foreground
+  ;; or background.
   (if (eq face 'default)
       t
     (let ((spec-list (specifier-spec-list (face-property face prop))))
       (not (null (assq 'global spec-list))))))
 
+(defun htmlize-face-color-internal (face fg)
+  ;; Used only under GNU Emacs.  Return the color of FACE, but don't
+  ;; return "unspecified-fg" or "unspecified-bg".  If the face is
+  ;; `default' and the color is unspecified, look up the color in
+  ;; frame parameters.
+  (let ((color (if fg (face-foreground face) (face-background face))))
+    (when (and (eq face 'default) (null color))
+      (setq color (cdr (assq (if fg 'foreground-color 'background-color)
+			     (frame-parameters)))))
+    (when (or (equal color "unspecified-fg")
+	      (equal color "unspecified-bg"))
+      (setq color nil))
+    (when (and (eq face 'default)
+	       (null color))
+      ;; Assuming black on white doesn't seem right, but I can't think
+      ;; of anything better to do.
+      (setq color (if fg "black" "white")))
+    color))
+
 (defun htmlize-face-foreground (face)
   ;; Return the foreground color of the face as a string, either a
   ;; color name or #rrggbb.  If FACE does not specify a foreground
   ;; color, return nil.
   (cond (htmlize-running-xemacs
 	 ;; XEmacs.
-	 (and (htmlize-face-has-property face 'foreground)
+	 (and (htmlize-face-specifies-property face 'foreground)
 	      (color-instance-name (face-foreground-instance face))))
 	(t
-	 ;; FSF Emacs.
-	 (let ((color (face-foreground face)))
-	   (when (or (equal color "unspecified-fg")
-		     (equal color "unspecified-bg"))
-	     (setq color nil))
-	   (when (and (eq face 'default) (null color))
-	     (setq color (or (cdr (assq 'foreground-color (frame-parameters)))
-			     ;; Assuming black foreground doesn't seem
-			     ;; right, but I can't think of anything
-			     ;; better to do.
-			     "black")))
-	   color))))
+	 ;; GNU Emacs.
+	 (htmlize-face-color-internal face t))))
 
 (defun htmlize-face-background (face)
   ;; Return the background color of the face as a string, either a
   ;; color, return nil.
   (cond (htmlize-running-xemacs
 	 ;; XEmacs.
-	 (and (htmlize-face-has-property face 'background)
+	 (and (htmlize-face-specifies-property face 'background)
 	      (color-instance-name (face-background-instance face))))
 	(t
-	 (let ((color (face-background face)))
-	   (when (or (equal color "unspecified-fg")
-		     (equal color "unspecified-bg"))
-	     (setq color nil))
-	   (when (and (eq face 'default) (null color))
-	     (setq color (or (cdr (assq 'background-color (frame-parameters)))
-			     ;; Assuming white background doesn't seem
-			     ;; right, but I can't think of anything
-			     ;; better to do.
-			     "white")))
-	   color))))
+	 ;; GNU Emacs.
+	 (htmlize-face-color-internal face nil))))
 
-;; Return the #rrggbb string for foreground color of FACE.  If BG-P is
-;; non-nil, background color is used.
+;; Convert COLOR to the #RRGGBB string.  If COLOR is already in that
+;; format, it's left unchanged.
+
 (defun htmlize-color-to-rgb-string (color)
-  (apply #'format "#%02x%02x%02x"
-	 ;; Here I cannot conditionalize on (fboundp ...) because
-	 ;; ps-print under some versions of GNU Emacs defines its own
-	 ;; dummy version of color-instance-rgb-components.
-	 (if htmlize-running-xemacs
-	     (mapcar (lambda (arg)
-		       (/ arg 256))
-		     (color-instance-rgb-components
-		      (make-color-instance color)))
-	   (mapcar (lambda (arg)
-		     (/ arg 256))
-		   (x-color-values color)))))
+  (let (rgb-string)
+    (cond ((string-match "^#" color)
+	   ;; The color is alredy in #rrggbb format.
+	   (setq rgb-string color))
+	  ((and htmlize-use-rgb-txt
+		htmlize-color-rgb-hash)
+	   ;; Use of rgb.txt is requested, and it's available on the
+	   ;; system.  Use it.
+	   (setq rgb-string (gethash (downcase color) htmlize-color-rgb-hash)))
+	  (t
+	   ;; We're getting the RGB components from Emacs.
+	   (let ((rgb
+		  ;; Here I cannot conditionalize on (fboundp ...) 
+		  ;; because ps-print under some versions of GNU Emacs
+		  ;; defines its own dummy version of
+		  ;; color-instance-rgb-components.
+		  (if htmlize-running-xemacs
+		      (mapcar (lambda (arg)
+				(/ arg 256))
+			      (color-instance-rgb-components
+			       (make-color-instance color)))
+		    (mapcar (lambda (arg)
+			      (/ arg 256))
+			    (x-color-values color)))))
+	     (when rgb
+	       (setq rgb-string (apply #'format "#%02x%02x%02x" rgb))))))
+    ;; If RGB-STRING is still null, it means the color cannot be
+    ;; found, for whatever reason.  In that case just punt and return
+    ;; COLOR.  Most browsers support a decent set of color names
+    ;; anyway.
+    (or rgb-string color)))
+
+;; Return FACE's foreground or background as an RGB string.  If the
+;; face doesn't specify color, return nil.
 
 (defun htmlize-face-rgb-string (face &optional bg-p)
   (let ((color-name (if bg-p
 			(htmlize-face-background face)
 		      (htmlize-face-foreground face))))
-    (when color-name
-      (cond ((and htmlize-use-rgb-map
-		  htmlize-color-rgb-hash)
-	     (setq color-name (downcase color-name))
-	     (let ((rgb (if (string-match "^#" color-name)
-			    color-name
-			  (gethash color-name htmlize-color-rgb-hash))))
-	       (unless rgb
-		 (error "Color %s (face %s) not found" color-name face))
-	       rgb))
-	    (t
-	     (htmlize-color-to-rgb-string color-name))))))
+    (and color-name
+	 (htmlize-color-to-rgb-string color-name))))
+
+;; We abstract the face properties we care about into an
+;; `htmlize-face' structure.  That way we only have to analyze face
+;; properties, which can be time consuming, once per each face.  The
+;; mapping between Emacs faces and htmlize-faces is established by
+;; htmlize-make-face-hash.
 
 (defstruct htmlize-face
   rgb-foreground			; foreground color, #rrggbb
   boldp					; whether face is bold
   italicp				; whether face is italic
   underlinep				; whether face is underlined
-  strikep				; whether face is strikethrough
+  overlinep				; whether face is overlined
+  strikep				; whether face is striked through
   css-name				; CSS name of face
   )
 
+(defun htmlize-emacs-face-to-htmlize-face (face)
+  "Convert Emacs face FACE to htmlize-face."
+  (let ((object (make-htmlize-face
+		 :rgb-foreground (htmlize-face-rgb-string face)
+		 :rgb-background (htmlize-face-rgb-string face t))))
+    (cond (htmlize-running-xemacs
+	   ;; XEmacs doesn't provide a way to detect whether a face is
+	   ;; bold or italic, so we need to examine the font instance.
+	   ;; #### This probably doesn't work under MS Windows and/or
+	   ;; GTK devices.  I'll need help with those.
+	   (let* ((font-instance (face-font-instance face))
+		  (props (font-instance-properties font-instance)))
+	     (when (equalp (cdr (assq 'WEIGHT_NAME props)) "bold")
+	       (setf (htmlize-face-boldp object) t))
+	     (when (or (equalp (cdr (assq 'SLANT props)) "i")
+		       (equalp (cdr (assq 'SLANT props)) "o"))
+	       (setf (htmlize-face-italicp object) t))
+	     (setf (htmlize-face-strikep object)
+		   (face-strikethru-p face))
+	     (setf (htmlize-face-underlinep object)
+		   (face-underline-p face))))
+	  ((fboundp 'face-attribute)
+	   ;; GNU Emacs 21.
+	   (dolist (attr '(:weight :slant :underline :overline :strike-through))
+	     (let ((value (face-attribute face attr)))
+	       (when (and value (not (eq value 'unspecified)))
+		 (htmlize-face-emacs21-attr object attr value)))))
+	  (t
+	   ;; Older GNU Emacs.  Some of these functions are only
+	   ;; available under Emacs 20+, hence the guards.
+	   (when (fboundp 'face-bold-p)
+	     (setf (htmlize-face-boldp object) (face-bold-p face)))
+	   (when (fboundp 'face-italic-p)
+	     (setf (htmlize-face-italicp object) (face-italic-p face)))
+	   (setf (htmlize-face-underlinep object)
+		 (face-underline-p object))))
+    ;; Generate the css-name property.  Emacs places no restrictions
+    ;; on the names of symbols that represent faces -- any characters
+    ;; may be in the name, even ^@.  We try hard to beat the face name
+    ;; into shape, both esthetically and according to CSS1 specs.
+    (setf (htmlize-face-css-name object)
+	  (let ((name (downcase (symbol-name face))))
+	    (when (string-match "\\`font-lock-" name)
+	      ;; Change font-lock-FOO-face to FOO.
+	      (setq name (replace-match "" t t name)))
+	    (when (string-match "-face\\'" name)
+	      ;; Drop the redundant "-face" suffix.
+	      (setq name (replace-match "" t t name)))
+	    (while (string-match "[^-a-zA-Z0-9]" name)
+	      ;; Drop the non-alphanumerics.
+	      (setq name (replace-match "X" t t name)))
+	    (when (string-match "^[-0-9]" name)
+	      ;; CSS identifiers may not start with a digit.
+	      (setq name (concat "X" name)))
+	    ;; After these transformations, the face could come
+	    ;; out empty.
+	    (when (equal name "")
+	      (setq name "face"))
+	    ;; Apply the prefix.
+	    (setq name (concat htmlize-css-name-prefix name))
+	    name))
+    object))
+
+(defun htmlize-face-emacs21-attr (hface attr value)
+  (case attr
+    (:foreground
+     (setf (htmlize-face-rgb-foreground hface)
+	   (htmlize-color-to-rgb-string value)))
+    (:background
+     (setf (htmlize-face-rgb-background hface)
+	   (htmlize-color-to-rgb-string value)))
+    (:weight
+     (when (string-match (symbol-name value) "bold")
+       (setf (htmlize-face-boldp hface) t)))
+    (:slant
+     (setf (htmlize-face-italicp hface)
+	   (or (eq value 'italic) (eq value 'oblique))))
+    (:bold
+     (setf (htmlize-face-boldp hface) value))
+    (:italic
+     (setf (htmlize-face-italicp hface) value))
+    (:underline
+     (setf (htmlize-face-underlinep hface) value))
+    (:overline
+     (setf (htmlize-face-overlinep hface) value))
+    (:strike-through
+     (setf (htmlize-face-strikep hface) value))))
+
 (defun htmlize-make-face-hash (faces)
   ;; Return a hash table mapping faces (typically face symbols, but
   ;; under XEmacs possibly also objects returned by find-face) to the
-  ;; associated `htmlize-face' objects.
-
-  ;; Keys are faces, not strings, so `eq' suffices as test condition.
+  ;; associated `htmlize-face' objects.  Keys are faces, not strings,
+  ;; so `eq' suffices as test condition.
   (let ((face-hash (make-hash-table :test 'eq))
-	face-fancy-names b-font i-font bi-font use-bi use-i)
-    (when htmlize-running-xemacs
-      (setq b-font (face-font-name 'bold)
-	    i-font (face-font-name 'italic)
-	    bi-font (face-font-name 'bold-italic)
-	    use-bi (not (or (equal b-font bi-font) (equal i-font bi-font)))
-	    use-i (not (equal b-font i-font))))
+	face-css-names)
     (dolist (face faces)
       (unless (gethash face face-hash)
-	(let ((object (make-htmlize-face
-		       :rgb-foreground (htmlize-face-rgb-string face)
-		       :rgb-background (htmlize-face-rgb-string face t)
-		       :underlinep (face-underline-p face))))
-	  ;; Portability junk -- there is no good way of detecting
-	  ;; whether a face is bold or italic under XEmacs, so I need
-	  ;; to resort to disgusting hacks.  Please close your eyes
-	  ;; lest you vomit or spontaneously combust.
-	  (if htmlize-running-xemacs
-	      (let* ((font (face-font-name face)))
-		;; Boldness, XEmacs
-		(setf (htmlize-face-boldp object)
-		      (or (equal font (face-font-name 'bold))
-			  (and use-bi
-			       (equal font (face-font-name 'bold-italic)))))
-		;; Italic-ness, XEmacs
-		(setf (htmlize-face-italicp object)
-		      (and use-i
-			   (or (equal font (face-font-name 'italic))
-			       (and use-bi
-				    (equal font
-					   (face-font-name 'bold-italic))))))
-		;; OK, you may open them again.
-		;; Strikethrough, XEmacs
-		(setf (htmlize-face-strikep object) (face-strikethru-p face)))
-	    (when (fboundp 'face-bold-p)
-	      ;; Boldness, GNU Emacs 20
-	      (setf (htmlize-face-boldp object) (face-bold-p face)))
-	    (when (fboundp 'face-italic-p)
-	      ;; Italic-ness, GNU Emacs 19
-	      (setf (htmlize-face-italicp object) (face-italic-p face)))
-	    ;; Strikethrough is not supported by GNU Emacs.
-	    (setf (htmlize-face-strikep object) nil))
-
-	  ;; css-name.  Emacs is lenient about face names -- virtually
-	  ;; any string may name a face, even those consisting of
-	  ;; characters such as ^@.  We try hard to beat the face name
-	  ;; into shape, both esthetically and according to CSS1
-	  ;; specs.
-	  (setf (htmlize-face-css-name object)
-		(let ((name (downcase (symbol-name face))))
-		  (when (string-match "\\`font-lock-" name)
-		    (setq name (replace-match "" t t name)))
-		  (when (string-match "-face\\'" name)
-		    (setq name (replace-match "" t t name)))
-		  (while (string-match "[^-a-zA-Z0-9]" name)
-		    (setq name (replace-match "X" t t name)))
-		  (when (string-match "^[-0-9]" name)
-		    (setq name (concat "X" name)))
-		  ;; After these transformations, the face could come
-		  ;; out empty.
-		  (when (equal name "")
-		    (setq name "face"))
-		  ;; Apply the prefix.
-		  (setq name (concat htmlize-css-name-prefix name))
-		  (let ((i 1))
-		    (while (member name face-fancy-names)
-		      (setq name (format "%s-%d" name i))
-		      (incf i)))
-		  (push name face-fancy-names)
-		  name))
-	  ;; Store it in the hash table.
-	  (setf (gethash face face-hash) object))))
+	;; Convert FACE to our format.
+	(let ((face-obj (htmlize-emacs-face-to-htmlize-face face)))
+	  (setf (gethash face face-hash) face-obj)
+	  (let* ((css-name (htmlize-face-css-name face-obj))
+		 (new-name css-name)
+		 (i 0))
+	    ;; Uniquify the face's css-name by using FACE-1, FACE-2,
+	    ;; etc.
+	    (while (member new-name face-css-names)
+	      (setq new-name (format "%s-%s" css-name (incf i))))
+	    (unless (equal new-name css-name)
+	      (setf (htmlize-face-css-name face-obj) new-name))
+	    (push new-name face-css-names)))))
     face-hash))
 
 (defun htmlize-faces-in-buffer ()
     ;; Testing for (fboundp 'map-extents) doesn't work because W3
     ;; defines `map-extents' under FSF.
     (if (string-match "XEmacs" emacs-version)
-	(map-extents (lambda (extent ignored)
-		       (let ((face (extent-face extent)))
-			 ;; FACE can be a face or a list of faces.
-			 ;; Handle both cases.
-			 (if (listp face)
-			     (dolist (face face)
-			       (when face
-				 (pushnew face faces)))
-			   (pushnew face faces)))
-		       nil)
-		     nil nil nil nil nil 'face)
+	(let (face)
+	  (map-extents (lambda (extent ignored)
+			 (setq face (extent-face extent)
+			       ;; Note: FACE can be a face or a list of faces.
+			       faces (if (listp face)
+					 (union face faces)
+				       (adjoin face faces)))
+			 nil)
+		       nil nil nil nil nil 'face))
       ;; FSF Emacs code.
       (save-excursion
 	(goto-char (point-min))
 	(let (face next)
 	  (while (not (eobp))
-	    (setq face (get-char-property (point) 'face)
-		  next (or (htmlize-next-change (point) 'face)
+	    (setq face (get-text-property (point) 'face)
+		  next (or (next-single-property-change (point) 'face)
 			   (point-max)))
-	    ;; FACE can be a face or a list of faces.  Handle both
-	    ;; cases.
-	    (if (listp face)
-		(dolist (face face)
-		  (and face
-		       (facep face)
-		       (pushnew face faces)))
-	      (pushnew face faces))
-	    (goto-char next)))
-	(setq faces (delq nil faces))))
+	    ;; FACE can be a face or a list of faces.
+	    (setq faces (if (listp face)
+			    (union face faces)
+			  (adjoin face faces)))
+	    (goto-char next))
+	  ;; Add faces used by buffer overlays.
+	  (dolist (overlay (overlays-in (point-min) (point-max)))
+	    (setq face (overlay-get overlay 'face))
+	    ;; FACE can be a face or a list of faces.
+	    (setq faces (if (listp face)
+			    (union face faces)
+			  (adjoin face faces)))))
+	(setq faces (delete-if-not #'facep faces))))
     faces))
 
-;; htmlize-faces-at-point returns the faces that are in effect at
-;; point, with the exception of `default'.  The faces are sorted by
-;; increasing priority, i.e. the last face takes precedence.
+;; htmlize-faces-at-point returns the faces in use at point.  The
+;; faces are sorted by increasing priority, i.e. the last face takes
+;; precedence.
 ;;
 ;; Under XEmacs, this returns all the faces in all the extents at
-;; point.
+;; point.  Under GNU Emacs, this returns all the faces in the `face'
+;; property and all the faces in the overlays at point.
 
 (cond (htmlize-running-xemacs
        (defun htmlize-faces-at-point ()
 	 (let (extent list face)
 	   (while (setq extent (extent-at (point) nil 'face extent))
 	     (setq face (extent-face extent))
-	     (push (if (listp face) (reverse face) (list face)) list))
-	   (delq 'default (apply #'nconc list)))))
+	     (setq list (if (listp face)
+			    (nconc (reverse face) list)
+			  (cons face list))))
+	   ;; No need to reverse the list: PUSH has already
+	   ;; constructed it in the reverse display order.
+	   list)))
       (t
        (defun htmlize-faces-at-point ()
-	 (let ((face-list (get-char-property (point) 'face)))
-	   (setq face-list (if (listp face-list)
-			       (copy-list face-list)
-			     (list face-list)))
-	   ;; We don't support the non-face properties, such as
-	   ;; (foreground-color . FOO), yet.  Only leave faces in for
-	   ;; now.
-	   (setq face-list (delete-if-not 'facep face-list))
-	   (nreverse (delq 'default face-list))))))
+	 (let (all-faces)
+	   ;; Faces from text properties.
+	   (let* ((face (get-text-property (point) 'face))
+		  (list (if (listp face) (copy-list face) (list face))))
+	     (setq all-faces (nconc all-faces (nreverse list))))
+	   ;; Faces from overlays.
+	   (let ((overlays
+		  ;; Sort overlays by size, so that more specific
+		  ;; overlays set precedence.  The number of overlays
+		  ;; at each one position should be very small, so
+		  ;; this sort shouldn't slow things down.
+		  (sort (overlays-at (point))
+			(lambda (o1 o2)
+			  (< (- (overlay-end o1) (overlay-start o1))
+			     (- (overlay-end o2) (overlay-start o2))))))
+		 list face)
+	     (dolist (overlay overlays)
+	       (setq face (overlay-get overlay 'face))
+	       (setq list (if (listp face)
+			      (nconc (reverse face) list)
+			    (cons face list))))
+	     (setq all-faces (nconc all-faces list)))
+	   ;; We don't support property lists, such as (:foreground
+	   ;; ...).  (Supporting them is hard because they need to be
+	   ;; mapped to face-less classes, and those classes must be
+	   ;; known in advance.)  For now, only leave faces.
+	   (delete-if-not 'facep all-faces)))))
 
 ;;; CSS1 support
 
       (push "font-style: italic;" result))
     (when (htmlize-face-underlinep face)
       (push "text-decoration: underline;" result))
+    (when (htmlize-face-overlinep face)
+      (push "text-decoration: overline;" result))
     (when (htmlize-face-strikep face)
       (push "text-decoration: line-through;" result))
     (nreverse result)))
        (while (string-match "\\*/" cleaned-up-face-name)
 	 (setq cleaned-up-face-name (replace-match "XX" t t
 						   cleaned-up-face-name)))
-       (unless (eq face 'default)
-	 (let ((specs (htmlize-css-specs face-object)))
-	   (insert "      ." (htmlize-face-css-name face-object))
-	   (if (null specs)
-	       (insert " {")
-	     (insert " {\n        /* " cleaned-up-face-name " */\n        "
-		     (mapconcat #'identity specs "\n        ")))
-	   (insert "\n      }\n")))))
+       (let ((specs (htmlize-css-specs face-object)))
+	 (insert "      ." (htmlize-face-css-name face-object))
+	 (if (null specs)
+	     (insert " {")
+	   (insert " {\n        /* " cleaned-up-face-name " */\n        "
+		   (mapconcat #'identity specs "\n        ")))
+	 (insert "\n      }\n"))))
      face-hash)
   (insert htmlize-hyperlink-style
 	  "    -->\n    </style>\n"))
 	    (htmlize-face-rgb-background face-object))))
 
 (defun htmlize-font-insert-text (text faces buffer)
-  ;; Merge the faces.
   (let (bold italic underline strike fg)
+    ;; Merge the faces.
     (dolist (face faces)
-      ;; Any face with a boolean attribute sets the attribute.
+      ;; A non-null boolean attribute in any face sets the attribute.
       (and (htmlize-face-boldp face)      (setq bold t))
       (and (htmlize-face-italicp face)    (setq italic t))
       (and (htmlize-face-underlinep face) (setq underline t))
 	;; ones that belong to text properties.  Likewise for
 	;; `htmlize-next-change'.
 	(setq faces (htmlize-faces-at-point)
-	      next-change (or (htmlize-next-change (point) 'face)
-			      (point-max)))
+	      next-change (htmlize-next-change (point) 'face))
 	;; Convert faces to face objects.
 	(setq face-objects (mapcar (lambda (f) (gethash f face-hash)) faces))
 	;; Extract buffer text, sans the invisible parts.  Then
 	(concat file ".html")
       (concat sans-extension ".html"))))
 
-(defun htmlize-make-absolute-file-name (file dir)
-  "Create an absolute HTML file name with the desired directory.
-That means, run FILE through `htmlize-make-file-name', and
-expand it to either DIR or, if DIR is nil, to its own
-directory name."
-  (expand-file-name (htmlize-make-file-name (file-name-nondirectory file))
-		    (or dir (file-name-directory file))))
+;;;###autoload
+(defun htmlize-file (file &optional target)
+  "Find FILE, fontify it convert it to HTML, and save the result.
 
-;;;###autoload
-(defun htmlize-file (file &optional target-directory)
-  "HTML-ize FILE, and save the result to an `.html' file.
-The file name of the HTML file is determined with `html-make-file-name'.
-If TARGET-DIRECTORY is non-nil, the resulting HTML file will be saved
-to that directory, instead of to FILE's directory."
+This function does not modify current buffer or point.  If FILE is
+already being visited in a buffer, the contents of that buffer are
+used for HTML-ization.  Otherwise, FILE is read into a temporary
+buffer, which is disposed of after use.  FILE's buffer is explicitly
+fontified before HTML-ization.  If a form of highlighting other than
+font-lock is desired, please use `htmlize-buffer' directly.
+
+The function `htmlize-make-file-name', is used to determine the name
+of the resulting HTML file.  In normal cases, the FILE's extension is
+replaced with `html', e.g. \"foo.c\" becomes \"foo.html\".  See the
+documentation of `htmlize-make-file-name' for more details.
+
+If TARGET is specified and names a directory, the resulting file will
+be saved there instead of to FILE's directory.  If TARGET is specified
+and does not name a directory, it will be used as output file name."
   (interactive (list (read-file-name
 		      "HTML-ize file: "
 		      nil nil nil (and (buffer-file-name)
 				       (file-name-nondirectory
 					(buffer-file-name))))))
   (let* ((was-visited (get-file-buffer file))
-	 ;; Set these to nil to prevent double fontification; we'll
-	 ;; fontify manually below.
+	 ;; Prevent `find-file-noselect' from triggering font-lock.
+	 ;; We'll fontify manually below.  Set these to nil to prevent
+	 ;; double fontification; we'll fontify manually below.
 	 (font-lock-auto-fontify nil)
-	 (global-font-lock-mode nil))
+	 (global-font-lock-mode nil)
+	 ;; Determine the output file name.
+	 (output-file (if (and target (not (file-directory-p target)))
+			  target
+			(expand-file-name
+			 (htmlize-make-file-name (file-name-nondirectory file))
+			 (or target (file-name-directory file))))))
     ;; Find FILE, fontify it, HTML-ize it, and write it to FILE.html.
+    ;; The `unwind-protect' forms are used to make certain the
+    ;; temporary buffers go away in case of unexpected errors or C-g.
     (with-current-buffer (find-file-noselect file t)
-      (font-lock-fontify-buffer)
-      (with-current-buffer (htmlize-buffer-1)
-	(run-hooks 'htmlize-file-hook)
-	(write-region (point-min) (point-max)
-		      (htmlize-make-absolute-file-name file target-directory))
-	(kill-buffer (current-buffer)))
-      ;; If FILE was not previously visited, its buffer is temporary
-      ;; and can be killed.
-      (unless was-visited
-	(kill-buffer (current-buffer))))))
+      (unwind-protect
+	  (progn
+	    (font-lock-fontify-buffer)
+	    (with-current-buffer (htmlize-buffer-1)
+	      (unwind-protect
+		  (progn
+		    (run-hooks 'htmlize-file-hook)
+		    (write-region (point-min) (point-max) output-file))
+		(kill-buffer (current-buffer)))))
+	;; If FILE was not previously visited, its buffer is temporary
+	;; and must be killed.
+	(unless was-visited
+	  (kill-buffer (current-buffer)))))))
 
 ;;;###autoload
 (defun htmlize-many-files (files &optional target-directory)
     (let (list file)
       ;; Use empty string as DEFAULT because setting DEFAULT to nil
       ;; defaults to the directory name, which is not what we want.
-      (while (not (equal (setq file (read-file-name "HTML-ize file (RET to finish): "
-						    (and list (file-name-directory
-							       (car list)))
-						    "" t))
+      (while (not (equal (setq file (read-file-name
+				     "HTML-ize file (RET to finish): "
+				     (and list (file-name-directory
+						(car list)))
+				     "" t))
 			 ""))
 	(push file list))
       (nreverse list))))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.