Commits

Anonymous committed 7146fa9

[svn] Improve Unicode handling without encoding.

Comments (0)

Files changed (15)

 
 Version 0.6
 -----------
-(released Dec XX, 2006)
+(codename Zimtstern, released Dec XX, 2006)
 
 - Added option for the HTML formatter to write the CSS to an external file
   in "full document" mode.
 - Support for guessing input encoding added.
 
 - Encoding support added: all processing is now done with Unicode
-  strings, input and output are converted from and to byte strings
-  (see the ``encoding`` option of lexers and formatters).
+  strings, input and output are converted from and optionally to
+  byte strings (see the ``encoding`` option of lexers and formatters).
 
 - Some improvements in the C(++) lexers handling comments and line
   continuations.
 
 Version 0.5
 -----------
-(released Oct 30, 2006)
+(codename PyKleur, released Oct 30, 2006)
 
 - Initial public release.

docs/src/formatters.txt

 All formatters support this option:
 
 `encoding`
+    *New in Pygments 0.6.*
+
     If given, must be an encoding name (such as ``"utf-8"``). This will
     be used to convert the token strings (which are Unicode strings)
-    to byte strings in the output (default: ``"latin1"``).
+    to byte strings in the output (default: ``None``).
     It will also be written in an encoding declaration suitable for the
     document format if the `full` option is given (e.g. a ``meta
     content-type`` directive in HTML or an invocation of the `inputenc`
     package in LaTeX).
 
+    If this is ``""`` or ``None``, Unicode strings will be written
+    to the output file, which most file-like objects do not support.
+
 The `HtmlFormatter` and `LatexFormatter` classes support these options:
 
 `style`
     If given and greater than 0, expand tabs in the input (default: ``0``).
 
 `encoding`
+    *New in Pygments 0.6.*
+
     If given, must be an encoding name (such as ``"utf-8"``). This encoding
     will be used to convert the input string to Unicode (if it is not already
     a Unicode string). The default is ``"latin1"``.
 
 
 import sys, os
-from cStringIO import StringIO
+from StringIO import StringIO
+from cStringIO import StringIO as CStringIO
 
 
 def lex(code, lexer):
     with a ``write`` method), the result will be written to it, otherwise
     it is returned as a string.
     """
-    realoutfile = outfile or StringIO()
-    formatter.format(tokens, realoutfile)
     if not outfile:
+        # if we want Unicode output, we have to use Python StringIO
+        realoutfile = formatter.encoding and CStringIO() or StringIO()
+        formatter.format(tokens, realoutfile)
         return realoutfile.getvalue()
+    else:
+        formatter.format(tokens, outfile)
 
 
 def highlight(code, lexer, formatter, outfile=None):

pygments/formatter.py

     ``encoding``
         If given, must be an encoding name. This will be used to
         convert the Unicode token strings to byte strings in the
-        output (default: 'latin1').
+        output. If it is "" or None, Unicode strings will be written
+        to the output file, which most file-like objects do not
+        support (default: None).
     """
 
+    #: If True, this formatter outputs Unicode strings when no encoding
+    #: option is given.
+    unicodeoutput = True
+
     def __init__(self, **options):
         self.style = _lookup_style(options.get('style', 'default'))
         self.full  = get_bool_opt(options, 'full', False)
         self.title = options.get('title', '')
-        self.encoding = options.get('encoding', 'latin1')
+        self.encoding = options.get('encoding', None) or None
         self.options = options
 
     def get_style_defs(self, arg=''):

pygments/formatters/bbcode.py

         if self._mono:
             outfile.write('[font=monospace]')
 
+        enc = self.encoding
         lastval = ''
         lasttype = None
 
         for ttype, value in tokensource:
-            value = value.encode(self.encoding)
+            if enc:
+                value = value.encode(enc)
             while ttype not in self.styles:
                 ttype = ttype.parent
             if ttype == lasttype:

pygments/formatters/html.py

     def _format_nowrap(self, tokensource, outfile, lnos=False):
         lncount = 0
         nocls = self.noclasses
+        enc = self.encoding
         # for <span style=""> lookup only
         getcls = self.ttype2class.get
         c2s = self.class2style
         write = outfile.write
         lspan = ''
         for ttype, value in tokensource:
-            htmlvalue = escape_html(value.encode(self.encoding))
+            if enc:
+                value = value.encode(enc)
+            htmlvalue = escape_html(value)
             if lnos:
                 lncount += value.count("\n")
 

pygments/formatters/latex.py

 
     def format(self, tokensource, outfile):
         # TODO: add support for background colors
+        enc = self.encoding
 
         if self.full:
             realoutfile = outfile
         outfile.write(']\n')
 
         for ttype, value in tokensource:
-            value = escape_tex(value.encode(self.encoding))
+            if enc:
+                value = value.encode(enc)
+            value = escape_tex(value)
             cmd = self.ttype2cmd.get(ttype)
             while cmd is None:
                 ttype = ttype.parent

pygments/formatters/other.py

     Output the text unchanged without any formatting.
     """
     def format(self, tokensource, outfile):
+        enc = self.encoding
         for ttype, value in tokensource:
-            outfile.write(value.encode(self.encoding))
+            if enc:
+                outfile.write(value.encode(enc))
+            else:
+                outfile.write(value)
 
 
 class RawTokenFormatter(Formatter):
         the given compression algorithm (default: '').
     """
 
+    unicodeoutput = False
+
     def __init__(self, **options):
         Formatter.__init__(self, **options)
         self.compress = options.get('compress', '')

pygments/formatters/rtf.py

 class RtfFormatter(Formatter):
     """Output RTF (Rich Text Format)."""
 
+    unicodeoutput = False
+
     def __init__(self, **options):
         """
         Additional options accepted:
         return text.replace('\n', '\\par\n')
 
     def format(self, tokensource, outfile):
+        if not self.encoding:
+            outfile.write(u'')
+
         outfile.write(r'{\rtf1\ansi\deff0'
                       r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}{\colortbl;' %
                       (self.fontface and ' ' + self._escape(self.fontface) or ''))

pygments/formatters/terminal.py

         self.colorscheme = options.get('colorscheme', None) or TERMINAL_COLORS
 
     def format(self, tokensource, outfile):
+        enc = self.encoding
         for ttype, value in tokensource:
-            value = value.encode(self.encoding)
+            if enc:
+                value = value.encode(enc)
             color = self.colorscheme.get(ttype)
             while color is None:
                 ttype = ttype[:-1]

pygments/lexers/compiled.py

             (r'(\d+\.\d*|\.\d+)', Number.Float),
             (r'\d+', Number.Integer),
             (r'[~!%^&*+=|?:<>/-]', Operator),
-            (r'[()\[\],.]', Punctuation),
+            (r'[()\[\],.;]', Punctuation),
             (r'(asm|auto|break|case|catch|const|const_cast|continue|'
              r'default|delete|do|dynamic_cast|else|enum|explicit|export|'
              r'extern|for|friend|goto|if|mutable|namespace|new|operator|'

scripts/find_error.py

     :license: BSD, see LICENSE for more details.
 """
 
-import sys
+import sys, os
+
+try:
+    import pygments
+except ImportError:
+    # try parent path
+    sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 
 from pygments import highlight
 from pygments.lexers import get_lexer_for_filename, get_lexer_by_name

tests/examplefiles/apacheconf_httpd.conf

-#
-#  This is a modification of the default Apache 2 configuration
-#  file by Gentoo Linux.  .... [insert more]
-#  
-#  Support:
-#     http://www.gentoo.org/main/en/lists.xml    [mailing lists]
-#     http://forums.gentoo.org/                  [web forums]
-#
-#  Bug Reports:
-#     http://bugs.gentoo.org/      [gentoo related bugs]
-#     http://bugs.apache.org/      [apache httpd related bugs]
-
-#
-#
-#
-# Based upon the NCSA server configuration files originally by Rob McCool.
-#
-# This is the main Apache server configuration file.  It contains the
-# configuration directives that give the server its instructions.
-# See <URL:http://httpd.apache.org/docs/2.0/> for detailed information about
-# the directives.
-#
-# Do NOT simply read the instructions in here without understanding
-# what they do.  They're here only as hints or reminders.  If you are unsure
-# consult the online docs. You have been warned.  
-#
-# The configuration directives are grouped into three basic sections:
-#  1. Directives that control the operation of the Apache server process as a
-#     whole (the 'global environment').
-#  2. Directives that define the parameters of the 'main' or 'default' server,
-#     which responds to requests that aren't handled by a virtual host.
-#     These directives also provide default values for the settings
-#     of all virtual hosts.
-#  3. Settings for virtual hosts, which allow Web requests to be sent to
-#     different IP addresses or hostnames and have them handled by the
-#     same Apache server process.
-#
-# Configuration and logfile names: If the filenames you specify for many
-# of the server's control files begin with "/" (or "drive:/" for Win32), the
-# server will use that explicit path.  If the filenames do *not* begin
-# with "/", the value of ServerRoot is prepended -- so "logs/foo.log"
-# with ServerRoot set to "/usr/lib/apache2" will be interpreted by the
-# server as "/usr/lib/apache2/logs/foo.log".
-#
-
-### Section 1: Global Environment
-#
-# The directives in this section affect the overall operation of Apache,
-# such as the number of concurrent requests it can handle or where it
-# can find its configuration files.
-#
-
-#
-# ServerRoot: The top of the directory tree under which the server's
-# configuration, error, and log files are kept.
-#
-# NOTE!  If you intend to place this on an NFS (or otherwise network)
-# mounted filesystem then please read the LockFile documentation (available
-# at <URL:http://httpd.apache.org/docs/2.0/mod/mpm_common.html#lockfile>);
-# you will save yourself a lot of trouble.
-#
-# Do NOT add a slash at the end of the directory path.
-#
-ServerRoot "/usr/lib/apache2"
-
-#
-# The accept serialization lock file MUST BE STORED ON A LOCAL DISK.
-#
-#LockFile "/var/run/apache2.lock"
-
-#
-# ScoreBoardFile: File used to store internal server process information.
-# If unspecified (the default), the scoreboard will be stored in an
-# anonymous shared memory segment, and will be unavailable to third-party
-# applications.
-# If specified, ensure that no two invocations of Apache share the same
-# scoreboard file. The scoreboard file MUST BE STORED ON A LOCAL DISK.
-#
-<IfModule !perchild.c>
-    #ScoreBoardFile /var/run/apache2_runtime_status
-</IfModule>
-
-
-#
-# PidFile: The file in which the server should record its process
-# identification number when it starts.
-#
-PidFile "/var/run/apache2.pid"
-
-#
-# Timeout: The number of seconds before receives and sends time out.
-#
-Timeout 300
-
-#
-# KeepAlive: Whether or not to allow persistent connections (more than
-# one request per connection). Set to "Off" to deactivate.
-#
-KeepAlive On
-
-#
-# MaxKeepAliveRequests: The maximum number of requests to allow
-# during a persistent connection. Set to 0 to allow an unlimited amount.
-# We recommend you leave this number high, for maximum performance.
-#
-MaxKeepAliveRequests 100
-
-#
-# KeepAliveTimeout: Number of seconds to wait for the next request from the
-# same client on the same connection.
-#
-KeepAliveTimeout 15
-
-##
-## Server-Pool Size Regulation (MPM specific)
-## 
-
-# prefork MPM [DEFAULT IF USE=-threads]
-# StartServers: number of server processes to start
-# MinSpareServers: minimum number of server processes which are kept spare
-# MaxSpareServers: maximum number of server processes which are kept spare
-# MaxClients: maximum number of server processes allowed to start
-# MaxRequestsPerChild: maximum number of requests a server process serves
-<IfModule prefork.c>
-    StartServers         5
-    MinSpareServers      5
-    MaxSpareServers     10
-    MaxClients         150
-    MaxRequestsPerChild  0
-</IfModule>
-
-# worker MPM [DEFAULT IF USE=threads]
-# StartServers: initial number of server processes to start
-# MaxClients: maximum number of simultaneous client connections
-# MinSpareThreads: minimum number of worker threads which are kept spare
-# MaxSpareThreads: maximum number of worker threads which are kept spare
-# ThreadsPerChild: constant number of worker threads in each server process
-# MaxRequestsPerChild: maximum number of requests a server process serves
-<IfModule worker.c>
-    StartServers         2
-    MaxClients         150
-    MinSpareThreads     25
-    MaxSpareThreads     75 
-    ThreadsPerChild     25
-    MaxRequestsPerChild  0
-</IfModule>
-
-# perchild MPM [THIS MPM IS NOT SUPPORTED]
-# NumServers: constant number of server processes
-# StartThreads: initial number of worker threads in each server process
-# MinSpareThreads: minimum number of worker threads which are kept spare
-# MaxSpareThreads: maximum number of worker threads which are kept spare
-# MaxThreadsPerChild: maximum number of worker threads in each server process
-# MaxRequestsPerChild: maximum number of connections per server process
-<IfModule perchild.c>
-    NumServers           5
-    StartThreads         5
-    MinSpareThreads      5
-    MaxSpareThreads     10
-    MaxThreadsPerChild  20
-    MaxRequestsPerChild  0
-</IfModule>
-
-# peruser MPM [THIS MPM IS NOT SUPPORTED]
-# MinSpareServers - Minimum number of idle children, to handle request spikes 
-# MaxClients - Maximum number of children alive at the same time 
-# MaxProcessors - Maximum number of processors per vhost
-# Multiplexer - Specify an Multiplexer Child configuration.
-# Processor - Specify a User and Group for a specific child process.
-# ServerEnvironment - Specify the server environment for this virtual host.
-<IfModule peruser.c>
-    ServerLimit          256
-    MaxClients           256
-    MinSpareProcessors     2
-    MaxProcessors         10
-    MaxRequestsPerChild 1000
-    
-    # kill off idle processors after this many seconds
-    # set to 0 to disable
-    ExpireTimeout       1800
-    
-    Multiplexer nobody nobody
-    
-    Processor apache apache
-    
-    # chroot dir is optional:
-    # Processor user group /path/to/chroot
-</IfModule>
-
-# itk MPM [THIS MPM IS NOT SUPPORTED]
-# StartServers: number of server processes to start
-# MinSpareServers: minimum number of server processes which are kept spare
-# MaxSpareServers: maximum number of server processes which are kept spare
-# MaxClients: maximum number of server processes allowed to start
-# MaxRequestsPerChild: maximum number of requests a server process serves
-<IfModule itk.c>
-    StartServers           5
-    MinSpareServers        2
-    MaxSpareServers       10
-    MaxClients           150
-    MaxRequestsPerChild 1000
-</IfModule>
-
-#
-# Listen: Allows you to bind Apache to specific IP addresses and/or
-# ports, instead of the default. See also the <VirtualHost>
-# directive.
-#
-# Change this to Listen on specific IP addresses as shown below to 
-# prevent Apache from glomming onto all bound IP addresses (0.0.0.0)
-#
-#Listen 12.34.56.78:80
-Listen 80
-
-#
-# Dynamic Shared Object (DSO) Support
-#
-# To be able to use the functionality of a module which was built as a DSO you
-# have to place corresponding `LoadModule' lines at this location so the
-# directives contained in it are actually available _before_ they are used.
-# Statically compiled modules (those listed by `httpd -l') do not need
-# to be loaded here.
-#
-# The following modules are considered as the default configuration.
-# If you wish to disable one of them, you may have to alter other 
-# configuration directives.
-#
-# You should always leave these three, as they are needed for normal use.
-# mod_access (Order, Allow, etc..)
-# mod_log_config (Transferlog, etc..)
-# mod_mime (AddType, etc...)
-#
-# Example:
-# LoadModule foo_module modules/mod_foo.so
-
-
-# Authentication Modules
-#
-# These modules provide authentication and authorization for
-# clients. They should not normally be disabled.
-#
-LoadModule access_module                 modules/mod_access.so
-LoadModule auth_module                   modules/mod_auth.so
-LoadModule auth_anon_module              modules/mod_auth_anon.so
-LoadModule auth_dbm_module               modules/mod_auth_dbm.so
-LoadModule auth_digest_module            modules/mod_auth_digest.so
-
-#
-# Metadata Modules
-# 
-# These modules provide extra data to clients about
-# a file, such as the mime-type or charset.
-#
-LoadModule charset_lite_module           modules/mod_charset_lite.so
-LoadModule env_module                    modules/mod_env.so
-LoadModule expires_module                modules/mod_expires.so
-LoadModule headers_module                modules/mod_headers.so
-LoadModule mime_module                   modules/mod_mime.so
-LoadModule negotiation_module            modules/mod_negotiation.so
-LoadModule setenvif_module               modules/mod_setenvif.so
-
-#
-# Logging Modules
-# 
-# These modules provide logging services for Apache
-#
-LoadModule log_config_module             modules/mod_log_config.so
-LoadModule logio_module                  modules/mod_logio.so
-
-
-#
-# CGI Modules
-#
-# These modules provide the ability to execute CGI Scripts.
-#
-LoadModule cgi_module                    modules/mod_cgi.so
-LoadModule cgid_module                   modules/mod_cgid.so
-
-
-#
-# This `suexec` module provides the ability to exeucte CGI scripts under
-# a different user than apache is run.
-#
-LoadModule suexec_module                 modules/mod_suexec.so
-
-
-#
-# Mappers
-#
-# These Modules provide URL mappings or translations.
-LoadModule alias_module                  modules/mod_alias.so
-LoadModule rewrite_module                modules/mod_rewrite.so
-<IfDefine USERDIR>
-    LoadModule userdir_module            modules/mod_userdir.so
-</IfDefine>
-
-
-#
-# Handlers
-#
-# These modules create content for a client.
-#
-<IfDefine INFO>
-    LoadModule info_module               modules/mod_info.so
-    LoadModule status_module             modules/mod_status.so
-</IfDefine>
-LoadModule actions_module                modules/mod_actions.so
-LoadModule autoindex_module              modules/mod_autoindex.so
-LoadModule dir_module                    modules/mod_dir.so
-
-#
-# Filters
-#
-# These modules provide filters for Apache.
-# They preform common tasks like gzip encoding or SSI
-#
-#
-LoadModule ext_filter_module             modules/mod_ext_filter.so
-LoadModule deflate_module                modules/mod_deflate.so
-LoadModule include_module                modules/mod_include.so
-
-
-#
-# Cache Modules
-#
-# The following modules are used for storing a cache of
-# generated or proxied content.
-#
-#LoadModule cache_module                  modules/mod_cache.so
-#LoadModule disk_cache_module             modules/mod_disk_cache.so
-#LoadModule mem_cache_module              modules/mod_mem_cache.so
-#LoadModule file_cache_module             modules/mod_file_cache.so
-
-#
-# Proxy Modules
-# 
-# The following modules are only needed if you are running
-# Apache as a Forward or Reverse Proxy.
-# 
-# WARNING: Enabling these modules can be dangerous! 
-#   READ THE DOCUMENTATION FIRST:
-#   http://httpd.apache.org/docs/2.0/mod/mod_proxy.html
-<IfDefine PROXY>
-    LoadModule proxy_module                  modules/mod_proxy.so
-    LoadModule proxy_connect_module          modules/mod_proxy_connect.so
-    LoadModule proxy_ftp_module              modules/mod_proxy_ftp.so
-    LoadModule proxy_http_module             modules/mod_proxy_http.so
-</IfDefine>
-
-#
-# Uncommon Modules
-#
-# The following Modules are not commonly loaded for Apache
-#
-#LoadModule case_filter_module            modules/mod_case_filter.so
-#LoadModule case_filter_in_module         modules/mod_case_filter_in.so
-#LoadModule echo_module                   modules/mod_echo.so
-#LoadModule mime_magic_module             modules/mod_mime_magic.so
-#LoadModule speling_module                modules/mod_speling.so
-#LoadModule unique_id_module              modules/mod_unique_id.so
-#LoadModule vhost_alias_module            modules/mod_vhost_alias.so
-
-#
-# Obsolete Modules
-# 
-# The Following modules are not commonly needed and use 
-# obsolete technologies.
-#
-#LoadModule cern_meta_module              modules/mod_cern_meta.so
-#LoadModule imap_module                   modules/mod_imap.so
-#LoadModule usertrack_module              modules/mod_usertrack.so
-#LoadModule asis_module                   modules/mod_asis.so
-
-
-#
-# Extra Modules
-#
-# We Include extra .conf files from /etc/apache2/modules.d
-# This is used to load things like PHP and mod_ssl.
-#
-Include /etc/apache2/modules.d/*.conf
-
-### Section 2: 'Main' server configuration
-#
-# The directives in this section set up the values used by the 'main'
-# server, which responds to any requests that aren't handled by a
-# <VirtualHost> definition.  These values also provide defaults for
-# any <VirtualHost> containers you may define later in the file.
-#
-# All of these directives may appear inside <VirtualHost> containers,
-# in which case these default settings will be overridden for the
-# virtual host being defined.
-#
-
-#
-# If you wish httpd to run as a different user or group, you must run
-# httpd as root initially and it will switch.  
-#
-# User/Group: The name (or #number) of the user/group to run httpd as.
-#  . On SCO (ODT 3) use "User nouser" and "Group nogroup".
-#  . On HPUX you may not be able to use shared memory as nobody, and the
-#    suggested workaround is to create a user www and use that user.
-#  NOTE that some kernels refuse to setgid(Group) or semctl(IPC_SET)
-#  when the value of (unsigned)Group is above 60000; 
-#  don't use Group #-1 on these systems!
-#
-User apache
-Group apache
-
-#
-# ServerAdmin: Your address, where problems with the server should be
-# e-mailed.  This address appears on some server-generated pages, such
-# as error documents.  e.g. admin@your-domain.com
-#
-ServerAdmin root@localhost
-
-#
-# ServerName gives the name and port that the server uses to identify itself.
-# This can often be determined automatically, but we recommend you specify
-# it explicitly to prevent problems during startup.
-#
-# If this is not set to valid DNS name for your host, server-generated
-# redirections will not work.  See also the UseCanonicalName directive.
-#
-# If your host doesn't have a registered DNS name, enter its IP address here.
-# You will have to access it by its address anyway, and this will make 
-# redirections work in a sensible way.
-#
-#ServerName localhost
-
-#
-# UseCanonicalName: Determines how Apache constructs self-referencing 
-# URLs and the SERVER_NAME and SERVER_PORT variables.
-# When set "Off", Apache will use the Hostname and Port supplied
-# by the client.  When set "On", Apache will use the value of the
-# ServerName directive.
-#
-UseCanonicalName Off
-
-
-#
-# Each directory to which Apache has access can be configured with respect
-# to which services and features are allowed and/or disabled in that
-# directory (and its subdirectories). 
-#
-# First, we configure the "default" to be a very restrictive set of 
-# features.  
-#
-<Directory />
-    Options FollowSymLinks
-    AllowOverride None
-</Directory>
-
-#
-# Note that from this point forward you must specifically allow
-# particular features to be enabled - so if something's not working as
-# you might expect, make sure that you have specifically enabled it
-# below.
-#
-
-#
-# UserDir: The name of the directory that is appended onto a user's home
-# directory if a ~user request is received.
-# enable by adding -D USERDIR to /etc/conf.d/apache2
-#
-<IfModule mod_userdir.c>
-    UserDir public_html
-
-#
-# Control access to UserDir directories.  The following is an example
-# for a site where these directories are restricted to read-only.
-#
-    <Directory /home/*/public_html>
-        AllowOverride FileInfo AuthConfig Limit Indexes
-        Options MultiViews Indexes SymLinksIfOwnerMatch IncludesNoExec
-        <Limit GET POST OPTIONS PROPFIND>
-            Order allow,deny
-            Allow from all
-       </Limit>
-       <LimitExcept GET POST OPTIONS PROPFIND>
-            Order deny,allow
-            Deny from all
-       </LimitExcept>
-    </Directory>
-
-
-# Enable this additional section if you would like to make use of a
-# suexec-enabled cgi-bin directory on a per-user basis.
-#
-#<Directory /home/*/public_html/cgi-bin>
-#    Options ExecCGI
-#    SetHandler cgi-script
-#</Directory>
-
-</IfModule>
-
-
-#
-# DirectoryIndex: sets the file that Apache will serve if a directory
-# is requested.
-#
-# The index.html.var file (a type-map) is used to deliver content-
-# negotiated documents.  The MultiViews Option can be used for the 
-# same purpose, but it is much slower.
-#
-DirectoryIndex index.html index.html.var
-
-#
-# AccessFileName: The name of the file to look for in each directory
-# for additional configuration directives.  See also the AllowOverride 
-# directive.
-#
-AccessFileName .htaccess
-
-#
-# The following lines prevent .htaccess and .htpasswd files from being 
-# viewed by Web clients. 
-#
-<FilesMatch "^\.ht">
-    Order allow,deny
-    Deny from all
-</FilesMatch>
-
-#
-# TypesConfig describes where the mime.types file (or equivalent) is
-# to be found.
-#
-TypesConfig /etc/mime.types
-
-#
-# DefaultType is the default MIME type the server will use for a document
-# if it cannot otherwise determine one, such as from filename extensions.
-# If your server contains mostly text or HTML documents, "text/plain" is
-# a good value.  If most of your content is binary, such as applications
-# or images, you may want to use "application/octet-stream" instead to
-# keep browsers from trying to display binary files as though they are
-# text.
-#
-DefaultType text/plain
-
-#
-# The mod_mime_magic module allows the server to use various hints from the
-# contents of the file itself to determine its type.  The MIMEMagicFile
-# directive tells the module where the hint definitions are located.
-#
-<IfModule mod_mime_magic.c>
-    MIMEMagicFile /etc/apache2/magic
-</IfModule>
-
-#
-# HostnameLookups: Log the names of clients or just their IP addresses
-# e.g., www.apache.org (on) or 204.62.129.132 (off).
-# The default is off because it'd be overall better for the net if people
-# had to knowingly turn this feature on, since enabling it means that
-# each client request will result in AT LEAST one lookup request to the
-# nameserver.
-#
-HostnameLookups Off
-
-#
-# EnableMMAP: Control whether memory-mapping is used to deliver
-# files (assuming that the underlying OS supports it).
-# The default is on; turn this off if you serve from NFS-mounted 
-# filesystems.  On some systems, turning it off (regardless of
-# filesystem) can improve performance; for details, please see
-# http://httpd.apache.org/docs/2.0/mod/core.html#enablemmap
-#
-#EnableMMAP off
-
-#
-# EnableSendfile: Control whether the sendfile kernel support is 
-# used  to deliver files (assuming that the OS supports it).
-# The default is on; turn this off if you serve from NFS-mounted 
-# filesystems.  Please see
-# http://httpd.apache.org/docs/2.0/mod/core.html#enablesendfile
-#
-#EnableSendfile off
-
-#
-# ErrorLog: The location of the error log file.
-# If you do not specify an ErrorLog directive within a <VirtualHost>
-# container, error messages relating to that virtual host will be
-# logged here.  If you *do* define an error logfile for a <VirtualHost>
-# container, that host's errors will be logged there and not here.
-#
-ErrorLog logs/error_log
-
-#
-# LogLevel: Control the number of messages logged to the error_log.
-# Possible values include: debug, info, notice, warn, error, crit,
-# alert, emerg.
-#
-LogLevel warn
-
-#
-# The following directives define some format nicknames for use with
-# a CustomLog directive (see below).
-#
-LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
-LogFormat "%h %l %u %t \"%r\" %>s %b" common
-LogFormat "%{Referer}i -> %U" referer
-LogFormat "%{User-agent}i" agent
-LogFormat "%v %h %l %u %t \"%r\" %>s %b %T" script
-LogFormat "%v %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" VLOG=%{VLOG}e" vhost
-
-# You need to enable mod_logio.c to use %I and %O
-#LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio
-
-#
-# The location and format of the access logfile (Common Logfile Format).
-# If you do not define any access logfiles within a <VirtualHost>
-# container, they will be logged here.  Contrariwise, if you *do*
-# define per-<VirtualHost> access logfiles, transactions will be
-# logged therein and *not* in this file.
-#
-CustomLog logs/access_log common
-
-#
-# If you would like to have agent and referer logfiles, uncomment the
-# following directives.
-#
-#CustomLog logs/referer_log referer
-#CustomLog logs/agent_log agent
-
-#
-# If you prefer a single logfile with access, agent, and referer information
-# (Combined Logfile Format) you can use the following directive.
-#
-#CustomLog logs/access_log combined
-
-#
-# ServerTokens
-# This directive configures what you return as the Server HTTP response
-# Header. The default is 'Full' which sends information about the OS-Type
-# and compiled in modules.
-# Set to one of:  Full | OS | Minor | Minimal | Major | Prod
-# where Full conveys the most information, and Prod the least.
-#
-ServerTokens Prod
-
-#
-# Optionally add a line containing the server version and virtual host
-# name to server-generated pages (internal error documents, FTP directory 
-# listings, mod_status and mod_info output etc., but not CGI generated 
-# documents or custom error documents).
-# Set to "EMail" to also include a mailto: link to the ServerAdmin.
-# Set to one of:  On | Off | EMail
-#
-ServerSignature On
-
-#
-# Aliases: Add here as many aliases as you need (with no limit). The format is 
-# Alias fakename realname
-#
-# Note that if you include a trailing / on fakename then the server will
-# require it to be present in the URL.  So "/icons" isn't aliased in this
-# example, only "/icons/".  If the fakename is slash-terminated, then the 
-# realname must also be slash terminated, and if the fakename omits the 
-# trailing slash, the realname must also omit it.
-#
-# We include the /icons/ alias for FancyIndexed directory listings.  If you
-# do not use FancyIndexing, you may comment this out.
-#
-Alias /icons/ "/var/www/localhost/icons/"
-
-<Directory "/var/www/localhost/icons/">
-    Options Indexes MultiViews
-    AllowOverride None
-    Order allow,deny
-    Allow from all
-</Directory>
-
-#
-# ScriptAlias: This controls which directories contain server scripts.
-# ScriptAliases are essentially the same as Aliases, except that
-# documents in the realname directory are treated as applications and
-# run by the server when requested rather than as documents sent to the client.
-# The same rules about trailing "/" apply to ScriptAlias directives as to
-# Alias.
-#
-ScriptAlias /cgi-bin/ /var/www/localhost/cgi-bin/
-
-<IfModule mod_cgid.c>
-    #
-    # Additional to mod_cgid.c settings, mod_cgid has Scriptsock <path>
-    # for setting UNIX socket for communicating with cgid.
-    #
-    #Scriptsock            /var/run/cgisock
-</IfModule>
-
-#
-# "/var/www/localhost/cgi-bin/" should be changed to whatever your ScriptAliased
-# CGI directory exists, if you have that configured.
-#
-<Directory "/var/www/localhost/cgi-bin/">
-    AllowOverride None
-    Options None
-    Order allow,deny
-    Allow from all
-</Directory>
-
-#
-# Redirect allows you to tell clients about documents which used to exist in
-# your server's namespace, but do not anymore. This allows you to tell the
-# clients where to look for the relocated document.
-# Example:
-# Redirect permanent /foo http://www.example.com/bar
-
-#
-# Directives controlling the display of server-generated directory listings.
-#
-<IfModule mod_autoindex.c>
-    #
-    # IndexOptions: Controls the appearance of server-generated directory
-    # listings.
-    #
-    IndexOptions FancyIndexing VersionSort
-
-    #
-    # AddIcon* directives tell the server which icon to show for different
-    # files or filename extensions.  These are only displayed for
-    # FancyIndexed directories.
-    #
-    AddIconByEncoding (CMP,/icons/compressed.gif) x-compress x-gzip
-
-    AddIconByType (TXT,/icons/text.gif) text/*
-    AddIconByType (IMG,/icons/image2.gif) image/*
-    AddIconByType (SND,/icons/sound2.gif) audio/*
-    AddIconByType (VID,/icons/movie.gif) video/*
-
-    AddIcon /icons/binary.gif .bin .exe
-    AddIcon /icons/binhex.gif .hqx
-    AddIcon /icons/tar.gif .tar
-    AddIcon /icons/world2.gif .wrl .wrl.gz .vrml .vrm .iv
-    AddIcon /icons/compressed.gif .Z .z .tgz .gz .zip
-    AddIcon /icons/a.gif .ps .ai .eps
-    AddIcon /icons/layout.gif .html .shtml .htm .pdf
-    AddIcon /icons/text.gif .txt
-    AddIcon /icons/c.gif .c
-    AddIcon /icons/p.gif .pl .py
-    AddIcon /icons/f.gif .for
-    AddIcon /icons/dvi.gif .dvi
-    AddIcon /icons/uuencoded.gif .uu
-    AddIcon /icons/script.gif .conf .sh .shar .csh .ksh .tcl
-    AddIcon /icons/tex.gif .tex
-    AddIcon /icons/bomb.gif core
-
-    AddIcon /icons/back.gif ..
-    AddIcon /icons/hand.right.gif README
-    AddIcon /icons/folder.gif ^^DIRECTORY^^
-    AddIcon /icons/blank.gif ^^BLANKICON^^
-
-    #
-    # DefaultIcon is which icon to show for files which do not have an icon
-    # explicitly set.
-    #
-    DefaultIcon /icons/unknown.gif
-
-    #
-    # AddDescription allows you to place a short description after a file in
-    # server-generated indexes.  These are only displayed for FancyIndexed
-    # directories.
-    # Format: AddDescription "description" filename
-    #
-    #AddDescription "GZIP compressed document" .gz
-    #AddDescription "tar archive" .tar
-    #AddDescription "GZIP compressed tar archive" .tgz
-
-    #
-    # ReadmeName is the name of the README file the server will look for by
-    # default, and append to directory listings.
-    #
-    # HeaderName is the name of a file which should be prepended to
-    # directory indexes. 
-    ReadmeName README.html
-    HeaderName HEADER.html
-
-    #
-    # IndexIgnore is a set of filenames which directory indexing should ignore
-    # and not include in the listing.  Shell-style wildcarding is permitted.
-    #
-    IndexIgnore .??* *~ *# HEADER* README* RCS CVS *,v *,t .svn
-</IfModule>
-
-#
-# DefaultLanguage and AddLanguage allows you to specify the language of 
-# a document. You can then use content negotiation to give a browser a 
-# file in a language the user can understand.
-#
-# Specify a default language. This means that all data
-# going out without a specific language tag (see below) will 
-# be marked with this one. You probably do NOT want to set
-# this unless you are sure it is correct for all cases.
-#
-# * It is generally better to not mark a page as 
-# * being a certain language than marking it with the wrong
-# * language!
-#
-# DefaultLanguage nl
-#
-# Note 1: The suffix does not have to be the same as the language
-# keyword --- those with documents in Polish (whose net-standard
-# language code is pl) may wish to use "AddLanguage pl .po" to
-# avoid the ambiguity with the common suffix for perl scripts.
-#
-# Note 2: The example entries below illustrate that in some cases 
-# the two character 'Language' abbreviation is not identical to 
-# the two character 'Country' code for its country,
-# E.g. 'Danmark/dk' versus 'Danish/da'.
-#
-# Note 3: In the case of 'ltz' we violate the RFC by using a three char
-# specifier. There is 'work in progress' to fix this and get
-# the reference data for rfc1766 cleaned up.
-#
-# Catalan (ca) - Croatian (hr) - Czech (cs) - Danish (da) - Dutch (nl)
-# English (en) - Esperanto (eo) - Estonian (et) - French (fr) - German (de)
-# Greek-Modern (el) - Hebrew (he) - Italian (it) - Japanese (ja)
-# Korean (ko) - Luxembourgeois* (ltz) - Norwegian Nynorsk (nn)
-# Norwegian (no) - Polish (pl) - Portugese (pt)
-# Brazilian Portuguese (pt-BR) - Russian (ru) - Swedish (sv)
-# Simplified Chinese (zh-CN) - Spanish (es) - Traditional Chinese (zh-TW)
-#
-AddLanguage ca .ca
-AddLanguage cs .cz .cs
-AddLanguage da .dk
-AddLanguage de .de
-AddLanguage el .el
-AddLanguage en .en
-AddLanguage eo .eo
-AddLanguage es .es
-AddLanguage et .et
-AddLanguage fr .fr
-AddLanguage he .he
-AddLanguage hr .hr
-AddLanguage it .it
-AddLanguage ja .ja
-AddLanguage ko .ko
-AddLanguage ltz .ltz
-AddLanguage nl .nl
-AddLanguage nn .nn
-AddLanguage no .no
-AddLanguage pl .po
-AddLanguage pt .pt
-AddLanguage pt-BR .pt-br
-AddLanguage ru .ru
-AddLanguage sv .sv
-AddLanguage zh-CN .zh-cn
-AddLanguage zh-TW .zh-tw
-
-#
-# LanguagePriority allows you to give precedence to some languages
-# in case of a tie during content negotiation.
-#
-# Just list the languages in decreasing order of preference. We have
-# more or less alphabetized them here. You probably want to change this.
-#
-LanguagePriority en ca cs da de el eo es et fr he hr it ja ko ltz nl nn no pl pt pt-BR ru sv zh-CN zh-TW
-
-#
-# ForceLanguagePriority allows you to serve a result page rather than
-# MULTIPLE CHOICES (Prefer) [in case of a tie] or NOT ACCEPTABLE (Fallback)
-# [in case no accepted languages matched the available variants]
-#
-ForceLanguagePriority Prefer Fallback
-
-#
-# Commonly used filename extensions to character sets. You probably
-# want to avoid clashes with the language extensions, unless you
-# are good at carefully testing your setup after each change.
-# See http://www.iana.org/assignments/character-sets for the
-# official list of charset names and their respective RFCs.
-#
-AddCharset ISO-8859-1  .iso8859-1  .latin1
-AddCharset ISO-8859-2  .iso8859-2  .latin2 .cen
-AddCharset ISO-8859-3  .iso8859-3  .latin3
-AddCharset ISO-8859-4  .iso8859-4  .latin4
-AddCharset ISO-8859-5  .iso8859-5  .latin5 .cyr .iso-ru
-AddCharset ISO-8859-6  .iso8859-6  .latin6 .arb
-AddCharset ISO-8859-7  .iso8859-7  .latin7 .grk
-AddCharset ISO-8859-8  .iso8859-8  .latin8 .heb
-AddCharset ISO-8859-9  .iso8859-9  .latin9 .trk
-AddCharset ISO-2022-JP .iso2022-jp .jis
-AddCharset ISO-2022-KR .iso2022-kr .kis
-AddCharset ISO-2022-CN .iso2022-cn .cis
-AddCharset Big5        .Big5       .big5
-# For russian, more than one charset is used (depends on client, mostly):
-AddCharset WINDOWS-1251 .cp-1251   .win-1251
-AddCharset CP866       .cp866
-AddCharset KOI8-r      .koi8-r .koi8-ru
-AddCharset KOI8-ru     .koi8-uk .ua
-AddCharset ISO-10646-UCS-2 .ucs2
-AddCharset ISO-10646-UCS-4 .ucs4
-AddCharset UTF-8       .utf8
-
-# The set below does not map to a specific (iso) standard
-# but works on a fairly wide range of browsers. Note that
-# capitalization actually matters (it should not, but it
-# does for some browsers).
-#
-# See http://www.iana.org/assignments/character-sets
-# for a list of sorts. But browsers support few.
-#
-AddCharset GB2312      .gb2312 .gb 
-AddCharset utf-7       .utf7
-AddCharset utf-8       .utf8
-AddCharset big5        .big5 .b5
-AddCharset EUC-TW      .euc-tw
-AddCharset EUC-JP      .euc-jp
-AddCharset EUC-KR      .euc-kr
-AddCharset shift_jis   .sjis
-
-#
-# AddType allows you to add to or override the MIME configuration
-# file mime.types for specific file types.
-#
-#AddType application/x-tar .tgz
-#
-# AddEncoding allows you to have certain browsers uncompress
-# information on the fly. Note: Not all browsers support this.
-# Despite the name similarity, the following Add* directives have nothing
-# to do with the FancyIndexing customization directives above.
-#
-#AddEncoding x-compress .Z
-#AddEncoding x-gzip .gz .tgz
-#
-# If the AddEncoding directives above are commented-out, then you
-# probably should define those extensions to indicate media types:
-#
-AddType application/x-compress .Z
-AddType application/x-gzip .gz .tgz
-
-#
-# AddHandler allows you to map certain file extensions to "handlers":
-# actions unrelated to filetype. These can be either built into the server
-# or added with the Action directive (see below)
-#
-# To use CGI scripts outside of ScriptAliased directories:
-# (You will also need to add "ExecCGI" to the "Options" directive.)
-#
-#AddHandler cgi-script .cgi
-
-#
-# For files that include their own HTTP headers:
-#
-#AddHandler send-as-is asis
-
-#
-# For server-parsed imagemap files:
-#
-#AddHandler imap-file map
-
-#
-# For type maps (negotiated resources):
-# (This is enabled by default to allow the Apache "It Worked" page
-#  to be distributed in multiple languages.)
-#
-AddHandler type-map var
-
-#
-# Filters allow you to process content before it is sent to the client.
-#
-# To parse .shtml files for server-side includes (SSI):
-# (You will also need to add "Includes" to the "Options" directive.)
-#
-#AddType text/html .shtml
-#AddOutputFilter INCLUDES .shtml
-
-#
-# Action lets you define media types that will execute a script whenever
-# a matching file is called. This eliminates the need for repeated URL
-# pathnames for oft-used CGI file processors.
-# Format: Action media/type /cgi-script/location
-# Format: Action handler-name /cgi-script/location
-#
-
-#
-# Customizable error responses come in three flavors:
-# 1) plain text 2) local redirects 3) external redirects
-#
-# Some examples:
-#ErrorDocument 500 "The server made a boo boo."
-#ErrorDocument 404 /missing.html
-#ErrorDocument 404 "/cgi-bin/missing_handler.pl"
-#ErrorDocument 402 http://www.example.com/subscription_info.html
-#
-
-#
-# Putting this all together, we can internationalize error responses.
-#
-# We use Alias to redirect any /error/HTTP_<error>.html.var response to
-# our collection of by-error message multi-language collections.  We use 
-# includes to substitute the appropriate text.
-#
-# You can modify the messages' appearance without changing any of the
-# default HTTP_<error>.html.var files by adding the line:
-#
-#   Alias /error/include/ "/your/include/path/"
-#
-# which allows you to create your own set of files by starting with the
-# /var/www/localhost/error/include files and copying them to /your/includepath/ 
-# even on a per-VirtualHost basis.  The default include files will display
-# your Apache version number and your ServerAdmin email address regardless
-# of the setting of ServerSignature.
-#
-# The internationalized error documents require mod_alias, mod_include
-# and mod_negotiation.  To activate them, uncomment the following 30 lines.
-
-#    Alias /error/ "/var/www/localhost/error/"
-#
-#    <Directory "/var/www/localhost/error">
-#        AllowOverride None
-#        Options IncludesNoExec
-#        AddOutputFilter Includes html
-#        AddHandler type-map var
-#        Order allow,deny
-#        Allow from all
-#        LanguagePriority en cs de es fr it nl sv pt-br ro
-#        ForceLanguagePriority Prefer Fallback
-#    </Directory>
-#
-#    ErrorDocument 400 /error/HTTP_BAD_REQUEST.html.var
-#    ErrorDocument 401 /error/HTTP_UNAUTHORIZED.html.var
-#    ErrorDocument 403 /error/HTTP_FORBIDDEN.html.var
-#    ErrorDocument 404 /error/HTTP_NOT_FOUND.html.var
-#    ErrorDocument 405 /error/HTTP_METHOD_NOT_ALLOWED.html.var
-#    ErrorDocument 408 /error/HTTP_REQUEST_TIME_OUT.html.var
-#    ErrorDocument 410 /error/HTTP_GONE.html.var
-#    ErrorDocument 411 /error/HTTP_LENGTH_REQUIRED.html.var
-#    ErrorDocument 412 /error/HTTP_PRECONDITION_FAILED.html.var
-#    ErrorDocument 413 /error/HTTP_REQUEST_ENTITY_TOO_LARGE.html.var
-#    ErrorDocument 414 /error/HTTP_REQUEST_URI_TOO_LARGE.html.var
-#    ErrorDocument 415 /error/HTTP_UNSUPPORTED_MEDIA_TYPE.html.var
-#    ErrorDocument 500 /error/HTTP_INTERNAL_SERVER_ERROR.html.var
-#    ErrorDocument 501 /error/HTTP_NOT_IMPLEMENTED.html.var
-#    ErrorDocument 502 /error/HTTP_BAD_GATEWAY.html.var
-#    ErrorDocument 503 /error/HTTP_SERVICE_UNAVAILABLE.html.var
-#    ErrorDocument 506 /error/HTTP_VARIANT_ALSO_VARIES.html.var
-
-
-#
-# The following directives modify normal HTTP response behavior to
-# handle known problems with browser implementations.
-#
-BrowserMatch "Mozilla/2" nokeepalive
-BrowserMatch "MSIE 4\.0b2;" nokeepalive downgrade-1.0 force-response-1.0
-BrowserMatch "RealPlayer 4\.0" force-response-1.0
-BrowserMatch "Java/1\.0" force-response-1.0
-BrowserMatch "JDK/1\.0" force-response-1.0
-
-#
-# The following directive disables redirects on non-GET requests for
-# a directory that does not include the trailing slash.  This fixes a 
-# problem with Microsoft WebFolders which does not appropriately handle 
-# redirects for folders with DAV methods.
-# Same deal with Apple's DAV filesystem and Gnome VFS support for DAV.
-#
-BrowserMatch "Microsoft Data Access Internet Publishing Provider" redirect-carefully
-BrowserMatch "MS FrontPage" redirect-carefully
-BrowserMatch "^WebDrive" redirect-carefully
-BrowserMatch "^WebDAVFS/1.[0123]" redirect-carefully
-BrowserMatch "^gnome-vfs" redirect-carefully
-BrowserMatch "^XML Spy" redirect-carefully
-BrowserMatch "^Dreamweaver-WebDAV-SCM1" redirect-carefully
-
-#
-# Allow server status reports generated by mod_status,
-# with the URL of http://servername/server-status
-# Change the ".example.com" to match your domain to enable.
-#
-<IfDefine INFO>
-    ExtendedStatus On
-    <Location /server-status>
-        SetHandler server-status
-        Order deny,allow
-        Deny from all
-        Allow from localhost
-    </Location>
-</IfDefine>
-
-#
-# Allow remote server configuration reports, with the URL of
-#  http://localhost/server-info (This is useful for debugging)
-#
-<IfDefine INFO>
-    <Location /server-info>
-       SetHandler server-info
-       Order deny,allow
-       Deny from all
-       Allow from localhost
-    </Location>
-</IfDefine>
-
-
-#
-# Gentoo VHosts
-# 
-# For Gentoo we include External Virtual Hosts Files.
-# Please see vhosts.d/00_default_vhost.conf for the default virtual host.
-#
-Include /etc/apache2/vhosts.d/*.conf

tests/test_basic_api.py

 import StringIO
 import random
 
-from pygments import lexers, formatters
+from pygments import lexers, formatters, format
 from pygments.token import _TokenType
 
 test_content = [chr(i) for i in xrange(33, 128)] * 5
             inst.get_style_defs()
             inst.format(ts, out)
 
+    def test_unicode_handling(self):
+        # test that the formatter supports encoding and Unicode
+        tokens = list(lexers.PythonLexer(encoding='utf-8').get_tokens("def f(): 'ä'"))
+        for formatter, info in formatters.FORMATTERS.iteritems():
+            inst = formatter(encoding=None)
+            out = format(tokens, inst)
+            if formatter.unicodeoutput:
+                self.assert_(type(out) is unicode)
+
+            inst = formatter(encoding='utf-8')
+            out = format(tokens, inst)
+            self.assert_(type(out) is str)
+            # Cannot test for encoding, since formatters may have to escape
+            # non-ASCII characters.
+
     def test_get_formatters(self):
         a = self.assert_
         ae = self.assertEquals