Source

spiffy-cgi-handlers / cgi-handler.scm

;;;; cgi-handler.scm
;
; Copyright (c) 2007-2009, Peter Bex
; Copyright (c) 2000-2005, Felix L. Winkelmann
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; 1. Redistributions of source code must retain the above copyright
;    notice, this list of conditions and the following disclaimer.
; 2. Redistributions in binary form must reproduce the above copyright
;    notice, this list of conditions and the following disclaimer in the
;    documentation and/or other materials provided with the distribution.
; 3. Neither the name of the author nor the names of its
;    contributors may be used to endorse or promote products derived
;    from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
; OF THE POSSIBILITY OF SUCH DAMAGE.
;
; CGI file handler
; See the spec at http://hoohoo.ncsa.uiuc.edu/cgi/interface.html
; Newer CGI spec: RFC 3875 at http://www.ietf.org/rfc/rfc3875

(module cgi-handler
  (cgi-handler cgi-handler* cgi-default-environment)

(import chicken scheme extras files posix regex data-structures)
(use spiffy srfi-1 srfi-13 intarweb uri-common (prefix uri-generic generic:))

(define (cgi-handler* #!optional interp)
  (lambda (fn) (cgi-handler fn interp)))

(define (alist->envlist alist)
  (map (lambda (entry)
         (conc (car entry) "=" (or (cdr entry) "")))
       alist))

(define (environmentize str)
  (conc "HTTP_" (string-upcase (string-translate str "-" "_"))))

(define (create-header-env headers)
  (fold
   (lambda (h result)
     ;; As per RFC 3875, section 4.1.18, remove all redundant information
     ;; all information related to authentication.
     (if (member (car h) '(content-type content-length authorization))
         result
         (append! (map (lambda (x)
                         (cons (environmentize (symbol->http-name (car h))) x))
                       (unparse-header (car h) (cdr h))) result)))
   '() (headers->list headers)))

(define (cgi-build-env req fn)
  (let* ((server-env
          `(;; TODO: Enable and find a script that requires auth, then test it!
            #;("AUTH_TYPE" . ,(header-value 'authorization
                                            (request-headers req)))
            ;; Username MUST be available when AUTH_TYPE is set
            #;("REMOTE_USER" . ,(header-value ... ))
            ("CONTENT_LENGTH" . ,(header-value 'content-length
                                             (request-headers req)))
            ("CONTENT_TYPE" . ,(and-let* ((contents (header-contents
                                                     'content-type
                                                     (request-headers req))))
                                 (car (unparse-header 'content-type contents))))
            ("PATH_INFO" . ,(and (current-pathinfo)
                                 (string-join (current-pathinfo) "/")))
            ("QUERY_STRING" . ,(generic:uri-query
                                (uri->uri-generic (request-uri req))))
            ("REMOTE_ADDR" . ,(remote-address))
            ;; This should really be the FQDN of the remote address
            ("REMOTE_HOST" . ,(remote-address))
            ("REQUEST_METHOD" . ,(request-method req))
            ("SCRIPT_NAME" . ,(current-file))
            ("SERVER_NAME" . ,(uri-host (request-uri (current-request))))
            ("SERVER_PORT" . ,(server-port)) ; OK?
            ("SERVER_PROTOCOL" . ,(sprintf "HTTP/~A.~A" ; protocol, NOT scheme
                                           (request-major req)
                                           (request-minor req)))
            ("SERVER_SOFTWARE" . ,(and-let* ((contents (header-contents
                                                        'server
                                                        (response-headers
                                                         (current-response)))))
                                    (car (unparse-header 'server contents))))
            ;; RFC 3875, section 4.1.6:
            ;; "The value is derived in this way irrespective of whether
            ;; it maps to a valid repository location."
            ;; ie, this value does not always make sense
            ("PATH_TRANSLATED" . ,(and (current-pathinfo)
                                       (not (null? (current-pathinfo)))
                                       (make-pathname
                                        (root-path)
                                        (string-join (current-pathinfo) "/"))))
            ;; PHP _always_ wants the REDIRECT_STATUS "for security",
            ;; so just initialize it unconditionally.
            ;; See http://php.net/security.cgi-bin
            ("REDIRECT_STATUS" . ,(response-code (current-response)))
            ;; More stuff needed because PHP's CGI is broken
            ;; See http://bugs.php.net/28227
            ;; (yes, that's right; it's been broken since 2004)
            ("SCRIPT_FILENAME" . ,fn)
            ;; Nonstandard but reasonably widely used Apache extension
            ("HTTPS" . ,(and (secure-connection?) "on"))))
         (header-env (create-header-env (request-headers req))))
    (alist->envlist (append (cgi-default-environment) header-env server-env))))

(define (copy-port in out #!optional limit)
  (let ((bufsize 1024))
   (let loop ((data (read-string (min (or limit bufsize) bufsize) in)))
     (unless (string-null? data)
             (display data out)
             (when limit (set! limit (- limit (string-length data))))
             (loop (read-string (min (or limit bufsize) bufsize) in))))))

;; Read a port and discard all data
(define (discard-inport in)
  (let ((bufsize 1024))
    (let loop ((data (read-string bufsize in)))
      (unless (string-null? data)
        (loop (read-string bufsize in))))))

;; "the server retains its responsibility to the client to conform to the
;;  relevant network protocol even if the CGI script fails to conform to
;;  this specification." -- RFC 3875, Section 3.1
;; The simplest way to ensure that the client conforms to the protocol
;; is to discard any content-length headers and simply close the connection.
(define (sanitize-headers script-headers)
  (headers '((connection close))
           (remove-header 'content-length script-headers)))

(define (status-parser str)
  (let ((parts (string-match "([0-9]+) (.+)" str)))
    (cons (string->number (second parts)) (third parts))))

(define (cgi-handler fn #!optional interp)
  (let* ((path (if (absolute-pathname? fn) fn (make-pathname (root-path) fn)))
         (req (current-request))
         (len (header-value 'content-length (request-headers req) 0))
	 (interp (or interp (make-pathname (root-path)
                                           (string-join (cdr (uri-path (request-uri req))) "/"))))
	 (env (cgi-build-env req path)))
    ;; TODO: stderr should be linked to spiffy error log
    (if (file-execute-access? interp)
        ;; XXX The script should be called with the query args on the
        ;; commandline but only if those do not contain any unencoded '='
        ;; characters. Otherwise, it should pass no commandline arguments.
        ;; XXX Current working directory should be the dir with the script.
	(let-values (((i o pid) (process interp (list path) env)))
          (log-to (debug-log) "(cgi) started program ~a(~a) ..." interp path)
          (copy-port (request-port (current-request)) o len)
          (close-output-port o)
          ;; TODO: Implement read timeout
          (let* ((script-headers (parameterize
                                     ((header-parsers
                                       `((status . ,(single status-parser))
                                         ,@(header-parsers))))
                                   (read-headers i)))
                 (loc (header-value 'location script-headers))
                 (status (header-value 'status script-headers))
                 (code (cond
                        (status (car status))
                        (loc 302)
                        (else (response-code (current-response)))))
                 (reason (cond
                          (status (cdr status))
                          (loc "Found")
                          (else (response-reason (current-response)))))
                 ;; Get rid of our temporary Status "header" again
                 (script-headers (remove-header 'status script-headers)))
            (parameterize ((current-response
                            (update-response (current-response)
                                             headers:  (sanitize-headers
                                                        script-headers)
                                             code: code
                                             reason: reason)))
              (write-logged-response)
              (if (eq? 'HEAD (request-method (current-request)))
                  (discard-inport i)
                  (copy-port i (response-port (current-response))))
              (close-input-port i))))
          (error (sprintf "Invalid interpreter: ~A\n" interp)))))

(define cgi-default-environment
  (make-parameter `(("GATEWAY_INTERFACE" . "CGI/1.1"))))
)