Commits

llibra  committed 9b0afe5

Initial import.

  • Participants

Comments (0)

Files changed (13)

+syntax: glob
+*~
+PERCENT-ENCODING
+================
+
+What is this?
+-------------
+
+Percent-encoding is a library for
+[percent-encoding](http://tools.ietf.org/html/rfc3986#section-2.1) defined in
+[RFC 3986](http://tools.ietf.org/html/rfc3986) and varieties.
+
+[RFC 3986](http://tools.ietf.org/html/rfc3986)で定義されている
+[percent-encoding](http://tools.ietf.org/html/rfc3986#section-2.1)とその亜種を処
+理するためのライブラリです。
+
+License
+-------
+
+It's licensed under the MIT license.
+
+Requirements
+------------
+
+* [Anaphora](http://common-lisp.net/project/anaphora/)
+* [Babel](http://common-lisp.net/project/babel/)
+
+API
+---
+
+### Encoding And Decoding
+
+#### Function: encode string &key test www-form encoding
+
+Encodes `string` to a percent-encoded string and returns the result.
+
+`test` is used for determining whether each octet is allowed. If an octet is not
+allowed, the octet is encoded. It's a function of one argument that returns a
+generalized boolean. The default value is `#'unreservedp`.
+
+If `www-form` is true, returns application/x-www-form-urlencoded string instead
+of RFC 3986 percent-encoded string.
+
+`encoding` is a character encoding scheme. `string` is encoded using the
+encoding before percent-encoding. This argument is passed to
+`babel:string-to-octets` without any change.
+
+#### Function: decode string &key test www-form encoding
+
+Decodes `string` to a decoded string and returns the result.
+
+`test` is used for determining whether each octet is decoded. It's a function of
+one argument that returns a generalized boolean. The default value is
+`(constantly t)`.
+
+If `www-form` is true, assumes that `string` is an
+application/x-www-form-urlencoded string.
+
+`encoding` is a character encoding scheme. `string` is decoded according to the
+scheme after percent-encoding. This argument is passed to
+`babel:octets-to-string` without any change.
+
+### Predicates
+
+#### Function: gen-delims-p x
+#### Function: sub-delims-p x
+#### Function: reservedp x
+#### Function: alphap x
+#### Function: digitp x
+#### Function: unreservedp x
+#### Function: userinfop x
+#### Function: reg-name-p x
+#### Function: pcharp x
+#### Function: queryp x
+#### Function: fragmentp x
+
+Returns true if the octet `x` is a member of each character set. See RFC 3986.
+
+Acknowledgements
+----------------
+
+The API of percent-encoding was inspired by Daniel Oliveira's
+[do-urlencode](https://github.com/drdo/do-urlencode) and Franz's
+[uri](https://github.com/franzinc/uri).
+
+sile's [url](http://d.hatena.ne.jp/sile/20091216/1260980935) gave some important
+hints for speed to me.

File percent-encoding.asd

+(defpackage :percent.asd (:use :cl :asdf))
+(in-package :percent.asd)
+
+(defsystem :percent-encoding
+  :version "0.1"
+  :author "Manabu Takayama <learn.libra@gmail.com>"
+  :license "MIT License"
+  :depends-on (:anaphora :babel)
+  :components ((:module "src"
+                        :serial t
+                        :components ((:file "package")
+                                     (:file "type")
+                                     (:file "util")
+                                     (:file "condition")
+                                     (:file "ascii")
+                                     (:file "pred")
+                                     (:file "encode")
+                                     (:file "decode")))))
+
+(defsystem :percent-encoding-test
+  :depends-on (:percent-encoding :fiveam)
+  :components ((:module "t"
+                        :serial t
+                        :components ((:file "package")
+                                     (:file "test")))))
+
+(defmethod perform ((o test-op) (c (eql (find-system :percent-encoding))))
+  (asdf:load-system :percent-encoding-test)
+  (funcall (intern "RUN!" :5am) (intern "ALL" :percent.test)))

File src/ascii.lisp

+(in-package :percent)
+
+(defvar *ascii-char-table*
+  #(nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
+    nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
+    #\  #\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+ #\, #\- #\. #\/
+    #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\: #\; #\< #\= #\> #\?
+    #\@ #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M #\N #\O
+    #\P #\Q #\R #\S #\T #\U #\V #\W #\X #\Y #\Z #\[ #\\ #\] #\^ #\_
+    #\` #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o
+    #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z #\{ #\| #\} #\~ nil))
+
+(declaim (inline ascii-char))
+(defun ascii-char (x)
+  (aref (the (simple-array (or character null) (*)) *ascii-char-table*) x))
+
+(defvar *char-ascii-table*
+  (labels ((->code (x)
+             (etypecase x
+               (character (char-code x))
+               (integer x)
+               (null 0)))
+           (max-code (x y)
+             (let ((x (->code x)) (y (->code y)))
+               (max x y))))
+    (let* ((length (1+ (reduce #'max-code *ascii-char-table*)))
+           (table (make-array length :initial-element nil)))
+      (reduce (lambda (i x)
+                (when x (setf (aref table (char-code x)) i))
+                (1+ i))
+              *ascii-char-table* :initial-value 0)
+      table)))
+
+(declaim (inline char-ascii))
+(defun char-ascii (x)
+  (aref *char-ascii-table* (char-code x)))

File src/condition.lisp

+(in-package :percent)
+
+(define-condition decode-error (error)
+  ((position :reader decode-error-position :initarg :position)))
+
+(define-condition invalid-% (decode-error)
+  ()
+  (:report (lambda (c s)
+             (format s "Encountered an invalid % at ~d."
+                     (decode-error-position c)))))
+
+(define-condition invalid-hexdig (decode-error)
+  ((c1 :reader invalid-hexdig-c1 :initarg :c1)
+   (c2 :reader invalid-hexdig-c2 :initarg :c2))
+  (:report (lambda (c s)
+             (format s "Encountered an invalid octet %~c~c at ~d."
+                     (invalid-hexdig-c1 c)
+                     (invalid-hexdig-c2 c)
+                     (decode-error-position c)))))

File src/decode.lisp

+(in-package :percent)
+
+(defun decode (string &key (test (load-time-value (constantly t)))
+                           (www-form nil)
+                           (encoding :utf-8))
+  (labels ((->octet (c1 c2 pos)
+             (declare (inline ->octet))
+             (aif (char-pair-octet c1 c2)
+                  it
+                  (error 'invalid-hexdig :position pos :c1 c1 :c2 c2))))
+    (do* ((length (length string))
+          (buffer (make-array length :element-type 'octet))
+          (i 0 (1+ i))
+          (j 0 (1+ j)))
+         ((= i length) (octets-to-string buffer :end j :encoding encoding))
+      (let ((c (char string i)))
+        (cond ((char/= c #\%)
+               (if (and www-form (char= c #\+))
+                   (setf (aref buffer j) #x20)  ; SP
+                   (setf (aref buffer j) (char-ascii c))))
+              ((> (+ i 3) length)
+               (error 'invalid-% :position i))
+              (t
+               (let* ((c1 (char string (+ i 1)))
+                      (c2 (char string (+ i 2)))
+                      (octet (->octet c1 c2 i)))
+                 (incf i 2)
+                 (cond ((funcall test octet)
+                        (setf (aref buffer j) octet))
+                       (t
+                        (setf (aref buffer j) #x25) ; '%'
+                        (setf (aref buffer (incf j)) (char-ascii c1))
+                        (setf (aref buffer (incf j)) (char-ascii c2)))))))))))

File src/encode.lisp

+(in-package :percent)
+
+(defun encode (string &key (test #'unreservedp)
+                           (www-form nil)
+                           (encoding :utf-8))
+  (declare (optimize speed (debug 0) (safety 0)))
+  (do* ((octets (string-to-octets string :encoding encoding))
+        (length (length octets))
+        (buffer (make-string (* length 3)))
+        (i 0 (1+ i))
+        (j 0 (1+ j)))
+       ((= i length) (shrink-vector buffer j))
+    (declare (type octets octets) (type fixnum i j length))
+    (let ((octet (aref octets i)))
+      (declare (type octet octet))
+      (cond ((funcall test octet)
+             (setf (aref buffer j) (ascii-char octet)))
+            ((and www-form (= octet #x0d))  ; CR
+             (setf (aref buffer j) #\return)
+             (setf (aref buffer (incf j)) #\linefeed)
+             (when (and (< i length) (= (aref octets (1+ i)) #x0a))
+               (incf i)))
+            ((and www-form (= octet #x0a))  ; LF
+             (setf (aref buffer j) #\return)
+             (setf (aref buffer (incf j)) #\linefeed))
+            ((and www-form (= octet #x20))  ; SP
+             (setf (aref buffer j) #\+))
+            (t
+             (multiple-value-bind (h l) (truncate octet #x10)
+               (setf (aref buffer j) #\%)
+               (setf (aref buffer (incf j)) (digit-char h 16))
+               (setf (aref buffer (incf j)) (digit-char l 16))))))))

File src/package.lisp

+(in-package :cl-user)
+
+(defpackage :percent-encoding
+  (:nicknames :percent)
+  (:use :cl :anaphora)
+  (:import-from :babel :string-to-octets :octets-to-string)
+  ;; Conditions
+  (:export :decode-error :invalid-% :invalid-hexdig)
+  ;; Predicates
+  (:export :gen-delims-p :sub-delims-p :reservedp :alphap :digitp :unreservedp
+           :userinfop :reg-name-p :pcharp :queryp :fragmentp)
+  ;; Encoder and decoder
+  (:export :encode :decode))

File src/pred.lisp

+(in-package :percent)
+
+(declaim (inline gen-delims-p))
+(defun gen-delims-p (x)
+  (case x
+    ;;  :    /    ?    #    [    ]    @
+    ((#x3a #x2f #x3f #x23 #x5b #x5d #x40) t)
+    (t nil)))
+
+(declaim (inline sub-delims-p))
+(defun sub-delims-p (x)
+  (case x
+    ;;  !    $    &    '    (    )    *    +    ,    ;    =
+    ((#x21 #x24 #x26 #x27 #x28 #x29 #x2a #x2b #x2c #x3b #x3d) t)
+    (t nil)))
+
+(declaim (inline reservedp))
+(defun reservedp (x)
+  (or (gen-delims-p x) (sub-delims-p x)))
+
+(declaim (inline alphap))
+(defun alphap (x)
+  (or (<= #x41 x #x5a)      ; A-Z
+      (<= #x61 x #x7a)))    ; a-z
+
+(declaim (inline digitp))
+(defun digitp (x)
+  (<= #x30 x #x39)) ; 0-9
+
+(declaim (inline unreservedp))
+(defun unreservedp (x)
+  (or (alphap x)
+      (digitp x)
+      (case x
+        ;;  -    .    _    ~
+        ((#x2d #x2e #x5f #x7e) t)
+        (t nil))))
+
+(declaim (inline userinfop))
+(defun userinfop (x)
+  (or (unreservedp x) (sub-delims-p x) (= x #x3a)))   ; :
+
+(declaim (inline reg-name-p))
+(defun reg-name-p (x)
+  (or (unreservedp x) (sub-delims-p x)))
+
+(declaim (inline pcharp))
+(defun pcharp (x)
+  (or (unreservedp x)
+      (sub-delims-p x)
+      (case x
+        ;;  :    @
+        ((#x3a #x40) t)
+        (t nil))))
+
+(declaim (inline queryp))
+(defun queryp (x)
+  (or (pcharp x)
+      (case x
+        ;;  /    ?
+        ((#x2f #x3f) t)
+        (nil t))))
+
+(declaim (inline fragmentp))
+(defun fragmentp (x)
+  (queryp x))

File src/type.lisp

+(in-package :percent)
+
+(deftype octet () '(unsigned-byte 8))
+
+(deftype octets (&optional size)
+  `(simple-array octet (,size)))

File src/util.lisp

+(in-package :percent)
+
+(defun shrink-vector (vector size)
+  #+sbcl
+  (sb-kernel:%shrink-vector vector size)
+  #+ccl
+  (ccl::shrink-vector vector size)
+  #-(or sbcl ccl)
+  (subseq vector size))
+
+(declaim (inline char-pair-octet))
+(defun char-pair-octet (c1 c2)
+  (let ((h (digit-char-p c1 16))
+        (l (digit-char-p c2 16)))
+    (if (and h l)
+        (+ (ash h 4) l)
+        nil)))

File t/package.lisp

+(in-package :cl-user)
+
+(defpackage :percent-encoding.test
+  (:nicknames :percent.test)
+  (:use :cl :percent)
+  (:export :all))
+(in-package :percent.test)
+
+(5am:in-suite* all)
+
+(5am:test encode
+  (5am:is (equal "Aa0-._~"
+                 (percent:encode "Aa0-._~")))
+  (5am:is (equal "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"
+                 (percent:encode ":/?#[]@!$&'()*+,;="))))
+
+(5am:test encode/test
+  (5am:is (equal "Aa0-._~"
+                 (percent:encode "Aa0-._~"
+                                 :test (constantly t))))
+  (5am:is (equal ":/?#[]@!$&'()*+,;="
+                 (percent:encode ":/?#[]@!$&'()*+,;="
+                                 :test (constantly t))))
+  (5am:is (equal "%41%61%30%2D%2E%5F%7E"
+                 (percent:encode "Aa0-._~"
+                                 :test (constantly nil))))
+  (5am:is (equal "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"
+                 (percent:encode ":/?#[]@!$&'()*+,;="
+                                 :test (constantly nil))))
+  (5am:is (equal ":%2F%3F%23%5B%5D@!$&'()*+,;="
+                 (percent:encode ":/?#[]@!$&'()*+,;="
+                                 :test #'pcharp))))
+
+(5am:test encode/encoding
+  (5am:is (equal "Aa0-._~"
+                 (percent:encode "Aa0-._~"
+                                 :encoding :ascii)))
+  (5am:is (equal "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"
+                 (percent:encode ":/?#[]@!$&'()*+,;="
+                                 :encoding :ascii)))
+  (5am:is (equal "Aa0-._~"
+                 (percent:encode "Aa0-._~"
+                                 :encoding :utf-8)))
+  (5am:is (equal "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"
+                 (percent:encode ":/?#[]@!$&'()*+,;="
+                                 :encoding :utf-8)))
+  (5am:is (equal "%E6%97%A5%E6%9C%AC%E8%AA%9E"
+                 (percent:encode "日本語"
+                                 :encoding :utf-8)))
+  (5am:is (equal "%93%FA%96%7B%8C%EA"
+                 (percent:encode "日本語"
+                                 :encoding :cp932))))
+
+(5am:test decode
+  (5am:is (equal "Aa0-._~"
+                 (percent:decode "Aa0-._~")))
+  (5am:is (equal ":/?#[]@!$&'()*+,;="
+                 (percent:decode "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"))))
+
+(5am:test decode/test
+  (5am:is (equal "Aa0-._~"
+                 (percent:decode "Aa0-._~"
+                                 :test (constantly nil))))
+  (5am:is (equal ":/?#[]@!$&'()*+,;="
+                 (percent:decode ":/?#[]@!$&'()*+,;="
+                                 :test (constantly nil))))
+  (5am:is (equal "Aa0-._~"
+                 (percent:decode "%41%61%30%2D%2E%5F%7E"
+                                 :test (constantly t))))
+  (5am:is (equal ":/?#[]@!$&'()*+,;="
+                 (percent:decode "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"
+                                 :test (constantly t))))
+  (5am:is (equal ":/?#[]@!$&'()*+,;="
+                 (percent:decode ":%2F%3F%23%5B%5D@!$&'()*+,;="
+                                 :test (complement #'pcharp)))))
+
+(5am:test decode/encoding
+  (5am:is (equal "Aa0-._~"
+                 (percent:decode "Aa0-._~"
+                                 :encoding :ascii)))
+  (5am:is (equal ":/?#[]@!$&'()*+,;="
+                 (percent:decode "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"
+                                 :encoding :ascii)))
+  (5am:is (equal "Aa0-._~"
+                 (percent:decode "Aa0-._~"
+                                 :encoding :utf-8)))
+  (5am:is (equal ":/?#[]@!$&'()*+,;="
+                 (percent:decode "%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D"
+                                 :encoding :utf-8)))
+  (5am:is (equal "日本語"
+                 (percent:decode "%E6%97%A5%E6%9C%AC%E8%AA%9E"
+                                 :encoding :utf-8)))
+  (5am:is (equal "日本語"
+                 (percent:decode "%93%FA%96%7B%8C%EA"
+                                 :encoding :cp932))))