Commits

Aleksey Sivokon committed f7ba7fa

Internationalized Domain Names in Applications (IDNA) support in URL validator.

Comments (0)

Files changed (1)

formencode/validators.py

         >>> URL(require_tld=False).to_python('http://localhost')
         'http://localhost'
 
+    By default, internationalized domain names (IDNA) in Unicode will be
+    accepted and encoded to ASCII using Punycode (as described in RFC 3490).
+    You may set allow_idna to False to change this behavior::
+
+        >>> URL(allow_idna=True).to_python(u'http://\u0433\u0443\u0433\u043b.\u0440\u0444')
+        'http://xn--c1aay4a.xn--p1ai'
+        >>> URL(allow_idna=False).to_python(u'http://\u0433\u0443\u0433\u043b.\u0440\u0444')
+        Traceback (most recent call last):
+            ...
+        Invalid: That is not a valid URL
     """
 
     check_exists = False
     add_http = True
     require_tld = True
+    allow_idna = True
 
     url_re = re.compile(r'''
         ^(http|https)://
 
     def _to_python(self, value, state):
         value = value.strip()
+        if self.allow_idna:
+            value = self._encode_idna(value)
         if self.add_http:
             if not self.scheme_re.search(value):
                 value = 'http://' + value
             self._check_url_exists(value, state)
         return value
 
+    def _encode_idna(self, url):
+        global urlparse
+        if urlparse is None:
+            import urlparse
+        scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
+        try:
+            return str(urlparse.urlunparse((
+                scheme, netloc.encode('idna'), path, params, query, fragment)))
+        except UnicodeError:
+            return url
+
     def _check_url_exists(self, url, state):
         global httplib, urlparse, socket
         if httplib is None: