Victor Gavro avatar Victor Gavro committed 1980774

initial

Comments (0)

Files changed (1)

google_translate.py

+import urllib
+import urllib2
+import urlparse
+import gzip as gzip_
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+try:
+    import simplejson as json
+except ImportError:
+    import json
+
+
+API_URL = 'http://translate.googleapis.com/translate_a/t'
+#don't bother google with suspicious user-agents
+DEFAULT_USER_AGENT = ('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 '
+                      '(KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6')
+
+
+def translate(text, target_language, source_language=None, html=True,
+              user_agent=DEFAULT_USER_AGENT, referer=None, origin=None,
+              timeout=None, flat=False, urlopen=urllib2.urlopen, gzip=True):
+
+    if isinstance(text, basestring):
+        text = (text,)
+    #TODO: split for smaller queries
+    #official clients split queries for smaller parts, but service can tranlate
+    #blocks for almost 2MB.
+
+    headers = {
+        'User-Agent': user_agent,
+        'Accept-Charset': 'utf-8',
+    }
+    if gzip:
+        headers['Accept-Encoding'] = 'gzip'
+    #better supply information below not to be banned by google
+    if referer:
+        headers['Referer'] = referer
+        headers['Origin'] = '%s://%s/' % urlparse.urlsplit(referer)[:2]
+        if origin and origin != headers['Origin']:
+            raise ValueError('Origin and referer not matched', origin,
+                             headers['Origin'])
+    elif origin:
+        headers['Origin'] = origin
+
+    qs = (
+        #('anno', '3'), #wtf? annotation?
+        #If sentences more than 1 it returns original after translation.
+        #NOTE: this isn't happening from official clients for some reason.
+
+        #('client', 'te'), #google javascript translator
+        ('client', 'te_lib'), #google-chrome translator
+        ('format', html and 'html' or ''), #encodes html-entities in response
+        ('v', '1.0'), #google translate version
+        ('logld', 'v10'), #obviously protocol version
+    )
+
+    data = []
+    for block in text:
+        if isinstance(block, unicode):
+            block = block.encode('utf8')
+        else:
+            #just to make sure this string is utf8 encoded
+            try:
+                block.decode('utf8')
+            except UnicodeDecodeError:
+                raise ValueError('Text must be utf8 or unicode')
+        data.append(('q', block))
+    data += [
+        ('sl', source_language or ''),
+        ('tl', target_language),
+        ('tc', '1'), #counter of splitted request related to one content
+        #('ctt', '1'), #wtf? absent in google-chrome translator
+    ]
+
+    req = urllib2.Request(API_URL + '?' + urllib.urlencode(qs),
+                          urllib.urlencode(data), headers=headers)
+    resp = urlopen(req, **(timeout and {'timeout': timeout} or {}))
+    if gzip:
+        resp_cont = gzip_.GzipFile(fileobj=StringIO(resp.read()), mode='r').read()
+    else:
+        resp_cont = resp.read()
+    result = json.loads(resp_cont.decode('string-escape'))
+
+    if not source_language:
+        #return detected source language as last element if it wasn't specified
+        result, source_language = result[:-1], result[-1]
+    elif len(text) == 1:
+        #this isn't json list, just plain text
+        result = (result,)
+
+    if flat:
+        return u' '.join(result)
+    return tuple(result), source_language
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.