Commits

Martin Ostrovsky committed 3761cc7

init commit of client libraries - python the only one working so far

  • Participants

Comments (0)

Files changed (5)

+syntax: glob
+
+*egg
+*egg-info
+*pyc
+*swp
+*swo
+*csv
+*mmap
+
+# Dummy files
+foo.py
+
+# External data - don't commit any of this to source control, just
+# synchronize this with rsync
+ext_data
+
+# Python's distutils crap
+build
+dist
+
+.DS_Store
+
+python/*.jpg
+python/*.png
+python/*.ppt

File csharp/helloworld.cs

+using System;
+using System.IO;
+using System.Net;
+using System.Text;
+
+/// <summary>
+/// Fetches a Web Page
+/// </summary>
+class WebFetch
+{
+    static void Main(string[] args)
+    {
+        // used to build entire input
+        StringBuilder sb  = new StringBuilder();
+
+        // used on each read operation
+        byte[]        buf = new byte[8192];
+
+        // prepare the web page we will be asking for
+        HttpWebRequest  request  = (HttpWebRequest)
+            WebRequest.Create("http://www.mayosoftware.com");
+
+        // execute the request
+        HttpWebResponse response = (HttpWebResponse)
+            request.GetResponse();
+
+        // we will read data via the response stream
+        Stream resStream = response.GetResponseStream();
+
+        string tempString = null;
+        int    count      = 0;
+
+        do
+        {
+            // fill the buffer with data
+            count = resStream.Read(buf, 0, buf.Length);
+
+            // make sure we read some data
+            if (count != 0)
+            {
+                // translate from bytes to ASCII text
+                tempString = Encoding.ASCII.GetString(buf, 0, count);
+
+                // continue building the string
+                sb.Append(tempString);
+            }
+        }
+        while (count > 0); // any more data to read?
+
+        // print out page source
+        Console.WriteLine(sb.ToString());
+    }
+}
+
+/*
+ASCIIEncoding encoding=new ASCIIEncoding();
+string postData="userid="+strId;
+postData += ("&username="+strName);
+byte[]  data = encoding.GetBytes(postData);
+
+// Prepare web request...
+HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create("http://localhost/MyIdentity/Default.aspx");
+myRequest.Method = "POST";
+myRequest.ContentType="application/x-www-form-urlencoded";
+myRequest.ContentLength = data.Length;
+Stream newStream=myRequest.GetRequestStream();
+// Send the data.
+newStream.Write(data,0,data.Length);
+newStream.Close();
+*/

File csharp/helloworld.exe

Binary file added.

File python/client.py

+"""
+Repustate Python API client.
+
+Requirements:
+- Python
+- A json library (builtin or simplejson works fine)
+- lxml (http://lxml.de/)
+
+Want to change it / improve it / share it? Go for it.
+
+Feedback is appreciated at info@repustate.com
+
+More documentation available at http://www.repustate.com/docs
+"""
+import base64
+from lxml import etree
+import urllib
+import urllib2
+
+try:
+    # Different versions of Python call the json library different things.
+    import json
+except ImportError:
+    import simplejson as json
+
+class Repustate(object):
+
+    url_template = (
+        'http://api.repustate.com/%(version)s/%(key)s/%(function)s.%(response_type)s'
+        )
+
+    ppt_url = 'http://api.repustate.com/%(version)s/%(key)s/powerpoint/'
+
+    def __init__(self, api_key, version='v2'):
+        self.api_key = api_key
+        self.version = version
+
+    def _call_api(self, api_function, use_http_get=False, response_type='json', **params):
+        
+        assert response_type in ('json', 'xml'), "%s is not a valid response type" % response_type
+
+        params = dict((x, y) for x, y in params.iteritems() if y is not None)
+        
+        data = urllib.urlencode(params)
+        
+        url_args = dict(
+            response_type=response_type,
+            function=api_function,
+            key=self.api_key,
+            version=self.version,
+        )
+        
+        if api_function == 'powerpoint':
+            url = self.ppt_url % url_args
+            response = urllib2.urlopen(url, data)
+            # Return the ppt file as binary data.
+            result = response.read()
+        else:
+            url = self.url_template % url_args
+        
+            if use_http_get:
+                url = '%s?%s' % (url, data)
+                response = urllib2.urlopen(url)
+            else:
+                response = urllib2.urlopen(url, data)
+
+            if response_type == 'json':
+                result = json.load(response)
+            else:
+                result = etree.ElementTree.fromstring(response)
+            
+        return result
+
+    def _call_natural_language(self, api_function, cloud=None, text=None, url=None):
+        """
+        Helper function for the NLP calls.
+        """
+        return self._call_api(api_function, use_http_get=(not text), cloud=cloud, text=text, url=url)
+
+    def sentiment(self, text=None, url=None, response_type='json'):
+        """
+        Retreive the sentiment for a single URl or block of text.
+        """
+        return self._call_api('score', text=text, url=url)
+
+    def bulk_sentiment(self, items=None, response_type='json'):
+        """
+        Bulk score multiple pieces of text (not urls!).
+        """
+        items_to_score = {}
+        
+        for idx, item in enumerate(items):
+            items_to_score['text%d' % idx] = item
+        
+        return self._call_api('bulk-score', **items_to_score)
+    
+    def clean_html(self, url=None, response_type='json'):
+        """
+        Clean up a web page. It doesn't work well on home pages - it's designed for content pages.
+        """
+        return self._call_api('clean-html', use_http_get=True, url=url)
+
+    def adjectives(self, cloud=None, text=None, url=None, response_type='json'):
+        return self._call_natural_language('adj', cloud=cloud, text=text, url=url)
+
+    def verbs(self, cloud=None, text=None, url=None, response_type='json'):
+        return self._call_natural_language('verb', cloud=cloud, text=text, url=url)
+
+    def ngrams(self, url=None, text=None, max=None, min=None, freq=None, stopwords=None, response_type='json'):
+        return self._call_api('ngrams', use_http_get=bool(url), text=text, url=url, max=max, min=min, freq=freq, stopwords=stopwords,)
+    
+    def date_extraction(self, text, response_type='json'):
+        """
+        Convert english date indicators like "today", "tomorrow", "next week"
+        into date strings like 2011-01-12.
+        """
+        return self._call_api('extract-dates', text=text)
+    
+    def powerpoint(self, report_title, author, images, titles):
+        """
+        Given a list of images and titles, generate a simple powerpoint presentation.
+        """
+        kwargs = dict(
+            title=report_title,
+            author=author,
+        )
+
+        for idx, (image, title) in enumerate(zip(images, titles)):
+            # We need to b64 encode the image.
+            image_content = base64.b64encode(open(image).read())
+            kwargs['slide_%d_image' % idx] = image_content
+            kwargs['slide_%d_title' % idx] = title
+
+        return self._call_api('powerpoint', **kwargs)
+
+if __name__ == '__main__':
+    """
+    Sample usage of the client library. You'll have to change the api_key below
+    to yours if you want to actually run this.
+    """
+    client = Repustate(api_key='demokey', version='v2')
+    
+    # Score a single piece of text.
+    score = client.sentiment(text='I hate food.', response_type='txt')
+    
+    # Score multiple pieces of text.
+    scores = client.bulk_sentiment(['I love candy', 'I hate fish', 'I want to go watch a movie'], response_type='xml')
+
+    data = client.powerpoint('Test Report', 'Martin Ostrovsky', images=['FavouritePicture.jpg'], titles=['Gillian C. FitzGerald'])
+    fd = open('test.ppt', 'w')
+    fd.write(data)
+    fd.close()

File python/test_repustate.py

+import unittest2 as unittest
+
+from client import Repustate
+
+class TestRepustate(unittest.TestCase):
+
+    key = '0c9cf39718d8ce33e5310047617f8020984d8f18'
+
+    def get_accessor(self):
+        return Repustate(self.key, 'v1')
+
+    def check_call(self, func_name, **kwargs):
+        func = getattr(self.get_accessor(), func_name)
+        result = func(**kwargs)
+        self.assertIsInstance(result, dict, 'Invalid result')
+        self.assertEqual(result.get('status'), 'OK', result)
+
+    def test_sentiment_for_text(self):
+        self.check_call('sentiment', text='iPad')
+
+    def test_sentiment_for_url(self):
+        self.check_call('sentiment', url='http://www.twitter.com')
+
+    def test_bulk_sentiment(self):
+        self.check_call('bulk_sentiment', items=['I love candy'])
+
+    def test_adjectives_for_text(self):
+        self.check_call('adjectives', text='iPad')
+
+    def test_adjectives_for_url(self):
+        self.check_call('adjectives', url='http://www.twitter.com')
+
+    def test_verbs_for_text(self):
+        self.check_call('verbs', text='iPad')
+
+    def test_verbs_for_url(self):
+        self.check_call('verbs', url='http://www.twitter.com')
+
+    def test_clean_html(self):
+        self.check_call('clean_html', url='http://tcrn.ch/aav9Ty')
+
+    def test_ngrams_for_text(self):
+        self.check_call('ngrams', text='iPad')
+
+    def test_ngrams_for_url(self):
+        self.check_call('ngrams', url='http://tcrn.ch/aav9Ty')