Source

nysl / responses.py

'''
Classes/functions for parsing nysl.nysed.gov responses.
'''
from operator import methodcaller, itemgetter, attrgetter
from collections import defaultdict
import pprint
import copy

import pyquery
from functional import compose, partial

# ---------------------------------------------
# Helpers
# ---------------------------------------------
multi_compose = partial(reduce, compose)

class obj(dict):
    __getattr__ = dict.__getitem__

def catalog_filter(item):
    if not item.strip():
        return False
    if item == ' Single term in tag ':
        return False
    else:
        return True


def orders_filter(item):
    i = item.strip()
    if i in ('', 'library desc', 'RECURRING order', 'parts', 'status', None):
        return False
    elif not i:
        return False
    else:
        return True

def printfunc(item):
    print item
    return item
    
# ---------------------------------------------
#
# ---------------------------------------------
kw = obj({
    
  'f': multi_compose((
      methodcaller('strip'),
      itemgetter(-1),
      lambda _list: filter(catalog_filter, _list),
      list,
      methodcaller('itertext'))),
  
  'g': multi_compose((
      methodcaller('strip'),
      attrgetter('text'))),

  'h': multi_compose((
      itemgetter(-1),
      lambda _list: _list if _list else [None],
      lambda _list: map(methodcaller('strip'), _list),
      lambda _list: filter(orders_filter, _list),
      list,
      methodcaller('itertext'))),
  
  })

# ---------------------------------------------
def detail(html, kw=kw, pq=pyquery.PyQuery):

    p = pq(html)

    # First get the "catalog record" info.
    headers = map(kw.f, p.find("th.viewmarctags")) 
    values  = map(kw.f, p.find("td.viewmarctags"))

    multivalues = ['Added entry', 'Note', 'Subject']
    ret = dict(zip(copy.copy(multivalues), ([], [], [])))
    import pdb
    pdb.set_trace()
    for h, v in zip(headers, values):
        if h in multivalues:
            ret[h].append(v)
        else:
            ret[h] = v

    # Next get the "holdings" data under "item information".
    holdings = []
    table = p.find("table:eq(7)")
    trs = pq(table).find("tr")[2:]
    for tr in trs:
        holdings.append(dict(zip(('call number', 'copies', 'material', 'location'),
                                 map(kw.g, pq(tr).find("td")))))
    ret['holdings'] = holdings

    # Next get the "orders" info under "item information".
    orders = []
    table = p.find("table:eq(8)")
    trs = pq(table).find("tr")[2:]
    for tr in trs:
        orders.append(dict(zip(('library', 'copies', 'status', 'parts'),
                                 map(kw.h, pq(tr).find("td")))))
    ret['orders'] = orders

    import json
    import urllib2
    ret.update(json.loads(urllib2.urlopen((
        "https://www.googleapis.com/books/v1/volumes?"
        "q=ISBN:%s"
        "&key=AIzaSyAYsoUlbM91SDLHjp798QZMZTVG2P1lujw") % ret["ISBN:"]).read()))
    
    return dict(ret)
     
    
    
    
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.