Commits

Uche Ogbuji committed dc5a612

Add slide demos and the schedule tweet tool for Balisage '09

Comments (0)

Files changed (8)

balisage09/schedule.py

+#25 lines to scrape Balisage schedule and generate tweets (including a URL shortener)
+import sys, urllib, urllib2
+from amara.bindery import html
+from amara.lib import iri
+
+BASE = 'http://www.balisage.net/2009/At-A-Glance.html'
+SUFFIX = '#balisage2009'
+SHORTENER = 'http://ur1.ca/'
+
+doc = html.parse(sys.stdin)
+abstracts = doc.xml_select(u'//a[@class="abstract"]')
+
+def shorten_url(url):
+    params = {'longurl': url}
+    data = urllib.urlencode(params)
+    request = urllib2.Request(SHORTENER, data)
+    resultdoc = html.parse(urllib2.urlopen(request).read())
+    shortened = resultdoc.xml_select(u'string(//p[@class="success"]/a)')
+    return shortened
+
+for ab in abstracts:
+    bits = []
+    #Many other ways to do this but wanted to demo XPath fullnes :)
+    bits.append('"'+unicode(ab)+'"')
+    bits.append(ab.xml_select(u'string(following-sibling::*//a[@class="biolink"])'))
+    bits.append(shorten_url(iri.absolutize(ab.href, BASE)))
+    bits.append(SUFFIX)
+    msg = ' '.join(bits)
+    print msg
+    print len(msg)
+

balisage09/slide25.py

+import amara
+from amara import tree
+
+MONTY_XML = """<monty>
+  <python spam="eggs">What do you mean "bleh"</python>
+  <python ministry="abuse">But I was looking for argument</python>
+</monty>"""
+
+doc = amara.parse(MONTY_XML)
+assert doc.xml_type == tree.entity.xml_type
+m = doc.xml_children[0] #xml_children is a sequence of child nodes
+assert m.xml_local == u'monty' #local name, i.e. without any prefix
+assert m.xml_qname == u'monty' #qualified name, e.g. includes prefix
+assert m.xml_prefix == None
+assert m.xml_namespace == None
+assert m.xml_name == (None, u'monty') #The "universal name" or "expanded name"
+assert m.xml_parent == doc
+
+p1 = m.xml_children[1]
+p1.xml_attributes[(None, u'spam')] = u"greeneggs"
+p1.xml_children[0].xml_value = u"Close to the edit"
+amara.xml_print(p1)
+

balisage09/slide26.py

+from amara import bindery
+
+MONTY_XML = """<quotes>
+  <quote skit="1">This parrot is dead</quote>
+  <quote skit="2">What do you mean "bleh"</quote>
+  <quote skit="2">I don't like spam</quote>
+  <quote skit="3">But I was looking for argument</quote>
+</quotes>"""
+
+doc = bindery.parse(MONTY_XML)
+q1 = doc.quotes.quote # or doc.quotes.quote[0]
+print q1.skit
+print q1.xml_attributes[(None, u'skit')] # XPath works too: q1.xml_select(u'@skit')
+
+for q in doc.quotes.quote: # The loop will pick up both q elements
+    print unicode(q) # Just the child char data
+
+from itertools import groupby #Python stdlib
+from operator import attrgetter #Python stdlib
+
+skit_key = attrgetter('skit')
+for skit, quotegroup in groupby(doc.quotes.quote, skit_key):
+    print skit, [ unicode(q) for q in quotegroup ]
+

balisage09/slide28.py

+from amara import bindery
+from amara.bindery.model import *
+
+MONTY_XML = """<monty>
+  <python spam="eggs">What do you mean "bleh"</python>
+  <python ministry="abuse">But I was looking for argument</python>
+</monty>"""
+
+doc = bindery.parse(MONTY_XML)
+
+#Add a constraint that `python` elements must have a `ministry` attribute
+c = constraint(u'@ministry')
+try:
+    doc.monty.python.xml_model.add_constraint(c, validate=True)
+except bindery.BinderyError, e:
+    # Exception will be raised because the doc doesn't meet the constraint we added
+    pass # ignore and move on
+
+#Update the doc to meet the desired constraint
+doc.monty.python.xml_attributes[None, u'ministry'] = u'argument'
+doc.monty.python.xml_model.add_constraint(c, validate=True)
+

balisage09/slide29.py

+from amara import bindery
+from amara.bindery.model import *
+
+MONTY_XML = """<monty>
+  <python spam="eggs">What do you mean "bleh"</python>
+  <python ministry="abuse">But I was looking for argument</python>
+</monty>"""
+
+doc = bindery.parse(MONTY_XML)
+
+#Add a constraint using a specialized model primitive that supports a default
+c = attribute_constraint(None, u'ministry', u'nonesuch')
+doc.monty.python.xml_model.add_constraint(c, validate=True)
+
+

balisage09/slide30.py

+from amara import bindery
+from amara.bindery.model import *
+
+LABEL_MODEL = '''<?xml version="1.0" encoding="utf-8"?>
+<labels>
+  <label>
+    <name>[Addressee name]</name>
+    <address>
+      <street>[Address street info]</street>
+      <city>[City]</city>
+      <state>[State abbreviation]</state>
+    </address>
+  </label>
+</labels>
+'''
+
+VALID_LABEL_XML = '''<?xml version="1.0" encoding="utf-8"?>
+<labels>
+  <label>
+    <name>Thomas Eliot</name>
+    <address>
+      <street>3 Prufrock Lane</street>
+      <city>Stamford</city>
+      <state>CT</state>
+    </address>
+  </label>
+</labels>
+'''
+
+#Construct a set of constraints and other model info from the example
+label_model = examplotron_model(LABEL_MODEL)
+
+#Now use this to validate an instant document VALID_LABEL_XML
+doc = bindery.parse(VALID_LABEL_XML, model=label_model)
+doc.xml_validate()
+

balisage09/slide31.py

+MODEL_A = '''<labels xmlns:eg="http://examplotron.org/0/" xmlns:ak="http://purl.org/dc/org/xml3k/akara">
+  <label id="tse" added="2003-06-10" eg:occurs="*" ak:resource="@id">
+    <!-- use ak:resource="" for an anonymous resource -->
+    <quote eg:occurs="?">
+      <emph>Midwinter</emph> Spring is its own <strong>season</strong>...
+    </quote>
+    <name ak:rel="name()">Thomas Eliot</name>
+    <address ak:rel="'place'" ak:value="concat(city, ',', province)">
+      <street>3 Prufrock Lane</street>
+      <city>Stamford</city>
+      <province>CT</province>
+    </address>
+    <opus year="1932" ak:rel="name()" ak:resource="">
+      <title ak:rel="name()">The Wasteland</title>
+    </opus>
+    <tag eg:occurs="*" ak:rel="name()">old possum</tag>
+  </label>
+</labels>
+'''
+labelmodel = examplotron_model(MODEL_A)
+
+INSTANCE_A_1 = '''<labels>
+  <label id="co" added="2004-11-15">
+    <name>Christopher Okigbo</name>
+    <address>
+      <street>7 Heaven's Gate</street>
+      <city>Idoto</city>
+      <province>Anambra</province>
+    </address>
+    <opus>
+      <title>Heaven's Gate</title>
+    </opus>
+    <tag>biafra</tag>
+    <tag>poet</tag>
+  </label>
+</labels>
+'''
+
+from amara.bindery.model import generate_metadata
+
+doc = bindery.parse(INSTANCE_A_1, model=labelmodel)
+
+for triple in generate_metadata(doc): #Triples, but only RDF if you want it to be
+    print triple
+

balisage09/slide33.py

+import amara
+from akara.services import simple_service, response
+
+ECOUNTER_SERVICE_ID = 'http://purl.org/akara/services/demo/element_counter'
+
+#Config info is pulled in at global scope as AKARA_MODULE_CONFIG
+
+#Security demo: create a URI jail outside of which XML operations won't leak
+URI_JAIL = AKARA_MODULE_CONFIG.get('uri_jail')
+
+#Create the assertion rule for the URI jail
+ALLOWED = [(lambda uri, base=baseuri: uri.startswith(URI_JAIL), True)]
+
+#Create a URI resolver instance that enforces the jail
+restricted_resolver = irihelpers.resolver(authorizations=ALLOWED)
+
+@simple_service('GET', ECOUNTER_SERVICE_ID, 'ecounter', 'text/plain')
+def ecounter(uri):
+    #e.g.: curl http://localhost:8880/akara.xslt?http://hg.akara.info/testdoc.xml"
+    uri = inputsource(uri[0], resolver=restricted_resolver)
+    doc = amara.parse(uri)
+    ecount = doc.xml_select(u'count(//*)')
+    return str(ecount)
+