Commits

Walter Dörwald committed 8d3bd58

Be more explicit about bytes/unicode.

Comments (0)

Files changed (16)

src/ll/xist/parsers.py

 		Produces an event stream of one ``"url"`` event and one ``"bytes"`` or
 		``"unicode"`` event for the data.
 		"""
-		yield ("url", self.url)
+		yield (u"url", self.url)
 		if isinstance(self.data, str):
-			yield ("bytes", self.data)
+			yield (u"bytes", self.data)
 		elif  isinstance(self.data, unicode):
-			yield ("unicode", self.data)
+			yield (u"unicode", self.data)
 		else:
 			raise TypeError("data must be str or unicode")
 
 		Produces an event stream of one ``"url"`` event followed by the
 		``"bytes"``/``"unicode"`` events for the data from the iterable.
 		"""
-		yield ("url", self.url)
+		yield (u"url", self.url)
 		for data in self.iterable:
 			if isinstance(data, str):
-				yield ("bytes", data)
+				yield (u"bytes", data)
 			elif  isinstance(data, unicode):
-				yield ("unicode", data)
+				yield (u"unicode", data)
 			else:
 				raise TypeError("data must be str or unicode")
 
 		Produces an event stream of one ``"url"`` event followed by the
 		``"bytes"``/``"unicode"`` events for the data from the stream.
 		"""
-		yield ("url", self.url)
+		yield (u"url", self.url)
 		while True:
 			data = self.stream.read(self.bufsize)
 			if data:
 				if isinstance(data, str):
-					yield ("bytes", data)
+					yield (u"bytes", data)
 				elif  isinstance(data, unicode):
-					yield ("unicode", data)
+					yield (u"unicode", data)
 				else:
 					raise TypeError("data must be str or unicode")
 			else:
 		Produces an event stream of one ``"url"`` event followed by the
 		``"bytes"`` events for the data from the file.
 		"""
-		yield ("url", self.url)
+		yield (u"url", self.url)
 		with open(self._filename, "rb") as stream:
 			while True:
 				data = stream.read(self.bufsize)
 				if data:
-					yield ("bytes", data)
+					yield (u"bytes", data)
 				else:
 					break
 
 		``"bytes"`` events for the data from the URL.
 		"""
 		stream = self.url.open("rb", *self.args, **self.kwargs)
-		yield ("url", stream.finalurl())
+		yield (u"url", stream.finalurl())
 		with contextlib.closing(stream) as stream:
 			while True:
 				data = stream.read(self.bufsize)
 				if data:
-					yield ("bytes", data)
+					yield (u"bytes", data)
 				else:
 					break
 
 				(elementxmlns, sep, elementname) = elementname[1:].partition("}")
 			else:
 				elementxmlns = self.defaultxmlns
-			yield ("enterstarttagns", (elementname, elementxmlns))
+			yield (u"enterstarttagns", (elementname, elementxmlns))
 			for (attrname, attrvalue) in node.items():
 				if attrname.startswith("{"):
 					(attrxmlns, sep, attrname) = attrname[1:].partition("}")
 				else:
 					attrxmlns = None
-				yield ("enterattrns", (attrname, attrxmlns))
-				yield ("text", attrvalue)
-				yield ("leaveattrns", (attrname, attrxmlns))
-			yield ("leavestarttagns", (elementname, elementxmlns))
+				yield (u"enterattrns", (attrname, attrxmlns))
+				yield (u"text", attrvalue)
+				yield (u"leaveattrns", (attrname, attrxmlns))
+			yield (u"leavestarttagns", (elementname, elementxmlns))
 			if node.text:
-				yield ("text", node.text)
+				yield (u"text", node.text)
 			for child in node:
 				for event in self._asxist(child):
 					yield event
 				if hasattr(child, "tail") and child.tail:
-					yield ("text", child.tail)
-			yield ("endtagns", (elementname, elementxmlns))
+					yield (u"text", child.tail)
+			yield (u"endtagns", (elementname, elementxmlns))
 		elif "ProcessingInstruction" in name:
-			yield ("procinst", (node.target, node.text))
+			yield (u"procinst", (node.target, node.text))
 		elif "Comment" in name:
-			yield ("comment", node.text)
+			yield (u"comment", node.text)
 
 	def __iter__(self):
 		"""
 		Produces an event stream of namespaced parsing events for the ElementTree
 		object passed as :var:`data` to the constructor.
 		"""
-		yield ("url", self.url)
+		yield (u"url", self.url)
 		for event in self._asxist(self.data):
 			yield event
 
 	def __call__(self, input):
 		decoder = codecs.getincrementaldecoder("xml")(encoding=self.encoding)
 		for (evtype, data) in input:
-			if evtype == "bytes":
+			if evtype == u"bytes":
 				data = decoder.decode(data, False)
 				if data:
-					yield ("unicode", data)
-			elif evtype == "unicode":
+					yield (u"unicode", data)
+			elif evtype == u"unicode":
 				if data:
-					yield ("unicode", data)
-			elif evtype == "url":
-				yield ("url", data)
+					yield (u"unicode", data)
+			elif evtype == u"url":
+				yield (u"url", data)
 			else:
 				raise UnknownEventError(self, (evtype, data))
 		data = decoder.decode("", True)
 		if data:
-			yield ("unicode", data)
+			yield (u"unicode", data)
 
 	def __repr__(self):
 		return "<{0.__class__.__module__}.{0.__class__.__name__} object encoding={0.encoding!r} at {1:#x}>".format(self, id(self))
 	def __call__(self, input):
 		encoder = codecs.getincrementalencoder("xml")(encoding=self.encoding)
 		for (evtype, data) in input:
-			if evtype == "unicode":
+			if evtype == u"unicode":
 				data = encoder.encode(data, False)
 				if data:
-					yield ("bytes", data)
-			elif evtype == "bytes":
+					yield (u"bytes", data)
+			elif evtype == u"bytes":
 				if data:
-					yield ("bytes", data)
-			elif evtype == "url":
-				yield ("url", data)
+					yield (u"bytes", data)
+			elif evtype == u"url":
+				yield (u"url", data)
 			else:
 				raise UnknownEventError(self, (evtype, data))
 		data = encoder.encode(u"", True)
 		if data:
-			yield ("bytes", data)
+			yield (u"bytes", data)
 
 	def __repr__(self):
 		return "<{0.__class__.__module__}.{0.__class__.__name__} object encoding={0.encoding!r} at {1:#x}>".format(self, id(self))
 		decoder = codecs.getincrementaldecoder("xml")(encoding=self.fromencoding)
 		encoder = codecs.getincrementalencoder("xml")(encoding=self.toencoding)
 		for (evtype, data) in input:
-			if evtype == "bytes":
+			if evtype == u"bytes":
 				data = encoder.encode(decoder.decode(data, False), False)
 				if data:
-					yield ("bytes", data)
-			elif evtype == "url":
-				yield ("url", data)
+					yield (u"bytes", data)
+			elif evtype == u"url":
+				yield (u"url", data)
 			else:
 				raise UnknownEventError(self, (evtype, data))
 		data = encoder.encode(decoder.decode("", True), True)
 		if data:
-			yield ("bytes", data)
+			yield (u"bytes", data)
 
 	def __repr__(self):
 		return "<{0.__class__.__module__}.{0.__class__.__name__} object fromencoding={0.fromencoding!r} toencoding={0.toencoding!r} at {1:#x}>".format(self, id(self))
 	"""
 	Basic parser interface.
 	"""
-	evxmldecl = "xmldecl"
-	evbegindoctype = "begindoctype"
-	evenddoctype = "enddoctype"
-	evcomment = "comment"
-	evtext = "text"
-	evcdata = "cdata"
-	eventerstarttag = "enterstarttag"
-	eventerstarttagns = "enterstarttagns"
-	eventerattr = "enterattr"
-	eventerattrns = "enterattrns"
-	evleaveattr = "leaveattr"
-	evleaveattrns = "leaveattrns"
-	evleavestarttag = "leavestarttag"
-	evleavestarttagns = "leavestarttagns"
-	evendtag = "endtag"
-	evendtagns = "endtagns"
-	evprocinst = "procinst"
-	eventity = "entity"
-	evposition = "position"
-	evurl = "url"
+	evxmldecl = u"xmldecl"
+	evbegindoctype = u"begindoctype"
+	evenddoctype = u"enddoctype"
+	evcomment = u"comment"
+	evtext = u"text"
+	evcdata = u"cdata"
+	eventerstarttag = u"enterstarttag"
+	eventerstarttagns = u"enterstarttagns"
+	eventerattr = u"enterattr"
+	eventerattrns = u"enterattrns"
+	evleaveattr = u"leaveattr"
+	evleaveattrns = u"leaveattrns"
+	evleavestarttag = u"leavestarttag"
+	evleavestarttagns = u"leavestarttagns"
+	evendtag = u"endtag"
+	evendtagns = u"endtagns"
+	evprocinst = u"procinst"
+	eventity = u"entity"
+	evposition = u"position"
+	evurl = u"url"
 
 	@misc.notimplemented
 	def feed(self, data, final=False):
 		Return an iterator over the events produced by :var:`input`.
 		"""
 		for (evtype, data) in input:
-			if evtype == "bytes":
+			if evtype == u"bytes":
 				for event2 in self.feed(data):
 					yield event2
-			elif evtype == "url":
+			elif evtype == u"url":
 				yield (self.evurl, data)
 			else:
 				yield UnknownEventError(self, (evtype, data))
 	def _handle_xmldecl(self, version, encoding, standalone):
 		standalone = (bool(standalone) if standalone != -1 else None)
 		self._handle_position()
-		self._buffer.append((self.evxmldecl, {"version": version, "encoding": encoding, "standalone": standalone}))
+		self._buffer.append((self.evxmldecl, {u"version": version, u"encoding": encoding, u"standalone": standalone}))
 
 	def _handle_begindoctype(self, doctypename, systemid, publicid, has_internal_subset):
 		if self.doctype:
 			self._handle_position()
-			self._buffer.append((self.evbegindoctype, {"name": doctypename, "publicid": publicid, "systemid": systemid}))
+			self._buffer.append((self.evbegindoctype, {u"name": doctypename, u"publicid": publicid, u"systemid": systemid}))
 
 	def _handle_enddoctype(self):
 		if self.doctype:
 			self._buffer.append((self.evenddoctype, None))
 
 	def _handle_default(self, data):
-		if data.startswith("&") and data.endswith(";"):
+		if data.startswith(u"&") and data.endswith(u";"):
 			self._handle_position()
 			self._buffer.append((self.eventity, data[1:-1]))
 
 				yield event
 
 	def url(self, data):
-		yield ("url", data)
+		yield (u"url", data)
 
 	def xmldecl(self, data):
-		data = ("xmldecl", data)
+		data = (u"xmldecl", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 			yield data
 
 	def begindoctype(self, data):
-		data = ("begindoctype", data)
+		data = (u"begindoctype", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 			yield data
 
 	def enddoctype(self, data):
-		data = ("enddoctype", data)
+		data = (u"enddoctype", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 			yield data
 
 	def comment(self, data):
-		data = ("comment", data)
+		data = (u"comment", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 			yield data
 
 	def text(self, data):
-		data = ("text", data)
+		data = (u"text", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 			yield data
 
 	def cdata(self, data):
-		data = ("cdata", data)
+		data = (u"cdata", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 			yield data
 
 	def procinst(self, data):
-		data = ("procinst", data)
+		data = (u"procinst", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 			yield data
 
 	def entity(self, data):
-		data = ("entity", data)
+		data = (u"entity", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 			yield data
 
 	def position(self, data):
-		data = ("position", data)
+		data = (u"position", data)
 		if self._attr is not None:
 			self._attr.append(data)
 		else:
 
 		if self._newprefixes:
 			prefixes = oldprefixes.copy()
-			newprefixes = dict((key, "".join(d for (t, d) in value if t == "text")) for (key, value) in self._newprefixes.iteritems())
+			newprefixes = dict((key, u"".join(d for (t, d) in value if t == u"text")) for (key, value) in self._newprefixes.iteritems())
 			prefixes.update(newprefixes)
 		else:
 			prefixes = oldprefixes
 
 		self._prefixstack.append((data, prefixes))
 
-		yield ("enterstarttagns", data)
+		yield (u"enterstarttagns", data)
 		for (attrname, attrvalue) in self._attrs.iteritems():
 			if u":" in attrname:
 				(attrprefix, attrname) = attrname.split(u":", 1)
 						raise xsc.IllegalPrefixError(attrprefix)
 			else:
 				xmlns = None
-			yield ("enterattrns", (attrname, xmlns))
+			yield (u"enterattrns", (attrname, xmlns))
 			for event in attrvalue:
 				yield event
-			yield ("leaveattrns", (attrname, xmlns))
-		yield ("leavestarttagns", data)
+			yield (u"leaveattrns", (attrname, xmlns))
+		yield (u"leavestarttagns", data)
 		self._newprefixes = self._attrs = self._attr = None
 
 	def endtag(self, data):
 		(data, prefixes) = self._prefixstack.pop()
-		yield ("endtagns", data)
+		yield (u"endtagns", data)
 
 
 class Instantiate(object):
 		self._url = data
 
 	def xmldecl(self, data):
-		node = xml.XML(version=data["version"], encoding=data["encoding"], standalone=data["standalone"])
+		node = xml.XML(version=data[u"version"], encoding=data[u"encoding"], standalone=data[u"standalone"])
 		if self.loc:
 			node.startloc = xsc.Location(self._url, *self._position)
-		return ("xmldeclnode", node)
+		return (u"xmldeclnode", node)
 
 	def begindoctype(self, data):
-		if data["publicid"]:
+		if data[u"publicid"]:
 			fmt = u'{0[name]} PUBLIC "{0[publicid]}" "{0[systemid]}"'
 		elif data["systemid"]:
 			fmt = u'{0[name]} SYSTEM "{0[systemid]}"'
 		self._indoctype = True
 
 	def enddoctype(self, data):
-		result = ("doctypenode", self.doctype)
+		result = (u"doctypenode", self.doctype)
 		del self.doctype
 		self._indoctype = False
 		return result
 		node = self.pool.entity_xml(data)
 		if self.loc:
 			node.startloc = xsc.Location(self._url, *self._position)
-		node.parsed(self, "entity")
+		node.parsed(self, u"entity")
 		if self._inattr:
 			self._stack[-1].append(node)
 		elif not self._indoctype:
-		 	return ("entitynode", node)
+		 	return (u"entitynode", node)
 
 	def comment(self, data):
 		node = xsc.Comment(data)
 		if self.loc:
 			node.startloc = xsc.Location(self._url, *self._position)
-		node.parsed(self, "comment")
+		node.parsed(self, u"comment")
 		if self._inattr:
 			self._stack[-1].append(node)
 		elif not self._indoctype:
-			return ("commentnode", node)
+			return (u"commentnode", node)
 
 	def cdata(self, data):
 		node = xsc.Text(data)
 		if self.loc:
 			node.startloc = xsc.Location(self._url, *self._position)
-		node.parsed(self, "cdata")
+		node.parsed(self, u"cdata")
 		if self._inattr:
 			self._stack[-1].append(node)
 		elif not self._indoctype:
-			return ("textnode", node)
+			return (u"textnode", node)
 
 	def text(self, data):
 		node = xsc.Text(data)
 		if self.loc:
 			node.startloc = xsc.Location(self._url, *self._position)
-		node.parsed(self, "text")
+		node.parsed(self, u"text")
 		if self._inattr:
 			self._stack[-1].append(node)
 		elif not self._indoctype:
-		 	return ("textnode", node)
+		 	return (u"textnode", node)
 
 	def enterstarttagns(self, data):
 		node = self.pool.element_xml(*data)
 		if self.loc:
 			node.startloc = xsc.Location(self._url, *self._position)
 		self._stack.append(node)
-		node.parsed(self, "starttagns")
+		node.parsed(self, u"starttagns")
 
 	def enterattrns(self, data):
 		if data[1] is not None:
 		node = self._stack[-1].attrs[node]
 		self._stack.append(node)
 		self._inattr = True
-		node.parsed(self, "enterattrns")
+		node.parsed(self, u"enterattrns")
 
 	def leaveattrns(self, data):
 		node = self._stack.pop()
 		self._inattr = False
-		node.parsed(self, "leaveattrns")
+		node.parsed(self, u"leaveattrns")
 
 	def leavestarttagns(self, data):
 		node = self._stack[-1]
-		node.parsed(self, "leavestarttagns")
-		return ("startelementnode", node)
+		node.parsed(self, u"leavestarttagns")
+		return (u"startelementnode", node)
 
 	def endtagns(self, data):
 		node = self._stack.pop()
 		if self.loc:
 			node.endloc = xsc.Location(self._url, *self._position)
-		node.parsed(self, "endtagns")
-		return ("endelementnode", node)
+		node.parsed(self, u"endtagns")
+		return (u"endelementnode", node)
 
 	def procinst(self, data):
 		node = self.pool.procinst_xml(*data)
 		if self.loc:
 			node.startloc = xsc.Location(self._url, *self._position)
-		node.parsed(self, "procinst")
+		node.parsed(self, u"procinst")
 		if self._inattr:
 			self._stack[-1].append(node)
 		elif not self._indoctype:
-			return ("procinstnode", node)
+			return (u"procinstnode", node)
 
 	def position(self, data):
 		self._position = data
 		if self.loc:
 			lineno = node.lineNo()
 			if lineno != self._lastlineno:
-				result = ("position", (lineno, None))
+				result = (u"position", (lineno, None))
 				self._lastlineno = lineno
 				return result
 
 			if pos is not None:
 				yield pos
 			elementname = decode(node.name).lower()
-			yield ("enterstarttag", elementname)
+			yield (u"enterstarttag", elementname)
 			attr = node.properties
 			while attr is not None:
 				attrname = decode(attr.name).lower()
 				content = decode(attr.content) if attr.content is not None else u""
-				yield ("enterattr", attrname)
-				yield ("text", content)
-				yield ("leaveattr", attrname)
+				yield (u"enterattr", attrname)
+				yield (u"text", content)
+				yield (u"leaveattr", attrname)
 				attr = attr.next
-			yield ("leavestarttag", elementname)
+			yield (u"leavestarttag", elementname)
 			child = node.children
 			while child is not None:
 				for event in self._asxist(child):
 					yield event
 				child = child.next
-			yield ("endtag", elementname)
+			yield (u"endtag", elementname)
 		elif node.type == "text":
 			pos = self._handle_pos(node)
 			if pos is not None:
 				yield pos
-			yield ("text", decode(node.content))
+			yield (u"text", decode(node.content))
 		elif node.type == "cdata":
 			pos = self._handle_pos(node)
 			if pos is not None:
 				yield pos
-			yield ("cdata", decode(node.content))
+			yield (u"cdata", decode(node.content))
 		elif node.type == "comment":
 			pos = self._handle_pos(node)
 			if pos is not None:
 				yield pos
-			yield ("comment", decode(node.content))
+			yield (u"comment", decode(node.content))
 		# ignore all other types
 
 	def __call__(self, input):
 		url = None
 		collectdata = []
 		for (evtype, data) in input:
-			if evtype == "url":
+			if evtype == u"url":
 				if url is None:
 					url = data
 				else:
 					raise ValueError("got multiple url events")
-			elif evtype == "bytes":
+			elif evtype == u"bytes":
 				collectdata.append(data)
 			else:
 				raise UnknownEventError(self, (evtype, data))
 		data = "".join(collectdata)
 		if url is not None:
-			yield ("url", url)
+			yield (u"url", url)
 		if data:
 			self._lastlineno = None
 			try:
 	stack = [xsc.Frag()]
 	validate = kwargs.get("validate", True)
 	for (evtype, node) in events(*pipeline):
-		if evtype == "startelementnode":
+		if evtype == u"startelementnode":
 			stack[-1].append(node)
 			stack.append(node)
-		elif evtype == "endelementnode":
+		elif evtype == u"endelementnode":
 			if validate:
 				node.checkvalid()
 			stack.pop()
 
 	path = [xsc.Frag()]
 	for (evtype, node) in events(*pipeline):
-		if evtype == "startelementnode":
+		if evtype == u"startelementnode":
 			path[-1].append(node)
 			path.append(node)
 			if evtype in events_ and filter.matchpath(path): # FIXME: This requires that the ``WalkFilter`` is in fact a ``Selector``
 				yield (evtype, path)
-		elif evtype == "endelementnode":
+		elif evtype == u"endelementnode":
 			if validate:
 				node.checkvalid()
 			if evtype in events_ and filter.matchpath(path): # FIXME: This requires that the ``WalkFilter`` is in fact a ``Selector``

test/test_xist_basics.py

 	node = html.div()
 	io = cStringIO.StringIO()
 	node.write(io, xhtml=2)
-	assert io.getvalue() == "<div/>"
+	assert io.getvalue() == b"<div/>"
 
 
 def test_mul():
-	node = xsc.Frag("a")
-	assert 3*node == xsc.Frag(list("aaa"))
-	assert node*3 == xsc.Frag(list("aaa"))
+	node = xsc.Frag(u"a")
+	assert 3*node == xsc.Frag(list(u"aaa"))
+	assert node*3 == xsc.Frag(list(u"aaa"))
 
 	node = html.div()
 	assert 3*node == xsc.Frag(html.div(), html.div(), html.div())
 
 
 def test_text():
-	s = "test"
+	s = u"test"
 	node = xsc.Text(s)
 	hash(node)
 	assert len(node), 4
-	assert node[1] == xsc.Text("e")
+	assert node[1] == xsc.Text(u"e")
 	assert 3*node == xsc.Text(3*s)
 	assert node*3 == xsc.Text(s*3)
-	assert node[1:3] == xsc.Text("es")
-	assert node.capitalize() == xsc.Text("Test")
-	assert node.center(8) == xsc.Text("  test  ")
-	assert node.count("t") == 2
-	assert node.endswith("st") is True
-	assert node.index("s") == 2
+	assert node[1:3] == xsc.Text(u"es")
+	assert node.capitalize() == xsc.Text(u"Test")
+	assert node.center(8) == xsc.Text(u"  test  ")
+	assert node.count(u"t") == 2
+	assert node.endswith(u"st") is True
+	assert node.index(u"s") == 2
 	assert node.isalpha() is True
 	assert node.isalnum() is True
 	assert node.isdecimal() is False
 	assert node.isspace() is False
 	assert node.istitle() is False
 	assert node.isupper() is False
-	assert node.join(xsc.Frag(list("abc"))) == xsc.Frag("a", "test", "b", "test", "c")
-	assert node.ljust(6) == xsc.Text("test  ")
-	assert node.ljust(6, ".") == xsc.Text("test..")
-	assert node.lower() == xsc.Text("test")
-	assert xsc.Text("  test").lstrip() == xsc.Text("test")
-	assert node.replace("s", "x") == xsc.Text("text")
-	assert node.rjust(6) == xsc.Text("  test")
-	assert node.rjust(6, ".") == xsc.Text("..test")
-	assert xsc.Text("test  ").rstrip() == xsc.Text("test")
-	assert node.rfind("s") == 2
-	assert node.rindex("s") == 2
-	assert node.split("e") == xsc.Frag("t", "st")
-	assert xsc.Text("a\nb\n").splitlines() == xsc.Frag("a", "b")
-	assert node.startswith("te") is True
-	assert xsc.Text("  test  ").strip() == xsc.Text("test")
-	assert node.swapcase() == xsc.Text("TEST")
-	assert node.title() == xsc.Text("Test")
-	assert node.upper() == xsc.Text("TEST")
+	assert node.join(xsc.Frag(list(u"abc"))) == xsc.Frag(u"a", u"test", u"b", u"test", u"c")
+	assert node.ljust(6) == xsc.Text(u"test  ")
+	assert node.ljust(6, u".") == xsc.Text(u"test..")
+	assert node.lower() == xsc.Text(u"test")
+	assert xsc.Text(u"  test").lstrip() == xsc.Text(u"test")
+	assert node.replace(u"s", u"x") == xsc.Text(u"text")
+	assert node.rjust(6) == xsc.Text(u"  test")
+	assert node.rjust(6, u".") == xsc.Text(u"..test")
+	assert xsc.Text(u"test  ").rstrip() == xsc.Text(u"test")
+	assert node.rfind(u"s") == 2
+	assert node.rindex(u"s") == 2
+	assert node.split(u"e") == xsc.Frag(u"t", u"st")
+	assert xsc.Text(u"a\nb\n").splitlines() == xsc.Frag(u"a", u"b")
+	assert node.startswith(u"te") is True
+	assert xsc.Text(u"  test  ").strip() == xsc.Text(u"test")
+	assert node.swapcase() == xsc.Text(u"TEST")
+	assert node.title() == xsc.Text(u"Test")
+	assert node.upper() == xsc.Text(u"TEST")
 
 
 def test_charref():
 def test_conv():
 	def mappedmapper(node, converter):
 		if isinstance(node, xsc.Text):
-			node = node.replace("gurk", "hurz")
+			node = node.replace(u"gurk", u"hurz")
 		return node
 
 	node = common.createfrag()
 	class newa(html.a):
 		def convert(self, converter):
 			attrs = self.attrs.clone()
-			attrs["href"].insert(0, "foo")
+			attrs[u"href"].insert(0, u"foo")
 			e = html.a(self.content, attrs)
 			return e.convert(converter)
-	e = newa("gurk", href="hurz")
+	e = newa(u"gurk", href=u"hurz")
 	e = e.conv().conv()
-	assert unicode(e["href"]) == "foohurz"
-	assert str(e["href"]) == "foohurz"
+	assert unicode(e["href"]) == u"foohurz"
 
 
 def test_attributes():
-	node = html.h1("gurk", {xml.Attrs.lang: "de"}, lang="de")
-	assert node.attrs.has("lang")
-	assert node.attrs.has_xml("lang")
+	node = html.h1(u"gurk", {xml.Attrs.lang: u"de"}, lang=u"de")
+	assert node.attrs.has(u"lang")
+	assert node.attrs.has_xml(u"lang")
 
 	assert node.attrs.has(html.h1.Attrs.lang)
 	assert node.attrs.has_xml(html.h1.Attrs.lang)
 	assert node.attrs.has(xml.Attrs.lang)
 	assert node.attrs.has_xml(xml.Attrs.lang)
 
-	assert "lang" in node.attrs
+	assert u"lang" in node.attrs
 	assert html.h1.Attrs.lang in node.attrs
 	assert xml.Attrs.lang in node.attrs
 
 
 
 def test_attributeswithoutnames():
-	node = html.h1("gurk",
-		{xml.Attrs.lang: "de", xml.Attrs.base: "http://www.livinglogic.de/"},
-		lang="de",
-		style="color: #fff",
-		align="right",
-		title="gurk",
-		class_="important",
+	node = html.h1(
+		u"gurk",
+		{xml.Attrs.lang: u"de", xml.Attrs.base: u"http://www.livinglogic.de/"},
+		lang=u"de",
+		style=u"color: #fff",
+		align=u"right",
+		title=u"gurk",
+		class_=u"important",
 		id=42,
-		dir="ltr"
+		dir=u"ltr"
 	)
 	keys = set(node.attrs.keys())
 	keys.remove(html.h1.Attrs.class_)
 
-	keys1 = set(node.attrs.withoutnames("class_").keys())
+	keys1 = set(node.attrs.withoutnames(u"class_").keys())
 	assert keys == keys1
 
 	keys.remove(xml.Attrs.lang)
 	keys.remove(xml.Attrs.base)
-	keys2 = set(node.attrs.withoutnames("class_", xml.Attrs.lang, xml.Attrs.base).keys())
+	keys2 = set(node.attrs.withoutnames(u"class_", xml.Attrs.lang, xml.Attrs.base).keys())
 	assert keys == keys2
 
 	# Check that non existing attrs are handled correctly
-	keys3 = set(node.attrs.withoutnames("class_", "src", xml.Attrs.lang, xml.Attrs.base).keys())
+	keys3 = set(node.attrs.withoutnames(u"class_", u"src", xml.Attrs.lang, xml.Attrs.base).keys())
 	assert keys == keys3
 
 
 def test_attributeswithoutnames_xml():
-	node = html.h1("gurk",
-		title="gurk",
-		class_="important",
+	node = html.h1(
+		u"gurk",
+		title=u"gurk",
+		class_=u"important",
 		id=42,
 	)
 	keys = set(node.attrs.keys())
 	keys.remove(html.h1.Attrs.class_)
 
-	keys1 = set(node.attrs.withoutnames_xml("class").keys())
+	keys1 = set(node.attrs.withoutnames_xml(u"class").keys())
 	assert keys == keys1
 
 
 			class lang(html.h1.Attrs.lang):
 				default = 42
 
-	node = h1("gurk",
-		{xml.Attrs.space: 1, xml.Attrs.lang: "de"},
-		class_="gurk",
-		align="right"
+	node = h1(
+		u"gurk",
+		{xml.Attrs.space: 1, xml.Attrs.lang: u"de"},
+		class_=u"gurk",
+		align=u"right"
 	)
 
-	assert set(node.attrs.withnames("id").keys()) == set()
+	assert set(node.attrs.withnames(u"id").keys()) == set()
 
-	assert set(node.attrs.withnames("class_").keys()) == set([html.h1.Attrs.class_])
+	assert set(node.attrs.withnames(u"class_").keys()) == set([html.h1.Attrs.class_])
 
-	assert set(node.attrs.withnames("lang", "align").keys()) == set([h1.Attrs.lang, html.h1.Attrs.align])
+	assert set(node.attrs.withnames(u"lang", u"align").keys()) == set([h1.Attrs.lang, html.h1.Attrs.align])
 
-	assert set(node.attrs.withnames(h1.Attrs.lang, "align").keys()) == set([h1.Attrs.lang, html.h1.Attrs.align])
+	assert set(node.attrs.withnames(h1.Attrs.lang, u"align").keys()) == set([h1.Attrs.lang, html.h1.Attrs.align])
 
-	assert set(node.attrs.withnames(html.h1.Attrs.lang, "align").keys()) == set([h1.Attrs.lang, html.h1.Attrs.align])
+	assert set(node.attrs.withnames(html.h1.Attrs.lang, u"align").keys()) == set([h1.Attrs.lang, html.h1.Attrs.align])
 
-	node = html.h1("gurk",
-		{xml.Attrs.space: 1, xml.Attrs.lang: "de"},
-		lang="de",
-		class_="gurk",
-		align="right"
+	node = html.h1(
+		u"gurk",
+		{xml.Attrs.space: 1, xml.Attrs.lang: u"de"},
+		lang=u"de",
+		class_=u"gurk",
+		align=u"right"
 	)
 
-	assert set(node.attrs.withnames("id").keys()) == set()
+	assert set(node.attrs.withnames(u"id").keys()) == set()
 
-	assert set(node.attrs.withnames("class_").keys()) == set([html.h1.Attrs.class_])
+	assert set(node.attrs.withnames(u"class_").keys()) == set([html.h1.Attrs.class_])
 
-	assert set(node.attrs.withnames("lang", "align").keys()) == set([html.h1.Attrs.lang, html.h1.Attrs.align])
+	assert set(node.attrs.withnames(u"lang", u"align").keys()) == set([html.h1.Attrs.lang, html.h1.Attrs.align])
 
 	# no h1.Attrs.lang
-	assert set(node.attrs.withnames(h1.Attrs.lang, "align").keys()) == set([html.h1.Attrs.align])
+	assert set(node.attrs.withnames(h1.Attrs.lang, u"align").keys()) == set([html.h1.Attrs.align])
 
-	assert set(node.attrs.withnames(html.h1.Attrs.lang, "align").keys()) == set([html.h1.Attrs.lang, html.h1.Attrs.align])
+	assert set(node.attrs.withnames(html.h1.Attrs.lang, u"align").keys()) == set([html.h1.Attrs.lang, html.h1.Attrs.align])
 
 
 def test_attributeswithnames_xml():
-	node = html.h1("gurk",
+	node = html.h1(
+		u"gurk",
 		{xml.Attrs.space: 1},
-		lang="de",
-		class_="gurk",
-		align="right"
+		lang=u"de",
+		class_=u"gurk",
+		align=u"right"
 	)
-	assert set(node.attrs.withnames_xml("class").keys()) == set([html.h1.Attrs.class_])
+	assert set(node.attrs.withnames_xml(u"class").keys()) == set([html.h1.Attrs.class_])
 	assert set(node.attrs.withnames_xml(xml.Attrs.space).keys()) == set([xml.Attrs.space])
 
 
 			class withdef(xsc.TextAttr): default = 42
 			class withoutdef(xsc.TextAttr): pass
 	node = Test()
-	assert "withdef" in node.attrs
-	assert "withoutdef" not in node.attrs
-	py.test.raises(xsc.IllegalAttrError, node.attrs.__contains__, "illegal")
+	assert u"withdef" in node.attrs
+	assert u"withoutdef" not in node.attrs
+	py.test.raises(xsc.IllegalAttrError, node.attrs.__contains__, u"illegal")
 	node = Test(withdef=None)
-	assert "withdef" not in node.attrs
+	assert u"withdef" not in node.attrs
 
 
 def test_attributedictmethods():
 				default = 42
 
 	node = testelem()
-	assert unicode(node["testattr"]) == "42"
-	assert unicode(node.conv()["testattr"]) == "42"
+	assert unicode(node[u"testattr"]) == u"42"
+	assert unicode(node.conv()[u"testattr"]) == u"42"
 
-	node["testattr"].clear()
-	assert "testattr" not in node.attrs
-	assert "testattr" not in node.conv().attrs
+	node[u"testattr"].clear()
+	assert u"testattr" not in node.attrs
+	assert u"testattr" not in node.conv().attrs
 
 	node = testelem(testattr=23)
-	assert unicode(node["testattr"]) == "23"
-	assert unicode(node.conv()["testattr"]) == "23"
+	assert unicode(node[u"testattr"]) == u"23"
+	assert unicode(node.conv()[u"testattr"]) == u"23"
 
-	del node["testattr"]
-	assert unicode(node["testattr"]) == ""
-	assert unicode(node.conv()["testattr"]) == ""
+	del node[u"testattr"]
+	assert unicode(node[u"testattr"]) == u""
+	assert unicode(node.conv()[u"testattr"]) == u""
 
-	node["testattr"] = 23
-	node["testattr"] = None
-	assert "testattr" not in node.attrs
-	assert "testattr" not in node.conv().attrs
+	node[u"testattr"] = 23
+	node[u"testattr"] = None
+	assert u"testattr" not in node.attrs
+	assert u"testattr" not in node.conv().attrs
 
 	node = testelem(testattr=None)
-	assert "testattr" not in node.attrs
-	assert "testattr" not in node.conv().attrs
+	assert u"testattr" not in node.attrs
+	assert u"testattr" not in node.conv().attrs
 
 
 def test_checkisallowed():
 			testattr = None
 
 	node = testelem()
-	assert node.attrs.isallowed("testattr") is True
-	assert node.attrs.isallowed("notestattr") is False
+	assert node.attrs.isallowed(u"testattr") is True
+	assert node.attrs.isallowed(u"notestattr") is False
 
 	node = testelem2()
-	assert node.attrs.isallowed("testattr") is True
-	assert node.attrs.isallowed("notestattr") is False
+	assert node.attrs.isallowed(u"testattr") is True
+	assert node.attrs.isallowed(u"notestattr") is False
 
 	node = testelem3()
-	assert node.attrs.isallowed("testattr") is True
-	assert node.attrs.isallowed("testattr3") is True
+	assert node.attrs.isallowed(u"testattr") is True
+	assert node.attrs.isallowed(u"testattr3") is True
 
 	node = testelem4()
-	assert node.attrs.isallowed("testattr") is False
-	assert node.attrs.isallowed("testattr3") is True
+	assert node.attrs.isallowed(u"testattr") is False
+	assert node.attrs.isallowed(u"testattr3") is True
 
 
 def test_withsep():
 	for class_ in (xsc.Frag, html.div):
 		node = class_(1,2,3)
-		assert unicode(node.withsep(",")) == u"1,2,3"
+		assert unicode(node.withsep(u",")) == u"1,2,3"
 		node = class_(1)
-		assert unicode(node.withsep(",")) == u"1"
+		assert unicode(node.withsep(u",")) == u"1"
 		node = class_()
-		assert unicode(node.withsep(",")) == u""
+		assert unicode(node.withsep(u",")) == u""
 
 
 def test_allowedattr():
-	assert html.a.Attrs.allowedattr("href") is html.a.Attrs.href
-	py.test.raises(xsc.IllegalAttrError, html.a.Attrs.allowedattr, "gurk")
+	assert html.a.Attrs.allowedattr(u"href") is html.a.Attrs.href
+	py.test.raises(xsc.IllegalAttrError, html.a.Attrs.allowedattr, u"gurk")
 	assert html.a.Attrs.allowedattr(xml.Attrs.lang) is xml.Attrs.lang
 
 	# Check inherited attributes
-	assert htmlspecials.plaintable.Attrs.allowedattr("border") is htmlspecials.plaintable.Attrs.border
+	assert htmlspecials.plaintable.Attrs.allowedattr(u"border") is htmlspecials.plaintable.Attrs.border
 	assert htmlspecials.plaintable.Attrs.allowedattr(htmlspecials.plaintable.Attrs.border) is htmlspecials.plaintable.Attrs.border
 	assert html.table.Attrs.allowedattr(htmlspecials.plaintable.Attrs.border) is html.table.Attrs.border
 
 
 def test_plaintableattrs():
 	e = htmlspecials.plaintable(border=3)
-	assert isinstance(e["border"], html.table.Attrs.border)
-	assert isinstance(e["cellpadding"], html.table.Attrs.cellpadding)
+	assert isinstance(e[u"border"], html.table.Attrs.border)
+	assert isinstance(e[u"cellpadding"], html.table.Attrs.cellpadding)
 	e = e.conv()
-	assert isinstance(e["border"], html.table.Attrs.border)
-	assert isinstance(e["cellpadding"], html.table.Attrs.cellpadding)
+	assert isinstance(e[u"border"], html.table.Attrs.border)
+	assert isinstance(e[u"cellpadding"], html.table.Attrs.cellpadding)
 
 
 def test_attrupdate():
-	node = html.a(href="gurk", class_="hurz")
-	node.attrs.update(xml.Attrs(lang="de"), {"href": "gurk2", html.a.Attrs.id: 42})
-	assert unicode(node["href"]) == u"gurk2"
-	assert unicode(node["id"]) == u"42"
+	node = html.a(href=u"gurk", class_=u"hurz")
+	node.attrs.update(xml.Attrs(lang=u"de"), {u"href": u"gurk2", html.a.Attrs.id: 42})
+	assert unicode(node[u"href"]) == u"gurk2"
+	assert unicode(node[u"id"]) == u"42"
 	assert unicode(node[xml.Attrs.lang]) == u"de"
 
-	node = html.a({xml.Attrs.lang: "de"}, href="gurk", class_="hurz")
+	node = html.a({xml.Attrs.lang: u"de"}, href=u"gurk", class_=u"hurz")
 	assert unicode(node[xml.Attrs.lang]) == u"de"
 
-	node = html.a(xml.Attrs(lang="de"), href="gurk", class_="hurz")
+	node = html.a(xml.Attrs(lang=u"de"), href=u"gurk", class_=u"hurz")
 	assert unicode(node[xml.Attrs.lang]) == u"de"
 
 	class Gurk(xsc.Element):
 		model = False
 		class Attrs(xsc.Element.Attrs):
 			class gurk(xsc.TextAttr): pass
-			class hurz(xsc.TextAttr): default = "hinz+kunz"
+			class hurz(xsc.TextAttr): default = u"hinz+kunz"
 
 	node1 = Gurk()
 	node2 = Gurk(hurz=None)
 	node1.attrs.update(node2.attrs)
-	assert "hurz" not in node1.attrs
+	assert u"hurz" not in node1.attrs
 
 	node1 = Gurk(hurz=None)
 	node2 = Gurk()
 	node1.attrs.update(node2.attrs)
-	assert "hurz" in node1.attrs
+	assert u"hurz" in node1.attrs
 
 	node = Gurk(Gurk(hurz=None).attrs)
-	assert "hurz" not in node.attrs
+	assert u"hurz" not in node.attrs
 
 	attrs = Gurk.Attrs(Gurk.Attrs(hurz=None))
-	assert "hurz" not in attrs
+	assert u"hurz" not in attrs
 
 
 def test_classrepr():
 		assert xsc.Frag(e[e/html.dt]) == xsc.Frag(html.dt(0), html.dt(1), html.dt(2))
 		assert xsc.Frag(e[e.__class__/html.dt]) == xsc.Frag(html.dt(0), html.dt(1), html.dt(2))
 
-		for attr in ("class_", xml.Attrs.lang):
-			e = cls("foo", html.div("bar", {attr: "gurk"}), "baz")
+		for attr in (u"class_", xml.Attrs.lang):
+			e = cls(u"foo", html.div(u"bar", {attr: u"gurk"}), u"baz")
 			i = e[xsc.Text]
-			assert str(i.next()) == "foo"
-			assert str(i.next()) == "baz"
+			assert unicode(i.next()) == u"foo"
+			assert unicode(i.next()) == u"baz"
 			py.test.raises(StopIteration, i.next)
 
 		# list
-		for attr in ("class_", xml.Attrs.lang):
-			node = cls(html.div("foo", html.div("bar", {attr: "gurk"}), "baz"))
+		for attr in (u"class_", xml.Attrs.lang):
+			node = cls(html.div(u"foo", html.div(u"bar", {attr: u"gurk"}), u"baz"))
 			assert node[[]] == node[:]
-			assert str(node[[0, 1]]) == "bar"
-			assert str(node[[0, 1, attr]]) == "gurk"
+			assert unicode(node[[0, 1]]) == u"bar"
+			assert unicode(node[[0, 1, attr]]) == u"gurk"
 
 
 def test_setitem():
 		e[::-1] = range(6)
 		assert e == cls(range(5, -1, -1))
 
-		for attr in ("class_", xml.Attrs.lang):
-			node = cls(html.div("foo", html.div({attr: "gurk"}), "bar"))
-			node[[0, 1, attr]] = "hurz"
-			assert str(node[[0, 1, attr]]) == "hurz"
+		for attr in (u"class_", xml.Attrs.lang):
+			node = cls(html.div(u"foo", html.div({attr: u"gurk"}), u"bar"))
+			node[[0, 1, attr]] = u"hurz"
+			assert unicode(node[[0, 1, attr]]) == u"hurz"
 			py.test.raises(ValueError, node.__setitem__, [], None)
 			py.test.raises(ValueError, node.__delitem__, [])
 
 	e = html.div(id=(17, html.div(23), 42))
 	for src in (e, e.attrs):
 		dst = src.clone()
-		assert src["id"] is not dst["id"]
-		assert src["id"][0] is dst["id"][0]
-		assert src["id"][1] is not dst["id"][1]
+		assert src[u"id"] is not dst[u"id"]
+		assert src[u"id"][0] is dst[u"id"][0]
+		assert src[u"id"][1] is not dst[u"id"][1]
 
-	e["id"][1] = e # create a cycle
-	e["id"][2] = e # create a cycle
+	e[u"id"][1] = e # create a cycle
+	e[u"id"][2] = e # create a cycle
 	for src in (e, e.attrs):
 		dst = src.copy()
-		assert src["id"] is dst["id"]
-		assert src["id"][0] is dst["id"][0]
-		assert src["id"][1] is dst["id"][1]
-		assert dst["id"][1] is dst["id"][2]
+		assert src[u"id"] is dst[u"id"]
+		assert src[u"id"][0] is dst[u"id"][0]
+		assert src[u"id"][1] is dst[u"id"][1]
+		assert dst[u"id"][1] is dst[u"id"][2]
 		dst = src.deepcopy()
-		assert src["id"] is not dst["id"]
-		assert src["id"][0] is dst["id"][0]
-		assert src["id"][1] is not dst["id"][1]
-		assert dst["id"][1] is dst["id"][2]
+		assert src[u"id"] is not dst[u"id"]
+		assert src[u"id"][0] is dst[u"id"][0]
+		assert src[u"id"][1] is not dst[u"id"][1]
+		assert dst[u"id"][1] is dst[u"id"][2]
 
 
 def test_sortedreversed():
 	for class_ in (xsc.Frag, html.div):
 		node = class_(3, 2, 1)
-		node2 = node.sorted(key=str)
+		node2 = node.sorted(key=unicode)
 		assert node == class_(3, 2, 1)
 		assert node2 == class_(1, 2, 3)
 
 
 	with xsc.build():
 		with html.p() as e:
-			+xml.Attrs(lang="de")
-	assert e == html.p(xml.Attrs(lang="de"))
-	assert e.bytes() == '<p xml:lang="de"></p>'
+			+xml.Attrs(lang=u"de")
+	assert e == html.p(xml.Attrs(lang=u"de"))
+	assert e.bytes() == b'<p xml:lang="de"></p>'
 
 	with xsc.build():
 		with xsc.Frag() as e:
 
 	with xsc.build():
 		with html.p() as e:
-			xsc.add(class_="foo")
-	assert e == html.p(class_="foo")
+			xsc.add(class_=u"foo")
+	assert e == html.p(class_=u"foo")
 
 	with xsc.build():
 		with html.p() as e:
-			xsc.add(dict(class_="foo"))
-	assert e == html.p(class_="foo")
+			xsc.add(dict(class_=u"foo"))
+	assert e == html.p(class_=u"foo")
 
 	with xsc.build():
 		with html.p() as e:
-			xsc.add(xml.Attrs(lang="en"))
-	assert e == html.p(xml.Attrs(lang="en"))
+			xsc.add(xml.Attrs(lang=u"en"))
+	assert e == html.p(xml.Attrs(lang=u"en"))
 
 
 def test_with_addattr():
 	with xsc.build():
 		with html.ul() as e:
-			with xsc.addattr("id"):
-				+xsc.Text("gurk")
-	assert e == html.ul(id="gurk")
+			with xsc.addattr(u"id"):
+				+xsc.Text(u"gurk")
+	assert e == html.ul(id=u"gurk")
 
 	with xsc.build():
 		with html.ul() as e:
 			with xsc.addattr(html.ul.Attrs.id):
-				+xsc.Text("gurk")
-	assert e == html.ul(id="gurk")
+				+xsc.Text(u"gurk")
+	assert e == html.ul(id=u"gurk")

test/test_xist_conversion.py

 def test_mapped():
 	def maplang(node, converter):
 		if isinstance(node, xsc.Text):
-			node = node.replace("lang", converter.lang)
+			node = node.replace(u"lang", converter.lang)
 		return node
 
 	node = xsc.Frag(
-		"lang",
+		u"lang",
 		html.div(
-			"lang",
-			class_="lang",
+			u"lang",
+			class_=u"lang",
 		)
 	)
 	node2 = node.mapped(maplang, lang="en")
 	assert node == xsc.Frag(
-		"lang",
+		u"lang",
 		html.div(
-			"lang",
-			class_="lang",
+			u"lang",
+			class_=u"lang",
 		)
 	)
 	assert node2 == xsc.Frag(
-		"en",
+		u"en",
 		html.div(
-			"en",
-			class_="lang", # No replacement in attributes
+			u"en",
+			class_=u"lang", # No replacement in attributes
 		)
 	)

test/test_xist_css.py

 	with xsc.build():
 		with html.div(id=1) as e:
 			with html.ul(id=2):
-				+html.li("foo")
+				+html.li(u"foo")
 				+html.li()
 
-	assert list(e.walknodes(css.selector("div"))) == [e]
-	assert list(e.walknodes(css.selector("li"))) == [e[0][0], e[0][1]]
-	assert list(e.walknodes(css.selector("div#1"))) == [e]
-	assert list(e.walknodes(css.selector("#2"))) == [e[0]]
-	assert list(e.walknodes(css.selector(":empty"))) == [e[0][1]]
-	assert list(e.walknodes(css.selector("li:empty"))) == [e[0][1]]
-	assert list(e.walknodes(css.selector("div :empty"))) == [e[0][1]]
-	assert list(e.walknodes(css.selector("div>*:empty"))) == []
-	assert list(e.walknodes(css.selector("div>:empty"))) == []
-	assert list(e.walknodes(css.selector("*|li"))) == [e[0][0], e[0][1]]
-	assert list(e.walknodes(css.selector("h|li", prefixes={"h": html}))) == [e[0][0], e[0][1]]
-	assert list(e.walknodes(css.selector("h|li", prefixes={"h": specials}))) == []
+	assert list(e.walknodes(css.selector(u"div"))) == [e]
+	assert list(e.walknodes(css.selector(u"li"))) == [e[0][0], e[0][1]]
+	assert list(e.walknodes(css.selector(u"div#1"))) == [e]
+	assert list(e.walknodes(css.selector(u"#2"))) == [e[0]]
+	assert list(e.walknodes(css.selector(u":empty"))) == [e[0][1]]
+	assert list(e.walknodes(css.selector(u"li:empty"))) == [e[0][1]]
+	assert list(e.walknodes(css.selector(u"div :empty"))) == [e[0][1]]
+	assert list(e.walknodes(css.selector(u"div>*:empty"))) == []
+	assert list(e.walknodes(css.selector(u"div>:empty"))) == []
+	assert list(e.walknodes(css.selector(u"*|li"))) == [e[0][0], e[0][1]]
+	assert list(e.walknodes(css.selector(u"h|li", prefixes={u"h": html}))) == [e[0][0], e[0][1]]
+	assert list(e.walknodes(css.selector(u"h|li", prefixes={u"h": specials}))) == []
 
 	with xsc.build():
 		with xsc.Frag() as e:
-			+html.div("foo")
-			+xsc.Text("filler")
-			+html.p("foo")
-			+xsc.Text("filler")
-			+html.ul(html.li("foo"))
+			+html.div(u"foo")
+			+xsc.Text(u"filler")
+			+html.p(u"foo")
+			+xsc.Text(u"filler")
+			+html.ul(html.li(u"foo"))
 
-	assert list(e.walknodes(css.selector("div + p"))) == [e[2]]
-	assert list(e.walknodes(css.selector("div + ul"))) == []
-	assert list(e.walknodes(css.selector("ul + p"))) == []
-	assert list(e.walknodes(css.selector("div ~ p"))) == [e[2]]
-	assert list(e.walknodes(css.selector("div ~ ul"))) == [e[4]]
-	assert list(e.walknodes(css.selector("p ~ div"))) == []
-	assert list(e.walknodes(css.selector("div:first-child + p"))) == [e[2]]
-	assert list(e.walknodes(css.selector("*:first-child + p"))) == [e[2]]
+	assert list(e.walknodes(css.selector(u"div + p"))) == [e[2]]
+	assert list(e.walknodes(css.selector(u"div + ul"))) == []
+	assert list(e.walknodes(css.selector(u"ul + p"))) == []
+	assert list(e.walknodes(css.selector(u"div ~ p"))) == [e[2]]
+	assert list(e.walknodes(css.selector(u"div ~ ul"))) == [e[4]]
+	assert list(e.walknodes(css.selector(u"p ~ div"))) == []
+	assert list(e.walknodes(css.selector(u"div:first-child + p"))) == [e[2]]
+	assert list(e.walknodes(css.selector(u"*:first-child + p"))) == [e[2]]
 
 	with xsc.build():
 		with xsc.Frag() as e:
-			+html.span(html.b("hurz"), "gurk", html.em("hinz"), html.em("kunz"))
-			+html.em("hurz")
-			+html.em("hinz")
-			+xsc.Text("nix")
-			+html.i("kunz")
+			+html.span(html.b(u"hurz"), u"gurk", html.em(u"hinz"), html.em(u"kunz"))
+			+html.em(u"hurz")
+			+html.em(u"hinz")
+			+xsc.Text(u"nix")
+			+html.i(u"kunz")
 
-	assert list(e.walknodes(css.selector("*:only-of-type"))) == [e[0], e[0][0], e[4]]
-	assert list(e.walknodes(css.selector("*:nth-child(1)"))) == [e[0], e[0][0]]
-	assert list(e.walknodes(css.selector("*:nth-child(2)"))) == [e[0][2], e[1]]
-	assert list(e.walknodes(css.selector("*:nth-last-child(1)"))) == [e[0][3], e[4]]
-	assert list(e.walknodes(css.selector("*:nth-last-child(2)"))) == [e[0][2], e[2]]
-	assert list(e.walknodes(css.selector("*:nth-of-type(1)"))) == [e[0], e[0][0], e[0][2], e[1], e[4]]
-	assert list(e.walknodes(css.selector("*:nth-of-type(2)"))) == [e[0][3], e[2]]
-	assert list(e.walknodes(css.selector("*:nth-last-of-type(1)"))) == [e[0], e[0][0], e[0][3], e[2], e[4]]
-	assert list(e.walknodes(css.selector("*:nth-last-of-type(2)"))) == [e[0][2], e[1]]
+	assert list(e.walknodes(css.selector(u"*:only-of-type"))) == [e[0], e[0][0], e[4]]
+	assert list(e.walknodes(css.selector(u"*:nth-child(1)"))) == [e[0], e[0][0]]
+	assert list(e.walknodes(css.selector(u"*:nth-child(2)"))) == [e[0][2], e[1]]
+	assert list(e.walknodes(css.selector(u"*:nth-last-child(1)"))) == [e[0][3], e[4]]
+	assert list(e.walknodes(css.selector(u"*:nth-last-child(2)"))) == [e[0][2], e[2]]
+	assert list(e.walknodes(css.selector(u"*:nth-of-type(1)"))) == [e[0], e[0][0], e[0][2], e[1], e[4]]
+	assert list(e.walknodes(css.selector(u"*:nth-of-type(2)"))) == [e[0][3], e[2]]
+	assert list(e.walknodes(css.selector(u"*:nth-last-of-type(1)"))) == [e[0], e[0][0], e[0][3], e[2], e[4]]
+	assert list(e.walknodes(css.selector(u"*:nth-last-of-type(2)"))) == [e[0][2], e[1]]
 
-	e = xsc.Frag(html.span(html.b("hurz"), "gurk"))
-	assert list(e.walknodes(css.selector("*:only-child"))) == [e[0], e[0][0]]
+	e = xsc.Frag(html.span(html.b(u"hurz"), u"gurk"))
+	assert list(e.walknodes(css.selector(u"*:only-child"))) == [e[0], e[0][0]]
 
 	with xsc.build():
 		with xsc.Frag() as e:
-			+html.em(class_="gurk", lang="en")
-			+html.em(class_="gurk hurz", lang="en-us")
-			+html.em(class_="hurz", lang="de")
+			+html.em(class_=u"gurk", lang=u"en")
+			+html.em(class_=u"gurk hurz", lang=u"en-us")
+			+html.em(class_=u"hurz", lang=u"de")
 
-	assert list(e.walknodes(css.selector("em[class='gurk']"))) == [e[0]]
-	assert list(e.walknodes(css.selector("em[class~='gurk']"))) == [e[0], e[1]]
-	assert list(e.walknodes(css.selector("em[lang|='en']"))) == [e[0], e[1]]
+	assert list(e.walknodes(css.selector(u"em[class='gurk']"))) == [e[0]]
+	assert list(e.walknodes(css.selector(u"em[class~='gurk']"))) == [e[0], e[1]]
+	assert list(e.walknodes(css.selector(u"em[lang|='en']"))) == [e[0], e[1]]
 
 
 def test_applystylesheets1():
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("p {color: red;}", type="text/css")
+				+html.style(u"p {color: red;}", type=u"text/css")
 			with html.body():
-				+html.p("gurk")
+				+html.p(u"gurk")
 
 	css.applystylesheets(e)
 
-	assert str(e.walknodes(html.p)[0].attrs.style) == "color: red;"
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u"color: red;"
 	assert list(e.walknodes(html.style)) == []
 
 
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("p.dont {color: red;}", type="text/css")
+				+html.style(u"p.dont {color: red;}", type=u"text/css")
 			with html.body():
-				+html.p("gurk")
+				+html.p(u"gurk")
 
 	css.applystylesheets(e)
 
-	assert str(e.walknodes(html.p)[0].attrs.style) == ""
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u""
 	assert list(e.walknodes(html.style)) == []
 
 
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("p.do {color: red;}", type="text/css")
+				+html.style(u"p.do {color: red;}", type=u"text/css")
 			with html.body():
-				+html.p("gurk", class_="do")
+				+html.p(u"gurk", class_=u"do")
 
 	css.applystylesheets(e)
 
-	assert str(e.walknodes(html.p)[0].attrs.style) == "color: red;"
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u"color: red;"
 	assert list(e.walknodes(html.style)) == []
 
 
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("#id42 {color: red;}", type="text/css")
+				+html.style(u"#id42 {color: red;}", type=u"text/css")
 			with html.body():
-				+html.p("gurk", id="id42", style="color: blue;")
+				+html.p(u"gurk", id=u"id42", style=u"color: blue;")
 
 	css.applystylesheets(e)
 
 	# style attribute wins
-	assert str(e.walknodes(html.p)[0].attrs.style) == "color: blue;"
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u"color: blue;"
 	assert list(e.walknodes(html.style)) == []
 
 
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("p#id42 {color: red;}", type="text/css")
+				+html.style(u"p#id42 {color: red;}", type=u"text/css")
 			with html.body():
-				+html.p("gurk", id="id42", style="color: blue;")
+				+html.p(u"gurk", id=u"id42", style=u"color: blue;")
 
 	css.applystylesheets(e)
 
-	# stylesheet always wins (at least in CSS 2.1 und 3)
-	assert str(e.walknodes(html.p)[0].attrs.style) == "color: blue;"
+	# stylesheet always wins (at least in CSS 2.1 and 3)
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u"color: blue;"
 	assert list(e.walknodes(html.style)) == []
 
 
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("p {color: red;}", type="text/css", media="screen")
+				+html.style(u"p {color: red;}", type=u"text/css", media=u"screen")
 			with html.body():
-				+html.p("gurk")
+				+html.p(u"gurk")
 
-	css.applystylesheets(e, media="screen")
+	css.applystylesheets(e, media=u"screen")
 
-	assert str(e.walknodes(html.p)[0].attrs.style) == "color: red;"
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u"color: red;"
 
 	# Check that media="screen" doesn't pick up the print stylesheet
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("p {color: red;}", type="text/css", media="screen")
+				+html.style(u"p {color: red;}", type=u"text/css", media=u"screen")
 			with html.body():
-				+html.p("gurk")
+				+html.p(u"gurk")
 
-	css.applystylesheets(e, media="print")
+	css.applystylesheets(e, media=u"print")
 
-	assert str(e.walknodes(html.p)[0].attrs.style) == ""
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u""
 
 	# Check that @media rules are treated properly
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("@media screen { p {color: red;}}", type="text/css")
+				+html.style(u"@media screen { p {color: red;}}", type=u"text/css")
 			with html.body():
-				+html.p("gurk")
+				+html.p(u"gurk")
 
-	css.applystylesheets(e, media="screen")
+	css.applystylesheets(e, media=u"screen")
 
-	assert str(e.walknodes(html.p)[0].attrs.style) == "color: red;"
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u"color: red;"
 
 	with xsc.build():
 		with html.html() as e:
 			with html.head():
-				+html.style("@media screen { p {color: red;}}", type="text/css")
+				+html.style(u"@media screen { p {color: red;}}", type=u"text/css")
 			with html.body():
-				+html.p("gurk")
+				+html.p(u"gurk")
 
-	css.applystylesheets(e, media="print")
+	css.applystylesheets(e, media=u"print")
 
-	assert str(e.walknodes(html.p)[0].attrs.style) == ""
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u""
 
 
 def test_applystylesheets_title():
 		with xsc.build():
 			with html.html() as e:
 				with html.head():
-					+html.style("p {color: red;}", type="text/css")
-					+html.style("p {color: blue;}", type="text/css", title="blue")
+					+html.style(u"p {color: red;}", type=u"text/css")
+					+html.style(u"p {color: blue;}", type=u"text/css", title=u"blue")
 				with html.body():
-					+html.p("gurk")
+					+html.p(u"gurk")
 		return e
 
 	# Check that title=None uses only the titleless stylesheets
 	e = makenode()
 	css.applystylesheets(e, title=None)
-	assert str(e.walknodes(html.p)[0].attrs.style) == "color: red;"
+	assert unicode(e.walknodes(html.p)[0].attrs.style) == u"color: red;"
 
 	# Check that title="blue" uses only the stylesheet with the specified title
 	e = makenode()
-	css.applystylesheets(e, title="blue")
-	assert str(e.walknodes(html.p)[0].attrs.style) == "color: blue;"
+	css.applystylesheets(e, title=u"blue")
+	assert str(e.walknodes(html.p)[0].attrs.style) == u"color: blue;"
 
 
 def test_parse():
-	s = css.parsestring("@charset 'utf-8'; div{background-image: url(gurk.gif);}")
+	s = css.parsestring(b"@charset 'utf-8'; div{background-image: url(gurk.gif);}")
 	urls = set(css.geturls(s))
 	assert urls == set([url.URL("gurk.gif")])
 
-	s = css.parsestring("@charset 'utf-8'; div{background-image: url(gurk.gif);}", base="root:")
+	s = css.parsestring(b"@charset 'utf-8'; div{background-image: url(gurk.gif);}", base="root:")
 	urls = set(css.geturls(s))
 	assert urls == set([url.URL("root:gurk.gif")])
 
 
 def test_comments():
-	d = '<html><head><style type="text/css">/*nix*/ p{/*nix*/ color: red;}</style></head><body><p>gurk</p></body></html>'
+	d = b'<html><head><style type="text/css">/*nix*/ p{/*nix*/ color: red;}</style></head><body><p>gurk</p></body></html>'
 	node = parsers.tree(d, parsers.Expat(), parsers.NS(html), parsers.Instantiate())
 	css.applystylesheets(node)
-	assert unicode(node.walknodes(html.p)[0].attrs.style) == "color: red;"
+	assert unicode(node.walknodes(html.p)[0].attrs.style) == u"color: red;"

test/test_xist_detox.py

 		e = xsc.Frag(
 			detox.def_(self.attrs.func),
 				self.content,
-			detox.end("def")
+			detox.end(u"def")
 		)
 		return e.convert(converter)
 
 		e = xsc.Frag(
 			detox.for_(self.attrs.loop),
 				self.content,
-			detox.end("for")
+			detox.end(u"for")
 		)
 		return e.convert(converter)
 
 		e = xsc.Frag(
 			detox.while_(self.attrs.loop),
 				self.content,
-			detox.end("while")
+			detox.end(u"while")
 		)
 		return e.convert(converter)
 
 
 def makeoutput(node, function, *args, **kwargs):
 	mod = makemod(node)
-	return "".join(getattr(mod, function)(*args, **kwargs))
+	return u"".join(getattr(mod, function)(*args, **kwargs))
 
 
 def test_modulecode():
-	assert makemod(detox.code("x = 42")).x == 42
+	assert makemod(detox.code(u"x = 42")).x == 42
 
 
 def test_text():
 	with xsc.build():
 		with xsc.Frag() as e:
-			+detox.def_("gurk()")
-			+xsc.Text("foo")
-			+detox.end("def")
-	assert makeoutput(e, "gurk") == "foo"
+			+detox.def_(u"gurk()")
+			+xsc.Text(u"foo")
+			+detox.end(u"def")
+	assert makeoutput(e, u"gurk") == u"foo"
 
 
 def test_expr():
 	with xsc.build():
 		with xsc.Frag() as e:
-			with defblock(func="gurk(arg)"):
-				+detox.expr("arg")
+			with defblock(func=u"gurk(arg)"):
+				+detox.expr(u"arg")
 
-	assert makeoutput(e, "gurk", "hurz") == "hurz"
+	assert makeoutput(e, u"gurk", u"hurz") == u"hurz"
 
 
 def test_for():
 	with xsc.build():
 		with xsc.Frag() as e:
-			with defblock(func="gurk(arg)"):
-				with forblock(loop="i in xrange(arg)"):
-					+detox.expr("str(i)")
+			with defblock(func=u"gurk(arg)"):
+				with forblock(loop=u"i in xrange(arg)"):
+					+detox.expr(u"str(i)")
 
-	assert makeoutput(e, "gurk", 3) == "012"
+	assert makeoutput(e, u"gurk", 3) == u"012"
 
 
 def test_if():
 	with xsc.build():
 		with xsc.Frag() as e:
-			with defblock(func="gurk(arg)"):
-				+detox.if_("arg>2")
-				+detox.expr("str(2*arg)")
+			with defblock(func=u"gurk(arg)"):
+				+detox.if_(u"arg>2")
+				+detox.expr(u"str(2*arg)")
 				+detox.else_()
-				+detox.expr("str(3*arg)")
-				+detox.end("if")
+				+detox.expr(u"str(3*arg)")
+				+detox.end(u"if")
 
-	assert makeoutput(e, "gurk", 0) == "0"
-	assert makeoutput(e, "gurk", 1) == "3"
-	assert makeoutput(e, "gurk", 2) == "6"
-	assert makeoutput(e, "gurk", 3) == "6"
-	assert makeoutput(e, "gurk", 4) == "8"
+	assert makeoutput(e, u"gurk", 0) == u"0"
+	assert makeoutput(e, u"gurk", 1) == u"3"
+	assert makeoutput(e, u"gurk", 2) == u"6"
+	assert makeoutput(e, u"gurk", 3) == u"6"
+	assert makeoutput(e, u"gurk", 4) == u"8"
 
 
 def test_while():
 	with xsc.build():
 		with xsc.Frag() as e:
-			with defblock(func="gurk(arg)"):
-				+detox.code("i = 0")
-				with whileblock(loop="i < arg"):
-					+detox.expr("str(i)")
-					+detox.code("i += 1")
+			with defblock(func=u"gurk(arg)"):
+				+detox.code(u"i = 0")
+				with whileblock(loop=u"i < arg"):
+					+detox.expr(u"str(i)")
+					+detox.code(u"i += 1")
 
-	assert makeoutput(e, "gurk", 3) == "012"
+	assert makeoutput(e, u"gurk", 3) == u"012"
 
 
 def test_scopecheck():
 	with xsc.build():
 		with xsc.Frag() as e:
-			+detox.def_("gurk()")
-			+xsc.Text("hurz")
+			+detox.def_(u"gurk()")
+			+xsc.Text(u"hurz")
 			+detox.end()
 
-	assert makeoutput(e, "gurk") == "hurz"
+	assert makeoutput(e, u"gurk") == u"hurz"
 
 	with xsc.build():
 		with xsc.Frag() as e:
-			+detox.def_("gurk()")
-			+xsc.Text("hurz")
-			+detox.end("for")
+			+detox.def_(u"gurk()")
+			+xsc.Text(u"hurz")
+			+detox.end(u"for")
 
-	py.test.raises(SyntaxError, makeoutput, e, "gurk")
+	py.test.raises(SyntaxError, makeoutput, e, u"gurk")
 
 
 def test_textexpr():
 	with xsc.build():
 		with xsc.Frag() as e:
-			with defblock(func="gurk()"):
-				+detox.code("""s = '"a" < "b" & "b" > "a"'""")
-				+detox.textexpr("s")
+			with defblock(func=u"gurk()"):
+				+detox.code(u"""s = '"a" < "b" & "b" > "a"'""")
+				+detox.textexpr(u"s")
 
-	assert makeoutput(e, "gurk") == '&quot;a&quot; &lt; &quot;b&quot; &amp; &quot;b&quot; &gt; &quot;a&quot;'
+	assert makeoutput(e, u"gurk") == u'&quot;a&quot; &lt; &quot;b&quot; &amp; &quot;b&quot; &gt; &quot;a&quot;'

test/test_xist_htmlspecials.py

 
 def test_pixel():
 	e = htmlspecials.pixel()
-	assert str(e.conv().attrs.src) == "root:px/spc.gif"
+	assert unicode(e.conv().attrs.src) == u"root:px/spc.gif"
 
-	e = htmlspecials.pixel(src="root:nix.gif")
-	assert str(e.conv().attrs.src) == "root:nix.gif"
+	e = htmlspecials.pixel(src=u"root:nix.gif")
+	assert unicode(e.conv().attrs.src) == u"root:nix.gif"
 
 	c = converters.Converter()
-	c[htmlspecials.pixel].src = "root:spam.gif"
+	c[htmlspecials.pixel].src = u"root:spam.gif"
 	e = htmlspecials.pixel()
-	assert str(e.conv(c).attrs.src) == "root:spam.gif"
+	assert unicode(e.conv(c).attrs.src) == u"root:spam.gif"
 
-	e = htmlspecials.pixel(color="red")
-	assert str(e.conv().attrs.style) == "background-color: red;"
+	e = htmlspecials.pixel(color=u"red")
+	assert unicode(e.conv().attrs.style) == u"background-color: red;"
 
-	e = htmlspecials.pixel(color="red", style="display: block;")
-	assert str(e.conv().attrs.style) == "background-color: red; display: block;"
+	e = htmlspecials.pixel(color=u"red", style=u"display: block;")
+	assert unicode(e.conv().attrs.style) == u"background-color: red; display: block;"

test/test_xist_parse.py

 
 def test_parselocationsgmlop():
 	# sgmlop doesn't provide any location info, so check only the URL
-	node = parsers.tree("<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>", parsers.SGMLOP(), parsers.NS(doc), parsers.Instantiate())
+	node = parsers.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>", parsers.SGMLOP(), parsers.NS(doc), parsers.Instantiate())
 	assert len(node) == 1
 	assert len(node[0]) == 1
 	assert str(node[0][0].startloc.url) == "STRING"
 
 def test_parselocationexpat():
 	# Check that expat gets the location info right
-	node = parsers.tree("<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>", parsers.Expat(), parsers.NS(doc), parsers.Instantiate())
+	node = parsers.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>", parsers.Expat(), parsers.NS(doc), parsers.Instantiate())
 	assert len(node) == 1
 	assert len(node[0]) == 1
 	assert str(node[0][0].startloc.url) == "STRING"
 
 def test_nsparse():
 	# A prepopulated prefix mapping and xmlns attributes should work together
-	xml = """
+	xml = b"""
 		<x:a>
 			<x:a xmlns:x='http://www.w3.org/1999/xhtml'>
 				<x:a xmlns:x='http://www.nttdocomo.co.jp/imode'>gurk</x:a>
 
 def test_parseurls():
 	# Check proper URL handling when parsing URLAttr or StyleAttr attributes
-	node = parsers.tree('<a href="4.html" style="background-image: url(3.gif);"/>', parsers.Expat(), parsers.NS(html), parsers.Instantiate(base="root:1/2.html"))
+	node = parsers.tree(b'<a href="4.html" style="background-image: url(3.gif);"/>', parsers.Expat(), parsers.NS(html), parsers.Instantiate(base="root:1/2.html"))
 	assert str(node[0]["style"]) == "background-image: url(root:1/3.gif)"
 	assert node[0]["style"].urls() == [url.URL("root:1/3.gif")]
 	assert str(node[0]["href"]) == "root:1/4.html"
 				class required(xsc.TextAttr):
 					required = True
 
-		node = parsers.tree('<Test required="foo"/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
+		node = parsers.tree(b'<Test required="foo"/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
 		assert str(node[0]["required"]) == "foo"
 
-		parsers.tree('<Test/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
+		parsers.tree(b'<Test/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
 		w = recwarn.pop(xsc.RequiredAttrMissingWarning)
 
-	py.test.raises(xsc.IllegalElementError, parsers.tree, '<Test required="foo"/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
+	py.test.raises(xsc.IllegalElementError, parsers.tree, b'<Test required="foo"/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
 
 
 def test_parsevalueattrs(recwarn):
 				class withvalues(xsc.TextAttr):
 					values = ("foo", "bar")
 
-		node = parsers.tree('<Test withvalues="bar"/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
+		node = parsers.tree(b'<Test withvalues="bar"/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
 		assert str(node[0]["withvalues"]) == "bar"
 
-		parsers.tree('<Test withvalues="baz"/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
+		parsers.tree(b'<Test withvalues="baz"/>', parsers.Expat(), parsers.NS(xmlns), parsers.Instantiate())
 		w = recwarn.pop(xsc.IllegalAttrValueWarning)
 
 
 	def check(parser):
 		for i in xrange(3):
 			try:
-				parsers.tree("<>gurk", parser, parsers.NS(html), parsers.Instantiate())
+				parsers.tree(b"<>gurk", parser, parsers.NS(html), parsers.Instantiate())
 			except Exception:
 				pass
 			for j in xrange(3):
-				assert parsers.tree("<a>gurk</a>", parser, parsers.NS(html), parsers.Instantiate()).bytes() == "<a>gurk</a>"
+				assert parsers.tree(b"<a>gurk</a>", parser, parsers.NS(html), parsers.Instantiate()).bytes() == "<a>gurk</a>"
 
 	# A Parser instance should be able to parse multiple XML sources, even when some of the parse calls fail
 	for parser in (parsers.SGMLOP, parsers.Expat):
 
 def test_parseentities_sgmlop():
 	def check(input, output):
-		node = parsers.tree("""<a title="{0}">{0}</a>""".format(input), parsers.SGMLOP(), parsers.NS(a.xmlns), parsers.Instantiate(pool=xsc.Pool(a, bar, foo, chars)))
+		node = parsers.tree(b"""<a title="{0}">{0}</a>""".format(input), parsers.SGMLOP(), parsers.NS(a.xmlns), parsers.Instantiate(pool=xsc.Pool(a, bar, foo, chars)))
 		node = node.walknodes(a)[0]
 		assert unicode(node) == output
 		assert unicode(node.attrs.title) == output
 
-	yield check, "a", "a"
-	yield check, ";a;", ";a;"
-	yield check, "&lt;", "<"
-	yield check, "&lt;&gt;", "<>"
-	yield check, "&gt;", ">"
-	yield check, "&apos;", "'"
-	yield check, "&quot;", '"'
-	yield check, "&amp;", "&"
-	yield check, "&amp;", "&"
-	yield check, "a&amp;b", "a&b"
-	yield check, "&foo;", "FOO"
-	yield check, "&bar;", "\x42"
-	yield check, "&#32;", " "
-	yield check, "&#x20;", " "
-	yield check, "&#x3042;", u"\u3042"
+	yield check, b"a", "a"
+	yield check, b";a;", ";a;"
+	yield check, b"&lt;", "<"
+	yield check, b"&lt;&gt;", "<>"
+	yield check, b"&gt;", ">"
+	yield check, b"&apos;", "'"
+	yield check, b"&quot;", '"'
+	yield check, b"&amp;", "&"
+	yield check, b"&amp;", "&"
+	yield check, b"a&amp;b", "a&b"
+	yield check, b"&foo;", "FOO"
+	yield check, b"&bar;", "\x42"
+	yield check, b"&#32;", " "
+	yield check, b"&#x20;", " "
+	yield check, b"&#x3042;", u"\u3042"
 
 
 def test_parseattr_sgmlop():
 		node = node.walknodes(a)[0]
 		assert unicode(node.attrs.title) == output
 
-	yield check, """<a title=x></a>""", "x"
-	yield check, """<a title=x/>""", "x"
-	yield check, """<a title=x id=42/>""", "x"
-	yield check, """<a title="x" id=42/>""", "x"
-	yield check, """<a title='x' id=42/>""", "x"
-	yield check, """<a title='x"y' id=42/>""", 'x"y'
-	yield check, """<a title="x'y" id=42/>""", "x'y"
+	yield check, b"""<a title=x></a>""", "x"
+	yield check, b"""<a title=x/>""", "x"
+	yield check, b"""<a title=x id=42/>""", "x"
+	yield check, b"""<a title="x" id=42/>""", "x"
+	yield check, b"""<a title='x' id=42/>""", "x"
+	yield check, b"""<a title='x"y' id=42/>""", 'x"y'
+	yield check, b"""<a title="x'y" id=42/>""", "x'y"
 
 
 def test_parsestringurl():
 	# Base URLs should end up in the location info of the resulting XML tree
-	node = parsers.tree("gurk", parsers.SGMLOP(), parsers.NS(), parsers.Instantiate())
+	node = parsers.tree(b"gurk", parsers.SGMLOP(), parsers.NS(), parsers.Instantiate())
 	assert str(node[0].startloc.url) == "STRING"
 
-	node = parsers.tree(parsers.StringSource("gurk", url="root:gurk.xmlxsc"), parsers.SGMLOP(), parsers.NS(), parsers.Instantiate())
+	node = parsers.tree(parsers.StringSource(b"gurk", url="root:gurk.xmlxsc"), parsers.SGMLOP(), parsers.NS(), parsers.Instantiate())
 	assert str(node[0].startloc.url) == "root:gurk.xmlxsc"
 
 
 def test_xmlns():
-	s = "<z xmlns={0!r}><rb xmlns={1!r}/><z/></z>".format(doc.xmlns, ruby.xmlns)
+	s = b"<z xmlns={0!r}><rb xmlns={1!r}/><z/></z>".format(doc.xmlns, ruby.xmlns)
 	e = parsers.tree(s, parsers.Expat(ns=True), parsers.Instantiate(pool=xsc.Pool(doc, ruby)))
 
 	assert e[0].xmlns == doc.xmlns
 	assert e[0][0].xmlns == ruby.xmlns
 
-	s = "<a xmlns={0!r}><a xmlns={1!r}/></a>".format(html.xmlns, ihtml.xmlns)
+	s = b"<a xmlns={0!r}><a xmlns={1!r}/></a>".format(html.xmlns, ihtml.xmlns)
 	e = parsers.tree(s, parsers.Expat(ns=True), parsers.Instantiate(pool=xsc.Pool(html, ihtml)))
 	assert isinstance(e[0], html.a)
 	assert isinstance(e[0][0], ihtml.a)
 
-	s = "<a><a xmlns={0!r}/></a>".format(ihtml.xmlns)
+	s = b"<a><a xmlns={0!r}/></a>".format(ihtml.xmlns)
 	py.test.raises(xsc.IllegalElementError, parsers.tree, s, parsers.Expat(), parsers.NS(html), parsers.Instantiate(pool=xsc.Pool(ihtml)))
 	e = parsers.tree(s, parsers.Expat(), parsers.NS(html), parsers.Instantiate(pool=xsc.Pool(html, ihtml)))
 	assert isinstance(e[0], html.a)
 	assert isinstance(e[0][0], ihtml.a)
 
-	s = "<z xmlns={0!r}/>".format(doc.xmlns)
+	s = b"<z xmlns={0!r}/>".format(doc.xmlns)
 	e = parsers.tree(s, parsers.Expat(ns=True), parsers.Instantiate(pool=xsc.Pool(doc.z)))
 	assert isinstance(e[0], doc.z)
 	py.test.raises(xsc.IllegalElementError, parsers.tree, s, parsers.Expat(ns=True), parsers.Instantiate(pool=xsc.Pool()))
 
 
 def test_parseemptyattribute():
-	e = parsers.tree("<a target=''/>", parsers.Expat(), parsers.NS(html), parsers.Instantiate(pool=xsc.Pool(html)))
+	e = parsers.tree(b"<a target=''/>", parsers.Expat(), parsers.NS(html), parsers.Instantiate(pool=xsc.Pool(html)))
 	assert "target" in e[0].attrs
 
 
 def test_expat_xmldecl():
-	e = parsers.tree("<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>", parsers.Expat(), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>", parsers.Expat(), parsers.NS(html), parsers.Instantiate())
 	assert not isinstance(e[0], xml.XML)
 
-	e = parsers.tree("<a/>", parsers.Expat(xmldecl=True), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b"<a/>", parsers.Expat(xmldecl=True), parsers.NS(html), parsers.Instantiate())
 	assert not isinstance(e[0], xml.XML)
 
-	e = parsers.tree("<?xml version='1.0'?><a/>", parsers.Expat(xmldecl=True), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b"<?xml version='1.0'?><a/>", parsers.Expat(xmldecl=True), parsers.NS(html), parsers.Instantiate())
 	assert isinstance(e[0], xml.XML)
 	assert e[0].content == u'version="1.0"'
 
-	e = parsers.tree("<?xml version='1.0' encoding='utf-8'?><a/>", parsers.Expat(xmldecl=True), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b"<?xml version='1.0' encoding='utf-8'?><a/>", parsers.Expat(xmldecl=True), parsers.NS(html), parsers.Instantiate())
 	assert isinstance(e[0], xml.XML)
 	assert e[0].content == u'version="1.0" encoding="utf-8"'
 
-	e = parsers.tree("<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>", parsers.Expat(xmldecl=True), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>", parsers.Expat(xmldecl=True), parsers.NS(html), parsers.Instantiate())
 	assert isinstance(e[0], xml.XML)
 	assert e[0].content == u'version="1.0" encoding="utf-8" standalone="yes"'
 
 
 def test_expat_doctype():
-	e = parsers.tree('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parsers.Expat(), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parsers.Expat(), parsers.NS(html), parsers.Instantiate())
 	assert not isinstance(e[0], xsc.DocType)
 
-	e = parsers.tree('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parsers.Expat(doctype=True), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parsers.Expat(doctype=True), parsers.NS(html), parsers.Instantiate())
 	assert isinstance(e[0], xsc.DocType)
 	assert e[0].content == html.DocTypeXHTML11().content
 
-	e = parsers.tree('<!DOCTYPE html><a/>', parsers.Expat(doctype=True), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b'<!DOCTYPE html><a/>', parsers.Expat(doctype=True), parsers.NS(html), parsers.Instantiate())
 	assert isinstance(e[0], xsc.DocType)
 	assert e[0].content == "html"
 
-	e = parsers.tree('<!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parsers.Expat(doctype=True), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b'<!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parsers.Expat(doctype=True), parsers.NS(html), parsers.Instantiate())
 	assert isinstance(e[0], xsc.DocType)
 	assert e[0].content == u'html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"'
 
-	e = parsers.tree('<!DOCTYPE a [<!ELEMENT a EMPTY><!--gurk-->]><a/>', parsers.Expat(doctype=True), parsers.NS(html), parsers.Instantiate())
+	e = parsers.tree(b'<!DOCTYPE a [<!ELEMENT a EMPTY><!--gurk-->]><a/>', parsers.Expat(doctype=True), parsers.NS(html), parsers.Instantiate())
 	assert isinstance(e[0], xsc.DocType)
 	assert e[0].content == u'a' # Internal subset gets dropped
 
 
 def test_htmlparse_base():
-	e = parsers.tree("<a href='gurk.gif'/>", parsers.Tidy(), parsers.NS(html), parsers.Instantiate(base="hurz/index.html"))
+	e = parsers.tree(b"<a href='gurk.gif'/>", parsers.Tidy(), parsers.NS(html), parsers.Instantiate(base="hurz/index.html"))
 	e = e.walknodes(html.a)[0]
 	assert unicode(e.attrs.href) == "hurz/gurk.gif"
 
 
 def test_parse_tidy_empty():
-	e = parsers.tree("", parsers.Tidy(), parsers.NS(), parsers.Instantiate())
+	e = parsers.tree(b"", parsers.Tidy(), parsers.NS(), parsers.Instantiate())
 	assert not e
 
 
 def test_base():
-	e = parsers.tree(parsers.StringSource('<a xmlns="http://www.w3.org/1999/xhtml" href="gurk.html"/>', 'http://www.gurk.de/'), parsers.Expat(ns=True), parsers.Instantiate(pool=xsc.Pool(html)))
+	e = parsers.tree(parsers.StringSource(b'<a xmlns="http://www.w3.org/1999/xhtml" href="gurk.html"/>', 'http://www.gurk.de/'), parsers.Expat(ns=True), parsers.Instantiate(pool=xsc.Pool(html)))
 	assert unicode(e[0].attrs.href) == "http://www.gurk.de/gurk.html"
 
 
 def test_stringsource():
-	expect = "hinz & kunz"
+	expect = b"hinz & kunz"
 	source = parsers.StringSource(expect)
 	for i in xrange(3):
-		parsed = "".join(data for (evtype, data) in source if evtype == "bytes")
+		parsed = b"".join(data for (evtype, data) in source if evtype == "bytes")
 		assert parsed == expect
 
 
 def test_itersource():
-	expect = "hinz & kunz"
-	source = parsers.IterSource(["hinz", " & ", "kunz"])
+	expect = b"hinz & kunz"
+	source = parsers.IterSource([b"hinz", b" & ", b"kunz"])
 	for i in xrange(3):
-		parsed = "".join(data for (evtype, data) in source if evtype == "bytes")
+		parsed = b"".join(data for (evtype, data) in source if evtype == "bytes")
 		assert parsed == expect
 
 
 	expect = url.URL("http://www.python.org/").openread().read()
 	source = parsers.URLSource("http://www.python.org/", bufsize=32)
 	for i in xrange(3):
-		parsed = "".join(data for (evtype, data) in source if evtype == "bytes")
+		parsed = b"".join(data for (evtype, data) in source if evtype == "bytes")
 		assert parsed == expect
 
 
 def test_itertree_large():
 	def xml():
-		yield "<ul xmlns='%s'>" % html.xmlns
+		yield b"<ul xmlns='{0}'>".format(html.xmlns)
 		for i in xrange(1000):
-			yield "<li>%d</li>" % i
-		yield "</ul>"
+			yield b"<li>{0}</li>".format(i)
+		yield b"</ul>"
 
 	for (i, (evtype, path)) in enumerate(parsers.itertree(parsers.IterSource(xml()), parsers.Expat(ns=True), parsers.Instantiate(), filter=html.li)):
 		assert int(str(path[-1])) == i

test/test_xist_pickle.py

 	e = xsc.Frag(
 		xml.XML(),
 		html.DocTypeXHTML10transitional(),
-		xsc.Comment("foo"),
-		html.html(xml.Attrs(lang="de"), lang="de"),
-		php.expression("$foo"),
+		xsc.Comment(u"foo"),
+		html.html(xml.Attrs(lang=u"de"), lang=u"de"),
+		php.expression(u"$foo"),
 		chars.nbsp(),
 		abbr.xml(),
 	)

test/test_xist_pool.py

 def test_basics():
 	# empty pool
 	r = xsc.Pool()
-	py.test.raises(xsc.IllegalElementError, r.elementclass, "a", html)
-	py.test.raises(xsc.IllegalElementError, r.elementclass_xml, "a", html)
+	py.test.raises(xsc.IllegalElementError, r.elementclass, u"a", html)
+	py.test.raises(xsc.IllegalElementError, r.elementclass_xml, u"a", html)
 
 	# register one element
 	r = xsc.Pool(html.a)
-	assert r.elementclass("a", html) is html.a
-	assert r.elementclass_xml("a", html) is html.a