Commits

Anonymous committed 537ef16

started to write real tests that actually work

Comments (0)

Files changed (4)

+# -*- coding: utf-8 -*-
+
+import hashlib
+import re
+import tidy
+
+BLOCK_TYPES = ('code', 'quote')
+
+
+class Block(object):
+	"""
+	defines one block of rsm text
+	"""
+	
+	def __init__(self, string):
+		# array in which we keep urls
+		self.links = []
+		# type_re = re.compile(r'(?P<type>code|quote):("(?P<meta>.*?)")?\n(?P<content>.*?)', re.DOTALL | re.MULTILINE)
+		type_re = re.compile(r'^(?P<type>\w+):(?P<meta>.*?)\n(?P<content>.*)', re.DOTALL | re.UNICODE)
+		match = type_re.match(string)
+
+		# print string.strip()[0:2]
+		if match and match.group('type') and match.group('type') in BLOCK_TYPES:
+			self.type = match.group('type')
+			self.content = ''.join(string.splitlines()[1:])
+		elif string.strip()[0:2] == '* ':
+			self.type = 'ul'
+			self.content = self.make_li(string)
+		else:
+			self.type = 'p'
+			self.content = string
+		if match and match.group('meta'):
+			self.meta = match.group('meta')
+		else:
+			self.meta = None
+		
+		self.sanitize()
+		self.parse_urls()
+		# self.parse_short_markup()
+		self.parse_long_markup()
+		self.brake_long_words()
+		self.resub_links()
+		if not self.type == 'ul':
+			self.nl2br()
+
+	def sanitize(self):
+		"""
+		get rid of leading and tracing newlines
+		"""
+		self.content = self.content.strip()
+
+	def make_li(self, string):
+		li_re = re.compile(r'\n?\*\s+')
+		lis = li_re.split(string)
+		stuff = []
+		for li in lis:
+			if not li == '':
+				stuff.append('\n<li>\n%s\n</li>' % li.replace('\n', '<br />\n'))
+		# for i in stuff:
+		# 	print i
+		# print string
+		# print stuff
+		return ''.join(stuff)
+
+	def create_nice_url(self, match):
+		if match.group(0).__len__() > 30:
+			name = match.group(0)[0:30] + u'[...]'
+		else:
+			name = match.group(0)
+		link = u'<a href="' + match.group(0) + '">' + name + '</a>'
+		link_hash = hashlib.md5(link).hexdigest()
+		self.links.append((link, link_hash))
+		return '$' + link_hash + '$'
+
+	def parse_urls(self):
+		url_re = re.compile(r'((ht|f)tp(s?)\:\/\/|~/|/)?([\w]+:\w+@)?([a-zA-Z]{1}([\w\-]+\.)+([\w]{2,5}))(:[\d]{1,5})?((/?\w+/)+|/?)(\w+\.[\w]{3,4})?((\?\w+=\w+)?(&\w+=\w+)*)', re.UNICODE)
+		# url_re = re.compile(r'http:\/\/[a-zA-Z\-]+\.[\w]{2,4}')
+		self.content = url_re.sub(self.create_nice_url, self.content)
+		# print self.content
+	
+	def resub_links(self):
+		"""get back in the urls we replaced with a safe string earlier"""
+		for link, link_hash in self.links:
+			# print u'$' + link_hash + u'$', link
+			self.content = self.content.replace(u'$' + link_hash + u'$', link)
+
+	def parse_short_markup(self):
+		"""parses short markup like _this_"""
+		# bold
+		b_re = re.compile(r'\*(?P<content>[\w\d]+?)\*', re.UNICODE)
+		self.content = b_re.sub('<b>\g<content></b>', self.content)
+		# italic
+		i_re = re.compile(r'/(?P<content>[\w\d]+?)/', re.UNICODE)
+		self.content = i_re.sub('<i>\g<content></i>', self.content)
+		# deleted
+		s_re = re.compile(r'-(?P<content>[\w\d]+?)-', re.UNICODE)
+		self.content = s_re.sub('<del>\g<content></del>', self.content)
+		# underline
+		u_re = re.compile(r'_(?P<content>[\w\d]+?)_', re.UNICODE)
+		self.content = u_re.sub('<u>\g<content></u>', self.content)
+
+	def parse_long_markup(self):
+		# bold
+		db_re = re.compile(r'\*\*(?P<content>.*?)\*\*', re.UNICODE)
+		self.content = db_re.sub('<b>\g<content></b>', self.content)
+		# italic
+		di_re = re.compile(r'//(?P<content>.*?)//', re.UNICODE)
+		self.content = di_re.sub('<i>\g<content></i>', self.content)
+		# deleted
+		ds_re = re.compile(r'--(?P<content>.*?)--', re.UNICODE)
+		self.content = ds_re.sub('<del>\g<content></del>', self.content)
+		# underlined
+		du_re = re.compile(r'__(?P<content>.*?)__', re.UNICODE)
+		self.content = du_re.sub('<u>\g<content></u>', self.content)
+
+	def brake_long_word(self, match):
+		print match.group(0)[::30]
+		return match.group(0)
+	
+	def brake_long_words(self):
+		long_re = re.compile(r'[a-zA-Z0-9]{50,}')
+		self.content = long_re.sub(self.brake_long_word, self.content)
+
+	def nl2br(self):
+		"""
+		replace newlines with html breaks
+		"""
+		self.content = self.content.replace('\n', '<br />')
+	
+	# def __str__(self):
+	# 	if self.type == 'p' or self.type == 'code':
+	# 		output = '\n<%s>\n%s\n</%s>' % (self.type, self.content, self.type)
+	# 	else:
+	# 		output = '\n<blockquote>\n<p>\n%s</p>\n</blockquote>' % self.content
+	# 	return output
+
+# TODO: make possible to escape stuff
+# TODO: add lists
+# TODO: trim too long words/links
+# TODO: make urls nameable
+# TODO: think of something for _stuff_ cd-rom-drive
+class SimpleMarkupText(object):
+	"""
+	instance of a text that has been parsed by sm
+
+	input: original input
+	blocks: blocks of text
+	output: final parsed text
+	"""
+	
+	def __init__(self, input, tidy=True):
+		# print type(input) is unicode
+		assert(type(input) is unicode, 'input isnt unicode string')
+		self.blocks = []
+		self.code_open = 0
+		self.quote_open = 0
+		
+		self.input = input
+		self.output = self.input
+		self.sanitize()
+		self.split_blocks()
+		self.join_blocks()
+		if tidy:
+			self.tidy()
+	
+	def sanitize(self):
+		"""
+		gets rid of tags by replacing < and > by the corresponding html entities
+		also replaces newlines that are too much
+		"""
+		# tags have to go first
+		self.output = self.output.replace('<', '&lt;')
+		self.output = self.output.replace('>', '&gt;')
+		# now we get rid of linebreaks
+		br_re = re.compile(r'(\n){3,}')
+		self.output = br_re.sub('\n\n', self.output)
+	
+	def split_blocks(self):
+		"""
+		a block is a paragraph of text defined by two newlines
+		"""
+		# print self.output.encode('utf8')
+		# print self.output.split('\n\n').__len__()
+		for blockstring in self.output.split('\n\n'):
+			block = Block(blockstring)
+			self.blocks.append(block)
+		
+	def parse_blocks(self):
+		pass
+		# code_re = re.compile(r'(?P<type>code|quote):("(?P<meta>.*?)")?\n(?P<content>.*?)\n:\1', re.DOTALL | re.MULTILINE)
+		# match = code_re.match(string)
+		# 
+		# for block in self.blocks:
+		# 	block
+	
+	def join_blocks(self):
+		self.output = ''
+		for block in self.blocks:
+			content = block.content
+			if block.type == 'p' or block.type == 'code':
+				output = '\n<%s>\n%s\n</%s>' % (block.type, block.content, block.type)
+			elif block.type == 'ul':
+				output = '\n<ul>%s\n</ul>' % block.content
+			else:
+				output = '\n<blockquote>\n<p>\n%s</p>\n</blockquote>' % block.content
+			self.output += output
+	
+	def tidy(self):
+		self.output = tidy.parseString(self.output.encode('utf8'), char_encoding='utf8', indent=True, output_xhtml=True, show_body_only=True).__str__().decode('utf8')
+
+
+def simple_markup(text, **kwargs):
+	return SimpleMarkupText(text, **kwargs).output
+
+# print simple_markup2(text)
+# print simple_markup2(text2)
+# print simple_markup2(text3)
 
 BLOCK_TYPES = ('code', 'quote')
 
+text9 = u"""# sdfsdfsdfsdfsdfsdf\r\n# sdflkjsdlfjsldfjsdlfkj\nsdflkjsdflkjsdlfjsdlf\r\nsdlfkjsldkfjlsdkf"""
+text8 = u"""- sdfsdfsdfsdfsdfsdf\r\nsdflkjsdlfjsldfjsdlfkj\nsdflkjsdflkjsdlfjsdlf\r\nsdlfkjsldkfjlsdkf"""
+text7 = u"""sdfsdf\r\n\r\n    * sdfsdfsdf\r\n    * sdfsfsdf\r\n    - 234234\r\n    - 34234\r\n\r\n"""
+text6 = u"""sdfsdf\r\n\r\n    - sdfsdfsdf\r\n    - sdfsfsdf\r\n    - 234234\r\n    - 34234\r\n\r\n"""
+
+text5 = u"""* Theaterfestpieltag in der FH-Hof\r\n* sozialkritische, kurze Stücke\r\n* Akteure sind die Studenten PM 2 und freiwillige\r\n* Zusammenarbeit mit Theater Hof\r\n* Mitte der Veranstaltung ein Buffet\r\n"""
+
+text4 = u"""
+Projektname: Theater-Theater
+Author: Andrea Heinze
+
+* Theaterfestpieltag in der FH-Hof
+* sozialkritische, kurze Stücke
+* Akteure sind die Studenten PM 2 und freiwillige
+* Zusammenarbeit mit Theater Hof
+* Mitte der Veranstaltung ein Buffet
+"""
+
+# print [text4]
+
+text3 = u"""
+Projektname: Theater-Theater\r
+Author: Andrea Heinze\r
+\r
+* Theaterfestpieltag in der FH-Hof\r
+* sozialkritische, kurze Stücke\r
+* Akteure sind die Studenten PM 2 und freiwillige\r
+* Zusammenarbeit mit Theater Hof\r
+* Mitte der Veranstaltung ein Buffet\r
+"""
+
+# print [text3]
+
 text2 = u"""
 Hallo zusammen,
 
 und zweiter auch
 :quote
 
+* blargh die blub
+hahah blub
+
+* sdfsdf
+sdfsfsdf
+sdfsdf
+* sdfsdfsdf
+* sdfsdfsdf
+sdfsdf
+
 das is //auch// zum testen gut.
 <script type="javascript"></script>
 
 http://google.com
 
 http://hh-1hallo.msn.blabla.com:80800/test/test/test.aspx?dd=dd&id=dki
+
+sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss
 """
 
 class Block(object):
 	def __init__(self, string):
 		# array in which we keep urls
 		self.links = []
-		# type_re = re.compile(r'(?P<type>code|quote):("(?P<meta>.*?)")?\n(?P<content>.*?)', re.DOTALL | re.MULTILINE)
 		type_re = re.compile(r'^(?P<type>\w+):(?P<meta>.*?)\n(?P<content>.*)', re.DOTALL | re.UNICODE)
 		match = type_re.match(string)
 
 		if match and match.group('type') and match.group('type') in BLOCK_TYPES:
 			self.type = match.group('type')
 			self.content = ''.join(string.splitlines()[1:])
+		elif string.strip()[0:2] == '* ' or string.strip()[0:2] == '- ':
+			self.type = 'ul'
+			self.content = self.make_ul(string)
+		elif string.strip()[0:2] == '# ':
+			self.type = 'ol'
+			self.content = self.make_ol(string)
 		else:
 			self.type = 'p'
 			self.content = string
 		self.parse_urls()
 		# self.parse_short_markup()
 		self.parse_long_markup()
+		# self.brake_long_words()
 		self.resub_links()
-		self.nl2br()
+		if not self.type == 'ul':
+			self.nl2br()
+
+	def clean_nl(self, string):
+		nl_re = re.compile(r'(\r\n|\r|\n)', re.UNICODE)
+		return nl_re.sub('\n', string)
 
 	def sanitize(self):
 		"""
 		"""
 		self.content = self.content.strip()
 
+	def make_ul(self, string):
+		li_re = re.compile(r'\n?\*|-\s+', re.UNICODE)
+		lis = li_re.split(string)
+		stuff = []
+		for li in lis:
+			if not li.strip() == '':
+				stuff.append('\n<li>\n%s\n</li>' % li.strip().replace('\n', '<br />\n'))
+		# for i in stuff:
+		# 	print i
+		# print string
+		# print stuff
+		return ''.join(stuff)
+		
+	def make_ol(self, string):
+		li_re = re.compile(r'\n?#\s+', re.UNICODE)
+		lis = li_re.split(string)
+		stuff = []
+		for li in lis:
+			if not li.strip() == '':
+				stuff.append('\n<li>\n%s\n</li>' % li.strip().replace('\n', '<br />\n'))
+		# for i in stuff:
+		# 	print i
+		# print string
+		# print stuff
+		return ''.join(stuff)
+
 	def create_nice_url(self, match):
 		if match.group(0).__len__() > 30:
 			name = match.group(0)[0:30] + u'[...]'
 		du_re = re.compile(r'__(?P<content>.*?)__', re.UNICODE)
 		self.content = du_re.sub('<u>\g<content></u>', self.content)
 
+	def brake_long_word(self, match):
+		print match.group(0)[::30]
+		return match.group(0)
+	
+	def brake_long_words(self):
+		long_re = re.compile(r'[a-zA-Z0-9]{50,}', re.UNICODE)
+		self.content = long_re.sub(self.brake_long_word, self.content)
+
 	def nl2br(self):
 		"""
 		replace newlines with html breaks
 	"""
 	
 	def __init__(self, input, tidy=True):
+		if input is None:
+			self.output = u''
+			return
 		# print type(input) is unicode
 		assert(type(input) is unicode, 'input isnt unicode string')
 		self.blocks = []
 	def sanitize(self):
 		"""
 		gets rid of tags by replacing < and > by the corresponding html entities
+		replace carriage returns and stuff that doesnt belong with newlines
 		also replaces newlines that are too much
 		"""
 		# tags have to go first
 		self.output = self.output.replace('<', '&lt;')
 		self.output = self.output.replace('>', '&gt;')
+		nl_re = re.compile(r'(\r\n|\r)', re.UNICODE)
+		self.output = nl_re.sub('\n', self.output)
 		# now we get rid of linebreaks
-		br_re = re.compile(r'(\n){3,}')
+		br_re = re.compile(r'(\n){3,}', re.UNICODE)
 		self.output = br_re.sub('\n\n', self.output)
 	
 	def split_blocks(self):
 			content = block.content
 			if block.type == 'p' or block.type == 'code':
 				output = '\n<%s>\n%s\n</%s>' % (block.type, block.content, block.type)
+			elif block.type == 'ul':
+				output = '\n<ul>%s\n</ul>' % block.content
+			elif block.type == 'ol':
+				output = '\n<ol>%s\n</ol>' % block.content
 			else:
 				output = '\n<blockquote>\n<p>\n%s</p>\n</blockquote>' % block.content
 			self.output += output
 		self.output = tidy.parseString(self.output.encode('utf8'), char_encoding='utf8', indent=True, output_xhtml=True, show_body_only=True).__str__().decode('utf8')
 
 
-def simple_markup2(text, **kwargs):
+def simple_markup(text, **kwargs):
+	# print [SimpleMarkupText(text, **kwargs).output]
 	return SimpleMarkupText(text, **kwargs).output
 
-print simple_markup2(text)
-
-
-def simple_markup(text):
-	def clean(text):
-		text = text.replace('<', '&lt;')
-		text = text.replace('>', '&gt;')
-		text = text.replace('\n', '<br />')
-		return text
-		
-	def simple_clean(string):
-		string = string.replace('<', '&lt;')
-		string = string.replace('>', '&gt;')
-		return string
-	
-	def split_blocks(text):
-		blocks = text.split('\n\n')
-		return blocks
-	
-	def render_db(string):
-		db_re = re.compile(r'\*\*(?P<content>.*?)\*\*')
-		string = db_re.sub('<b>\g<content></b>', string)
-		return string
-	
-	def render_di(string):
-		di_re = re.compile(r'//(?P<content>.*?)//')
-		string = di_re.sub('<i>\g<content></i>', string)
-		return string
-	
-	def render_ds(string):
-		ds_re = re.compile(r'--(?P<content>.*?)--')
-		string = ds_re.sub('<s>\g<content></s>', string)
-		return string
-
-	def render_du(string):
-		du_re = re.compile(r'__(?P<content>.*?)__')
-		string = du_re.sub('<u>\g<content></u>', string)
-		return string
-
-	def render_b(string):
-		b_re = re.compile(r'\*(?P<content>[\w\d]+?)\*')
-		string = b_re.sub('<b>\g<content></b>', string)
-		return string
-
-	def render_i(string):
-		i_re = re.compile(r'/(?P<content>[\w\d]+?)/')
-		string = i_re.sub('<i>\g<content></i>', string)
-		return string
-
-	def render_s(string):
-		s_re = re.compile(r'-(?P<content>[\w\d]+?)-')
-		string = s_re.sub('<s>\g<content></s>', string)
-		return string
-
-	def render_u(string):
-		u_re = re.compile(r'_(?P<content>[\w\d]+?)_')
-		string = u_re.sub('<u>\g<content></u>', string)
-		return string
-	
-	def render_block(string):
-		code_re = re.compile(r'(?P<type>code|quote):("(?P<meta>.*?)")?\n(?P<content>.*?)\n:\1', re.DOTALL | re.MULTILINE)
-		match = code_re.match(string)
-
-		if match:
-			if match.group('type') == 'quote':
-				if match.group('meta') and match.group('content'):
-					string = '<div class="quote-header">' + clean(match.group('meta')) + ':</div><blockquote><p>' + clean(match.group('content')) + '</p></blockquote>'
-
-				elif match.groups('content'):
-					string = '<blockquote><p>' + clean(match.group('content')) + '</p></blockquote>'
-		
-			else:
-				if match.group('meta') and match.group('content'):
-					string = '<div>' + clean(match.group('meta')) + ':</div><pre>' + simple_clean(match.group('content')) + '</pre>'
-
-				elif match.groups('content'):
-					string = '<pre>' + simple_clean(match.group('content')) + '</pre>'
-
-		else:
-			string = '<p>' + clean(string) + '</p>'
-
-		return string
-	
-	blocks = split_blocks(text)
-	output = ''
-
-	# print blocks
-	for block in blocks:
-		if block[:6] == 'quote:' or block[:5] == 'code:':
-			block = render_block(block)
-			output += block
-		else:
-			block = clean(block.strip())
-			block = render_db(block)
-			block = render_di(block)
-			block = render_ds(block)
-			block = render_du(block)
-			block = render_b(block)
-			block = render_i(block)
-			block = render_s(block)
-			block = render_u(block)
-			output += '<p>' + block + '</p>'
-
-	# return output
-	
-	output = output.encode('utf8')
-	output = str(tidy.parseString(output, char_encoding='utf8', indent=True, output_xhtml=True, show_body_only=True))
-	output = unicode(output, 'utf8')
-	print output
-	return output
-		
-	
-# print simple_markup(text).encode('utf8')
-# simple_markup(text)
+# print simple_markup(text)
+# print simple_markup(text2)
+# print simple_markup(text3)
+# print simple_markup(text4)
+# print simple_markup(text9)
+# print simple_markup(None)
 # -*- coding: utf-8 -*-
 from simple_markup import simple_markup
+# from BeautifulSoup import BeautifulSoup
 import unittest
 
-# blocksplitting
 
-bs_s1 = u"""
-Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
-Sed sed nisi.
-"""
-bs_r1 = u"""<p>
-  Lorem ipsum dolor sit amet, consectetuer adipiscing elit.<br />
-  Sed sed nisi.
-</p>
-"""
-
-bs_s2 = u"""Fusce lobortis dui nec metus.
-Nam vitae purus vel tortor pharetra posuere."""
-bs_r2 = u"""<p>
-  Fusce lobortis dui nec metus.<br />
-  Nam vitae purus vel tortor pharetra posuere.
-</p>
-"""
-
-bs_s3 = u"""
-Fusce lobortis dui nec metus.
-
-Nam vitae purus vel tortor pharetra posuere.
-"""
-bs_r3 = u"""<p>
-  Fusce lobortis dui nec metus.
-</p>
-<p>
-  Nam vitae purus vel tortor pharetra posuere.
-</p>
-"""
-
-# blockquote
-
-# code
-c_s1 = u"""
-code:c
-def test_blockspliting(self):
-	pairs = (
-		(bs_s1, bs_r1),
-		(bs_s2, bs_r2),
-		(bs_s3, bs_r3),
-	)
-	for s, r in pairs:
-		self.failUnlessEqual(simple_markup(s), r)
-:code
-"""
+def ct(string, tag):
+	count = 0
+	count += string.count('<%s>' % tag)
+	count += string.count('</%s>' % tag)
+	return count
 
 
 class SMTestCase(unittest.TestCase):
 	
-	def test_blockspliting(self):
+	def test_blocksplitting(self):
+		"""
+		tests if blocksplitting does what its intended to do
+		"""
 		pairs = (
-			(bs_s1, bs_r1),
-			(bs_s2, bs_r2),
-			(bs_s3, bs_r3),
+			("""para1\n\npara2\n\npara3""", 6),
+			("para1\r\npara2\r\npara3\r\n", 6),
+			
 		)
-		for s, r in pairs:
-			self.failUnlessEqual(simple_markup(s), r)
-	
-	def test_bold(self):
+		for s, c in pairs:
+			output = simple_markup(s, tidy=False)
+			self.failUnlessEqual(ct(output, 'p'), c)
+			output = simple_markup(s, tidy=False)
+			self.failUnlessEqual(ct(output, 'p'), c)
+
+
+	def test_bold_double(self):
 		pairs = (
-			(u'ich _bün_ fetter Text', u'<p>\n  ich _bün_ fetter Text\n</p>\n'),
-			(u'_das_', u'<p>\n  <u>das</u>\n</p>\n'),
-			(u'fetter _text_', u'<p>\n  fetter <u>text</u>\n</p>\n'),
-			(u'_däs_', u'<p>\n  _däs_\n</p>\n'),
+			(u'**ich bün fetter Text**', 2),
+			(u'**das üst ebenfalls** fötter text', 2),
+			(u'das is **aüch fetter text**', 2),
+			(u'auch **däs ist ziemlich** fätter text', 2),
+			(u"""
+			**wow**
+
+			**wow äöü**
+			""", 4),
+			("""
+			**wow
+
+			**wow**
+			""", 2)
 		)
-		for s, r in pairs:
-			self.failUnlessEqual(simple_markup(s), r)
-	
-	def test_longbold(self):
+		for s, c in pairs:
+			output = simple_markup(s, tidy=False)
+			self.failUnlessEqual(ct(output, 'b'), c)
+			output = simple_markup(s)
+			self.failUnlessEqual(ct(output, 'b'), c)
+
+
+	def test_delete_double(self):
 		pairs = (
-			(u'__ich bün fetter Text__', u'<p>\n  <u>ich bün fetter Text</u>\n</p>\n'),
-			(u'__das üst ebenfalls__ fötter text', u'<p>\n  <u>das üst ebenfalls</u> fötter text\n</p>\n'),
-			(u'das is __aüch fetter text__', u'<p>\n  das is <u>aüch fetter text</u>\n</p>\n'),
-			(u'auch __däs ist ziemlich__ fätter text', u'<p>\n  auch <u>däs ist ziemlich</u> fätter text\n</p>\n'),
+			(u'--ich bün fetter Text--', 2),
+			(u'--das üst ebenfalls-- fötter text', 2),
+			(u'das is --aüch fetter text--', 2),
+			(u'auch --däs ist ziemlich-- fätter text', 2),
+			(u"""
+			--wow--
+
+			--wow äöü--
+			""", 4),
+			("""
+			--wow
+
+			--wow--
+			""", 2)
 		)
-		for s, r in pairs:
-			self.failUnlessEqual(simple_markup(s), r)
-			
+		for s, c in pairs:
+			output = simple_markup(s, tidy=False)
+			self.failUnlessEqual(ct(output, 'del'), c)
+			output = simple_markup(s)
+			self.failUnlessEqual(ct(output, 'del'), c)
+
+
+	def test_italic_double(self):
+		pairs = (
+			(u'//ich bün fetter Text//', 2),
+			(u'//das üst ebenfalls// fötter text', 2),
+			(u'das is //aüch fetter text//', 2),
+			(u'auch //däs ist ziemlich// fätter text', 2),
+			(u"""
+			//wow//
+
+			//wow äöü//
+			""", 4),
+			("""
+			//wow
+
+			//wow//
+			""", 2)
+		)
+		for s, c in pairs:
+			output = simple_markup(s, tidy=False)
+			self.failUnlessEqual(ct(output, 'i'), c)
+			output = simple_markup(s)
+			self.failUnlessEqual(ct(output, 'i'), c)
+
+
+	def test_underline_double(self):
+		pairs = (
+			(u'__ich bün fetter Text__', 2),
+			(u'__das üst ebenfalls__ fötter text', 2),
+			(u'das is __aüch fetter text__', 2),
+			(u'auch __däs ist ziemlich__ fätter text', 2),
+			(u"""
+			__wow__
+
+			__wow äöü__
+			""", 4),
+			("""
+			__wow
+
+			__wow__
+			""", 2)
+		)
+		for s, c in pairs:
+			output = simple_markup(s, tidy=False)
+			self.failUnlessEqual(ct(output, 'u'), c)
+			output = simple_markup(s)
+			self.failUnlessEqual(ct(output, 'u'), c)
+
+
+	def test_olist(self):
+		pairs = (
+			(u"""
+			# sdf
+			# sdf
+			# sdf
+			""", 2, 6),
+		)
+		for s, c, cc in pairs:
+			output = simple_markup(s, tidy=False)
+			print output
+			self.failUnlessEqual(ct(output, 'ol'), c)
+			self.failUnlessEqual(ct(output, 'li'), cc)
+			output = simple_markup(s)
+			self.failUnlessEqual(ct(output, 'ol'), c)
+			self.failUnlessEqual(ct(output, 'li'), cc)
+
+
+	def test_ulist(self):
+		pairs = (
+			(u"""
+			* sdf
+			* sdf
+			* sdf
+			""", 2, 6),
+			(u"""
+			- sdfsdf
+			- sdfsdf
+			- sdfsdf
+			""", 2, 6),
+		)
+		for s, c, cc in pairs:
+			output = simple_markup(s, tidy=False)
+			self.failUnlessEqual(ct(output, 'ul'), c)
+			self.failUnlessEqual(ct(output, 'li'), cc)
+			output = simple_markup(s)
+			self.failUnlessEqual(ct(output, 'ul'), c)
+			self.failUnlessEqual(ct(output, 'li'), cc)
+
+
+	def test_code(self):
+		pairs = (
+			(u"""
+			code:c
+			def test_blockspliting(self):
+				pairs = (
+					(bs_s1, bs_r1),
+					(bs_s2, bs_r2),
+					(bs_s3, bs_r3),
+				)
+				for s, r in pairs:
+					self.failUnlessEqual(simple_markup(s), r)
+			:code
+			""", 2),
+		)
+		for s, c in pairs:
+			output = simple_markup(s, tidy=False)
+			self.failUnlessEqual(ct(output, 'code'), c)
+			output = simple_markup(s)
+			self.failUnlessEqual(ct(output, 'code'), c)
+
+		
 	def test_quote(self):
 		pairs = (
-			()
+		 ("""
+			quote:
+			this be quotetext
+			""", 2, 2),
 		)
+		for s, c, cc in pairs:
+			output = simple_markup(s, tidy=False)
+			# print output
+			self.failUnlessEqual(ct(output, 'blockquote'), c)
+			self.failUnlessEqual(ct(output, 'p'), cc)
+			output = simple_markup(s)
+			# print output
+			self.failUnlessEqual(ct(output, 'blockquote'), c)
+			self.failUnlessEqual(ct(output, 'p'), cc)
+
 
 if __name__ == '__main__': unittest.main() 
-
-# text = """
-# ich bin __lustiger blabla__ text__ der=-- zum --testen gut is.
-# ich hab **auch //mehere** zeilen//.
-# 
-# am ende is _fett_
-# 
-# ich kann auch /einfache/ _formats_ auf einzelne wörter machen
-# aber/ich/kann/nicht/sowas/machen
-# 
-# quote:
-# blub die bla
-# 
-# quote:"blub"
-# blargh
-# :quote
-# 
-# code:"c"
-# int a = 1;
-# int b = 2;
-# if a > b:
-# <script>
-# :code
-# 
-# code:"java"
-# blargh
-# 
-# quote:"jo dude"
-# ich war ärster
-# und zweiter auch
-# :quote
-# 
-# das is //auch// zum testen gut.
-# <script type="javascript"></script>
-# """
-# 
+Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Sed ut turpis. Donec est dolor, aliquam at, volutpat imperdiet, suscipit eu, diam. Vivamus ultrices sollicitudin ligula. Donec egestas. Donec vulputate. Cras erat. Nam cursus velit vel arcu. Nam aliquet, est non pretium tristique, nibh tortor lacinia felis, sit amet sollicitudin libero mi non magna.
+
+quote:
+Donec eget odio eu leo porttitor rhoncus. Curabitur at elit. Nullam quis ipsum. Phasellus tincidunt, lorem nec congue malesuada, nisl lacus nonummy ante, non viverra turpis erat vitae ante. Ut ullamcorper est nec pede. Praesent nec augue a est faucibus tincidunt.
+
+quote:chris
+Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos hymenaeos. Donec at purus.
+
+Sed at urna. Mauris venenatis, dolor id facilisis dictum, orci felis cursus lacus, sit amet ullamcorper augue velit non eros. Nunc eleifend, pede condimentum pellentesque imperdiet, nulla neque blandit tellus, et elementum leo diam quis orci.
+:quote
+
+code:c
+class SimpleMarkupText(object):
+	
+	def __init__(self, input):
+		self.input = sanitize(input)
+		return self.output
+	
+	def sanitize(self):
+		text = text.replace('<', '&lt;')
+		text = text.replace('>', '&gt;')
+		text = text.replace('\n', '<br />')
+		return text
+:code
+
+Nam arcu nibh, ornare quis, porttitor a, rutrum et, felis. Curabitur vitae lorem. Sed adipiscing gravida lacus. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos hymenaeos. Pellentesque euismod. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Praesent porta viverra ligula. Nulla tristique. Fusce vestibulum accumsan urna. Donec fringilla, mauris ac bibendum hendrerit, turpis nibh lobortis metus, eget pretium magna orci a libero. Proin id lectus ut ante interdum porttitor. Nullam felis mi, suscipit et, scelerisque vitae, pellentesque nec, purus. Nulla blandit leo nec nisi.