irc / irc / rfc.py

import re

def get_pages(filename):
	with open(filename) as f:
		data = f.read()
	return data.split('\x0c')

header_pattern = re.compile(r'^RFC \d+\s+.*\s+(\w+ \d{4})$', re.M)
footer_pattern = re.compile(r'^\w+\s+\w+\s+\[Page \d+\]$', re.M)

def remove_header(page):
	page = header_pattern.sub('', page)
	return page.lstrip('\n')

def remove_footer(page):
	page = footer_pattern.sub('', page)
	return page.rstrip() + '\n\n'

def clean_pages():
	return map(remove_header, map(remove_footer, get_pages('rfc2812.txt')))

def save_clean():
	with open('rfc2812-clean.txt', 'w') as f:
		map(f.write, clean_pages())

if __name__ == '__main__':
	save_clean()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.