Commits

Jason R. Coombs committed 10c2f27

Ported PageGetter to lxml and requests for Python 3 compatibility

Comments (0)

Files changed (2)

jaraco/net/http/mechanize.py

-import urllib2
 import logging
 
-import ClientForm
+import requests
+import lxml
 
 log = logging.getLogger(__name__)
 
 	A helper class for common HTTP page retrieval.
 	"""
 
-	def __init__(self, **attrs):
-		"set url to the target url or set request to the urllib2.Request object"
-		self.__dict__.update(attrs)
+	def __init__(self):
+		"initialize the session"
+		self.session = requests.Session()
 
-	def GetRequest(self):
-		req = getattr(self, 'request', None) or urllib2.Request(getattr(self, 'url'))
-		return req
+	def open_for_lxml(self, method, url, values):
+		"""
+		Open a request for lxml using the session
+		"""
+		return self.session.request(url=url, method=method, data=dict(values))
 
-	def Fetch(self):
-		return self._opener.open(self.GetRequest())
+	@classmethod
+	def submit(cls, form):
+		return lxml.html.submit_form(form, open_http=cls.open_for_lxml)
 
-	def Process(self):
-		resp = self.Fetch()
-		forms = ClientForm.ParseResponse(resp)
-		form = self.SelectForm(forms)
-		self.FillForm(form)
-		return form.click()
+	def load(self, url):
+		return self.session.get(url)
 
-	def SelectForm(self, forms):
+	def process_form(self, resp, form_data):
+		page = lxml.html.fromstring(resp.text, base_url=resp.url)
+		form = self.select_form(page.forms)
+		self.fill_form(form, form_data)
+		return self.submit(form)
+
+	def select_form(self, forms):
 		sel = getattr(self, 'form_selector', 0)
 		log.info('selecting form %s', sel)
 		if not isinstance(sel, int):
 			forms = dict(map(lambda f: (f.name, f), forms))
 		return forms[sel]
 
-	def FillForm(self, form):
-		for name, value in self.form_items.items():
+	def fill_form(self, form, data):
+		for name, value in data.items():
 			if callable(value):
 				value = value()
-			form[name] = value
-
-	def __call__(self, next):
-		# process the form and set the request for the next object
-		next.request = self.Process()
-		return next
+			form.fields[name] = value

jaraco/net/whois.py

 except ImportError:
 	pass
 
-from .http import PageGetter
+from .http import mechanize
 
 log = logging.getLogger(__name__)
 
 	'accept-language': 'en-us,en;q=0.5',
 }
 
-class BoliviaPageGetter(PageGetter):
-	url = 'http://www.nic.bo/'
-#	def GetRequest(self):
-#		request = super(self.__class__, self).GetRequest()
-#		map(request.add_header, mozilla_headers.keys(), mozilla_headers.values())
-#		return request
-
 class BoliviaWhoisHandler(WhoisHandler):
 	services = r'\.bo$'
 
 	def LoadHTTP(self):
 		name, domain = self._query.split('.', 1)
 		domain = '.' + domain
-		getter = BoliviaPageGetter()
-		getter.form_items = {'subdominio': [domain], 'dominio': name}
-		getter.request = getter.Process()
-		self._response = getter.Fetch().read()
-		del getter.request
+		getter = mechanize.PageGetter()
+		search_page = getter.load('http://www.nic.bo/')
+		form_items = {'subdominio': [domain], 'dominio': name}
+		resp = getter.process_form(search_page, form_items)
 
 		# now that we've submitted the request, we've got a response.
-		# Unfortunately, this page returns 'available' or 'not available'
+		# This page returns 'available' or 'not available'
 		# If it's not available, we need to know who owns it.
-		if re.search('Dominio %s registrado' % self._query, self._response):
-			getter.url = urllib.basejoin(getter.url, 'informacion.php')
-			self._response = getter.Fetch().read()
+		if re.search('Dominio %s registrado' % self._query, resp.text):
+			info_url = urllib.basejoin(resp.url, 'informacion.php')
+			resp = getter.load(info_url)
+
+		self._response = resp.text
 
 	def ParseResponse(self, s_out):
 		soup = BeautifulSoup(self._response)