Commits

Mike Ruckman committed 56fcc51

Completed command line argument logic in main(). Renamed nose_tests.py to test_scraper.py in order to meet standards.

Comments (0)

Files changed (3)

nose_tests.py

-#!/usr/bin/env python
-
-import unittest
-
-import scraper
-
-class TestScraper(unittest.TestCase):
-    '''unittest TestCase for scraper.py
-    Tests the core methods of the Scraper class.
-    '''
-    def setUp(self):
-        """Define the defaults for the tests."""
-        self.url = 'http://news.google.com/'
-        self.search_tag = '[0-9]{13}'
-        self.test = scraper.Scraper(self.url, self.search_tag)
-
-    def test_set_regular_expression(self):
-        """Ensure the RE gets passed an arg."""
-        self.test.set_regular_expression('[a-z]')
-        self.assertEqual(self.test.search_tag, '[a-z]')
-
-    def test_fetch_page(self):
-        """Ensure a page is fetched from the URL. """
-        self.fetched = self.test.fetch_page()
-        self.assertTrue(str(type(self.fetched)),"<type 'str'>")
-
-    def test_scrape_page(self):
-        """Ensure the scrape returns a list (it can be empty). """
-        self.test.fetch_page()
-        self.results = self.test.scrape_page()
-        self.assertTrue(str(type(self.results)),
-                        "<class 'BeautifulSoup.ResultSet'>")
-
-if __name__ == '__main__':
-    unittest.main()
     it will actually return some data.
     """
 
-    default_url = "http://news.google.com/news/search?aq=f&pz=1&cf=all&ned=us&hl=en&q=new+zealand&btnmeta_news_search=Search+News"
+    default_url = ''.join(['http://news.google.com/news/search?aq=f&pz=1&cf=',
+                          'all&ned=us&hl=en&q=%(search)s&btnmeta_news_search',
+                          '=Search+News'])
 
     # Set up a parser to handle cmd line args
     parser = argparse.ArgumentParser(prog=sys.argv[0])
-    
-    parser.add_argument('-p', action='store', default=default_url,
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('-p', action='store', default=default_url,
                         help='the page you want to search.',
                         metavar='page')
+    group.add_argument('-s', action='store', default='new+zealand',
+                        help='Search term for Google News - put a "+" in'
+                        +' for spaces.', 
+                        metavar='search_term')
     parser.add_argument('-r', action='store', default='[0-9]{13}',
-                        help='the regular expression you want to do.',
+                        help='the regular expression you want to use to'+
+                        ' specify the class attribute to search for.',
                         metavar='regex')
     parser.add_argument('-o', action='store', default='summary.html',
                         help='Specify the output file.', metavar='output')
 
     parser.parse_args(args=sys.argv[1:], namespace=Scraper)
 
-    page_scrape = Scraper(Scraper.p, Scraper.r)
+    page_scrape = Scraper((Scraper.p % {'search' : Scraper.s}), Scraper.r)
 
     fetched = page_scrape.fetch_page()
 
+#!/usr/bin/env python
+
+import unittest
+
+import scraper
+
+class TestScraper(unittest.TestCase):
+    '''unittest TestCase for scraper.py
+    Tests the core methods of the Scraper class.
+    '''
+    def setUp(self):
+        """Define the defaults for the tests."""
+        self.url = 'http://news.google.com/'
+        self.search_tag = '[0-9]{13}'
+        self.test = scraper.Scraper(self.url, self.search_tag)
+
+    def test_set_regular_expression(self):
+        """Ensure the RE gets passed an arg."""
+        self.test.set_regular_expression('[a-z]')
+        self.assertEqual(self.test.search_tag, '[a-z]')
+
+    def test_fetch_page(self):
+        """Ensure a page is fetched from the URL. """
+        self.fetched = self.test.fetch_page()
+        self.assertTrue(str(type(self.fetched)),"<type 'str'>")
+
+    def test_scrape_page(self):
+        """Ensure the scrape returns a list (it can be empty). """
+        self.test.fetch_page()
+        self.results = self.test.scrape_page()
+        self.assertTrue(str(type(self.results)),
+                        "<class 'BeautifulSoup.ResultSet'>")
+
+if __name__ == '__main__':
+    unittest.main()